From 70eec24a28aad1cda06053bce925819c50e765b7 Mon Sep 17 00:00:00 2001 From: Guanzhong Chen Date: Mon, 27 Nov 2023 20:32:32 +0800 Subject: [PATCH 1/2] 1 --- .../efficientdet/EfficientDetD0_preprocess.py | 87 +++ .../cv/detection/efficientdet/README.md | 35 + .../efficientdet/efficientdet-pytorch/LICENSE | 201 +++++ .../efficientdet-pytorch/README.md | 509 +++++++++++++ .../efficientdet-pytorch/avg_checkpoints.py | 122 +++ .../efficientdet-pytorch/clean_checkpoint.py | 82 +++ .../efficientdet-pytorch/distributed_train.sh | 5 + .../efficientdet-pytorch/effdet/__init__.py | 7 + .../efficientdet-pytorch/effdet/anchors.py | 415 +++++++++++ .../efficientdet-pytorch/effdet/bench.py | 117 +++ .../effdet/config/__init__.py | 4 + .../effdet/config/config_utils.py | 9 + .../effdet/config/fpn_config.py | 184 +++++ .../effdet/config/model_config.py | 116 +++ .../effdet/config/train_config.py | 34 + .../effdet/data/__init__.py | 6 + .../effdet/data/dataset.py | 97 +++ .../effdet/data/dataset_config.py | 179 +++++ .../effdet/data/dataset_factory.py | 103 +++ .../effdet/data/input_config.py | 70 ++ .../effdet/data/loader.py | 213 ++++++ .../effdet/data/parsers/__init__.py | 2 + .../effdet/data/parsers/parser.py | 82 +++ .../effdet/data/parsers/parser_coco.py | 93 +++ .../effdet/data/parsers/parser_config.py | 49 ++ .../effdet/data/parsers/parser_factory.py | 19 + .../effdet/data/parsers/parser_open_images.py | 211 ++++++ .../effdet/data/parsers/parser_voc.py | 148 ++++ .../effdet/data/random_erasing.py | 94 +++ .../effdet/data/transforms.py | 153 ++++ .../effdet/distributed.py | 308 ++++++++ .../effdet/efficientdet.py | 620 ++++++++++++++++ .../effdet/evaluation/README.md | 7 + .../effdet/evaluation/__init__.py | 0 .../effdet/evaluation/detection_evaluator.py | 590 +++++++++++++++ .../effdet/evaluation/fields.py | 105 +++ .../effdet/evaluation/metrics.py | 148 ++++ .../effdet/evaluation/np_box_list.py | 696 ++++++++++++++++++ .../effdet/evaluation/np_mask_list.py | 478 ++++++++++++ .../evaluation/object_detection_evaluation.py | 273 +++++++ .../effdet/evaluation/per_image_evaluation.py | 538 ++++++++++++++ .../efficientdet-pytorch/effdet/evaluator.py | 184 +++++ .../efficientdet-pytorch/effdet/factory.py | 55 ++ .../efficientdet-pytorch/effdet/helpers.py | 22 + .../efficientdet-pytorch/effdet/loss.py | 259 +++++++ .../effdet/object_detection/README.md | 3 + .../effdet/object_detection/__init__.py | 22 + .../effdet/object_detection/argmax_matcher.py | 174 +++++ .../effdet/object_detection/box_coder.py | 172 +++++ .../effdet/object_detection/box_list.py | 197 +++++ .../effdet/object_detection/matcher.py | 179 +++++ .../region_similarity_calculator.py | 101 +++ .../object_detection/target_assigner.py | 266 +++++++ .../efficientdet-pytorch/effdet/soft_nms.py | 170 +++++ .../efficientdet-pytorch/effdet/version.py | 1 + .../requirements-sotabench.txt | 16 + .../efficientdet-pytorch/requirements.txt | 10 + .../efficientdet-pytorch/setup.py | 47 ++ .../efficientdet-pytorch/sotabench.py | 148 ++++ .../efficientdet-pytorch/sotabench_setup.sh | 9 + .../efficientdet-pytorch/train.py | 656 +++++++++++++++++ .../efficientdet-pytorch/validate.py | 198 +++++ .../cv/detection/efficientdet/export.py | 42 ++ .../cv/detection/efficientdet/perf.py | 93 +++ .../detection/efficientdet/requirements.txt | 67 ++ .../built-in/cv/detection/efficientdet/run.py | 152 ++++ 66 files changed, 10452 insertions(+) create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/EfficientDetD0_preprocess.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/README.md create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/LICENSE create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/README.md create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/avg_checkpoints.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/clean_checkpoint.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/distributed_train.sh create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/__init__.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/anchors.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/bench.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/__init__.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/config_utils.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/fpn_config.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/model_config.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/train_config.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/__init__.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_config.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_factory.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/input_config.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/loader.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/__init__.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_coco.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_config.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_factory.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_open_images.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_voc.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/random_erasing.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/transforms.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/distributed.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/efficientdet.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/README.md create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/__init__.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/detection_evaluator.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/fields.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/metrics.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_box_list.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_mask_list.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/object_detection_evaluation.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/per_image_evaluation.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluator.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/factory.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/helpers.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/loss.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/README.md create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/__init__.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/argmax_matcher.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_coder.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_list.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/matcher.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/region_similarity_calculator.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/target_assigner.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/soft_nms.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/version.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements-sotabench.txt create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements.txt create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/setup.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench_setup.sh create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/train.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/validate.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/export.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/requirements.txt create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/efficientdet/run.py diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/EfficientDetD0_preprocess.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/EfficientDetD0_preprocess.py new file mode 100644 index 0000000000..7161ecc377 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/EfficientDetD0_preprocess.py @@ -0,0 +1,87 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append(r'./efficientdet-pytorch') +import os +import argparse +from effdet import create_dataset, create_loader # rename +from effdet.data import resolve_input_config +from timm.utils import * +from tqdm import tqdm +import numpy as np +import torch + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') + +parser.add_argument('--root', default='/home/ascend/coco2017', type=str, metavar='DIR', + help='path to dataset root') +parser.add_argument('--dataset', default='coco', type=str, metavar='DATASET', + help='Name of dataset (default: "coco"') +parser.add_argument('--split', default='val', + help='validation split') +parser.add_argument('--model', '-m', metavar='MODEL', default='tf_efficientdet_d0', + help='model architecture (default: tf_efficientdet_d1)') +parser.add_argument('--bin-save', default='bin_save', type=str, metavar='save', + help='path to save bin') +parser.add_argument('-b', '--batch-size', default=1, type=int, + metavar='N', help='mini-batch size (default: 128)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') + +if __name__ == '__main__': + args = parser.parse_args() + setup_default_logging() + dataset = create_dataset(args.dataset, args.root, args.split) + if args.model == 'tf_efficientdet_d0': + model_config = {'input_size': (3, 512, 512), + 'interpolation': 'bilinear', + 'mean': (0.485, 0.456, 0.406), + 'std': (0.229, 0.224, 0.225), + 'fill_color': 'mean'} + elif args.model == 'tf_efficientdet_d7': + model_config = {'input_size': (3, 1536, 1536), + 'interpolation': 'bilinear', + 'mean': (0.485, 0.456, 0.406), + 'std': (0.229, 0.224, 0.225), + 'fill_color': 'mean'} + input_config = resolve_input_config(args, model_config) + print(args) + loader = create_loader( + dataset, + input_size=input_config['input_size'], + batch_size=args.batch_size, + use_prefetcher=True, + interpolation=input_config['interpolation'], + fill_color=input_config['fill_color'], + mean=input_config['mean'], + std=input_config['std'], + num_workers=4, + pin_mem=True, + ) + pic=os.listdir(os.path.join(args.root,'val2017')) + pic.sort() + + if not os.path.exists(args.bin_save): + os.makedirs(args.bin_save) + one_batch = tqdm(zip(loader, pic)) + for i, file in one_batch: + img = i[0].numpy() + print(img.shape) # (1,3,512,512) + print(img.dtype) # float32 + img.tofile(os.path.join(args.bin_save, file.split('.')[0] + ".bin")) + + input_np_arr = np.fromfile(os.path.join(args.bin_save, file.split('.')[0] + ".bin"), dtype=np.float32).reshape((1, 3, 512, 512)) + input_tensor = torch.tensor(input_np_arr, dtype=torch.float16) + print("11111111111", input_tensor.shape) \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/README.md b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/README.md new file mode 100644 index 0000000000..22cfa41616 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/README.md @@ -0,0 +1,35 @@ +## 构建虚拟环境 + +`conda create --name effdetd0 python=3.9` +激活:`conda activate effdetd0` + +## 安装依赖 + +`pip3 install -r requirements.txt` + +编译pt插件,在dist目录下安装torh_aie + +## 下载pth模型 + +自行下载数据集并放置在efficientdet路径下 +链接:https://gitee.com/link?target=https%3A%2F%2Fascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com%2Fmodel%2F1_PyTorch_PTH%2FEffcientDet-D0%2FPTH%2Fd0.pth + +## trace得到ts文件 + +将model_path改为刚下载的模型的路径 +`python3 export.py --checkpoint=./d0.pth` + +## 数据预处理 + +将root改为自己目录下数据集图片的路径 +`python3 EfficientDetD0_preprocess.py --root /home/ascend/coco2017` + +## 模型推理 - 获取精度 + +将annotation_file_path改为自己目录下数据集label的路径 +`python3 run.py --root /home/ascend/coco2017 --ts_model_path ./d0.ts` + +## 测试性能 - ts + +将--ts_path改为自己目录下的ts路径 +`python3 perf.py --mode=ts --ts_path=/onnx/mobilenetv1/mobilenetv1.ts` \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/LICENSE b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/LICENSE new file mode 100644 index 0000000000..96bfb826e3 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Ross Wightman + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/README.md b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/README.md new file mode 100644 index 0000000000..78dbcd1011 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/README.md @@ -0,0 +1,509 @@ +# EfficientDet (PyTorch) + +A PyTorch implementation of EfficientDet. + +It is based on the +* official Tensorflow implementation by [Mingxing Tan and the Google Brain team](https://github.com/google/automl) +* paper by Mingxing Tan, Ruoming Pang, Quoc V. Le [EfficientDet: Scalable and Efficient Object Detection](https://arxiv.org/abs/1911.09070) + +There are other PyTorch implementations. Either their approach didn't fit my aim to correctly reproduce the Tensorflow models (but with a PyTorch feel and flexibility) or they cannot come close to replicating MS COCO training from scratch. + +Aside from the default model configs, there is a lot of flexibility to facilitate experiments and rapid improvements here -- some options based on the official Tensorflow impl, some of my own: +* BiFPN connections and combination mode are fully configurable and not baked into the model code +* BiFPN and head modules can be switched between depthwise separable or standard convolutions +* Activations, batch norm layers are switchable via arguments (soon config) +* Any backbone in my `timm` model collection that supports feature extraction (`features_only` arg) can be used as a bacbkone. + +## Updates + +### 2021-07-28 +* Add training example to README provided by Chris Hughes for training w/ custom dataset & Lightning training code + * [Medium blog post](https://medium.com/data-science-at-microsoft/training-efficientdet-on-custom-data-with-pytorch-lightning-using-an-efficientnetv2-backbone-1cdf3bd7921f) + * [Python notebook](https://gist.github.com/Chris-hughes10/73628b1d8d6fc7d359b3dcbbbb8869d7) + +### 2021-04-30 +* Add EfficientDet AdvProp-AA weights for D0-D5 from TF impl. Model names `tf_efficientdet_d?_ap` + * See https://github.com/google/automl/blob/master/efficientdet/Det-AdvProp.md + +### 2021-02-18 +* Add some new model weights with bilinear interpolation for upsample and downsample in FPN. + * 40.9 mAP - `efficientdet_q1` (replace prev model at 40.6) + * 43.2 mAP -`cspresdet50` + * 45.2 mAP - `cspdarkdet53m` + +### 2020-12-07 +* Training w/ fully jit scripted model + bench (`--torchscript`) is possible with inclusion of ModelEmaV2 from `timm` and previous torchscript compat additions. Big speed gains for CPU bound training. +* Add weights for alternate FPN layouts. QuadFPN experiments (`efficientdet_q0/q1/q2`) and CSPResDeXt + PAN (`cspresdext50pan`). See updated table below. Special thanks to [Artus](https://twitter.com/artuskg) for providing resources for training the Q2 model. +* Heads can have a different activation from FPN via config +* FPN resample (interpolation) can be specified via config and include any F.interpolation method or `max`/`avg` pool +* Default focal loss changed back to `new_focal`, use `--legacy-focal` arg to use the original. Legacy uses less memory, but has more numerical stability issues. +* custom augmentation transform and collate fn can be passed to loader factory +* `timm` >= 0.3.2 required, NOTE double check any custom defined model config for breaking change +* PyTorch >= 1.6 now required + +### 2020-11-12 +* add experimental PAN and Quad FPN configs to the existing EfficientDet BiFPN w/ two test model configs +* switch untrained experimental model configs to use torchscript compat bn head layout by default + +### 2020-11-09 +* set model config to read-only after creation to reduce likelyhood of misuse +* no accessing model or bench .config attr in forward() call chain (for torcscript compat) +* numerous smaller changes that allow jit scripting of the model or train/predict bench + +### 2020-10-30 +Merged a few months of accumulated fixes and additions. +* Proper fine-tuning compatible model init (w/ changeable # classes and proper init, demoed in train.py) +* A new dataset interface with dataset support (via parser classes) for COCO, VOC 2007/2012, and OpenImages V5/Challenge2019 +* New focal loss def w/ label smoothing available as an option, support for jit of loss fn for (potential) speedup +* Improved a few hot spots that squeek out a couple % of throughput gains, higher GPU utilization +* Pascal / OpenImages evaluators based on Tensorflow Models Evaluator framework (usable for other datasets as well) +* Support for native PyTorch DDP, SyncBN, and AMP in PyTorch >= 1.6. Still defaults to APEX if installed. +* Non-square input image sizes are allowed for the model (the anchor layout). Specified by image_size tuple in model config. Currently still restricted to `size % 128 = 0` on each dim. +* Allow anchor target generation to be done in either dataloader process' via collate or in model as in past. Can help balance compute. +* Filter out unused target cls/box from dataset annotations in fixed size batch tensors before passing to target assigner. Seems to speed convergence. +* Letterbox aware Random Erasing augmentation added. +* A (very slow) SoftNMS impl added for inference/validation use. It can be manually enabled right now, can add arg if demand. +* Tested with PyTorch 1.7 +* Add ResDet50 model weights, 41.6 mAP. + +A few things on priority list I haven't tackled yet: +* Mosaic augmentation +* bbox IOU loss (tried a bit but so far not a great result, need time to debug/improve) + +**NOTE** There are some breaking changes: +* Predict and Train benches now output XYXY boxes, NOT XYWH as before. This was done to support other datasets as XYWH is COCO's evaluator requirement. +* The TF Models Evaluator operates on YXYX boxes like the models. Conversion from XYXY is currently done by default. Why don't I just keep everything YXYX? Because PyTorch GPU NMS operates in XYXY. +* You must update your version of `timm` to the latest (>=0.3), as some APIs for helpers changed a bit. + +Training sanity checks were done on VOC and OI + * 80.0 @ 50 mAP finetune on voc0712 with no attempt to tune params (roughly as per command below) + * 18.0 mAP @ 50 for OI Challenge2019 after couple days of training (only 6 epochs, eek!). It's much bigger, and takes a LOONG time, many classes are quite challenging. + +### 2020-09-03 +* All models updated to latest checkpoints from TF original. +* Add experimental soft-nms code, must be manually enabled right now. It is REALLY slow, .1-.2 mAP increase. + +### 2020-07-27 +* Add updated TF ported weights for D3 model (better training) and model def and weights for new D7X model (54.3 val mAP) +* Fix Windows bug so it at least trains in non-distributed mode + +### 2020-06-15 +Add updated D7 weights from Tensorflow impl, 53.1 validation mAP here (53.4 in TF) + +### 2020-06-14 +New model results, I've trained a D1 model with some WIP augmentation enhancements (not commited), just squeaking by official weights. + +EfficientDet-D1: +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.393798 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.586831 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.420305 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.191880 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455586 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.571316 +``` + +Also, [Soyeb Nagori](https://github.com/soyebn) trained an EfficientDet-Lite0 config using this code and contributed the weights. +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.319861 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.500062 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.336777 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.111257 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.378062 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501938 +``` + +Unlike the other tf_ prefixed models this is not ported from (as of yet unreleased) TF official model, but it used +TF ported weights from `timm` for the pretrained imagenet model as the backbone init, thus it uses SAME padding. + + +## Models + +The table below contains models with pretrained weights. There are quite a number of other models that I have defined in [model configurations](effdet/config/model_config.py) that use various `timm` backbones. + +| Variant | mAP (val2017) | mAP (test-dev2017) | mAP (TF official val2017) | mAP (TF official test-dev2017) | Params (M) | +| --- | :---: | :---: | :---: | :---: | :---: | +| tf_efficientdet_lite0 | 32.0 | TBD | N/A | N/A | 3.24 | +| efficientdet_d0 | 33.6 | TBD | 33.5 | 33.8 | 3.88 | +| tf_efficientdet_d0 | 34.2 | TBD | 34.3 | 34.6 | 3.88 | +| tf_efficientdet_d0_ap | 34.8 | TBD | 35.2 | 35.3 | 3.88 | +| efficientdet_q0 | 35.7 | TBD | N/A | N/A | 4.13 | +| efficientdet_d1 | 39.4 | 39.5 | 39.1 | 39.6 | 6.62 | +| tf_efficientdet_d1 | 40.1 | TBD | 40.2 | 40.5 | 6.63 | +| tf_efficientdet_d1_ap | 40.8 | TBD | 40.9 | 40.8 | 6.63 | +| efficientdet_q1 | 40.9 | TBD | N/A | N/A | 6.98 | +| cspresdext50pan | 41.2 | TBD | N/A | N/A | 22.2 | +| resdet50 | 41.6 | TBD | N/A | N/A | 27.6 | +| efficientdet_q2 | 43.1 | TBD | N/A | N/A | 8.81 | +| cspresdet50 | 43.2 | TBD | N/A | N/A | 24.3 | +| tf_efficientdet_d2 | 43.4 | TBD | 42.5 | 43 | 8.10 | +| tf_efficientdet_d2_ap | 44.2 | TBD | 44.3 | 44.3 | 8.10 | +| cspdarkdet53m | 45.2 | TBD | N/A | N/A | 35.6 | +| tf_efficientdet_d3 | 47.1 | TBD | 47.2 | 47.5 | 12.0 | +| tf_efficientdet_d3_ap | 47.7 | TBD | 48.0 | 47.7 | 12.0 | +| tf_efficientdet_d4 | 49.2 | TBD | 49.3 | 49.7 | 20.7 | +| tf_efficientdet_d4_ap | 50.2 | TBD | 50.4 | 50.4 | 20.7 | +| tf_efficientdet_d5 | 51.2 | TBD | 51.2 | 51.5 | 33.7 | +| tf_efficientdet_d6 | 52.0 | TBD | 52.1 | 52.6 | 51.9 | +| tf_efficientdet_d5_ap | 52.1 | TBD | 52.2 | 52.5 | 33.7 | +| tf_efficientdet_d7 | 53.1 | 53.4 | 53.4 | 53.7 | 51.9 | +| tf_efficientdet_d7x | 54.3 | TBD | 54.4 | 55.1 | 77.1 | + + +See [model configurations](effdet/config/model_config.py) for model checkpoint urls and differences. + +_NOTE: Official scores for all modules now using soft-nms, but still using normal NMS here._ + +_NOTE: In training some experimental models, I've noticed some potential issues with the combination of synchronized BatchNorm (`--sync-bn`) and model EMA weight everaging (`--model-ema`) during distributed training. The result is either a model that fails to converge, or appears to converge (training loss) but the eval loss (running BN stats) is garbage. I haven't observed this with EfficientNets, but have with some backbones like CspResNeXt, VoVNet, etc. Disabling either EMA or sync bn seems to eliminate the problem and result in good models. I have not fully characterized this issue._ + +## Environment Setup + +Tested in a Python 3.7 or 3.8 conda environment in Linux with: +* PyTorch 1.6, 1.7, 1.7.1 +* PyTorch Image Models (timm) >= 0.3.2, `pip install timm` or local install from (https://github.com/rwightman/pytorch-image-models) +* Apex AMP master (as of 2020-08) + +*NOTE* - There is a conflict/bug with Numpy 1.18+ and pycocotools 2.0, force install numpy <= 1.17.5 or ensure you install pycocotools >= 2.0.2 + +## Dataset Setup and Use + +### COCO +MSCOCO 2017 validation data: +``` +wget http://images.cocodataset.org/zips/val2017.zip +wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip +unzip val2017.zip +unzip annotations_trainval2017.zip +``` + +MSCOCO 2017 test-dev data: +``` +wget http://images.cocodataset.org/zips/test2017.zip +unzip -q test2017.zip +wget http://images.cocodataset.org/annotations/image_info_test2017.zip +unzip image_info_test2017.zip +``` + +#### COCO Evaluation + +Run validation (val2017 by default) with D2 model: `python validate.py /localtion/of/mscoco/ --model tf_efficientdet_d2` + + +Run test-dev2017: `python validate.py /localtion/of/mscoco/ --model tf_efficientdet_d2 --split testdev` + +#### COCO Training + +`./distributed_train.sh 4 /mscoco --model tf_efficientdet_d0 -b 16 --amp --lr .09 --warmup-epochs 5 --sync-bn --opt fusedmomentum --model-ema` + +NOTE: +* Training script currently defaults to a model that does NOT have redundant conv + BN bias layers like the official models, set correct flag when validating. +* I've only trained with img mean (`--fill-color mean`) as the background for crop/scale/aspect fill, the official repo uses black pixel (0) (`--fill-color 0`). Both likely work fine. +* The official training code uses EMA weight averaging by default, it's not clear there is a point in doing this with the cosine LR schedule, I find the non-EMA weights end up better than EMA in the last 10-20% of training epochs +* The default h-params is a very close to unstable (exploding loss), don't try using Nesterov momentum. Try to keep the batch size up, use sync-bn. + + +### Pascal VOC + +2007, 2012, and combined 2007 + 2012 w/ labeled 2007 test for validation are supported. + +``` +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar +find . -name '*.tar' -exec tar xf {} \; +``` + +There should be a `VOC2007` and `VOC2012` folder within `VOCdevkit`, dataset root for cmd line will be VOCdevkit. + +Alternative download links, slower but up more often than ox.ac.uk: +``` +http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar +http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar +http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar +``` + +#### VOC Evaluation + +Evaluate on VOC2012 validation set: +`python validate.py /data/VOCdevkit --model efficientdet_d0 --num-gpu 2 --dataset voc2007 --checkpoint mychekpoint.pth --num-classes 20` + +#### VOC Training + +Fine tune COCO pretrained weights to VOC 2007 + 2012: +`/distributed_train.sh 4 /data/VOCdevkit --model efficientdet_d0 --dataset voc0712 -b 16 --amp --lr .008 --sync-bn --opt fusedmomentum --warmup-epochs 3 --model-ema --model-ema-decay 0.9966 --epochs 150 --num-classes 20 --pretrained` + +### OpenImages + +Setting up OpenImages dataset is a commitment. I've tried to make it a bit easier wrt to the annotations, but grabbing the dataset is still going to take some time. It will take approx 560GB of storage space. + +To download the image data, I prefer the CVDF packaging. The main OpenImages dataset page, annotations, dataset license info can be found at: https://storage.googleapis.com/openimages/web/index.html + +#### CVDF Images Download + +Follow the s3 download directions here: https://github.com/cvdfoundation/open-images-dataset#download-images-with-bounding-boxes-annotations + +Each `train_.tar.gz` should be extracted to `train/` folder, where x is a hex digit from 0-F. `validation.tar.gz` can be extracted as flat files into `validation/`. + +#### Annotations Download + +Annotations can be downloaded separately from the OpenImages home page above. For convenience, I've packaged them all together with some additional 'info' csv files that contain ids and stats for all image files. My datasets rely on the `-info.csv` files. Please see https://storage.googleapis.com/openimages/web/factsfigures.html for the License of these annotations. The annotations are licensed by Google LLC under CC BY 4.0 license. The images are listed as having a CC BY 2.0 license. +``` +wget https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1-anno/openimages-annotations.tar.bz2 +wget https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1-anno/openimages-annotations-challenge-2019.tar.bz2 +find . -name '*.tar.bz2' -exec tar xf {} \; +``` + +#### Layout + +Once everything is downloaded and extracted the root of your openimages data folder should contain: +``` +annotations/ +annotations/challenge-2019/ +train/0/ +. +. +. +train/f/ +validation/ +``` + +#### OpenImages Training +Training with Challenge2019 annotations (500 classes): +`./distributed_train.sh 4 /data/openimages --model efficientdet_d0 --dataset openimages-challenge2019 -b 7 --amp --lr .042 --sync-bn --opt fusedmomentum --warmup-epochs 1 --lr-noise 0.4 0.9 --model-ema --model-ema-decay 0.999966 --epochs 100 --remode pixel --reprob 0.15 --recount 4 --num-classes 500 --val-skip 2` + +The 500 (Challenge2019) or 601 (V5/V6) class head for OI takes up a LOT more GPU memory vs COCO. You'll likely need to half batch sizes. + +### Examples of Training / Fine-Tuning on Custom Datasets + +The models here have been used with custom training routines and datasets with great results. There are lots of details to figure out so please don't file any 'I get crap results on my custom dataset issues'. If you can illustrate a reproducible problem on a public, non-proprietary, downloadable dataset, with public github fork of this repo including working dataset/parser implementations, I MAY have time to take a look. + +Examples: +* Chris Hughes has put together a great example of training w/ `timm` EfficientNetV2 backbones and the latest versions of the EfficientDet models here + * [Medium blog post](https://medium.com/data-science-at-microsoft/training-efficientdet-on-custom-data-with-pytorch-lightning-using-an-efficientnetv2-backbone-1cdf3bd7921f) + * [Python notebook](https://gist.github.com/Chris-hughes10/73628b1d8d6fc7d359b3dcbbbb8869d7) +* Alex Shonenkov has a clear and concise Kaggle kernel which illustrates fine-tuning these models for detecting wheat heads: https://www.kaggle.com/shonenkov/training-efficientdet (NOTE: this is out of date wrt to latest versions here, many details have changed) + +If you have a good example script or kernel training these models with a different dataset, feel free to notify me for inclusion here... + +## Results + +### My Training + +#### EfficientDet-D0 + +Latest training run with .336 for D0 (on 4x 1080ti): +`./distributed_train.sh 4 /mscoco --model efficientdet_d0 -b 22 --amp --lr .12 --sync-bn --opt fusedmomentum --warmup-epochs 5 --lr-noise 0.4 0.9 --model-ema --model-ema-decay 0.9999` + +These hparams above resulted in a good model, a few points: +* the mAP peaked very early (epoch 200 of 300) and then appeared to overfit, so likely still room for improvement +* I enabled my experimental LR noise which tends to work well with EMA enabled +* the effective LR is a bit higher than official. Official is .08 for batch 64, this works out to .0872 +* drop_path (aka survival_prob / drop_connect) rate of 0.1, which is higher than the suggested 0.0 for D0 in official, but lower than the 0.2 for the other models +* longer EMA period than default + +VAL2017 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.336251 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.521584 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.356439 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.123988 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.395033 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521695 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.287121 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.441450 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.467914 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.197697 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.552515 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.689297 +``` + +#### EfficientDet-D1 + +Latest run with .394 mAP (on 4x 1080ti): +`./distributed_train.sh 4 /mscoco --model efficientdet_d1 -b 10 --amp --lr .06 --sync-bn --opt fusedmomentum --warmup-epochs 5 --lr-noise 0.4 0.9 --model-ema --model-ema-decay 0.99995` + +For this run I used some improved augmentations, still experimenting so not ready for release, should work well without them but will likely start overfitting a bit sooner and possibly end up a in the .385-.39 range. + + +### Ported Tensorflow weights + +#### TEST-DEV2017 + +NOTE: I've only tried submitting D7 to dev server for sanity check so far + +##### TF-EfficientDet-D7 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.534 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.726 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.577 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.356 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.569 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.660 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.397 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.644 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.682 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.508 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.718 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.818 + ``` + +#### VAL2017 + +##### TF-EfficientDet-D0 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.341877 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.525112 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.360218 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.131366 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.399686 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.537368 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.293137 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.447829 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.472954 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.195282 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.558127 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.695312 +``` + +##### TF-EfficientDet-D1 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.401070 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.590625 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.422998 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.211116 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.459650 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577114 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.326565 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.507095 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.537278 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.308963 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.610450 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.731814 +``` + +##### TF-EfficientDet-D2 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.434042 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.627834 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.463488 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237414 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.486118 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.606151 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.343016 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.538328 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.571489 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350301 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.638884 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.746671 +``` + +##### TF EfficientDet-D3 + +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.471223 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.661550 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.505127 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.301385 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.518339 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.626571 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.365186 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.582691 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.617252 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.424689 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.670761 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.779611 +``` + +##### TF-EfficientDet-D4 + ``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.491759 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.686005 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.527791 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.325658 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.536508 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.635309 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.373752 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.601733 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.638343 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.463057 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.685103 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.789180 +``` + +##### TF-EfficientDet-D5 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.511767 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.704835 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.552920 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.355680 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.551341 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.650184 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.384516 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.619196 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.657445 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.499319 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.695617 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.788889 +``` + +##### TF-EfficientDet-D6 +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.520200 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.713204 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.560973 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.361596 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.567414 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.657173 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387733 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.629269 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.667495 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.499002 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.711909 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.802336 +``` + +##### TF-EfficientDet-D7 + ``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.531256 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.724700 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.571787 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.368872 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.573938 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.668253 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.393620 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.637601 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.676987 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.524850 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.717553 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.806352 + ``` + +##### TF-EfficientDet-D7X + +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.543 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.737 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.585 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.401 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.579 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.680 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.398 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.649 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.689 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.550 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.725 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.823 +``` + +## TODO +- [x] Basic Training (object detection) reimplementation +- [ ] Mosaic Augmentation +- [ ] Rand/AutoAugment +- [ ] BBOX IoU loss (giou, diou, ciou, etc) +- [ ] Training (semantic segmentation) experiments +- [ ] Integration with Detectron2 / MMDetection codebases +- [ ] Addition and cleanup of EfficientNet based U-Net and DeepLab segmentation models that I've used in past projects +- [x] Addition and cleanup of OpenImages dataset/training support from a past project +- [ ] Exploration of instance segmentation possibilities... + +If you are an organization is interested in sponsoring and any of this work, or prioritization of the possible future directions interests you, feel free to contact me (issue, LinkedIn, Twitter, hello at rwightman dot com). I will setup a github sponser if there is any interest. diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/avg_checkpoints.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/avg_checkpoints.py new file mode 100644 index 0000000000..396e2abfb5 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/avg_checkpoints.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +""" Checkpoint Averaging Script + +This script averages all model weights for checkpoints in specified path that match +the specified filter wildcard. All checkpoints must be from the exact same model. + +For any hope of decent results, the checkpoints should be from the same or child +(via resumes) training session. This can be viewed as similar to maintaining running +EMA (exponential moving average) of the model weights or performing SWA (stochastic +weight averaging), but post-training. + +Hacked together by Ross Wightman (https://github.com/rwightman) +""" +import torch +import argparse +import os +import glob +import hashlib +from timm.models.helpers import load_state_dict + +parser = argparse.ArgumentParser(description='PyTorch Checkpoint Averager') +parser.add_argument('--input', default='', type=str, metavar='PATH', + help='path to base input folder containing checkpoints') +parser.add_argument('--filter', default='*.pth.tar', type=str, metavar='WILDCARD', + help='checkpoint filter (path wildcard)') +parser.add_argument('--output', default='./averaged.pth', type=str, metavar='PATH', + help='output filename') +parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true', + help='Force not using ema version of weights (if present)') +parser.add_argument('--descending', dest='descending', action='store_true', + help='Set if eval metric is descending (like loss)') +parser.add_argument('--no-sort', dest='no_sort', action='store_true', + help='Do not sort and select by checkpoint metric, also makes "n" argument irrelevant') +parser.add_argument('-n', type=int, default=10, metavar='N', + help='Number of checkpoints to average') + + +def checkpoint_metric(checkpoint_path): + if not checkpoint_path or not os.path.isfile(checkpoint_path): + return {} + print("=> Extracting metric from checkpoint '{}'".format(checkpoint_path)) + checkpoint = torch.load(checkpoint_path, map_location='cpu') + metric = None + if 'metric' in checkpoint: + metric = checkpoint['metric'] + return metric + + +def main(): + args = parser.parse_args() + # by default use the EMA weights (if present) + args.use_ema = not args.no_use_ema + # by default sort by checkpoint metric (if present) and avg top n checkpoints + args.sort = not args.no_sort + + if os.path.exists(args.output): + print("Error: Output filename ({}) already exists.".format(args.output)) + exit(1) + + pattern = args.input + if not args.input.endswith(os.path.sep) and not args.filter.startswith(os.path.sep): + pattern += os.path.sep + pattern += args.filter + checkpoints = glob.glob(pattern, recursive=True) + if not checkpoints: + print("Error: No checkpoints to average.") + exit(1) + + if args.sort: + checkpoint_metrics = [] + for c in checkpoints: + metric = checkpoint_metric(c) + if metric is not None: + checkpoint_metrics.append((metric, c)) + checkpoint_metrics = list(sorted(checkpoint_metrics, reverse=not args.descending)) + checkpoint_metrics = checkpoint_metrics[:args.n] + print("Selected checkpoints:") + [print(m, c) for m, c in checkpoint_metrics] + avg_checkpoints = [c for m, c in checkpoint_metrics] + else: + avg_checkpoints = checkpoints + print("Selected checkpoints:") + [print(c) for c in checkpoints] + + avg_state_dict = {} + avg_counts = {} + for c in avg_checkpoints: + new_state_dict = load_state_dict(c, args.use_ema) + if not new_state_dict: + print("Error: Checkpoint ({}) doesn't exist".format(args.checkpoint)) + continue + + for k, v in new_state_dict.items(): + if k not in avg_state_dict: + avg_state_dict[k] = v.clone().to(dtype=torch.float64) + avg_counts[k] = 1 + else: + avg_state_dict[k] += v.to(dtype=torch.float64) + avg_counts[k] += 1 + + for k, v in avg_state_dict.items(): + v.div_(avg_counts[k]) + + # float32 overflow seems unlikely based on weights seen to date, but who knows + float32_info = torch.finfo(torch.float32) + final_state_dict = {} + for k, v in avg_state_dict.items(): + v = v.clamp(float32_info.min, float32_info.max) + final_state_dict[k] = v.to(dtype=torch.float32) + + try: + torch.save(final_state_dict, args.output, _use_new_zipfile_serialization=False) + except: + torch.save(final_state_dict, args.output) + + with open(args.output, 'rb') as f: + sha_hash = hashlib.sha256(f.read()).hexdigest() + print("=> Saved state_dict to '{}, SHA256: {}'".format(args.output, sha_hash)) + + +if __name__ == '__main__': + main() diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/clean_checkpoint.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/clean_checkpoint.py new file mode 100644 index 0000000000..94f184d1b6 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/clean_checkpoint.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +""" Checkpoint Cleaning Script + +Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc. +and outputs a CPU tensor checkpoint with only the `state_dict` along with SHA256 +calculation for model zoo compatibility. + +Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) +""" +import torch +import argparse +import os +import hashlib +import shutil +from collections import OrderedDict + +parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner') +parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('--output', default='', type=str, metavar='PATH', + help='output path') +parser.add_argument('--use-ema', dest='use_ema', action='store_true', + help='use ema version of weights if present') +parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true', + help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint') + +_TEMP_NAME = './_checkpoint.pth' + + +def main(): + args = parser.parse_args() + + if os.path.exists(args.output): + print("Error: Output filename ({}) already exists.".format(args.output)) + exit(1) + + # Load an existing checkpoint to CPU, strip everything but the state_dict and re-save + if args.checkpoint and os.path.isfile(args.checkpoint): + print("=> Loading checkpoint '{}'".format(args.checkpoint)) + checkpoint = torch.load(args.checkpoint, map_location='cpu') + + new_state_dict = OrderedDict() + if isinstance(checkpoint, dict): + state_dict_key = 'state_dict_ema' if args.use_ema else 'state_dict' + if state_dict_key in checkpoint: + state_dict = checkpoint[state_dict_key] + else: + state_dict = checkpoint + else: + assert False + for k, v in state_dict.items(): + if args.clean_aux_bn and 'aux_bn' in k: + # If all aux_bn keys are removed, the SplitBN layers will end up as normal and + # load with the unmodified model using BatchNorm2d. + continue + name = k[7:] if k.startswith('module') else k + new_state_dict[name] = v + print("=> Loaded state_dict from '{}'".format(args.checkpoint)) + + try: + torch.save(new_state_dict, _TEMP_NAME, _use_new_zipfile_serialization=False) + except: + torch.save(new_state_dict, _TEMP_NAME) + + with open(_TEMP_NAME, 'rb') as f: + sha_hash = hashlib.sha256(f.read()).hexdigest() + + if args.output: + checkpoint_root, checkpoint_base = os.path.split(args.output) + checkpoint_base = os.path.splitext(checkpoint_base)[0] + else: + checkpoint_root = '' + checkpoint_base = os.path.splitext(args.checkpoint)[0] + final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + '.pth' + shutil.move(_TEMP_NAME, os.path.join(checkpoint_root, final_filename)) + print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash)) + else: + print("Error: Checkpoint ({}) doesn't exist".format(args.checkpoint)) + + +if __name__ == '__main__': + main() diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/distributed_train.sh b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/distributed_train.sh new file mode 100644 index 0000000000..884069974a --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/distributed_train.sh @@ -0,0 +1,5 @@ +#!/bin/bash +NUM_PROC=$1 +shift +python -m torch.distributed.launch --nproc_per_node=$NUM_PROC train.py "$@" + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/__init__.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/__init__.py new file mode 100644 index 0000000000..ae656622e2 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/__init__.py @@ -0,0 +1,7 @@ +from .efficientdet import EfficientDet +from .bench import DetBenchPredict, unwrap_bench +from .data import create_dataset, create_loader, create_parser, DetectionDatset, SkipSubset +from .evaluator import CocoEvaluator, PascalEvaluator, OpenImagesEvaluator, create_evaluator +from .config import get_efficientdet_config, default_detection_model_configs +from .factory import create_model, create_model_from_config +from .helpers import load_checkpoint, load_pretrained diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/anchors.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/anchors.py new file mode 100644 index 0000000000..5db0f37513 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/anchors.py @@ -0,0 +1,415 @@ +""" RetinaNet / EfficientDet Anchor Gen + +Adapted for PyTorch from Tensorflow impl at + https://github.com/google/automl/blob/6f6694cec1a48cdb33d5d1551a2d5db8ad227798/efficientdet/anchors.py + +Hacked together by Ross Wightman, original copyright below +""" +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Anchor definition. + +This module is borrowed from TPU RetinaNet implementation: +https://github.com/tensorflow/tpu/blob/master/models/official/retinanet/anchors.py +""" +from typing import Optional, Tuple, Sequence + +import numpy as np +import torch +import torch.nn as nn +#import torchvision.ops.boxes as tvb +from torchvision.ops.boxes import batched_nms, remove_small_boxes +from typing import List + +from effdet.object_detection import ArgMaxMatcher, FasterRcnnBoxCoder, BoxList, IouSimilarity, TargetAssigner +from .soft_nms import batched_soft_nms + + +# The minimum score to consider a logit for identifying detections. +MIN_CLASS_SCORE = -5.0 + +# The score for a dummy detection +_DUMMY_DETECTION_SCORE = -1e5 + + +def decode_box_outputs(rel_codes, anchors, output_xyxy: bool=False): + """Transforms relative regression coordinates to absolute positions. + + Network predictions are normalized and relative to a given anchor; this + reverses the transformation and outputs absolute coordinates for the input image. + + Args: + rel_codes: box regression targets. + + anchors: anchors on all feature levels. + + Returns: + outputs: bounding boxes. + + """ + ycenter_a = (anchors[:, 0] + anchors[:, 2]) / 2 + xcenter_a = (anchors[:, 1] + anchors[:, 3]) / 2 + ha = anchors[:, 2] - anchors[:, 0] + wa = anchors[:, 3] - anchors[:, 1] + + ty, tx, th, tw = rel_codes.unbind(dim=1) + + w = torch.exp(tw) * wa + h = torch.exp(th) * ha + ycenter = ty * ha + ycenter_a + xcenter = tx * wa + xcenter_a + ymin = ycenter - h / 2. + xmin = xcenter - w / 2. + ymax = ycenter + h / 2. + xmax = xcenter + w / 2. + if output_xyxy: + out = torch.stack([xmin, ymin, xmax, ymax], dim=1) + else: + out = torch.stack([ymin, xmin, ymax, xmax], dim=1) + return out + + +def clip_boxes_xyxy(boxes: torch.Tensor, size: torch.Tensor): + boxes = boxes.clamp(min=0) + size = torch.cat([size, size], dim=0) + boxes = boxes.min(size) + return boxes + + +def generate_detections( + cls_outputs, box_outputs, anchor_boxes, indices, classes, + img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor], + max_det_per_image: int = 100, soft_nms: bool = False): + """Generates detections with RetinaNet model outputs and anchors. + + Args: + cls_outputs: a torch tensor with shape [N, 1], which has the highest class + scores on all feature levels. The N is the number of selected + top-K total anchors on all levels. + + box_outputs: a torch tensor with shape [N, 4], which stacks box regression + outputs on all feature levels. The N is the number of selected top-k + total anchors on all levels. + + anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all + feature levels. The N is the number of selected top-k total anchors on all levels. + + indices: a torch tensor with shape [N], which is the indices from top-k selection. + + classes: a torch tensor with shape [N], which represents the class + prediction on all selected anchors from top-k selection. + + img_scale: a float tensor representing the scale between original image + and input image for the detector. It is used to rescale detections for + evaluating with the original groundtruth annotations. + + max_det_per_image: an int constant, added as argument to make torchscript happy + + Returns: + detections: detection results in a tensor with shape [max_det_per_image, 6], + each row representing [x_min, y_min, x_max, y_max, score, class] + """ + assert box_outputs.shape[-1] == 4 + assert anchor_boxes.shape[-1] == 4 + assert cls_outputs.shape[-1] == 1 + + anchor_boxes = anchor_boxes[indices, :] + + # Appply bounding box regression to anchors, boxes are converted to xyxy + # here since PyTorch NMS expects them in that form. + boxes = decode_box_outputs(box_outputs.float(), anchor_boxes, output_xyxy=True) + if img_scale is not None and img_size is not None: + boxes = clip_boxes_xyxy(boxes, img_size / img_scale) # clip before NMS better? + + scores = cls_outputs.sigmoid().squeeze(1).float() + if soft_nms: + top_detection_idx, soft_scores = batched_soft_nms( + boxes, scores, classes, method_gaussian=True, iou_threshold=0.3, score_threshold=.001) + scores[top_detection_idx] = soft_scores + else: + top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=0.5) + + # keep only top max_det_per_image scoring predictions + top_detection_idx = top_detection_idx[:max_det_per_image] + boxes = boxes[top_detection_idx] + scores = scores[top_detection_idx, None] + classes = classes[top_detection_idx, None] + 1 # back to class idx with background class = 0 + + if img_scale is not None: + boxes = boxes * img_scale + + # FIXME add option to convert boxes back to yxyx? Otherwise must be handled downstream if + # that is the preferred output format. + + # stack em and pad out to max_det_per_image if necessary + num_det = len(top_detection_idx) + detections = torch.cat([boxes, scores, classes.float()], dim=1) + if num_det < max_det_per_image: + detections = torch.cat([ + detections, + torch.zeros((max_det_per_image - num_det, 6), device=detections.device, dtype=detections.dtype) + ], dim=0) + return detections + + +def get_feat_sizes(image_size: Tuple[int, int], max_level: int): + """Get feat widths and heights for all levels. + Args: + image_size: a tuple (H, W) + max_level: maximum feature level. + Returns: + feat_sizes: a list of tuples (height, width) for each level. + """ + feat_size = image_size + feat_sizes = [feat_size] + for _ in range(1, max_level + 1): + feat_size = ((feat_size[0] - 1) // 2 + 1, (feat_size[1] - 1) // 2 + 1) + feat_sizes.append(feat_size) + return feat_sizes + + +class Anchors(nn.Module): + """RetinaNet Anchors class.""" + + def __init__(self, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size: Tuple[int, int]): + """Constructs multiscale RetinaNet anchors. + + Args: + min_level: integer number of minimum level of the output feature pyramid. + + max_level: integer number of maximum level of the output feature pyramid. + + num_scales: integer number representing intermediate scales added + on each level. For instances, num_scales=2 adds two additional + anchor scales [2^0, 2^0.5] on each level. + + aspect_ratios: list of tuples representing the aspect ratio anchors added + on each level. For instances, aspect_ratios = + [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level. + + anchor_scale: float number representing the scale of size of the base + anchor to the feature stride 2^level. + + image_size: Sequence specifying input image size of model (H, W). + The image_size should be divided by the largest feature stride 2^max_level. + """ + super(Anchors, self).__init__() + self.min_level = min_level + self.max_level = max_level + self.num_scales = num_scales + self.aspect_ratios = aspect_ratios + if isinstance(anchor_scale, Sequence): + assert len(anchor_scale) == max_level - min_level + 1 + self.anchor_scales = anchor_scale + else: + self.anchor_scales = [anchor_scale] * (max_level - min_level + 1) + + assert isinstance(image_size, Sequence) and len(image_size) == 2 + # FIXME this restriction can likely be relaxed with some additional changes + assert image_size[0] % 2 ** max_level == 0, 'Image size must be divisible by 2 ** max_level (128)' + assert image_size[1] % 2 ** max_level == 0, 'Image size must be divisible by 2 ** max_level (128)' + self.image_size = tuple(image_size) + self.feat_sizes = get_feat_sizes(image_size, max_level) + self.config = self._generate_configs() + self.register_buffer('boxes', self._generate_boxes()) + + @classmethod + def from_config(cls, config): + return cls( + config.min_level, config.max_level, + config.num_scales, config.aspect_ratios, + config.anchor_scale, config.image_size) + + def _generate_configs(self): + """Generate configurations of anchor boxes.""" + anchor_configs = {} + feat_sizes = self.feat_sizes + for level in range(self.min_level, self.max_level + 1): + anchor_configs[level] = [] + for scale_octave in range(self.num_scales): + for aspect in self.aspect_ratios: + anchor_configs[level].append( + ((feat_sizes[0][0] // feat_sizes[level][0], + feat_sizes[0][1] // feat_sizes[level][1]), + scale_octave / float(self.num_scales), aspect, + self.anchor_scales[level - self.min_level])) + return anchor_configs + + def _generate_boxes(self): + """Generates multiscale anchor boxes.""" + boxes_all = [] + for _, configs in self.config.items(): + boxes_level = [] + for config in configs: + stride, octave_scale, aspect, anchor_scale = config + base_anchor_size_x = anchor_scale * stride[1] * 2 ** octave_scale + base_anchor_size_y = anchor_scale * stride[0] * 2 ** octave_scale + if isinstance(aspect, Sequence): + aspect_x = aspect[0] + aspect_y = aspect[1] + else: + aspect_x = np.sqrt(aspect) + aspect_y = 1.0 / aspect_x + anchor_size_x_2 = base_anchor_size_x * aspect_x / 2.0 + anchor_size_y_2 = base_anchor_size_y * aspect_y / 2.0 + + x = np.arange(stride[1] / 2, self.image_size[1], stride[1]) + y = np.arange(stride[0] / 2, self.image_size[0], stride[0]) + xv, yv = np.meshgrid(x, y) + xv = xv.reshape(-1) + yv = yv.reshape(-1) + + boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2, + yv + anchor_size_y_2, xv + anchor_size_x_2)) + boxes = np.swapaxes(boxes, 0, 1) + boxes_level.append(np.expand_dims(boxes, axis=1)) + + # concat anchors on the same level to the reshape NxAx4 + boxes_level = np.concatenate(boxes_level, axis=1) + boxes_all.append(boxes_level.reshape([-1, 4])) + + anchor_boxes = np.vstack(boxes_all) + anchor_boxes = torch.from_numpy(anchor_boxes).float() + return anchor_boxes + + def get_anchors_per_location(self): + return self.num_scales * len(self.aspect_ratios) + + +class AnchorLabeler(object): + """Labeler for multiscale anchor boxes. + """ + + def __init__(self, anchors, num_classes: int, match_threshold: float = 0.5): + """Constructs anchor labeler to assign labels to anchors. + + Args: + anchors: an instance of class Anchors. + + num_classes: integer number representing number of classes in the dataset. + + match_threshold: float number between 0 and 1 representing the threshold + to assign positive labels for anchors. + """ + similarity_calc = IouSimilarity() + matcher = ArgMaxMatcher( + match_threshold, + unmatched_threshold=match_threshold, + negatives_lower_than_unmatched=True, + force_match_for_each_row=True) + box_coder = FasterRcnnBoxCoder() + + self.target_assigner = TargetAssigner(similarity_calc, matcher, box_coder) + self.anchors = anchors + self.match_threshold = match_threshold + self.num_classes = num_classes + self.indices_cache = {} + + def label_anchors(self, gt_boxes, gt_classes, filter_valid=True): + """Labels anchors with ground truth inputs. + + Args: + gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. + For each row, it stores [y0, x0, y1, x1] for four corners of a box. + + gt_classes: A integer tensor with shape [N, 1] representing groundtruth classes. + + filter_valid: Filter out any boxes w/ gt class <= -1 before assigning + + Returns: + cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. + The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l + represent the dimension of class logits at l-th level. + + box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. + The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and + width_l represent the dimension of bounding box regression output at l-th level. + + num_positives: scalar tensor storing number of positives in an image. + """ + cls_targets_out = [] + box_targets_out = [] + + if filter_valid: + valid_idx = gt_classes > -1 # filter gt targets w/ label <= -1 + gt_boxes = gt_boxes[valid_idx] + gt_classes = gt_classes[valid_idx] + + cls_targets, box_targets, matches = self.target_assigner.assign( + BoxList(self.anchors.boxes), BoxList(gt_boxes), gt_classes) + + # class labels start from 1 and the background class = -1 + cls_targets = (cls_targets - 1).long() + + # Unpack labels. + """Unpacks an array of cls/box into multiple scales.""" + count = 0 + for level in range(self.anchors.min_level, self.anchors.max_level + 1): + feat_size = self.anchors.feat_sizes[level] + steps = feat_size[0] * feat_size[1] * self.anchors.get_anchors_per_location() + cls_targets_out.append(cls_targets[count:count + steps].view([feat_size[0], feat_size[1], -1])) + box_targets_out.append(box_targets[count:count + steps].view([feat_size[0], feat_size[1], -1])) + count += steps + + num_positives = (matches.match_results > -1).float().sum() + + return cls_targets_out, box_targets_out, num_positives + + def batch_label_anchors(self, gt_boxes, gt_classes, filter_valid=True): + batch_size = len(gt_boxes) + assert batch_size == len(gt_classes) + num_levels = self.anchors.max_level - self.anchors.min_level + 1 + cls_targets_out = [[] for _ in range(num_levels)] + box_targets_out = [[] for _ in range(num_levels)] + num_positives_out = [] + + anchor_box_list = BoxList(self.anchors.boxes) + for i in range(batch_size): + last_sample = i == batch_size - 1 + + if filter_valid: + valid_idx = gt_classes[i] > -1 # filter gt targets w/ label <= -1 + gt_box_list = BoxList(gt_boxes[i][valid_idx]) + gt_class_i = gt_classes[i][valid_idx] + else: + gt_box_list = BoxList(gt_boxes[i]) + gt_class_i = gt_classes[i] + cls_targets, box_targets, matches = self.target_assigner.assign(anchor_box_list, gt_box_list, gt_class_i) + + # class labels start from 1 and the background class = -1 + cls_targets = (cls_targets - 1).long() + + # Unpack labels. + """Unpacks an array of cls/box into multiple scales.""" + count = 0 + for level in range(self.anchors.min_level, self.anchors.max_level + 1): + level_idx = level - self.anchors.min_level + feat_size = self.anchors.feat_sizes[level] + steps = feat_size[0] * feat_size[1] * self.anchors.get_anchors_per_location() + cls_targets_out[level_idx].append( + cls_targets[count:count + steps].view([feat_size[0], feat_size[1], -1])) + box_targets_out[level_idx].append( + box_targets[count:count + steps].view([feat_size[0], feat_size[1], -1])) + count += steps + if last_sample: + cls_targets_out[level_idx] = torch.stack(cls_targets_out[level_idx]) + box_targets_out[level_idx] = torch.stack(box_targets_out[level_idx]) + + num_positives_out.append((matches.match_results > -1).float().sum()) + if last_sample: + num_positives_out = torch.stack(num_positives_out) + + return cls_targets_out, box_targets_out, num_positives_out + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/bench.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/bench.py new file mode 100644 index 0000000000..7cc6864684 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/bench.py @@ -0,0 +1,117 @@ +""" PyTorch EfficientDet support benches + +Hacked together by Ross Wightman +""" +from typing import Optional, Dict, List +import torch +import torch.nn as nn +from .anchors import Anchors, AnchorLabeler, generate_detections +from .loss import DetectionLoss + + +def _post_process( + cls_outputs: List[torch.Tensor], + box_outputs: List[torch.Tensor], + num_levels: int, + num_classes: int, + max_detection_points: int = 5000, +): + """Selects top-k predictions. + + Post-proc code adapted from Tensorflow version at: https://github.com/google/automl/tree/master/efficientdet + and optimized for PyTorch. + + Args: + cls_outputs: an OrderDict with keys representing levels and values + representing logits in [batch_size, height, width, num_anchors]. + + box_outputs: an OrderDict with keys representing levels and values + representing box regression targets in [batch_size, height, width, num_anchors * 4]. + + num_levels (int): number of feature levels + + num_classes (int): number of output classes + """ + + batch_size = cls_outputs[0].shape[0] + cls_outputs_all = torch.cat([ + cls_outputs[level].permute(0, 2, 3, 1).reshape([batch_size, -1, num_classes]) + for level in range(num_levels)], 1) + + box_outputs_all = torch.cat([ + box_outputs[level].permute(0, 2, 3, 1).reshape([batch_size, -1, 4]) + for level in range(num_levels)], 1) + + _, cls_topk_indices_all = torch.topk(cls_outputs_all.reshape(batch_size, -1), dim=1, k=max_detection_points) + indices_all = cls_topk_indices_all // num_classes + classes_all = cls_topk_indices_all % num_classes + + box_outputs_all_after_topk = torch.gather( + box_outputs_all, 1, indices_all.unsqueeze(2).expand(-1, -1, 4)) + + cls_outputs_all_after_topk = torch.gather( + cls_outputs_all, 1, indices_all.unsqueeze(2).expand(-1, -1, num_classes)) + cls_outputs_all_after_topk = torch.gather( + cls_outputs_all_after_topk, 2, classes_all.unsqueeze(2)) + + return cls_outputs_all_after_topk, box_outputs_all_after_topk, indices_all, classes_all + + + +def _batch_detection( + batch_size: int, class_out, box_out, anchor_boxes, indices, classes, + img_scale: Optional[torch.Tensor] = None, + img_size: Optional[torch.Tensor] = None, + max_det_per_image: int = 100, + soft_nms: bool = False, +): + batch_detections = [] + # FIXME we may be able to do this as a batch with some tensor reshaping/indexing, PR welcome + for i in range(batch_size): + img_scale_i = None if img_scale is None else img_scale[i] + img_size_i = None if img_size is None else img_size[i] + detections = generate_detections( + class_out[i], box_out[i], anchor_boxes, indices[i], classes[i], + img_scale_i, img_size_i, max_det_per_image=max_det_per_image, soft_nms=soft_nms) + batch_detections.append(detections) + return torch.stack(batch_detections, dim=0) + + +class DetBenchPredict(nn.Module): + def __init__(self, config): + super(DetBenchPredict, self).__init__() + self.config=config + self.num_levels = config.num_levels + self.num_classes = config.num_classes + self.anchors = Anchors.from_config(config) + self.max_detection_points = config.max_detection_points + self.max_det_per_image = config.max_det_per_image + self.soft_nms = config.soft_nms + + def forward(self, x, class_out,box_out,img_info: Optional[Dict[str, torch.Tensor]] = None): + class_out, box_out, indices, classes = _post_process( + class_out, box_out, num_levels=self.num_levels, num_classes=self.num_classes, + max_detection_points=self.max_detection_points) + + if img_info is None: + img_scale, img_size = None, None + else: + img_scale, img_size = img_info['img_scale'], img_info['img_size'] + return _batch_detection( + x.shape[0], class_out, box_out, self.anchors.boxes, indices, classes, + img_scale, img_size, max_det_per_image=self.max_det_per_image, soft_nms=self.soft_nms + ) + + + + + +def unwrap_bench(model): + # Unwrap a model in support bench so that various other fns can access the weights and attribs of the + # underlying model directly + if hasattr(model, 'module'): # unwrap DDP or EMA + return unwrap_bench(model.module) + elif hasattr(model, 'model'): # unwrap Bench -> model + return unwrap_bench(model.model) + else: + return model diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/__init__.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/__init__.py new file mode 100644 index 0000000000..0dd74ec181 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/__init__.py @@ -0,0 +1,4 @@ +from .config_utils import set_config_readonly, set_config_writeable +from .fpn_config import get_fpn_config +from .model_config import get_efficientdet_config, default_detection_model_configs +from .train_config import default_detection_train_config diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/config_utils.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/config_utils.py new file mode 100644 index 0000000000..f367cccab7 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/config_utils.py @@ -0,0 +1,9 @@ +from omegaconf import OmegaConf + + +def set_config_readonly(conf): + OmegaConf.set_readonly(conf, True) + + +def set_config_writeable(conf): + OmegaConf.set_readonly(conf, False) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/fpn_config.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/fpn_config.py new file mode 100644 index 0000000000..e12ed18923 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/fpn_config.py @@ -0,0 +1,184 @@ +import itertools + +from omegaconf import OmegaConf + + +def bifpn_config(min_level, max_level, weight_method=None): + """BiFPN config. + Adapted from https://github.com/google/automl/blob/56815c9986ffd4b508fe1d68508e268d129715c1/efficientdet/keras/fpn_configs.py + """ + p = OmegaConf.create() + weight_method = weight_method or 'fastattn' + + num_levels = max_level - min_level + 1 + node_ids = {min_level + i: [i] for i in range(num_levels)} + + level_last_id = lambda level: node_ids[level][-1] + level_all_ids = lambda level: node_ids[level] + id_cnt = itertools.count(num_levels) + + p.nodes = [] + for i in range(max_level - 1, min_level - 1, -1): + # top-down path. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [level_last_id(i), level_last_id(i + 1)], + 'weight_method': weight_method, + }) + node_ids[i].append(next(id_cnt)) + + for i in range(min_level + 1, max_level + 1): + # bottom-up path. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)], + 'weight_method': weight_method, + }) + node_ids[i].append(next(id_cnt)) + return p + + +def panfpn_config(min_level, max_level, weight_method=None): + """PAN FPN config. + + This defines FPN layout from Path Aggregation Networks as an alternate to + BiFPN, it does not implement the full PAN spec. + + Paper: https://arxiv.org/abs/1803.01534 + """ + p = OmegaConf.create() + weight_method = weight_method or 'fastattn' + + num_levels = max_level - min_level + 1 + node_ids = {min_level + i: [i] for i in range(num_levels)} + level_last_id = lambda level: node_ids[level][-1] + id_cnt = itertools.count(num_levels) + + p.nodes = [] + for i in range(max_level, min_level - 1, -1): + # top-down path. + offsets = [level_last_id(i), level_last_id(i + 1)] if i != max_level else [level_last_id(i)] + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': offsets, + 'weight_method': weight_method, + }) + node_ids[i].append(next(id_cnt)) + + for i in range(min_level, max_level + 1): + # bottom-up path. + offsets = [level_last_id(i), level_last_id(i - 1)] if i != min_level else [level_last_id(i)] + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': offsets, + 'weight_method': weight_method, + }) + node_ids[i].append(next(id_cnt)) + + return p + + +def qufpn_config(min_level, max_level, weight_method=None): + """A dynamic quad fpn config that can adapt to different min/max levels. + + It extends the idea of BiFPN, and has four paths: + (up_down -> bottom_up) + (bottom_up -> up_down). + + Paper: https://ieeexplore.ieee.org/document/9225379 + Ref code: From contribution to TF EfficientDet + https://github.com/google/automl/blob/eb74c6739382e9444817d2ad97c4582dbe9a9020/efficientdet/keras/fpn_configs.py + """ + p = OmegaConf.create() + weight_method = weight_method or 'fastattn' + quad_method = 'fastattn' + num_levels = max_level - min_level + 1 + node_ids = {min_level + i: [i] for i in range(num_levels)} + level_last_id = lambda level: node_ids[level][-1] + level_all_ids = lambda level: node_ids[level] + level_first_id = lambda level: node_ids[level][0] + id_cnt = itertools.count(num_levels) + + p.nodes = [] + for i in range(max_level - 1, min_level - 1, -1): + # top-down path 1. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [level_last_id(i), level_last_id(i + 1)], + 'weight_method': weight_method + }) + node_ids[i].append(next(id_cnt)) + node_ids[max_level].append(node_ids[max_level][-1]) + + for i in range(min_level + 1, max_level): + # bottom-up path 2. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)], + 'weight_method': weight_method + }) + node_ids[i].append(next(id_cnt)) + + i = max_level + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [level_first_id(i)] + [level_last_id(i - 1)], + 'weight_method': weight_method + }) + node_ids[i].append(next(id_cnt)) + node_ids[min_level].append(node_ids[min_level][-1]) + + for i in range(min_level + 1, max_level + 1, 1): + # bottom-up path 3. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [ + level_first_id(i), level_last_id(i - 1) if i != min_level + 1 else level_first_id(i - 1)], + 'weight_method': weight_method + }) + node_ids[i].append(next(id_cnt)) + node_ids[min_level].append(node_ids[min_level][-1]) + + for i in range(max_level - 1, min_level, -1): + # top-down path 4. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [node_ids[i][0]] + [node_ids[i][-1]] + [level_last_id(i + 1)], + 'weight_method': weight_method + }) + node_ids[i].append(next(id_cnt)) + i = min_level + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [node_ids[i][0]] + [level_last_id(i + 1)], + 'weight_method': weight_method + }) + node_ids[i].append(next(id_cnt)) + node_ids[max_level].append(node_ids[max_level][-1]) + + # NOTE: the order of the quad path is reversed from the original, my code expects the output of + # each FPN repeat to be same as input from backbone, in order of increasing reductions + for i in range(min_level, max_level + 1): + # quad-add path. + p.nodes.append({ + 'reduction': 1 << i, + 'inputs_offsets': [node_ids[i][2], node_ids[i][4]], + 'weight_method': quad_method + }) + node_ids[i].append(next(id_cnt)) + + return p + + +def get_fpn_config(fpn_name, min_level=3, max_level=7): + if not fpn_name: + fpn_name = 'bifpn_fa' + name_to_config = { + 'bifpn_sum': bifpn_config(min_level=min_level, max_level=max_level, weight_method='sum'), + 'bifpn_attn': bifpn_config(min_level=min_level, max_level=max_level, weight_method='attn'), + 'bifpn_fa': bifpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'), + 'pan_sum': panfpn_config(min_level=min_level, max_level=max_level, weight_method='sum'), + 'pan_fa': panfpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'), + 'qufpn_sum': qufpn_config(min_level=min_level, max_level=max_level, weight_method='sum'), + 'qufpn_fa': qufpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'), + } + return name_to_config[fpn_name] diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/model_config.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/model_config.py new file mode 100644 index 0000000000..598d38d5a5 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/model_config.py @@ -0,0 +1,116 @@ +"""EfficientDet Configurations + +Adapted from official impl at https://github.com/google/automl/tree/master/efficientdet + +TODO use a different config system (OmegaConfig -> Hydra?), separate model from train specific hparams +""" + +from omegaconf import OmegaConf +from copy import deepcopy + + +def default_detection_model_configs(): + """Returns a default detection configs.""" + h = OmegaConf.create() + # model name. + h.name = 'tf_efficientdet_d1' + h.backbone_name = 'tf_efficientnet_b1' + h.backbone_args = None # FIXME sort out kwargs vs config for backbone creation + h.backbone_indices = None + # model specific, input preprocessing parameters + h.image_size = (640, 640) + # dataset specific head parameters + h.num_classes = 90 + # feature + anchor config + h.min_level = 3 + h.max_level = 7 + h.num_levels = h.max_level - h.min_level + 1 + h.num_scales = 3 + h.aspect_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] + # ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset. + # aspect ratios can be specified as below too, pairs will be calc as sqrt(val), 1/sqrt(val) + #h.aspect_ratios = [1.0, 2.0, 0.5] + h.anchor_scale = 4.0 + # FPN and head config + h.pad_type = 'same' # original TF models require an equivalent of Tensorflow 'SAME' padding + h.act_type = 'swish' + h.norm_layer = None # defaults to batch norm when None + h.norm_kwargs = dict(eps=.001, momentum=.01) + h.box_class_repeats = 3 + h.fpn_cell_repeats = 3 + h.fpn_channels = 88 + h.separable_conv = True + h.apply_resample_bn = True + h.conv_after_downsample = False + h.conv_bn_relu_pattern = False + h.use_native_resize_op = False + h.downsample_type = 'max' + h.upsample_type = 'nearest' + h.redundant_bias = True # original TF models have back to back bias + BN layers, not necessary! + h.head_bn_level_first = False # change order of BN in head repeat list of lists, True for torchscript compat + h.head_act_type = None # activation for heads, same as act_type if None + + h.fpn_name = None + h.fpn_config = None + h.fpn_drop_path_rate = 0. # No stochastic depth in default. NOTE not currently used, unstable training + + # classification loss (used by train bench) + h.alpha = 0.25 + h.gamma = 1.5 + h.label_smoothing = 0. # only supported if legacy_focal == False, haven't produced great results + h.legacy_focal = False # use legacy focal loss (less stable, lower memory use in some cases) + h.jit_loss = False # torchscript jit for loss fn speed improvement, can impact stability and/or increase mem usage + + # localization loss (used by train bench) + h.delta = 0.1 + h.box_loss_weight = 50.0 + + # nms + h.soft_nms = False # use soft-nms, this is incredibly slow + h.max_detection_points = 5000 # max detections for post process, input to NMS + h.max_det_per_image = 100 # max detections per image limit, output of NMS + return h + + +efficientdet_model_param_dict = dict( + # Models with PyTorch friendly padding and my PyTorch pretrained backbones, training TBD + # Models ported from Tensorflow with pretrained backbones ported from Tensorflow + tf_efficientdet_d0=dict( + name='tf_efficientdet_d0', + backbone_name='tf_efficientnet_b0', + image_size=(512, 512), + fpn_channels=64, + fpn_cell_repeats=3, + box_class_repeats=3, + backbone_args=dict(drop_path_rate=0.2), + url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d0_34-f153e0cf.pth', + ), + tf_efficientdet_d7=dict( + name='tf_efficientdet_d7', + backbone_name='tf_efficientnet_b6', + image_size=(1536, 1536), + fpn_channels=384, + fpn_cell_repeats=8, + box_class_repeats=5, + anchor_scale=5.0, + fpn_name='bifpn_sum', # Use unweighted sum for training stability. + backbone_args=dict(drop_path_rate=0.2), + url='https://github.com/rwightman/efficientdet-pytorch/releases/download/v0.1/tf_efficientdet_d7_53-6d1d7a95.pth' + ), + + + + + + +) + + +def get_efficientdet_config(model_name='tf_efficientdet_d1'): + """Get the default config for EfficientDet based on model name.""" + h = default_detection_model_configs() + h.update(efficientdet_model_param_dict[model_name]) + h.num_levels = h.max_level - h.min_level + 1 + h = deepcopy(h) # may be unnecessary, ensure no references to param dict values + # OmegaConf.set_struct(h, True) # FIXME good idea? + return h diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/train_config.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/train_config.py new file mode 100644 index 0000000000..88deab0a5f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/config/train_config.py @@ -0,0 +1,34 @@ +from omegaconf import OmegaConf + + +def default_detection_train_config(): + # FIXME currently using args for train config, will revisit, perhaps move to Hydra + h = OmegaConf.create() + + # dataset + h.skip_crowd_during_training = True + + # augmentation + h.input_rand_hflip = True + h.train_scale_min = 0.1 + h.train_scale_max = 2.0 + h.autoaugment_policy = None + + # optimization + h.momentum = 0.9 + h.learning_rate = 0.08 + h.lr_warmup_init = 0.008 + h.lr_warmup_epoch = 1.0 + h.first_lr_drop_epoch = 200.0 + h.second_lr_drop_epoch = 250.0 + h.clip_gradients_norm = 10.0 + h.num_epochs = 300 + + # regularization l2 loss. + h.weight_decay = 4e-5 + + h.lr_decay_method = 'cosine' + h.moving_average_decay = 0.9998 + h.ckpt_var_scope = None + + return h diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/__init__.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/__init__.py new file mode 100644 index 0000000000..fcc1ac5915 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/__init__.py @@ -0,0 +1,6 @@ +from .dataset_factory import create_dataset +from .dataset import DetectionDatset, SkipSubset +from .input_config import resolve_input_config +from .loader import create_loader +from .parsers import create_parser +from .transforms import * diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset.py new file mode 100644 index 0000000000..0087393eed --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset.py @@ -0,0 +1,97 @@ +""" Detection dataset + +Hacked together by Ross Wightman +""" +import torch.utils.data as data +import numpy as np + +from PIL import Image +from .parsers import create_parser + + +class DetectionDatset(data.Dataset): + """`Object Detection Dataset. Use with parsers for COCO, VOC, and OpenImages. + Args: + parser (string, Parser): + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.ToTensor`` + + """ + + def __init__(self, data_dir, parser=None, parser_kwargs=None, transform=None): + super(DetectionDatset, self).__init__() + parser_kwargs = parser_kwargs or {} + self.data_dir = data_dir + if isinstance(parser, str): + self._parser = create_parser(parser, **parser_kwargs) + else: + assert parser is not None and len(parser.img_ids) + self._parser = parser + self._transform = transform + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: Tuple (image, annotations (target)). + """ + img_info = self._parser.img_infos[index] + target = dict(img_idx=index, img_size=(img_info['width'], img_info['height'])) + if self._parser.has_labels: + ann = self._parser.get_ann_info(index) + target.update(ann) + + img_path = self.data_dir / img_info['file_name'] + img = Image.open(img_path).convert('RGB') + + if self.transform is not None: + img, target = self.transform(img, target) + return img, target + + def __len__(self): + return len(self._parser.img_ids) + + @property + def parser(self): + return self._parser + + @property + def transform(self): + return self._transform + + @transform.setter + def transform(self, t): + self._transform = t + + +class SkipSubset(data.Dataset): + r""" + Subset of a dataset at specified indices. + + Arguments: + dataset (Dataset): The whole Dataset + n (int): skip rate (select every nth) + """ + def __init__(self, dataset, n=2): + self.dataset = dataset + assert n >= 1 + self.indices = np.arange(len(dataset))[::n] + + def __getitem__(self, idx): + return self.dataset[self.indices[idx]] + + def __len__(self): + return len(self.indices) + + @property + def parser(self): + return self.dataset.parser + + @property + def transform(self): + return self.dataset.transform + + @transform.setter + def transform(self, t): + self.dataset.transform = t diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_config.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_config.py new file mode 100644 index 0000000000..6c1a1d38d4 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_config.py @@ -0,0 +1,179 @@ +""" COCO, VOC, OpenImages dataset configurations + +Copyright 2020 Ross Wightman +""" +import os +from dataclasses import dataclass, field +from typing import Dict + + +@dataclass +class CocoCfg: + variant: str = None + parser: str = 'coco' + num_classes: int = 80 + splits: Dict[str, dict] = None + + +@dataclass +class Coco2017Cfg(CocoCfg): + variant: str = '2017' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict(ann_filename='annotations/instances_train2017.json', img_dir='train2017', has_labels=True), + val=dict(ann_filename='annotations/instances_val2017.json', img_dir='val2017', has_labels=True), + test=dict(ann_filename='annotations/image_info_test2017.json', img_dir='test2017', has_labels=False), + testdev=dict(ann_filename='annotations/image_info_test-dev2017.json', img_dir='test2017', has_labels=False), + )) + + +@dataclass +class Coco2014Cfg(CocoCfg): + variant: str = '2014' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict(ann_filename='annotations/instances_train2014.json', img_dir='train2014', has_labels=True), + val=dict(ann_filename='annotations/instances_val2014.json', img_dir='val2014', has_labels=True), + test=dict(ann_filename='', img_dir='test2014', has_labels=False), + )) + + +@dataclass +class VocCfg: + variant: str = None + parser: str = 'voc' + num_classes: int = 80 + img_filename: str = '%s.jpg' + splits: Dict[str, dict] = None + + +@dataclass +class Voc2007Cfg(VocCfg): + variant: str = '2007' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict( + split_filename='VOC2007/ImageSets/Main/train.txt', + ann_filename='VOC2007/Annotations/%s.xml', + img_dir='VOC2007/JPEGImages', ), + val=dict( + split_filename='VOC2007/ImageSets/Main/val.txt', + ann_filename='VOC2007/Annotations/%s.xml', + img_dir='VOC2007/JPEGImages'), + #test=dict(img_dir='JPEGImages') + )) + + +@dataclass +class Voc2012Cfg(VocCfg): + variant: str = '2012' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict( + split_filename='VOC2012/ImageSets/Main/train.txt', + ann_filename='VOC2012/Annotations/%s.xml', + img_dir='VOC2012/JPEGImages'), + val=dict( + split_filename='VOC2012/ImageSets/Main/val.txt', + ann_filename='VOC2012/Annotations/%s.xml', + img_dir='VOC2012/JPEGImages'), + #test=dict(img_dir='JPEGImages', split_file=None) + )) + + +@dataclass +class Voc0712Cfg(VocCfg): + variant: str = '0712' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict( + split_filename=['VOC2007/ImageSets/Main/trainval.txt', 'VOC2012/ImageSets/Main/trainval.txt'], + ann_filename=['VOC2007/Annotations/%s.xml', 'VOC2012/Annotations/%s.xml'], + img_dir=['VOC2007/JPEGImages', 'VOC2012/JPEGImages']), + val=dict( + split_filename='VOC2007/ImageSets/Main/test.txt', + ann_filename='VOC2007/Annotations/%s.xml', + img_dir='VOC2007/JPEGImages'), + #test=dict(img_dir='JPEGImages', split_file=None) + )) + + + +@dataclass +class OpenImagesCfg: + variant: str = None + parser: str = 'openimages' + num_classes: int = None + img_filename = '%s.jpg' + splits: Dict[str, dict] = None + + +@dataclass +class OpenImagesObjCfg(OpenImagesCfg): + num_classes: int = 601 + categories_map: str = 'annotations/class-descriptions-boxable.csv' + + +@dataclass +class OpenImagesSegCfg(OpenImagesCfg): + num_classes: int = 350 + categories_map: str = 'annotations/classes-segmentation.txt' + + +@dataclass +class OpenImagesObjV5Cfg(OpenImagesObjCfg): + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict( + img_dir='train', img_info='annotations/train-info.csv', has_labels=True, prefix_levels=1, + ann_bbox='annotations/train-annotations-bbox.csv', + ann_img_label='annotations/train-annotations-human-imagelabels-boxable.csv', + ), + val=dict( + img_dir='validation', img_info='annotations/validation-info.csv', has_labels=True, prefix_levels=0, + ann_bbox='annotations/validation-annotations-bbox.csv', + ann_img_label='annotations/validation-annotations-human-imagelabels-boxable.csv', + ), + test=dict( + img_dir='test', img_info='', has_labels=True, prefix_levels=0, + ann_bbox='annotations/test-annotations-bbox.csv', + ann_img_label='annotations/test-annotations-human-imagelabels-boxable.csv', + ) + )) + + +@dataclass +class OpenImagesObjChallenge2019Cfg(OpenImagesObjCfg): + num_classes: int = 500 + categories_map: str = 'annotations/challenge-2019/challenge-2019-classes-description-500.csv' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict( + img_dir='train', img_info='annotations/train-info.csv', has_labels=True, prefix_levels=1, + ann_bbox='annotations/challenge-2019/challenge-2019-train-detection-bbox.csv', + ann_img_label='annotations/challenge-2019/challenge-2019-train-detection-human-imagelabels.csv', + ), + val=dict( + img_dir='validation', img_info='annotations/validation-info.csv', has_labels=True, prefix_levels=0, + ann_bbox='annotations/challenge-2019/challenge-2019-validation-detection-bbox.csv', + ann_img_label='annotations/challenge-2019/challenge-2019-validation-detection-human-imagelabels.csv', + ), + test=dict( + img_dir='challenge2019', img_info='annotations/challenge-2019/challenge2019-info', prefix_levels=0, + has_labels=False, ann_bbox='', ann_img_label='', + ) + )) + + +@dataclass +class OpenImagesSegV5Cfg(OpenImagesSegCfg): + num_classes: int = 300 + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict(), + val=dict(), + test=dict() + )) + + +@dataclass +class OpenImagesSegChallenge2019Cfg(OpenImagesSegCfg): + num_classes: int = 300 + ann_class_map: str = 'annotations/challenge-2019/challenge-2019-classes-description-segmentable.csv' + splits: Dict[str, dict] = field(default_factory=lambda: dict( + train=dict(), + val=dict(), + test=dict() + )) \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_factory.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_factory.py new file mode 100644 index 0000000000..1cffa9e3e4 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/dataset_factory.py @@ -0,0 +1,103 @@ +""" Dataset factory + +Copyright 2020 Ross Wightman +""" +import os +from collections import OrderedDict +from pathlib import Path + +from .dataset_config import * +from .parsers import * +from .dataset import DetectionDatset +from .parsers import create_parser + + +def create_dataset(name, root, splits=('train', 'val')): + if isinstance(splits, str): + splits = (splits,) + name = name.lower() + root = Path(root) + dataset_cls = DetectionDatset + + datasets = OrderedDict() + if name.startswith('coco'): + if 'coco2014' in name: + dataset_cfg = Coco2014Cfg() + else: + dataset_cfg = Coco2017Cfg() + for s in splits: + if s not in dataset_cfg.splits: + raise RuntimeError(f'{s} split not found in config') + split_cfg = dataset_cfg.splits[s] + ann_file = root / split_cfg['ann_filename'] + parser_cfg = CocoParserCfg( + ann_filename=ann_file, + has_labels=split_cfg['has_labels'] + ) + print(root / Path(split_cfg['img_dir'])) + datasets[s] = dataset_cls( + data_dir=root / Path(split_cfg['img_dir']), + parser=create_parser(dataset_cfg.parser, cfg=parser_cfg), + ) + + + elif name.startswith('voc'): + if 'voc0712' in name: + dataset_cfg = Voc0712Cfg() + elif 'voc2007' in name: + dataset_cfg = Voc2007Cfg() + else: + dataset_cfg = Voc2012Cfg() + for s in splits: + if s not in dataset_cfg.splits: + raise RuntimeError(f'{s} split not found in config') + split_cfg = dataset_cfg.splits[s] + if isinstance(split_cfg['split_filename'], (tuple, list)): + assert len(split_cfg['split_filename']) == len(split_cfg['ann_filename']) + parser = None + for sf, af, id in zip( + split_cfg['split_filename'], split_cfg['ann_filename'], split_cfg['img_dir']): + parser_cfg = VocParserCfg( + split_filename=root / sf, + ann_filename=os.path.join(root, af), + img_filename=os.path.join(id, dataset_cfg.img_filename)) + if parser is None: + parser = create_parser(dataset_cfg.parser, cfg=parser_cfg) + else: + other_parser = create_parser(dataset_cfg.parser, cfg=parser_cfg) + parser.merge(other=other_parser) + else: + parser_cfg = VocParserCfg( + split_filename=root / split_cfg['split_filename'], + ann_filename=os.path.join(root, split_cfg['ann_filename']), + img_filename=os.path.join(split_cfg['img_dir'], dataset_cfg.img_filename), + ) + parser = create_parser(dataset_cfg.parser, cfg=parser_cfg) + datasets[s] = dataset_cls(data_dir=root, parser=parser) + elif name.startswith('openimages'): + if 'challenge2019' in name: + dataset_cfg = OpenImagesObjChallenge2019Cfg() + else: + dataset_cfg = OpenImagesObjV5Cfg() + for s in splits: + if s not in dataset_cfg.splits: + raise RuntimeError(f'{s} split not found in config') + split_cfg = dataset_cfg.splits[s] + parser_cfg = OpenImagesParserCfg( + categories_filename=root / dataset_cfg.categories_map, + img_info_filename=root / split_cfg['img_info'], + bbox_filename=root / split_cfg['ann_bbox'], + img_label_filename=root / split_cfg['ann_img_label'], + img_filename=dataset_cfg.img_filename, + prefix_levels=split_cfg['prefix_levels'], + has_labels=split_cfg['has_labels'], + ) + datasets[s] = dataset_cls( + data_dir=root / Path(split_cfg['img_dir']), + parser=create_parser(dataset_cfg.parser, cfg=parser_cfg) + ) + else: + assert False, f'Unknown dataset parser ({name})' + + datasets = list(datasets.values()) + return datasets if len(datasets) > 1 else datasets[0] diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/input_config.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/input_config.py new file mode 100644 index 0000000000..0ee13e4be2 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/input_config.py @@ -0,0 +1,70 @@ +from .transforms import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD + + +def resolve_input_config(args, model_config=None, model=None): + if not isinstance(args, dict): + args = vars(args) + input_config = {} + if not model_config and model is not None and hasattr(model, 'config'): + model_config = model.config + + # Resolve input/image size + in_chans = 3 + # if 'chans' in args and args['chans'] is not None: + # in_chans = args['chans'] + + input_size = (in_chans, 512, 512) + # if 'input_size' in args and args['input_size'] is not None: + # assert isinstance(args['input_size'], (tuple, list)) + # assert len(args['input_size']) == 3 + # input_size = tuple(args['input_size']) + # in_chans = input_size[0] # input_size overrides in_chans + # elif 'img_size' in args and args['img_size'] is not None: + # assert isinstance(args['img_size'], int) + # input_size = (in_chans, args['img_size'], args['img_size']) + if 'input_size' in model_config: + input_size = tuple(model_config['input_size']) + elif 'image_size' in model_config: + input_size = (in_chans,) + tuple(model_config['image_size']) + assert isinstance(input_size, tuple) and len(input_size) == 3 + input_config['input_size'] = input_size + + # resolve interpolation method + input_config['interpolation'] = 'bicubic' + if 'interpolation' in args and args['interpolation']: + input_config['interpolation'] = args['interpolation'] + elif 'interpolation' in model_config: + input_config['interpolation'] = model_config['interpolation'] + + # resolve dataset + model mean for normalization + input_config['mean'] = IMAGENET_DEFAULT_MEAN + if 'mean' in args and args['mean'] is not None: + mean = tuple(args['mean']) + if len(mean) == 1: + mean = tuple(list(mean) * in_chans) + else: + assert len(mean) == in_chans + input_config['mean'] = mean + elif 'mean' in model_config: + input_config['mean'] = model_config['mean'] + + # resolve dataset + model std deviation for normalization + input_config['std'] = IMAGENET_DEFAULT_STD + if 'std' in args and args['std'] is not None: + std = tuple(args['std']) + if len(std) == 1: + std = tuple(list(std) * in_chans) + else: + assert len(std) == in_chans + input_config['std'] = std + elif 'std' in model_config: + input_config['std'] = model_config['std'] + + # resolve letterbox fill color + input_config['fill_color'] = 'mean' + if 'fill_color' in args and args['fill_color'] is not None: + input_config['fill_color'] = args['fill_color'] + elif 'fill_color' in model_config: + input_config['fill_color'] = model_config['fill_color'] + + return input_config diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/loader.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/loader.py new file mode 100644 index 0000000000..77b0dc9ae7 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/loader.py @@ -0,0 +1,213 @@ +""" Object detection loader/collate + +Hacked together by / Copyright 2020 Ross Wightman +""" +import torch.utils.data +from .transforms import * +from .random_erasing import RandomErasing +from effdet.anchors import AnchorLabeler +from timm.data.distributed_sampler import OrderedDistributedSampler +import os + +MAX_NUM_INSTANCES = 100 + + +class DetectionFastCollate: + """ A detection specific, optimized collate function w/ a bit of state. + + Optionally performs anchor labelling. Doing this here offloads some work from the + GPU and the main training process thread and increases the load on the dataloader + threads. + + """ + def __init__( + self, + instance_keys=None, + instance_shapes=None, + instance_fill=-1, + max_instances=MAX_NUM_INSTANCES, + anchor_labeler=None, + ): + instance_keys = instance_keys or {'bbox', 'bbox_ignore', 'cls'} + instance_shapes = instance_shapes or dict( + bbox=(max_instances, 4), bbox_ignore=(max_instances, 4), cls=(max_instances,)) + self.instance_info = {k: dict(fill=instance_fill, shape=instance_shapes[k]) for k in instance_keys} + self.max_instances = max_instances + self.anchor_labeler = anchor_labeler + + def __call__(self, batch): + batch_size = len(batch) + target = dict() + labeler_outputs = dict() + img_tensor = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8) + for i in range(batch_size): + img_tensor[i] += torch.from_numpy(batch[i][0]) + labeler_inputs = {} + for tk, tv in batch[i][1].items(): + instance_info = self.instance_info.get(tk, None) + if instance_info is not None: + # target tensor is associated with a detection instance + tv = torch.from_numpy(tv).to(dtype=torch.float32) + if self.anchor_labeler is None: + if i == 0: + shape = (batch_size,) + instance_info['shape'] + target_tensor = torch.full(shape, instance_info['fill'], dtype=torch.float32) + target[tk] = target_tensor + else: + target_tensor = target[tk] + num_elem = min(tv.shape[0], self.max_instances) + target_tensor[i, 0:num_elem] = tv[0:num_elem] + else: + # no need to pass gt tensors through when labeler in use + if tk in ('bbox', 'cls'): + labeler_inputs[tk] = tv + else: + # target tensor is an image-level annotation / metadata + if i == 0: + # first batch elem, create destination tensors + if isinstance(tv, (tuple, list)): + # per batch elem sequence + shape = (batch_size, len(tv)) + dtype = torch.float32 if isinstance(tv[0], (float, np.floating)) else torch.int32 + else: + # per batch elem scalar + shape = batch_size, + dtype = torch.float32 if isinstance(tv, (float, np.floating)) else torch.int64 + target_tensor = torch.zeros(shape, dtype=dtype) + target[tk] = target_tensor + else: + target_tensor = target[tk] + target_tensor[i] = torch.tensor(tv, dtype=target_tensor.dtype) + + if self.anchor_labeler is not None: + cls_targets, box_targets, num_positives = self.anchor_labeler.label_anchors( + labeler_inputs['bbox'], labeler_inputs['cls'], filter_valid=False) + if i == 0: + # first batch elem, create destination tensors, separate key per level + for j, (ct, bt) in enumerate(zip(cls_targets, box_targets)): + labeler_outputs[f'label_cls_{j}'] = torch.zeros( + (batch_size,) + ct.shape, dtype=torch.int64) + labeler_outputs[f'label_bbox_{j}'] = torch.zeros( + (batch_size,) + bt.shape, dtype=torch.float32) + labeler_outputs['label_num_positives'] = torch.zeros(batch_size) + for j, (ct, bt) in enumerate(zip(cls_targets, box_targets)): + labeler_outputs[f'label_cls_{j}'][i] = ct + labeler_outputs[f'label_bbox_{j}'][i] = bt + labeler_outputs['label_num_positives'][i] = num_positives + if labeler_outputs: + target.update(labeler_outputs) + + return img_tensor, target + + +class PrefetchLoader: + + def __init__(self, + loader, + mean=IMAGENET_DEFAULT_MEAN, + std=IMAGENET_DEFAULT_STD, + re_prob=0., + re_mode='pixel', + re_count=1, + ): + self.loader = loader + self.mean = torch.tensor([x * 255 for x in mean]).view(1, 3, 1, 1) + self.std = torch.tensor([x * 255 for x in std]).view(1, 3, 1, 1) + if re_prob > 0.: + self.random_erasing = RandomErasing(probability=re_prob, mode=re_mode, max_count=re_count) + else: + self.random_erasing = None + + def __iter__(self): + + first = True + + for next_input, next_target in self.loader: + + next_input = next_input.float().sub_(self.mean).div_(self.std) + next_target = {k: v for k, v in next_target.items()} + if self.random_erasing is not None: + next_input = self.random_erasing(next_input, next_target) + + if not first: + yield input, target + else: + first = False + + + input = next_input + target = next_target + + yield input, target + + def __len__(self): + return len(self.loader) + + @property + def sampler(self): + return self.loader.sampler + + @property + def dataset(self): + return self.loader.dataset + + +def create_loader( + dataset, + input_size, + batch_size, + is_training=False, + use_prefetcher=True, + re_prob=0., + re_mode='pixel', + re_count=1, + interpolation='bilinear', + fill_color='mean', + mean=IMAGENET_DEFAULT_MEAN, + std=IMAGENET_DEFAULT_STD, + num_workers=1, + distributed=False, + pin_mem=False, + anchor_labeler=None, + transform_fn=None, + collate_fn=None, +): + if isinstance(input_size, tuple): + img_size = input_size[-2:] + else: + img_size = input_size + + if transform_fn is not None: + # transform_fn should accept inputs (img, annotations) from the dataset and return a tuple + # of img, annotations for the data loader collate function. + # The valid types of img and annotations depend on the dataset and collate abstractions used. + # The default dataset outputs PIL Image and dict of numpy ndarrays or python scalar annotations. + # The fast collate fn accepts ONLY numpy uint8 images and annotations dicts of ndarrays and python scalars + transform = transform_fn + else: + transform = transforms_coco_eval( + img_size, + interpolation=interpolation, + use_prefetcher=use_prefetcher, + fill_color=fill_color, + mean=mean, + std=std) + dataset.transform = transform + + sampler = None + + collate_fn = collate_fn or DetectionFastCollate(anchor_labeler=anchor_labeler) + print(anchor_labeler) + loader = torch.utils.data.DataLoader( + dataset, + batch_size=batch_size, + shuffle=sampler is None and is_training, + num_workers=num_workers, + sampler=sampler, + pin_memory=pin_mem, + collate_fn=collate_fn, + ) + if use_prefetcher: + loader = PrefetchLoader(loader, mean=mean, std=std) + + return loader diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/__init__.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/__init__.py new file mode 100644 index 0000000000..bea708cb8c --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/__init__.py @@ -0,0 +1,2 @@ +from .parser_config import OpenImagesParserCfg, CocoParserCfg, VocParserCfg +from .parser_factory import create_parser diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser.py new file mode 100644 index 0000000000..b593280520 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser.py @@ -0,0 +1,82 @@ +from numbers import Integral +from typing import List, Union, Dict, Any + + +class Parser: + """ Parser base class. + + The attributes listed below make up a public interface common to all parsers. They can be accessed directly + once the dataset is constructed and annotations are populated. + + Attributes: + + cat_names (list[str]): + list of category (class) names, with background class at position 0. + cat_ids (list[union[str, int]): + list of dataset specific, unique integer or string category ids, does not include background + cat_id_to_label (dict): + map from category id to integer 1-indexed class label + + img_ids (list): + list of dataset specific, unique image ids corresponding to valid samples in dataset + img_ids_invalid (list): + list of image ids corresponding to invalid images, not used as samples + img_infos (list[dict]): + image info, list of info dicts with filename, width, height for each image sample + """ + def __init__( + self, + bbox_yxyx: bool = False, + has_labels: bool = True, + include_masks: bool = False, + include_bboxes_ignore: bool = False, + ignore_empty_gt: bool = False, + min_img_size: int = 32, + ): + """ + Args: + yxyx (bool): output coords in yxyx format, otherwise xyxy + has_labels (bool): dataset has labels (for training validation, False usually for test sets) + include_masks (bool): include segmentation masks in target output (not supported yet for any dataset) + include_bboxes_ignore (bool): include ignored bbox in target output + ignore_empty_gt (bool): ignore images with no ground truth (no negative images) + min_img_size (bool): ignore images with width or height smaller than this number + sub_sample (int): sample every N images from the dataset + """ + # parser config, determines how dataset parsed and validated + self.yxyx = bbox_yxyx + self.has_labels = has_labels + self.include_masks = include_masks + self.include_bboxes_ignore = include_bboxes_ignore + self.ignore_empty_gt = ignore_empty_gt + self.min_img_size = min_img_size + self.label_offset = 1 + + # Category (class) metadata. Populated by _load_annotations() + self.cat_names: List[str] = [] + self.cat_ids: List[Union[str, Integral]] = [] + self.cat_id_to_label: Dict[Union[str, Integral], Integral] = dict() + + # Image metadata. Populated by _load_annotations() + self.img_ids: List[Union[str, Integral]] = [] + self.img_ids_invalid: List[Union[str, Integral]] = [] + self.img_infos: List[Dict[str, Any]] = [] + + @property + def cat_dicts(self): + """return category names and labels in format compatible with TF Models Evaluator + list[dict(name=, id=)] + """ + return [ + dict( + name=name, + id=cat_id if not self.cat_id_to_label else self.cat_id_to_label[cat_id] + ) for name, cat_id in zip(self.cat_names, self.cat_ids)] + + @property + def max_label(self): + if self.cat_id_to_label: + return max(self.cat_id_to_label.values()) + else: + assert len(self.cat_ids) and isinstance(self.cat_ids[0], Integral) + return max(self.cat_ids) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_coco.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_coco.py new file mode 100644 index 0000000000..58bc2495c3 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_coco.py @@ -0,0 +1,93 @@ +""" COCO dataset parser + +Copyright 2020 Ross Wightman +""" +import numpy as np +from pycocotools.coco import COCO +from .parser import Parser +from .parser_config import CocoParserCfg + + +class CocoParser(Parser): + + def __init__(self, cfg: CocoParserCfg): + super().__init__( + bbox_yxyx=cfg.bbox_yxyx, + has_labels=cfg.has_labels, + include_masks=cfg.include_masks, + include_bboxes_ignore=cfg.include_bboxes_ignore, + ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt, + min_img_size=cfg.min_img_size + ) + self.cat_ids_as_labels = True # this is the default for original TF EfficientDet models + self.coco = None + self._load_annotations(cfg.ann_filename) + + def get_ann_info(self, idx): + img_id = self.img_ids[idx] + return self._parse_img_ann(img_id) + + def _load_annotations(self, ann_file): + assert self.coco is None + self.coco = COCO(ann_file) + self.cat_ids = self.coco.getCatIds() + self.cat_names = [c['name'] for c in self.coco.loadCats(ids=self.cat_ids)] + if not self.cat_ids_as_labels: + self.cat_id_to_label = {cat_id: i + self.label_offset for i, cat_id in enumerate(self.cat_ids)} + img_ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) + for img_id in sorted(self.coco.imgs.keys()): + info = self.coco.loadImgs([img_id])[0] + if (min(info['width'], info['height']) < self.min_img_size or + (self.ignore_empty_gt and img_id not in img_ids_with_ann)): + self.img_ids_invalid.append(img_id) + continue + self.img_ids.append(img_id) + self.img_infos.append(info) + + def _parse_img_ann(self, img_id): + ann_ids = self.coco.getAnnIds(imgIds=[img_id]) + ann_info = self.coco.loadAnns(ann_ids) + bboxes = [] + bboxes_ignore = [] + cls = [] + + for i, ann in enumerate(ann_info): + if ann.get('ignore', False): + continue + x1, y1, w, h = ann['bbox'] + if self.include_masks and ann['area'] <= 0: + continue + if w < 1 or h < 1: + continue + + if self.yxyx: + bbox = [y1, x1, y1 + h, x1 + w] + else: + bbox = [x1, y1, x1 + w, y1 + h] + + if ann.get('iscrowd', False): + if self.include_bboxes_ignore: + bboxes_ignore.append(bbox) + else: + bboxes.append(bbox) + cls.append(self.cat_id_to_label[ann['category_id']] if self.cat_id_to_label else ann['category_id']) + + if bboxes: + bboxes = np.array(bboxes, ndmin=2, dtype=np.float32) + cls = np.array(cls, dtype=np.int64) + else: + bboxes = np.zeros((0, 4), dtype=np.float32) + cls = np.array([], dtype=np.int64) + + if self.include_bboxes_ignore: + if bboxes_ignore: + bboxes_ignore = np.array(bboxes_ignore, ndmin=2, dtype=np.float32) + else: + bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + + ann = dict(bbox=bboxes, cls=cls) + + if self.include_bboxes_ignore: + ann['bbox_ignore'] = bboxes_ignore + + return ann diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_config.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_config.py new file mode 100644 index 0000000000..8537d3e1b1 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_config.py @@ -0,0 +1,49 @@ +""" Dataset parser configs + +Copyright 2020 Ross Wightman +""" +from dataclasses import dataclass + +__all__ = ['CocoParserCfg', 'OpenImagesParserCfg', 'VocParserCfg'] + + +@dataclass +class CocoParserCfg: + ann_filename: str # absolute path + include_masks: bool = False + include_bboxes_ignore: bool = False + has_labels: bool = True + bbox_yxyx: bool = True + min_img_size: int = 32 + ignore_empty_gt: bool = False + + +@dataclass +class VocParserCfg: + split_filename: str + ann_filename: str + img_filename: str = '%.jpg' + keep_difficult: bool = True + classes: list = None + add_background: bool = True + has_labels: bool = True + bbox_yxyx: bool = True + min_img_size: int = 32 + ignore_empty_gt: bool = False + + +@dataclass +class OpenImagesParserCfg: + categories_filename: str + img_info_filename: str + bbox_filename: str + img_label_filename: str = '' + masks_filename: str = '' + img_filename: str = '%s.jpg' # relative to dataset img_dir + task: str = 'obj' + prefix_levels: int = 1 + add_background: bool = True + has_labels: bool = True + bbox_yxyx: bool = True + min_img_size: int = 32 + ignore_empty_gt: bool = False diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_factory.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_factory.py new file mode 100644 index 0000000000..9dcd46a740 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_factory.py @@ -0,0 +1,19 @@ +""" Parser factory + +Copyright 2020 Ross Wightman +""" +from .parser_coco import CocoParser +from .parser_voc import VocParser +from .parser_open_images import OpenImagesParser + + +def create_parser(name, **kwargs): + if name == 'coco': + parser = CocoParser(**kwargs) + elif name == 'voc': + parser = VocParser(**kwargs) + elif name == 'openimages': + parser = OpenImagesParser(**kwargs) + else: + assert False, f'Unknown dataset parser ({name})' + return parser diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_open_images.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_open_images.py new file mode 100644 index 0000000000..3c201ac2df --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_open_images.py @@ -0,0 +1,211 @@ +""" OpenImages dataset parser + +Copyright 2020 Ross Wightman +""" +import numpy as np +import os +import logging + +from .parser import Parser +from .parser_config import OpenImagesParserCfg + +_logger = logging.getLogger(__name__) + + +class OpenImagesParser(Parser): + + def __init__(self, cfg: OpenImagesParserCfg): + super().__init__( + bbox_yxyx=cfg.bbox_yxyx, + has_labels=cfg.has_labels, + include_masks=False, # FIXME to support someday + include_bboxes_ignore=False, + ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt, + min_img_size=cfg.min_img_size + ) + self.img_prefix_levels = cfg.prefix_levels + self.mask_prefix_levels = 1 + self._anns = None # access via get_ann_info() + self._img_to_ann = None + self._load_annotations( + categories_filename=cfg.categories_filename, + img_info_filename=cfg.img_info_filename, + img_filename=cfg.img_filename, + masks_filename=cfg.masks_filename, + bbox_filename=cfg.bbox_filename + ) + + def _load_annotations( + self, + categories_filename: str, + img_info_filename: str, + img_filename: str, + masks_filename: str, + bbox_filename: str, + ): + import pandas as pd # For now, blow up on pandas req only when trying to load open images anno + + _logger.info('Loading categories...') + classes_df = pd.read_csv(categories_filename, header=None) + self.cat_ids = classes_df[0].tolist() + self.cat_names = classes_df[1].tolist() + self.cat_id_to_label = {c: i + self.label_offset for i, c in enumerate(self.cat_ids)} + + def _img_filename(img_id): + # build image filenames that are relative to img_dir + filename = img_filename % img_id + if self.img_prefix_levels: + levels = [c for c in img_id[:self.img_prefix_levels]] + filename = os.path.join(*levels, filename) + return filename + + def _mask_filename(mask_path): + # FIXME finish + if self.mask_prefix_levels: + levels = [c for c in mask_path[:self.mask_prefix_levels]] + mask_path = os.path.join(*levels, mask_path) + return mask_path + + def _load_img_info(csv_file, select_img_ids=None): + _logger.info('Read img_info csv...') + img_info_df = pd.read_csv(csv_file, index_col='id') + + _logger.info('Filter images...') + if select_img_ids is not None: + img_info_df = img_info_df.loc[select_img_ids] + img_info_df = img_info_df[ + (img_info_df['width'] >= self.min_img_size) & (img_info_df['height'] >= self.min_img_size)] + + _logger.info('Mapping ids...') + img_info_df['img_id'] = img_info_df.index + img_info_df['file_name'] = img_info_df.index.map(lambda x: _img_filename(x)) + img_info_df = img_info_df[['img_id', 'file_name', 'width', 'height']] + img_sizes = img_info_df[['width', 'height']].values + self.img_infos = img_info_df.to_dict('records') + self.img_ids = img_info_df.index.values.tolist() + img_id_to_idx = {img_id: idx for idx, img_id in enumerate(self.img_ids)} + return img_sizes, img_id_to_idx + + if self.include_masks and self.has_labels: + masks_df = pd.read_csv(masks_filename) + + # NOTE currently using dataset masks anno ImageIDs to form valid img_ids from the dataset + anno_img_ids = sorted(masks_df['ImageID'].unique()) + img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids) + + masks_df['ImageIdx'] = masks_df['ImageID'].map(img_id_to_idx) + if np.issubdtype(masks_df.ImageIdx.dtype, np.floating): + masks_df = masks_df.dropna(axis='rows') + masks_df['ImageIdx'] = masks_df.ImageIdx.astype(np.int32) + masks_df.sort_values('ImageIdx', inplace=True) + ann_img_idx = masks_df['ImageIdx'].values + img_sizes = img_sizes[ann_img_idx] + masks_df['BoxXMin'] = masks_df['BoxXMin'] * img_sizes[:, 0] + masks_df['BoxXMax'] = masks_df['BoxXMax'] * img_sizes[:, 0] + masks_df['BoxYMin'] = masks_df['BoxYMin'] * img_sizes[:, 1] + masks_df['BoxYMax'] = masks_df['BoxYMax'] * img_sizes[:, 1] + masks_df['LabelIdx'] = masks_df['LabelName'].map(self.cat_id_to_label) + # FIXME remap mask filename with _mask_filename + + self._anns = dict( + bbox=masks_df[['BoxXMin', 'BoxYMin', 'BoxXMax', 'BoxYMax']].values.astype(np.float32), + label=masks_df[['LabelIdx']].values.astype(np.int32), + mask_path=masks_df[['MaskPath']].values + ) + _, ri, rc = np.unique(ann_img_idx, return_index=True, return_counts=True) + self._img_to_ann = list(zip(ri, rc)) # index, count tuples + elif self.has_labels: + _logger.info('Loading bbox...') + bbox_df = pd.read_csv(bbox_filename) + + # NOTE currently using dataset box anno ImageIDs to form valid img_ids from the larger dataset. + # FIXME use *imagelabels.csv or imagelabels-boxable.csv for negative examples (without box?) + anno_img_ids = sorted(bbox_df['ImageID'].unique()) + img_sizes, img_id_to_idx = _load_img_info(img_info_filename, select_img_ids=anno_img_ids) + + _logger.info('Process bbox...') + bbox_df['ImageIdx'] = bbox_df['ImageID'].map(img_id_to_idx) + if np.issubdtype(bbox_df.ImageIdx.dtype, np.floating): + bbox_df = bbox_df.dropna(axis='rows') + bbox_df['ImageIdx'] = bbox_df.ImageIdx.astype(np.int32) + bbox_df.sort_values('ImageIdx', inplace=True) + ann_img_idx = bbox_df['ImageIdx'].values + img_sizes = img_sizes[ann_img_idx] + bbox_df['XMin'] = bbox_df['XMin'] * img_sizes[:, 0] + bbox_df['XMax'] = bbox_df['XMax'] * img_sizes[:, 0] + bbox_df['YMin'] = bbox_df['YMin'] * img_sizes[:, 1] + bbox_df['YMax'] = bbox_df['YMax'] * img_sizes[:, 1] + bbox_df['LabelIdx'] = bbox_df['LabelName'].map(self.cat_id_to_label).astype(np.int32) + + self._anns = dict( + bbox=bbox_df[['XMin', 'YMin', 'XMax', 'YMax']].values.astype(np.float32), + label=bbox_df[['LabelIdx', 'IsGroupOf']].values.astype(np.int32), + ) + _, ri, rc = np.unique(ann_img_idx, return_index=True, return_counts=True) + self._img_to_ann = list(zip(ri, rc)) # index, count tuples + else: + _load_img_info(img_info_filename) + + _logger.info('Annotations loaded!') + + def get_ann_info(self, idx): + if not self.has_labels: + return dict() + start_idx, num_ann = self._img_to_ann[idx] + ann_keys = tuple(self._anns.keys()) + ann_values = tuple(self._anns[k][start_idx:start_idx + num_ann] for k in ann_keys) + return self._parse_ann_info(idx, ann_keys, ann_values) + + def _parse_ann_info(self, img_idx, ann_keys, ann_values): + """ + """ + gt_bboxes = [] + gt_labels = [] + gt_bboxes_ignore = [] + if self.include_masks: + assert 'mask_path' in ann_keys + gt_masks = [] + + for ann in zip(*ann_values): + ann = dict(zip(ann_keys, ann)) + x1, y1, x2, y2 = ann['bbox'] + if x2 - x1 < 1 or y2 - y1 < 1: + continue + label = ann['label'][0] + iscrowd = False + if len(ann['label']) > 1: + iscrowd = ann['label'][1] + if self.yxyx: + bbox = np.array([y1, x1, y2, x2], dtype=np.float32) + else: + bbox = ann['bbox'] + if iscrowd: + gt_bboxes_ignore.append(bbox) + else: + gt_bboxes.append(bbox) + gt_labels.append(label) + # if self.include_masks: + # img_info = self.img_infos[img_idx] + # mask_img = SegmentationMask(ann['mask_filename'], img_info['width'], img_info['height']) + # gt_masks.append(mask_img) + + if gt_bboxes: + gt_bboxes = np.array(gt_bboxes, ndmin=2, dtype=np.float32) + gt_labels = np.array(gt_labels, dtype=np.int64) + else: + gt_bboxes = np.zeros((0, 4), dtype=np.float32) + gt_labels = np.array([], dtype=np.int64) + + if self.include_bboxes_ignore: + if gt_bboxes_ignore: + gt_bboxes_ignore = np.array(gt_bboxes_ignore, ndmin=2, dtype=np.float32) + else: + gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + + ann = dict(bbox=gt_bboxes, cls=gt_labels) + + if self.include_bboxes_ignore: + ann.update(dict(bbox_ignore=gt_bboxes_ignore, cls_ignore=np.array([], dtype=np.int64))) + if self.include_masks: + ann['masks'] = gt_masks + return ann diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_voc.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_voc.py new file mode 100644 index 0000000000..554d43315d --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/parsers/parser_voc.py @@ -0,0 +1,148 @@ +""" Pascal VOC dataset parser + +Copyright 2020 Ross Wightman +""" +import os +import xml.etree.ElementTree as ET +from collections import defaultdict +import numpy as np + +from .parser import Parser +from .parser_config import VocParserCfg + + +class VocParser(Parser): + + DEFAULT_CLASSES = ( + 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', + 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', + 'sheep', 'sofa', 'train', 'tvmonitor') + + def __init__(self, cfg: VocParserCfg): + super().__init__( + bbox_yxyx=cfg.bbox_yxyx, + has_labels=cfg.has_labels, + include_masks=False, # FIXME to support someday + include_bboxes_ignore=False, + ignore_empty_gt=cfg.has_labels and cfg.ignore_empty_gt, + min_img_size=cfg.min_img_size + ) + self.correct_bbox = 1 + self.keep_difficult = cfg.keep_difficult + + self.anns = None + self.img_id_to_idx = {} + self._load_annotations( + split_filename=cfg.split_filename, + img_filename=cfg.img_filename, + ann_filename=cfg.ann_filename, + classes=cfg.classes, + ) + + def _load_annotations( + self, + split_filename: str, + img_filename: str, + ann_filename: str, + classes=None, + ): + classes = classes or self.DEFAULT_CLASSES + self.cat_names = list(classes) + self.cat_ids = self.cat_names + self.cat_id_to_label = {cat: i + self.label_offset for i, cat in enumerate(self.cat_ids)} + + self.anns = [] + + with open(split_filename) as f: + ids = f.readlines() + for img_id in ids: + img_id = img_id.strip("\n") + filename = img_filename % img_id + xml_path = ann_filename % img_id + tree = ET.parse(xml_path) + root = tree.getroot() + size = root.find('size') + width = int(size.find('width').text) + height = int(size.find('height').text) + if min(width, height) < self.min_img_size: + continue + + anns = [] + for obj_idx, obj in enumerate(root.findall('object')): + name = obj.find('name').text + label = self.cat_id_to_label[name] + difficult = int(obj.find('difficult').text) + bnd_box = obj.find('bndbox') + bbox = [ + int(bnd_box.find('xmin').text), + int(bnd_box.find('ymin').text), + int(bnd_box.find('xmax').text), + int(bnd_box.find('ymax').text) + ] + anns.append(dict(label=label, bbox=bbox, difficult=difficult)) + + if not self.ignore_empty_gt or len(anns): + self.anns.append(anns) + self.img_infos.append(dict(id=img_id, file_name=filename, width=width, height=height)) + self.img_ids.append(img_id) + else: + self.img_ids_invalid.append(img_id) + + def merge(self, other): + assert len(self.cat_ids) == len(other.cat_ids) + self.img_ids.extend(other.img_ids) + self.img_infos.extend(other.img_infos) + self.anns.extend(other.anns) + + def get_ann_info(self, idx): + return self._parse_ann_info(self.anns[idx]) + + def _parse_ann_info(self, ann_info): + bboxes = [] + labels = [] + bboxes_ignore = [] + labels_ignore = [] + for ann in ann_info: + ignore = False + x1, y1, x2, y2 = ann['bbox'] + label = ann['label'] + w = x2 - x1 + h = y2 - y1 + if w < 1 or h < 1: + ignore = True + if self.yxyx: + bbox = [y1, x1, y2, x2] + else: + bbox = ann['bbox'] + if ignore or (ann['difficult'] and not self.keep_difficult): + bboxes_ignore.append(bbox) + labels_ignore.append(label) + else: + bboxes.append(bbox) + labels.append(label) + + if not bboxes: + bboxes = np.zeros((0, 4), dtype=np.float32) + labels = np.zeros((0, ), dtype=np.float32) + else: + bboxes = np.array(bboxes, ndmin=2, dtype=np.float32) - self.correct_bbox + labels = np.array(labels, dtype=np.float32) + + if self.include_bboxes_ignore: + if not bboxes_ignore: + bboxes_ignore = np.zeros((0, 4), dtype=np.float32) + labels_ignore = np.zeros((0, ), dtype=np.float32) + else: + bboxes_ignore = np.array(bboxes_ignore, ndmin=2, dtype=np.float32) - self.correct_bbox + labels_ignore = np.array(labels_ignore, dtype=np.float32) + + ann = dict( + bbox=bboxes.astype(np.float32), + cls=labels.astype(np.int64)) + + if self.include_bboxes_ignore: + ann.update(dict( + bbox_ignore=bboxes_ignore.astype(np.float32), + cls_ignore=labels_ignore.astype(np.int64))) + return ann + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/random_erasing.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/random_erasing.py new file mode 100644 index 0000000000..ded751ecf0 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/random_erasing.py @@ -0,0 +1,94 @@ +""" Multi-Scale RandomErasing + +Copyright 2020 Ross Wightman +""" +import random +import math +import torch + + +def _get_pixels(per_pixel, rand_color, patch_size, dtype=torch.float32, device='cuda'): + # NOTE I've seen CUDA illegal memory access errors being caused by the normal_() + # paths, flip the order so normal is run on CPU if this becomes a problem + # Issue has been fixed in master https://github.com/pytorch/pytorch/issues/19508 + if per_pixel: + return torch.empty(patch_size, dtype=dtype, device=device).normal_() + elif rand_color: + return torch.empty((patch_size[0], 1, 1), dtype=dtype, device=device).normal_() + else: + return torch.zeros((patch_size[0], 1, 1), dtype=dtype, device=device) + + +class RandomErasing: + """ Randomly selects a rectangle region in an image and erases its pixels. + 'Random Erasing Data Augmentation' by Zhong et al. + See https://arxiv.org/pdf/1708.04896.pdf + + This variant of RandomErasing is tweaked for multi-scale obj detection training. + Args: + probability: Probability that the Random Erasing operation will be performed. + min_area: Minimum percentage of erased area wrt input image area. + max_area: Maximum percentage of erased area wrt input image area. + min_aspect: Minimum aspect ratio of erased area. + mode: pixel color mode, one of 'const', 'rand', or 'pixel' + 'const' - erase block is constant color of 0 for all channels + 'rand' - erase block is same per-channel random (normal) color + 'pixel' - erase block is per-pixel random (normal) color + max_count: maximum number of erasing blocks per image, area per box is scaled by count. + per-image count is randomly chosen between 1 and this value. + """ + + def __init__( + self, + probability=0.5, min_area=0.02, max_area=1/4, min_aspect=0.3, max_aspect=None, + mode='const', min_count=1, max_count=None, num_splits=0, device='cuda'): + self.probability = probability + self.min_area = min_area + self.max_area = max_area + max_aspect = max_aspect or 1 / min_aspect + self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect)) + self.min_count = min_count + self.max_count = max_count or min_count + self.num_splits = num_splits + mode = mode.lower() + self.rand_color = False + self.per_pixel = False + if mode == 'rand': + self.rand_color = True # per block random normal + elif mode == 'pixel': + self.per_pixel = True # per pixel random normal + else: + assert not mode or mode == 'const' + self.device = device + + def _erase(self, img, chan, img_h, img_w, dtype): + if random.random() > self.probability: + return + area = img_h * img_w + count = self.min_count if self.min_count == self.max_count else \ + random.randint(self.min_count, self.max_count) + for _ in range(count): + for attempt in range(10): + target_area = random.uniform(self.min_area, self.max_area) * area / count + aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio)) + h = int(round(math.sqrt(target_area * aspect_ratio))) + w = int(round(math.sqrt(target_area / aspect_ratio))) + if w < img_w and h < img_h: + top = random.randint(0, img_h - h) + left = random.randint(0, img_w - w) + img[:, top:top + h, left:left + w] = _get_pixels( + self.per_pixel, self.rand_color, (chan, h, w), + dtype=dtype, device=self.device) + break + + def __call__(self, input, target): + batch_size, chan, input_h, input_w = input.shape + img_scales = target['img_scale'] + img_size = (target['img_size'] / img_scales.unsqueeze(1)).int() + img_size[:, 0] = img_size[:, 0].clamp(max=input_w) + img_size[:, 1] = img_size[:, 1].clamp(max=input_h) + # skip first slice of batch if num_splits is set (for clean portion of samples) + batch_start = batch_size // self.num_splits if self.num_splits > 1 else 0 + for i in range(batch_start, batch_size): + self._erase(input[i], chan, img_size[i, 1], img_size[i, 0], input.dtype) + return input diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/transforms.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/transforms.py new file mode 100644 index 0000000000..13f4d618a5 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/data/transforms.py @@ -0,0 +1,153 @@ +""" COCO transforms (quick and dirty) + +Hacked together by Ross Wightman +""" +from PIL import Image +import numpy as np +import torch + +IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) +IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) +IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5) +IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5) + + +class ImageToNumpy: + + def __call__(self, pil_img, annotations: dict): + np_img = np.array(pil_img, dtype=np.uint8) + if np_img.ndim < 3: + np_img = np.expand_dims(np_img, axis=-1) + np_img = np.moveaxis(np_img, 2, 0) # HWC to CHW + return np_img, annotations + + +class ImageToTensor: + + def __init__(self, dtype=torch.float32): + self.dtype = dtype + + def __call__(self, pil_img, annotations: dict): + np_img = np.array(pil_img, dtype=np.uint8) + if np_img.ndim < 3: + np_img = np.expand_dims(np_img, axis=-1) + np_img = np.moveaxis(np_img, 2, 0) # HWC to CHW + return torch.from_numpy(np_img).to(dtype=self.dtype), annotations + + +def _pil_interp(method): + if method == 'bicubic': + return Image.BICUBIC + elif method == 'lanczos': + return Image.LANCZOS + elif method == 'hamming': + return Image.HAMMING + else: + # default bilinear, do we want to allow nearest? + return Image.BILINEAR + + +_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC) + + +def clip_boxes_(boxes, img_size): + height, width = img_size + clip_upper = np.array([height, width] * 2, dtype=boxes.dtype) + np.clip(boxes, 0, clip_upper, out=boxes) + + +def clip_boxes(boxes, img_size): + clipped_boxes = boxes.copy() + clip_boxes_(clipped_boxes, img_size) + return clipped_boxes + + +def _size_tuple(size): + if isinstance(size, int): + return size, size + else: + assert len(size) == 2 + return size + + +class ResizePad: + + def __init__(self, target_size: int, interpolation: str = 'bilinear', fill_color: tuple = (0, 0, 0)): + self.target_size = _size_tuple(target_size) + self.interpolation = interpolation + self.fill_color = fill_color + + def __call__(self, img, anno: dict): + width, height = img.size + + img_scale_y = self.target_size[0] / height + img_scale_x = self.target_size[1] / width + img_scale = min(img_scale_y, img_scale_x) + scaled_h = int(height * img_scale) + scaled_w = int(width * img_scale) + + new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color) + interp_method = _pil_interp(self.interpolation) + img = img.resize((scaled_w, scaled_h), interp_method) + new_img.paste(img) # pastes at 0,0 (upper-left corner) + + if 'bbox' in anno: + bbox = anno['bbox'] + bbox[:, :4] *= img_scale + bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1])) + clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller + valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1) + anno['bbox'] = bbox[valid_indices, :] + anno['cls'] = anno['cls'][valid_indices] + + anno['img_scale'] = 1. / img_scale # back to original + + return new_img, anno + + +def resolve_fill_color(fill_color, img_mean=IMAGENET_DEFAULT_MEAN): + if isinstance(fill_color, tuple): + assert len(fill_color) == 3 + fill_color = fill_color + else: + try: + int_color = int(fill_color) + fill_color = (int_color,) * 3 + except ValueError: + assert fill_color == 'mean' + fill_color = tuple([int(round(255 * x)) for x in img_mean]) + return fill_color + + +class Compose: + + def __init__(self, transforms: list): + self.transforms = transforms + + def __call__(self, img, annotations: dict): + for t in self.transforms: + img, annotations = t(img, annotations) + return img, annotations + + +def transforms_coco_eval( + img_size=224, + interpolation='bilinear', + use_prefetcher=False, + fill_color='mean', + mean=IMAGENET_DEFAULT_MEAN, + std=IMAGENET_DEFAULT_STD): + + fill_color = resolve_fill_color(fill_color, mean) + + image_tfl = [ + ResizePad( + target_size=img_size, interpolation=interpolation, fill_color=fill_color), + ImageToNumpy(), + ] + + assert use_prefetcher, "Only supporting prefetcher usage right now" + + image_tf = Compose(image_tfl) + return image_tf + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/distributed.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/distributed.py new file mode 100644 index 0000000000..63f024eb5c --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/distributed.py @@ -0,0 +1,308 @@ +""" PyTorch distributed helpers + +Some of this lifted from Detectron2 with other fns added by myself. Some of the Detectron2 fns +were intended for use with GLOO PG. I am using NCCL here with default PG so not everything will work +as is -RW +""" +import functools +import logging +import numpy as np +import pickle +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger = logging.getLogger(__name__) + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +def all_gather_container(container, group=None, cat_dim=0): + group = group or dist.group.WORLD + world_size = dist.get_world_size(group) + + def _do_gather(tensor): + tensor_list = [torch.empty_like(tensor) for _ in range(world_size)] + dist.all_gather(tensor_list, tensor, group=group) + return torch.cat(tensor_list, dim=cat_dim) + + if isinstance(container, dict): + gathered = dict() + for k, v in container.items(): + v = _do_gather(v) + gathered[k] = v + return gathered + elif isinstance(container, (list, tuple)): + gathered = [_do_gather(v) for v in container] + if isinstance(container, tuple): + gathered = tuple(gathered) + return gathered + else: + # if not a dict, list, tuple, expect a singular tensor + assert isinstance(container, torch.Tensor) + return _do_gather(container) + + +def gather_container(container, dst, group=None, cat_dim=0): + group = group or dist.group.WORLD + world_size = dist.get_world_size(group) + this_rank = dist.get_rank(group) + + def _do_gather(tensor): + if this_rank == dst: + tensor_list = [torch.empty_like(tensor) for _ in range(world_size)] + else: + tensor_list = None + dist.gather(tensor, tensor_list, dst=dst, group=group) + return torch.cat(tensor_list, dim=cat_dim) + + if isinstance(container, dict): + gathered = dict() + for k, v in container.items(): + v = _do_gather(v) + gathered[k] = v + return gathered + elif isinstance(container, (list, tuple)): + gathered = [_do_gather(v) for v in container] + if isinstance(container, tuple): + gathered = tuple(gathered) + return gathered + else: + # if not a dict, list, tuple, expect a singular tensor + assert isinstance(container, torch.Tensor) + return _do_gather(container) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/efficientdet.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/efficientdet.py new file mode 100644 index 0000000000..cf7a1b6dcc --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/efficientdet.py @@ -0,0 +1,620 @@ +""" PyTorch EfficientDet model + +Based on official Tensorflow version at: https://github.com/google/automl/tree/master/efficientdet +Paper: https://arxiv.org/abs/1911.09070 + +Hacked together by Ross Wightman +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +import logging +import math +from collections import OrderedDict +from typing import List, Callable, Optional, Union, Tuple +from functools import partial + + +from timm import create_model +from timm.models.layers import create_conv2d, create_pool2d, Swish, get_act_layer +from .config import get_fpn_config, set_config_writeable, set_config_readonly + +_DEBUG = False + +_ACT_LAYER = Swish + + +class SequentialList(nn.Sequential): + """ This module exists to work around torchscript typing issues list -> list""" + def __init__(self, *args): + super(SequentialList, self).__init__(*args) + + def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]: + for module in self: + x = module(x) + return x + + +class ConvBnAct2d(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, padding='', bias=False, + norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER): + super(ConvBnAct2d, self).__init__() + self.conv = create_conv2d( + in_channels, out_channels, kernel_size, stride=stride, dilation=dilation, padding=padding, bias=bias) + self.bn = None if norm_layer is None else norm_layer(out_channels) + self.act = None if act_layer is None else act_layer(inplace=True) + + def forward(self, x): + x = self.conv(x) + if self.bn is not None: + x = self.bn(x) + if self.act is not None: + x = self.act(x) + return x + + +class SeparableConv2d(nn.Module): + """ Separable Conv + """ + def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, + channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER): + super(SeparableConv2d, self).__init__() + self.conv_dw = create_conv2d( + in_channels, int(in_channels * channel_multiplier), kernel_size, + stride=stride, dilation=dilation, padding=padding, depthwise=True) + + self.conv_pw = create_conv2d( + int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) + + self.bn = None if norm_layer is None else norm_layer(out_channels) + self.act = None if act_layer is None else act_layer(inplace=True) + + def forward(self, x): + x = self.conv_dw(x) + x = self.conv_pw(x) + if self.bn is not None: + x = self.bn(x) + if self.act is not None: + x = self.act(x) + return x + + +class Interpolate2d(nn.Module): + r"""Resamples a 2d Image + + The input data is assumed to be of the form + `minibatch x channels x [optional depth] x [optional height] x width`. + Hence, for spatial inputs, we expect a 4D Tensor and for volumetric inputs, we expect a 5D Tensor. + + The algorithms available for upsampling are nearest neighbor and linear, + bilinear, bicubic and trilinear for 3D, 4D and 5D input Tensor, + respectively. + + One can either give a :attr:`scale_factor` or the target output :attr:`size` to + calculate the output size. (You cannot give both, as it is ambiguous) + + Args: + size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int], optional): + output spatial sizes + scale_factor (float or Tuple[float] or Tuple[float, float] or Tuple[float, float, float], optional): + multiplier for spatial size. Has to match input size if it is a tuple. + mode (str, optional): the upsampling algorithm: one of ``'nearest'``, + ``'linear'``, ``'bilinear'``, ``'bicubic'`` and ``'trilinear'``. + Default: ``'nearest'`` + align_corners (bool, optional): if ``True``, the corner pixels of the input + and output tensors are aligned, and thus preserving the values at + those pixels. This only has effect when :attr:`mode` is + ``'linear'``, ``'bilinear'``, or ``'trilinear'``. Default: ``False`` + """ + __constants__ = ['size', 'scale_factor', 'mode', 'align_corners', 'name'] + name: str + size: Optional[Union[int, Tuple[int, int]]] + scale_factor: Optional[Union[float, Tuple[float, float]]] + mode: str + align_corners: Optional[bool] + + def __init__(self, + size: Optional[Union[int, Tuple[int, int]]] = None, + scale_factor: Optional[Union[float, Tuple[float, float]]] = None, + mode: str = 'nearest', + align_corners: bool = False) -> None: + super(Interpolate2d, self).__init__() + self.name = type(self).__name__ + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = None if mode == 'nearest' else align_corners + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return F.interpolate( + input, self.size, self.scale_factor, self.mode, self.align_corners, recompute_scale_factor=False) + + +class ResampleFeatureMap(nn.Sequential): + + def __init__( + self, in_channels, out_channels, reduction_ratio=1., pad_type='', downsample=None, upsample=None, + norm_layer=nn.BatchNorm2d, apply_bn=False, conv_after_downsample=False, redundant_bias=False): + super(ResampleFeatureMap, self).__init__() + downsample = downsample or 'max' + upsample = upsample or 'nearest' + self.in_channels = in_channels + self.out_channels = out_channels + self.reduction_ratio = reduction_ratio + self.conv_after_downsample = conv_after_downsample + + conv = None + if in_channels != out_channels: + conv = ConvBnAct2d( + in_channels, out_channels, kernel_size=1, padding=pad_type, + norm_layer=norm_layer if apply_bn else None, + bias=not apply_bn or redundant_bias, act_layer=None) + + if reduction_ratio > 1: + if conv is not None and not self.conv_after_downsample: + self.add_module('conv', conv) + if downsample in ('max', 'avg'): + stride_size = int(reduction_ratio) + downsample = create_pool2d( + downsample, kernel_size=stride_size + 1, stride=stride_size, padding=pad_type) + else: + downsample = Interpolate2d(scale_factor=1./reduction_ratio, mode=downsample) + self.add_module('downsample', downsample) + if conv is not None and self.conv_after_downsample: + self.add_module('conv', conv) + else: + if conv is not None: + self.add_module('conv', conv) + if reduction_ratio < 1: + scale = int(1 // reduction_ratio) + self.add_module('upsample', Interpolate2d(scale_factor=scale, mode=upsample)) + + # def forward(self, x): + # # here for debugging only + # assert x.shape[1] == self.in_channels + # if self.reduction_ratio > 1: + # if hasattr(self, 'conv') and not self.conv_after_downsample: + # x = self.conv(x) + # x = self.downsample(x) + # if hasattr(self, 'conv') and self.conv_after_downsample: + # x = self.conv(x) + # else: + # if hasattr(self, 'conv'): + # x = self.conv(x) + # if self.reduction_ratio < 1: + # x = self.upsample(x) + # return x + + +class FpnCombine(nn.Module): + def __init__(self, feature_info, fpn_config, fpn_channels, inputs_offsets, target_reduction, pad_type='', + downsample=None, upsample=None, norm_layer=nn.BatchNorm2d, apply_resample_bn=False, + conv_after_downsample=False, redundant_bias=False, weight_method='attn'): + super(FpnCombine, self).__init__() + self.inputs_offsets = inputs_offsets + self.weight_method = weight_method + + self.resample = nn.ModuleDict() + for idx, offset in enumerate(inputs_offsets): + in_channels = fpn_channels + if offset < len(feature_info): + in_channels = feature_info[offset]['num_chs'] + input_reduction = feature_info[offset]['reduction'] + else: + node_idx = offset - len(feature_info) + input_reduction = fpn_config.nodes[node_idx]['reduction'] + reduction_ratio = target_reduction / input_reduction + self.resample[str(offset)] = ResampleFeatureMap( + in_channels, fpn_channels, reduction_ratio=reduction_ratio, pad_type=pad_type, + downsample=downsample, upsample=upsample, norm_layer=norm_layer, apply_bn=apply_resample_bn, + conv_after_downsample=conv_after_downsample, redundant_bias=redundant_bias) + + if weight_method == 'attn' or weight_method == 'fastattn': + self.edge_weights = nn.Parameter(torch.ones(len(inputs_offsets)), requires_grad=True) # WSM + else: + self.edge_weights = None + + def forward(self, x: List[torch.Tensor]): + dtype = x[0].dtype + nodes = [] + for offset, resample in zip(self.inputs_offsets, self.resample.values()): + input_node = x[offset] + input_node = resample(input_node) + nodes.append(input_node) + + if self.weight_method == 'attn': + normalized_weights = torch.softmax(self.edge_weights.to(dtype=dtype), dim=0) + out = torch.stack(nodes, dim=-1) * normalized_weights + elif self.weight_method == 'fastattn': + edge_weights = nn.functional.relu(self.edge_weights.to(dtype=dtype)) + weights_sum = torch.sum(edge_weights) + out = torch.stack( + [(nodes[i] * edge_weights[i]) / (weights_sum + 0.0001) for i in range(len(nodes))], dim=-1) + elif self.weight_method == 'sum': + out = torch.stack(nodes, dim=-1) + else: + raise ValueError('unknown weight_method {}'.format(self.weight_method)) + out = torch.sum(out, dim=-1) + return out + + +class Fnode(nn.Module): + """ A simple wrapper used in place of nn.Sequential for torchscript typing + Handles input type List[Tensor] -> output type Tensor + """ + def __init__(self, combine: nn.Module, after_combine: nn.Module): + super(Fnode, self).__init__() + self.combine = combine + self.after_combine = after_combine + + def forward(self, x: List[torch.Tensor]) -> torch.Tensor: + return self.after_combine(self.combine(x)) + + +class BiFpnLayer(nn.Module): + def __init__(self, feature_info, fpn_config, fpn_channels, num_levels=5, pad_type='', + downsample=None, upsample=None, norm_layer=nn.BatchNorm2d, act_layer=_ACT_LAYER, + apply_resample_bn=False, conv_after_downsample=True, conv_bn_relu_pattern=False, + separable_conv=True, redundant_bias=False): + super(BiFpnLayer, self).__init__() + self.num_levels = num_levels + self.conv_bn_relu_pattern = False + + self.feature_info = [] + self.fnode = nn.ModuleList() + for i, fnode_cfg in enumerate(fpn_config.nodes): + logging.debug('fnode {} : {}'.format(i, fnode_cfg)) + reduction = fnode_cfg['reduction'] + combine = FpnCombine( + feature_info, fpn_config, fpn_channels, tuple(fnode_cfg['inputs_offsets']), + target_reduction=reduction, pad_type=pad_type, downsample=downsample, upsample=upsample, + norm_layer=norm_layer, apply_resample_bn=apply_resample_bn, conv_after_downsample=conv_after_downsample, + redundant_bias=redundant_bias, weight_method=fnode_cfg['weight_method']) + + after_combine = nn.Sequential() + conv_kwargs = dict( + in_channels=fpn_channels, out_channels=fpn_channels, kernel_size=3, padding=pad_type, + bias=False, norm_layer=norm_layer, act_layer=act_layer) + if not conv_bn_relu_pattern: + conv_kwargs['bias'] = redundant_bias + conv_kwargs['act_layer'] = None + after_combine.add_module('act', act_layer(inplace=True)) + after_combine.add_module( + 'conv', SeparableConv2d(**conv_kwargs) if separable_conv else ConvBnAct2d(**conv_kwargs)) + + self.fnode.append(Fnode(combine=combine, after_combine=after_combine)) + self.feature_info.append(dict(num_chs=fpn_channels, reduction=reduction)) + + self.feature_info = self.feature_info[-num_levels::] + + def forward(self, x: List[torch.Tensor]): + for fn in self.fnode: + x.append(fn(x)) + return x[-self.num_levels::] + + +class BiFpn(nn.Module): + + def __init__(self, config, feature_info): + super(BiFpn, self).__init__() + self.num_levels = config.num_levels + norm_layer = config.norm_layer or nn.BatchNorm2d + if config.norm_kwargs: + norm_layer = partial(norm_layer, **config.norm_kwargs) + act_layer = get_act_layer(config.act_type) or _ACT_LAYER + fpn_config = config.fpn_config or get_fpn_config( + config.fpn_name, min_level=config.min_level, max_level=config.max_level) + + self.resample = nn.ModuleDict() + for level in range(config.num_levels): + if level < len(feature_info): + in_chs = feature_info[level]['num_chs'] + reduction = feature_info[level]['reduction'] + else: + # Adds a coarser level by downsampling the last feature map + reduction_ratio = 2 + self.resample[str(level)] = ResampleFeatureMap( + in_channels=in_chs, + out_channels=config.fpn_channels, + pad_type=config.pad_type, + downsample=config.downsample_type, + upsample=config.upsample_type, + norm_layer=norm_layer, + reduction_ratio=reduction_ratio, + apply_bn=config.apply_resample_bn, + conv_after_downsample=config.conv_after_downsample, + redundant_bias=config.redundant_bias, + ) + in_chs = config.fpn_channels + reduction = int(reduction * reduction_ratio) + feature_info.append(dict(num_chs=in_chs, reduction=reduction)) + + self.cell = SequentialList() + for rep in range(config.fpn_cell_repeats): + logging.debug('building cell {}'.format(rep)) + fpn_layer = BiFpnLayer( + feature_info=feature_info, + fpn_config=fpn_config, + fpn_channels=config.fpn_channels, + num_levels=config.num_levels, + pad_type=config.pad_type, + downsample=config.downsample_type, + upsample=config.upsample_type, + norm_layer=norm_layer, + act_layer=act_layer, + separable_conv=config.separable_conv, + apply_resample_bn=config.apply_resample_bn, + conv_after_downsample=config.conv_after_downsample, + conv_bn_relu_pattern=config.conv_bn_relu_pattern, + redundant_bias=config.redundant_bias, + ) + self.cell.add_module(str(rep), fpn_layer) + feature_info = fpn_layer.feature_info + + def forward(self, x: List[torch.Tensor]): + for resample in self.resample.values(): + x.append(resample(x[-1])) + x = self.cell(x) + return x + + +class HeadNet(nn.Module): + + def __init__(self, config, num_outputs): + super(HeadNet, self).__init__() + self.num_levels = config.num_levels + self.bn_level_first = getattr(config, 'head_bn_level_first', False) + norm_layer = config.norm_layer or nn.BatchNorm2d + if config.norm_kwargs: + norm_layer = partial(norm_layer, **config.norm_kwargs) + act_type = config.head_act_type if getattr(config, 'head_act_type', None) else config.act_type + act_layer = get_act_layer(act_type) or _ACT_LAYER + + # Build convolution repeats + conv_fn = SeparableConv2d if config.separable_conv else ConvBnAct2d + conv_kwargs = dict( + in_channels=config.fpn_channels, out_channels=config.fpn_channels, kernel_size=3, + padding=config.pad_type, bias=config.redundant_bias, act_layer=None, norm_layer=None) + self.conv_rep = nn.ModuleList([conv_fn(**conv_kwargs) for _ in range(config.box_class_repeats)]) + + # Build batchnorm repeats. There is a unique batchnorm per feature level for each repeat. + # This can be organized with repeats first or feature levels first in module lists, the original models + # and weights were setup with repeats first, levels first is required for efficient torchscript usage. + self.bn_rep = nn.ModuleList() + if self.bn_level_first: + for _ in range(self.num_levels): + self.bn_rep.append(nn.ModuleList([ + norm_layer(config.fpn_channels) for _ in range(config.box_class_repeats)])) + else: + for _ in range(config.box_class_repeats): + self.bn_rep.append(nn.ModuleList([ + nn.Sequential(OrderedDict([('bn', norm_layer(config.fpn_channels))])) + for _ in range(self.num_levels)])) + + self.act = act_layer(inplace=True) + + # Prediction (output) layer. Has bias with special init reqs, see init fn. + num_anchors = len(config.aspect_ratios) * config.num_scales + predict_kwargs = dict( + in_channels=config.fpn_channels, out_channels=num_outputs * num_anchors, kernel_size=3, + padding=config.pad_type, bias=True, norm_layer=None, act_layer=None) + self.predict = conv_fn(**predict_kwargs) + + @torch.jit.ignore() + def toggle_bn_level_first(self): + """ Toggle the batchnorm layers between feature level first vs repeat first access pattern + Limitations in torchscript require feature levels to be iterated over first. + + This function can be used to allow loading weights in the original order, and then toggle before + jit scripting the model. + """ + with torch.no_grad(): + new_bn_rep = nn.ModuleList() + for i in range(len(self.bn_rep[0])): + bn_first = nn.ModuleList() + for r in self.bn_rep.children(): + m = r[i] + # NOTE original rep first model def has extra Sequential container with 'bn', this was + # flattened in the level first definition. + bn_first.append(m[0] if isinstance(m, nn.Sequential) else nn.Sequential(OrderedDict([('bn', m)]))) + new_bn_rep.append(bn_first) + self.bn_level_first = not self.bn_level_first + self.bn_rep = new_bn_rep + + @torch.jit.ignore() + def _forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]: + outputs = [] + for level in range(self.num_levels): + x_level = x[level] + for conv, bn in zip(self.conv_rep, self.bn_rep): + x_level = conv(x_level) + x_level = bn[level](x_level) # this is not allowed in torchscript + x_level = self.act(x_level) + outputs.append(self.predict(x_level)) + return outputs + + def _forward_level_first(self, x: List[torch.Tensor]) -> List[torch.Tensor]: + outputs = [] + for level, bn_rep in enumerate(self.bn_rep): # iterating over first bn dim first makes TS happy + x_level = x[level] + for conv, bn in zip(self.conv_rep, bn_rep): + x_level = conv(x_level) + x_level = bn(x_level) + x_level = self.act(x_level) + outputs.append(self.predict(x_level)) + return outputs + + def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]: + if self.bn_level_first: + return self._forward_level_first(x) + else: + return self._forward(x) + + +def _init_weight(m, n='', ): + """ Weight initialization as per Tensorflow official implementations. + """ + + def _fan_in_out(w, groups=1): + dimensions = w.dim() + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + num_input_fmaps = w.size(1) + num_output_fmaps = w.size(0) + receptive_field_size = 1 + if w.dim() > 2: + receptive_field_size = w[0][0].numel() + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + fan_out //= groups + return fan_in, fan_out + + def _glorot_uniform(w, gain=1, groups=1): + fan_in, fan_out = _fan_in_out(w, groups) + gain /= max(1., (fan_in + fan_out) / 2.) # fan avg + limit = math.sqrt(3.0 * gain) + w.data.uniform_(-limit, limit) + + def _variance_scaling(w, gain=1, groups=1): + fan_in, fan_out = _fan_in_out(w, groups) + gain /= max(1., fan_in) # fan in + # gain /= max(1., (fan_in + fan_out) / 2.) # fan + + # should it be normal or trunc normal? using normal for now since no good trunc in PT + # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) + # std = math.sqrt(gain) / .87962566103423978 + # w.data.trunc_normal(std=std) + std = math.sqrt(gain) + w.data.normal_(std=std) + + if isinstance(m, SeparableConv2d): + if 'box_net' in n or 'class_net' in n: + _variance_scaling(m.conv_dw.weight, groups=m.conv_dw.groups) + _variance_scaling(m.conv_pw.weight) + if m.conv_pw.bias is not None: + if 'class_net.predict' in n: + m.conv_pw.bias.data.fill_(-math.log((1 - 0.01) / 0.01)) + else: + m.conv_pw.bias.data.zero_() + else: + _glorot_uniform(m.conv_dw.weight, groups=m.conv_dw.groups) + _glorot_uniform(m.conv_pw.weight) + if m.conv_pw.bias is not None: + m.conv_pw.bias.data.zero_() + elif isinstance(m, ConvBnAct2d): + if 'box_net' in n or 'class_net' in n: + m.conv.weight.data.normal_(std=.01) + if m.conv.bias is not None: + if 'class_net.predict' in n: + m.conv.bias.data.fill_(-math.log((1 - 0.01) / 0.01)) + else: + m.conv.bias.data.zero_() + else: + _glorot_uniform(m.conv.weight) + if m.conv.bias is not None: + m.conv.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + # looks like all bn init the same? + m.weight.data.fill_(1.0) + m.bias.data.zero_() + + +def _init_weight_alt(m, n='', ): + """ Weight initialization alternative, based on EfficientNet bacbkone init w/ class bias addition + NOTE: this will likely be removed after some experimentation + """ + if isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + if 'class_net.predict' in n: + m.bias.data.fill_(-math.log((1 - 0.01) / 0.01)) + else: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1.0) + m.bias.data.zero_() + + +def get_feature_info(backbone): + if isinstance(backbone.feature_info, Callable): + # old accessor for timm versions <= 0.1.30, efficientnet and mobilenetv3 and related nets only + feature_info = [dict(num_chs=f['num_chs'], reduction=f['reduction']) + for i, f in enumerate(backbone.feature_info())] + else: + # new feature info accessor, timm >= 0.2, all models supported + feature_info = backbone.feature_info.get_dicts(keys=['num_chs', 'reduction']) + return feature_info + + +class EfficientDet(nn.Module): + + def __init__(self, config, pretrained_backbone=True, alternate_init=False): + super(EfficientDet, self).__init__() + self.config = config + set_config_readonly(self.config) + self.backbone = create_model( + config.backbone_name, features_only=True, + out_indices=self.config.backbone_indices or (2, 3, 4), + pretrained=pretrained_backbone, **config.backbone_args) + feature_info = get_feature_info(self.backbone) + self.fpn = BiFpn(self.config, feature_info) + self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes) + self.box_net = HeadNet(self.config, num_outputs=4) + + for n, m in self.named_modules(): + if 'backbone' not in n: + if alternate_init: + _init_weight_alt(m, n) + else: + _init_weight(m, n) + + @torch.jit.ignore() + def reset_head(self, num_classes=None, aspect_ratios=None, num_scales=None, alternate_init=False): + reset_class_head = False + reset_box_head = False + set_config_writeable(self.config) + if num_classes is not None: + reset_class_head = True + self.config.num_classes = num_classes + if aspect_ratios is not None: + reset_box_head = True + self.config.aspect_ratios = aspect_ratios + if num_scales is not None: + reset_box_head = True + self.config.num_scales = num_scales + set_config_readonly(self.config) + + if reset_class_head: + self.class_net = HeadNet(self.config, num_outputs=self.config.num_classes) + for n, m in self.class_net.named_modules(prefix='class_net'): + if alternate_init: + _init_weight_alt(m, n) + else: + _init_weight(m, n) + + if reset_box_head: + self.box_net = HeadNet(self.config, num_outputs=4) + for n, m in self.box_net.named_modules(prefix='box_net'): + if alternate_init: + _init_weight_alt(m, n) + else: + _init_weight(m, n) + + @torch.jit.ignore() + def toggle_head_bn_level_first(self): + """ Toggle the head batchnorm layers between being access with feature_level first vs repeat + """ + self.class_net.toggle_bn_level_first() + self.box_net.toggle_bn_level_first() + + def forward(self, x): + x = self.backbone(x) + x = self.fpn(x) + x_class = self.class_net(x) + x_box = self.box_net(x) + return x_class, x_box diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/README.md b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/README.md new file mode 100644 index 0000000000..3546caa990 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/README.md @@ -0,0 +1,7 @@ +# Tensorflow Models Evaluation + +The code in this folder has been extracted and adapted from evaluation/evaluator code at https://github.com/tensorflow/models/tree/master/research/object_detection/utils + +Original code is licensed Apache 2.0, Copyright Google Inc. +https://github.com/tensorflow/models/blob/master/LICENSE + \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/__init__.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/detection_evaluator.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/detection_evaluator.py new file mode 100644 index 0000000000..60e4ba8983 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/detection_evaluator.py @@ -0,0 +1,590 @@ +from abc import ABCMeta +from abc import abstractmethod +#import collections +import logging +import unicodedata +import numpy as np + +from .fields import InputDataFields, DetectionResultFields +from .object_detection_evaluation import ObjectDetectionEvaluation + + +def create_category_index(categories): + """Creates dictionary of COCO compatible categories keyed by category id. + Args: + categories: a list of dicts, each of which has the following keys: + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza'. + Returns: + category_index: a dict containing the same entries as categories, but keyed + by the 'id' field of each category. + """ + category_index = {} + for cat in categories: + category_index[cat['id']] = cat + return category_index + + +class DetectionEvaluator(metaclass=ABCMeta): + """Interface for object detection evalution classes. + Example usage of the Evaluator: + ------------------------------ + evaluator = DetectionEvaluator(categories) + # Detections and groundtruth for image 1. + evaluator.add_single_gt_image_info(...) + evaluator.add_single_detected_image_info(...) + # Detections and groundtruth for image 2. + evaluator.add_single_gt_image_info(...) + evaluator.add_single_detected_image_info(...) + metrics_dict = evaluator.evaluation() + """ + + def __init__(self, categories): + """Constructor. + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + """ + self._categories = categories + + def observe_result_dict_for_single_example(self, eval_dict): + """Observes an evaluation result dict for a single example. + When executing eagerly, once all observations have been observed by this + method you can use `.evaluation()` to get the final metrics. + When using `tf.estimator.Estimator` for evaluation this function is used by + `get_estimator_eval_metric_ops()` to construct the metric update op. + Args: + eval_dict: A dictionary that holds tensors for evaluating an object + detection model, returned from + eval_util.result_dict_for_single_example(). + Returns: + None when executing eagerly, or an update_op that can be used to update + the eval metrics in `tf.estimator.EstimatorSpec`. + """ + raise NotImplementedError('Not implemented for this evaluator!') + + @abstractmethod + def add_single_ground_truth_image_info(self, image_id, gt_dict): + """Adds groundtruth for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + gt_dict: A dictionary of groundtruth numpy arrays required for evaluations. + """ + pass + + @abstractmethod + def add_single_detected_image_info(self, image_id, detections_dict): + """Adds detections for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary of detection numpy arrays required for evaluation. + """ + pass + + @abstractmethod + def evaluate(self): + """Evaluates detections and returns a dictionary of metrics.""" + pass + + @abstractmethod + def clear(self): + """Clears the state to prepare for a fresh evaluation.""" + pass + + +class ObjectDetectionEvaluator(DetectionEvaluator): + """A class to evaluation detections.""" + + def __init__(self, + categories, + matching_iou_threshold=0.5, + recall_lower_bound=0.0, + recall_upper_bound=1.0, + evaluate_corlocs=False, + evaluate_precision_recall=False, + metric_prefix=None, + use_weighted_mean_ap=False, + evaluate_masks=False, + group_of_weight=0.0): + """Constructor. + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + matching_iou_threshold: IOU threshold to use for matching groundtruth boxes to detection boxes. + recall_lower_bound: lower bound of recall operating area. + recall_upper_bound: upper bound of recall operating area. + evaluate_corlocs: (optional) boolean which determines if corloc scores are to be returned or not. + evaluate_precision_recall: (optional) boolean which determines if + precision and recall values are to be returned or not. + metric_prefix: (optional) string prefix for metric name; if None, no prefix is used. + use_weighted_mean_ap: (optional) boolean which determines if the mean + average precision is computed directly from the scores and tp_fp_labels of all classes. + evaluate_masks: If False, evaluation will be performed based on boxes. If + True, mask evaluation will be performed instead. + group_of_weight: Weight of group-of boxes.If set to 0, detections of the + correct class within a group-of box are ignored. If weight is > 0, then + if at least one detection falls within a group-of box with + matching_iou_threshold, weight group_of_weight is added to true + positives. Consequently, if no detection falls within a group-of box, + weight group_of_weight is added to false negatives. + Raises: + ValueError: If the category ids are not 1-indexed. + """ + super(ObjectDetectionEvaluator, self).__init__(categories) + self._num_classes = max([cat['id'] for cat in categories]) + if min(cat['id'] for cat in categories) < 1: + raise ValueError('Classes should be 1-indexed.') + self._matching_iou_threshold = matching_iou_threshold + self._recall_lower_bound = recall_lower_bound + self._recall_upper_bound = recall_upper_bound + self._use_weighted_mean_ap = use_weighted_mean_ap + self._label_id_offset = 1 + self._evaluate_masks = evaluate_masks + self._group_of_weight = group_of_weight + self._evaluation = ObjectDetectionEvaluation( + num_gt_classes=self._num_classes, + matching_iou_threshold=self._matching_iou_threshold, + recall_lower_bound=self._recall_lower_bound, + recall_upper_bound=self._recall_upper_bound, + use_weighted_mean_ap=self._use_weighted_mean_ap, + label_id_offset=self._label_id_offset, + group_of_weight=self._group_of_weight) + self._image_ids = set([]) + self._evaluate_corlocs = evaluate_corlocs + self._evaluate_precision_recall = evaluate_precision_recall + self._metric_prefix = (metric_prefix + '_') if metric_prefix else '' + self._build_metric_names() + + def _build_metric_names(self): + """Builds a list with metric names.""" + if self._recall_lower_bound > 0.0 or self._recall_upper_bound < 1.0: + self._metric_names = [ + self._metric_prefix + 'Precision/mAP@{}IOU@[{:.1f},{:.1f}]Recall'.format( + self._matching_iou_threshold, self._recall_lower_bound, self._recall_upper_bound) + ] + else: + self._metric_names = [ + self._metric_prefix + 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold) + ] + if self._evaluate_corlocs: + self._metric_names.append( + self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold)) + + category_index = create_category_index(self._categories) + for idx in range(self._num_classes): + if idx + self._label_id_offset in category_index: + category_name = category_index[idx + self._label_id_offset]['name'] + category_name = unicodedata.normalize('NFKD', category_name) + self._metric_names.append( + self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( + self._matching_iou_threshold, category_name)) + if self._evaluate_corlocs: + self._metric_names.append( + self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format( + self._matching_iou_threshold, category_name)) + + def add_single_ground_truth_image_info(self, image_id, gt_dict): + """Adds groundtruth for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + gt_dict: A dictionary containing - + InputDataFields.gt_boxes: float32 numpy array + of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of + the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + InputDataFields.gt_classes: integer numpy array + of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes. + InputDataFields.gt_difficult: Optional length M numpy boolean array + denoting whether a ground truth box is a difficult instance or not. + This field is optional to support the case that no boxes are difficult. + InputDataFields.gt_instance_masks: Optional numpy array of shape + [num_boxes, height, width] with values in {0, 1}. + Raises: + ValueError: On adding groundtruth for an image more than once. Will also + raise error if instance masks are not in groundtruth dictionary. + """ + if image_id in self._image_ids: + return + + gt_classes = gt_dict[InputDataFields.gt_classes] - self._label_id_offset + # If the key is not present in the gt_dict or the array is empty + # (unless there are no annotations for the groundtruth on this image) + # use values from the dictionary or insert None otherwise. + if (InputDataFields.gt_difficult in gt_dict and + (gt_dict[InputDataFields.gt_difficult].size or not gt_classes.size)): + gt_difficult = gt_dict[InputDataFields.gt_difficult] + else: + gt_difficult = None + # FIXME disable difficult flag warning, will support flag eventually + # if not len(self._image_ids) % 1000: + # logging.warning('image %s does not have groundtruth difficult flag specified', image_id) + gt_masks = None + if self._evaluate_masks: + if InputDataFields.gt_instance_masks not in gt_dict: + raise ValueError('Instance masks not in groundtruth dictionary.') + gt_masks = gt_dict[InputDataFields.gt_instance_masks] + self._evaluation.add_single_ground_truth_image_info( + image_key=image_id, + gt_boxes=gt_dict[InputDataFields.gt_boxes], + gt_class_labels=gt_classes, + gt_is_difficult_list=gt_difficult, + gt_masks=gt_masks) + self._image_ids.update([image_id]) + + def add_single_detected_image_info(self, image_id, detections_dict): + """Adds detections for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary containing - + DetectionResultFields.detection_boxes: float32 numpy + array of shape [num_boxes, 4] containing `num_boxes` detection boxes + of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + DetectionResultFields.detection_scores: float32 numpy + array of shape [num_boxes] containing detection scores for the boxes. + DetectionResultFields.detection_classes: integer numpy + array of shape [num_boxes] containing 1-indexed detection classes for the boxes. + DetectionResultFields.detection_masks: uint8 numpy array + of shape [num_boxes, height, width] containing `num_boxes` masks of + values ranging between 0 and 1. + Raises: + ValueError: If detection masks are not in detections dictionary. + """ + detection_classes = detections_dict[DetectionResultFields.detection_classes] - self._label_id_offset + detection_masks = None + if self._evaluate_masks: + if DetectionResultFields.detection_masks not in detections_dict: + raise ValueError('Detection masks not in detections dictionary.') + detection_masks = detections_dict[DetectionResultFields.detection_masks] + self._evaluation.add_single_detected_image_info( + image_key=image_id, + detected_boxes=detections_dict[DetectionResultFields.detection_boxes], + detected_scores=detections_dict[DetectionResultFields.detection_scores], + detected_class_labels=detection_classes, + detected_masks=detection_masks) + + def evaluate(self): + """Compute evaluation result. + Returns: + A dictionary of metrics with the following fields - + 1. summary_metrics: + '_Precision/mAP@IOU': mean + average precision at the specified IOU threshold. + 2. per_category_ap: category specific results with keys of the form + '_PerformanceByCategory/ + mAP@IOU/category'. + """ + metrics = self._evaluation.evaluate() + pascal_metrics = {self._metric_names[0]: metrics['mean_ap']} + if self._evaluate_corlocs: + pascal_metrics[self._metric_names[1]] = metrics['mean_corloc'] + category_index = create_category_index(self._categories) + for idx in range(metrics['per_class_ap'].size): + if idx + self._label_id_offset in category_index: + category_name = category_index[idx + self._label_id_offset]['name'] + category_name = unicodedata.normalize('NFKD', category_name) + display_name = self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( + self._matching_iou_threshold, category_name) + pascal_metrics[display_name] = metrics['per_class_ap'][idx] + + # Optionally add precision and recall values + if self._evaluate_precision_recall: + display_name = self._metric_prefix + 'PerformanceByCategory/Precision@{}IOU/{}'.format( + self._matching_iou_threshold, category_name) + pascal_metrics[display_name] = metrics['per_class_precision'][idx] + display_name = self._metric_prefix + 'PerformanceByCategory/Recall@{}IOU/{}'.format( + self._matching_iou_threshold, category_name) + pascal_metrics[display_name] = metrics['per_class_recall'][idx] + + # Optionally add CorLoc metrics.classes + if self._evaluate_corlocs: + display_name = self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format( + self._matching_iou_threshold, category_name) + pascal_metrics[display_name] = metrics['per_class_corloc'][idx] + + return pascal_metrics + + def clear(self): + """Clears the state to prepare for a fresh evaluation.""" + self._evaluation = ObjectDetectionEvaluation( + num_gt_classes=self._num_classes, + matching_iou_threshold=self._matching_iou_threshold, + use_weighted_mean_ap=self._use_weighted_mean_ap, + label_id_offset=self._label_id_offset) + self._image_ids.clear() + + +class PascalDetectionEvaluator(ObjectDetectionEvaluator): + """A class to evaluation detections using PASCAL metrics.""" + + def __init__(self, categories, matching_iou_threshold=0.5): + super(PascalDetectionEvaluator, self).__init__( + categories, + matching_iou_threshold=matching_iou_threshold, + evaluate_corlocs=False, + metric_prefix='PascalBoxes', + use_weighted_mean_ap=False) + + +class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator): + """A class to evaluation detections using weighted PASCAL metrics. + Weighted PASCAL metrics computes the mean average precision as the average + precision given the scores and tp_fp_labels of all classes. In comparison, + PASCAL metrics computes the mean average precision as the mean of the + per-class average precisions. + This definition is very similar to the mean of the per-class average + precisions weighted by class frequency. However, they are typically not the + same as the average precision is not a linear function of the scores and + tp_fp_labels. + """ + + def __init__(self, categories, matching_iou_threshold=0.5): + super(WeightedPascalDetectionEvaluator, self).__init__( + categories, + matching_iou_threshold=matching_iou_threshold, + evaluate_corlocs=False, + metric_prefix='WeightedPascalBoxes', + use_weighted_mean_ap=True) + + +class PrecisionAtRecallDetectionEvaluator(ObjectDetectionEvaluator): + """A class to evaluation detections using precision@recall metrics.""" + + def __init__(self, + categories, + matching_iou_threshold=0.5, + recall_lower_bound=0., + recall_upper_bound=1.0): + super(PrecisionAtRecallDetectionEvaluator, self).__init__( + categories, + matching_iou_threshold=matching_iou_threshold, + recall_lower_bound=recall_lower_bound, + recall_upper_bound=recall_upper_bound, + evaluate_corlocs=False, + metric_prefix='PrecisionAtRecallBoxes', + use_weighted_mean_ap=False) + + +class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): + """A class to evaluation detections using Open Images V2 metrics. + Open Images V2 introduce group_of type of bounding boxes and this metric + handles those boxes appropriately. + """ + + def __init__(self, + categories, + matching_iou_threshold=0.5, + evaluate_masks=False, + evaluate_corlocs=False, + metric_prefix='OpenImagesV5', + group_of_weight=0.0): + """Constructor. + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + matching_iou_threshold: IOU threshold to use for matching groundtruth + boxes to detection boxes. + evaluate_masks: if True, evaluator evaluates masks. + evaluate_corlocs: if True, additionally evaluates and returns CorLoc. + metric_prefix: Prefix name of the metric. + group_of_weight: Weight of the group-of bounding box. If set to 0 (default + for Open Images V2 detection protocol), detections of the correct class + within a group-of box are ignored. If weight is > 0, then if at least + one detection falls within a group-of box with matching_iou_threshold, + weight group_of_weight is added to true positives. Consequently, if no + detection falls within a group-of box, weight group_of_weight is added + to false negatives. + """ + + super(OpenImagesDetectionEvaluator, self).__init__( + categories, + matching_iou_threshold, + evaluate_corlocs, + metric_prefix=metric_prefix, + group_of_weight=group_of_weight, + evaluate_masks=evaluate_masks) + + def add_single_ground_truth_image_info(self, image_id, gt_dict): + """Adds groundtruth for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + gt_dict: A dictionary containing - + InputDataFields.gt_boxes: float32 numpy array + of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of + the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + InputDataFields.gt_classes: integer numpy array + of shape [num_boxes] containing 1-indexed groundtruth classes for the boxes. + InputDataFields.gt_group_of: Optional length M + numpy boolean array denoting whether a groundtruth box contains a group of instances. + Raises: + ValueError: On adding groundtruth for an image more than once. + """ + if image_id in self._image_ids: + return + + gt_classes = (gt_dict[InputDataFields.gt_classes] - self._label_id_offset) + # If the key is not present in the gt_dict or the array is empty + # (unless there are no annotations for the groundtruth on this image) + # use values from the dictionary or insert None otherwise. + if (InputDataFields.gt_group_of in gt_dict and + (gt_dict[InputDataFields.gt_group_of].size or not gt_classes.size)): + gt_group_of = gt_dict[InputDataFields.gt_group_of] + else: + gt_group_of = None + # FIXME disable warning for now, will add group_of flag eventually + # if not len(self._image_ids) % 1000: + # logging.warning('image %s does not have groundtruth group_of flag specified', image_id) + if self._evaluate_masks: + gt_masks = gt_dict[InputDataFields.gt_instance_masks] + else: + gt_masks = None + + self._evaluation.add_single_ground_truth_image_info( + image_id, + gt_dict[InputDataFields.gt_boxes], + gt_classes, + gt_is_difficult_list=None, + gt_is_group_of_list=gt_group_of, + gt_masks=gt_masks) + self._image_ids.update([image_id]) + + +class OpenImagesChallengeEvaluator(OpenImagesDetectionEvaluator): + """A class implements Open Images Challenge metrics. + Both Detection and Instance Segmentation evaluation metrics are implemented. + Open Images Challenge Detection metric has two major changes in comparison + with Open Images V2 detection metric: + - a custom weight might be specified for detecting an object contained in a group-of box. + - verified image-level labels should be explicitly provided for evaluation: in case an + image has neither positive nor negative image level label of class c, all detections of + this class on this image will be ignored. + + Open Images Challenge Instance Segmentation metric allows to measure performance + of models in case of incomplete annotations: some instances are + annotations only on box level and some - on image-level. In addition, + image-level labels are taken into account as in detection metric. + + Open Images Challenge Detection metric default parameters: + evaluate_masks = False + group_of_weight = 1.0 + + Open Images Challenge Instance Segmentation metric default parameters: + evaluate_masks = True + (group_of_weight will not matter) + """ + + def __init__( + self, + categories, + evaluate_masks=False, + matching_iou_threshold=0.5, + evaluate_corlocs=False, + group_of_weight=1.0): + """Constructor. + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + evaluate_masks: set to true for instance segmentation metric and to false + for detection metric. + matching_iou_threshold: IOU threshold to use for matching groundtruth + boxes to detection boxes. + evaluate_corlocs: if True, additionally evaluates and returns CorLoc. + group_of_weight: Weight of group-of boxes. If set to 0, detections of the + correct class within a group-of box are ignored. If weight is > 0, then + if at least one detection falls within a group-of box with + matching_iou_threshold, weight group_of_weight is added to true + positives. Consequently, if no detection falls within a group-of box, + weight group_of_weight is added to false negatives. + """ + if not evaluate_masks: + metrics_prefix = 'OpenImagesDetectionChallenge' + else: + metrics_prefix = 'OpenImagesInstanceSegmentationChallenge' + + super(OpenImagesChallengeEvaluator, self).__init__( + categories, + matching_iou_threshold, + evaluate_masks=evaluate_masks, + evaluate_corlocs=evaluate_corlocs, + group_of_weight=group_of_weight, + metric_prefix=metrics_prefix) + + self._evaluatable_labels = {} + + def add_single_ground_truth_image_info(self, image_id, gt_dict): + """Adds groundtruth for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + gt_dict: A dictionary containing - + InputDataFields.gt_boxes: float32 numpy array of shape [num_boxes, 4] + containing `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax] + in absolute image coordinates. + InputDataFields.gt_classes: integer numpy array of shape [num_boxes] + containing 1-indexed groundtruth classes for the boxes. + InputDataFields.gt_image_classes: integer 1D + numpy array containing all classes for which labels are verified. + InputDataFields.gt_group_of: Optional length M + numpy boolean array denoting whether a groundtruth box contains a group of instances. + Raises: + ValueError: On adding groundtruth for an image more than once. + """ + super(OpenImagesChallengeEvaluator, + self).add_single_ground_truth_image_info(image_id, gt_dict) + input_fields = InputDataFields + gt_classes = gt_dict[input_fields.gt_classes] - self._label_id_offset + image_classes = np.array([], dtype=int) + if input_fields.gt_image_classes in gt_dict: + image_classes = gt_dict[input_fields.gt_image_classes] + elif input_fields.gt_labeled_classes in gt_dict: + image_classes = gt_dict[input_fields.gt_labeled_classes] + image_classes -= self._label_id_offset + self._evaluatable_labels[image_id] = np.unique( + np.concatenate((image_classes, gt_classes))) + + def add_single_detected_image_info(self, image_id, detections_dict): + """Adds detections for a single image to be used for evaluation. + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary containing - + DetectionResultFields.detection_boxes: float32 numpy + array of shape [num_boxes, 4] containing `num_boxes` detection boxes + of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + DetectionResultFields.detection_scores: float32 numpy + array of shape [num_boxes] containing detection scores for the boxes. + DetectionResultFields.detection_classes: integer numpy + array of shape [num_boxes] containing 1-indexed detection classes for + the boxes. + Raises: + ValueError: If detection masks are not in detections dictionary. + """ + if image_id not in self._image_ids: + # Since for the correct work of evaluator it is assumed that groundtruth + # is inserted first we make sure to break the code if is it not the case. + self._image_ids.update([image_id]) + self._evaluatable_labels[image_id] = np.array([]) + + detection_classes = detections_dict[DetectionResultFields.detection_classes] - self._label_id_offset + allowed_classes = np.where(np.isin(detection_classes, self._evaluatable_labels[image_id])) + detection_classes = detection_classes[allowed_classes] + detected_boxes = detections_dict[DetectionResultFields.detection_boxes][allowed_classes] + detected_scores = detections_dict[DetectionResultFields.detection_scores][allowed_classes] + + if self._evaluate_masks: + detection_masks = detections_dict[DetectionResultFields.detection_masks][allowed_classes] + else: + detection_masks = None + self._evaluation.add_single_detected_image_info( + image_key=image_id, + detected_boxes=detected_boxes, + detected_scores=detected_scores, + detected_class_labels=detection_classes, + detected_masks=detection_masks) + + def clear(self): + """Clears stored data.""" + + super(OpenImagesChallengeEvaluator, self).clear() + self._evaluatable_labels.clear() + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/fields.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/fields.py new file mode 100644 index 0000000000..d029b77dc5 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/fields.py @@ -0,0 +1,105 @@ + +class InputDataFields(object): + """Names for the input tensors. + Holds the standard data field names to use for identifying input tensors. This + should be used by the decoder to identify keys for the returned tensor_dict + containing input tensors. And it should be used by the model to identify the + tensors it needs. + Attributes: + image: image. + image_additional_channels: additional channels. + key: unique key corresponding to image. + filename: original filename of the dataset (without common path). + gt_image_classes: image-level class labels. + gt_image_confidences: image-level class confidences. + gt_labeled_classes: image-level annotation that indicates the + classes for which an image has been labeled. + gt_boxes: coordinates of the ground truth boxes in the image. + gt_classes: box-level class labels. + gt_confidences: box-level class confidences. The shape should be + the same as the shape of gt_classes. + gt_label_types: box-level label types (e.g. explicit negative). + gt_is_crowd: [DEPRECATED, use gt_group_of instead] + is the groundtruth a single object or a crowd. + gt_area: area of a groundtruth segment. + gt_difficult: is a `difficult` object + gt_group_of: is a `group_of` objects, e.g. multiple objects of the + same class, forming a connected group, where instances are heavily + occluding each other. + gt_instance_masks: ground truth instance masks. + gt_instance_boundaries: ground truth instance boundaries. + gt_instance_classes: instance mask-level class labels. + gt_label_weights: groundtruth label weights. + gt_weights: groundtruth weight factor for bounding boxes. + image_height: height of images, used to decode + image_width: width of images, used to decode + """ + image = 'image' + key = 'image_id' + filename = 'filename' + gt_boxes = 'bbox' + gt_classes = 'cls' + gt_confidences = 'confidences' + gt_label_types = 'label_types' + gt_image_classes = 'img_cls' + gt_image_confidences = 'img_confidences' + gt_labeled_classes = 'labeled_cls' + gt_is_crowd = 'is_crowd' + gt_area = 'area' + gt_difficult = 'difficult' + gt_group_of = 'group_of' + gt_instance_masks = 'instance_masks' + gt_instance_boundaries = 'instance_boundaries' + gt_instance_classes = 'instance_classes' + image_height = 'img_height' + image_width = 'img_width' + image_size = 'img_size' + + +class DetectionResultFields(object): + """Naming conventions for storing the output of the detector. + Attributes: + source_id: source of the original image. + key: unique key corresponding to image. + detection_boxes: coordinates of the detection boxes in the image. + detection_scores: detection scores for the detection boxes in the image. + detection_multiclass_scores: class score distribution (including background) + for detection boxes in the image including background class. + detection_classes: detection-level class labels. + detection_masks: contains a segmentation mask for each detection box. + """ + + key = 'image_id' + detection_boxes = 'bbox' + detection_scores = 'score' + detection_classes = 'cls' + detection_masks = 'masks' + + +class BoxListFields(object): + """Naming conventions for BoxLists. + Attributes: + boxes: bounding box coordinates. + classes: classes per bounding box. + scores: scores per bounding box. + weights: sample weights per bounding box. + objectness: objectness score per bounding box. + masks: masks per bounding box. + boundaries: boundaries per bounding box. + keypoints: keypoints per bounding box. + keypoint_heatmaps: keypoint heatmaps per bounding box. + is_crowd: is_crowd annotation per bounding box. + """ + boxes = 'boxes' + classes = 'classes' + scores = 'scores' + weights = 'weights' + confidences = 'confidences' + objectness = 'objectness' + masks = 'masks' + boundaries = 'boundaries' + keypoints = 'keypoints' + keypoint_visibilities = 'keypoint_visibilities' + keypoint_heatmaps = 'keypoint_heatmaps' + is_crowd = 'is_crowd' + group_of = 'group_of' diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/metrics.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/metrics.py new file mode 100644 index 0000000000..01a73ef028 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/metrics.py @@ -0,0 +1,148 @@ +import numpy as np + + +def compute_precision_recall(scores, labels, num_gt): + """Compute precision and recall. + Args: + scores: A float numpy array representing detection score + labels: A float numpy array representing weighted true/false positive labels + num_gt: Number of ground truth instances + Raises: + ValueError: if the input is not of the correct format + Returns: + precision: Fraction of positive instances over detected ones. This value is + None if no ground truth labels are present. + recall: Fraction of detected positive instance over all positive instances. + This value is None if no ground truth labels are present. + """ + if not isinstance(labels, np.ndarray) or len(labels.shape) != 1: + raise ValueError("labels must be single dimension numpy array") + + if labels.dtype != np.float and labels.dtype != np.bool: + raise ValueError("labels type must be either bool or float") + + if not isinstance(scores, np.ndarray) or len(scores.shape) != 1: + raise ValueError("scores must be single dimension numpy array") + + if num_gt < np.sum(labels): + raise ValueError("Number of true positives must be smaller than num_gt.") + + if len(scores) != len(labels): + raise ValueError("scores and labels must be of the same size.") + + if num_gt == 0: + return None, None + + sorted_indices = np.argsort(scores) + sorted_indices = sorted_indices[::-1] + true_positive_labels = labels[sorted_indices] + false_positive_labels = (true_positive_labels <= 0).astype(float) + cum_true_positives = np.cumsum(true_positive_labels) + cum_false_positives = np.cumsum(false_positive_labels) + precision = cum_true_positives.astype(float) / (cum_true_positives + cum_false_positives) + recall = cum_true_positives.astype(float) / num_gt + return precision, recall + + +def compute_average_precision(precision, recall): + """Compute Average Precision according to the definition in VOCdevkit. + Precision is modified to ensure that it does not decrease as recall + decrease. + Args: + precision: A float [N, 1] numpy array of precisions + recall: A float [N, 1] numpy array of recalls + Raises: + ValueError: if the input is not of the correct format + Returns: + average_precison: The area under the precision recall curve. NaN if + precision and recall are None. + """ + if precision is None: + if recall is not None: + raise ValueError("If precision is None, recall must also be None") + return np.NAN + + if not isinstance(precision, np.ndarray) or not isinstance(recall, np.ndarray): + raise ValueError("precision and recall must be numpy array") + if precision.dtype != np.float or recall.dtype != np.float: + raise ValueError("input must be float numpy array.") + if len(precision) != len(recall): + raise ValueError("precision and recall must be of the same size.") + if not precision.size: + return 0.0 + if np.amin(precision) < 0 or np.amax(precision) > 1: + raise ValueError("Precision must be in the range of [0, 1].") + if np.amin(recall) < 0 or np.amax(recall) > 1: + raise ValueError("recall must be in the range of [0, 1].") + if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)): + raise ValueError("recall must be a non-decreasing array") + + recall = np.concatenate([[0], recall, [1]]) + precision = np.concatenate([[0], precision, [0]]) + + # Preprocess precision to be a non-decreasing array + for i in range(len(precision) - 2, -1, -1): + precision[i] = np.maximum(precision[i], precision[i + 1]) + + indices = np.where(recall[1:] != recall[:-1])[0] + 1 + average_precision = np.sum((recall[indices] - recall[indices - 1]) * precision[indices]) + return average_precision + + +def compute_cor_loc(num_gt_imgs_per_class, num_images_correctly_detected_per_class): + """Compute CorLoc according to the definition in the following paper. + https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf + Returns nans if there are no ground truth images for a class. + Args: + num_gt_imgs_per_class: 1D array, representing number of images containing + at least one object instance of a particular class + num_images_correctly_detected_per_class: 1D array, representing number of + images that are correctly detected at least one object instance of a particular class + Returns: + corloc_per_class: A float numpy array represents the corloc score of each class + """ + return np.where( + num_gt_imgs_per_class == 0, np.nan, + num_images_correctly_detected_per_class / num_gt_imgs_per_class) + + +def compute_median_rank_at_k(tp_fp_list, k): + """Computes MedianRank@k, where k is the top-scoring labels. + Args: + tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all + detection on a single image, where the detections are sorted by score in + descending order. Further, each numpy array element can have boolean or + float values. True positive elements have either value >0.0 or True; + any other value is considered false positive. + k: number of top-scoring proposals to take. + Returns: + median_rank: median rank of all true positive proposals among top k by score. + """ + ranks = [] + for i in range(len(tp_fp_list)): + ranks.append(np.where(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])] > 0)[0]) + concatenated_ranks = np.concatenate(ranks) + return np.median(concatenated_ranks) + + +def compute_recall_at_k(tp_fp_list, num_gt, k): + """Computes Recall@k, MedianRank@k, where k is the top-scoring labels. + Args: + tp_fp_list: a list of numpy arrays; each numpy array corresponds to the all + detection on a single image, where the detections are sorted by score in + descending order. Further, each numpy array element can have boolean or + float values. True positive elements have either value >0.0 or True; + any other value is considered false positive. + num_gt: number of groundtruth anotations. + k: number of top-scoring proposals to take. + Returns: + recall: recall evaluated on the top k by score detections. + """ + + tp_fp_eval = [] + for i in range(len(tp_fp_list)): + tp_fp_eval.append(tp_fp_list[i][0:min(k, tp_fp_list[i].shape[0])]) + + tp_fp_eval = np.concatenate(tp_fp_eval) + + return np.sum(tp_fp_eval) / num_gt diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_box_list.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_box_list.py new file mode 100644 index 0000000000..60e3b44c32 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_box_list.py @@ -0,0 +1,696 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Bounding Box List operations for Numpy BoxLists. + +Example box operations that are supported: + * Areas: compute bounding box areas + * IOU: pairwise intersection-over-union scores +""" +import numpy as np + + +class BoxList(object): + """Box collection. + BoxList represents a list of bounding boxes as numpy array, where each + bounding box is represented as a row of 4 numbers, + [y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a + given list correspond to a single image. + Optionally, users can add additional related fields (such as + objectness/classification scores). + """ + + def __init__(self, data): + """Constructs box collection. + Args: + data: a numpy array of shape [N, 4] representing box coordinates + Raises: + ValueError: if bbox data is not a numpy array + ValueError: if invalid dimensions for bbox data + """ + if not isinstance(data, np.ndarray): + raise ValueError('data must be a numpy array.') + if len(data.shape) != 2 or data.shape[1] != 4: + raise ValueError('Invalid dimensions for box data.') + if data.dtype != np.float32 and data.dtype != np.float64: + raise ValueError('Invalid data type for box data: float is required.') + if not self._is_valid_boxes(data): + raise ValueError('Invalid box data. data must be a numpy array of ' + 'N*[y_min, x_min, y_max, x_max]') + self.data = {'boxes': data} + + def num_boxes(self): + """Return number of boxes held in collections.""" + return self.data['boxes'].shape[0] + + def get_extra_fields(self): + """Return all non-box fields.""" + return [k for k in self.data.keys() if k != 'boxes'] + + def has_field(self, field): + return field in self.data + + def add_field(self, field, field_data): + """Add data to a specified field. + Args: + field: a string parameter used to speficy a related field to be accessed. + field_data: a numpy array of [N, ...] representing the data associated + with the field. + Raises: + ValueError: if the field is already exist or the dimension of the field + data does not matches the number of boxes. + """ + if self.has_field(field): + raise ValueError('Field ' + field + 'already exists') + if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes(): + raise ValueError('Invalid dimensions for field data') + self.data[field] = field_data + + def get(self): + """Convenience function for accesssing box coordinates. + Returns: + a numpy array of shape [N, 4] representing box corners + """ + return self.get_field('boxes') + + def get_field(self, field): + """Accesses data associated with the specified field in the box collection. + Args: + field: a string parameter used to speficy a related field to be accessed. + Returns: + a numpy 1-d array representing data of an associated field + Raises: + ValueError: if invalid field + """ + if not self.has_field(field): + raise ValueError('field {} does not exist'.format(field)) + return self.data[field] + + def get_coordinates(self): + """Get corner coordinates of boxes. + Returns: + a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max] + """ + box_coordinates = self.get() + y_min = box_coordinates[:, 0] + x_min = box_coordinates[:, 1] + y_max = box_coordinates[:, 2] + x_max = box_coordinates[:, 3] + return [y_min, x_min, y_max, x_max] + + def _is_valid_boxes(self, data): + """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin]. + Args: + data: a numpy array of shape [N, 4] representing box coordinates + Returns: + a boolean indicating whether all ymax of boxes are equal or greater than + ymin, and all xmax of boxes are equal or greater than xmin. + """ + if data.shape[0] > 0: + for i in range(data.shape[0]): + if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]: + return False + return True + + +def area(boxes): + """Computes area of boxes. + + Args: + boxes: Numpy array with shape [N, 4] holding N boxes + + Returns: + a numpy array with shape [N*1] representing box areas + """ + return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +def intersection(boxes1, boxes2): + """Compute pairwise intersection areas between boxes. + + Args: + boxes1: a numpy array with shape [N, 4] holding N boxes + boxes2: a numpy array with shape [M, 4] holding M boxes + + Returns: + a numpy array with shape [N*M] representing pairwise intersection area + """ + [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) + [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) + + all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2)) + all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2)) + intersect_heights = np.maximum(np.zeros(all_pairs_max_ymin.shape), all_pairs_min_ymax - all_pairs_max_ymin) + all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2)) + all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2)) + intersect_widths = np.maximum(np.zeros(all_pairs_max_xmin.shape), all_pairs_min_xmax - all_pairs_max_xmin) + return intersect_heights * intersect_widths + + +def iou(boxes1, boxes2): + """Computes pairwise intersection-over-union between box collections. + + Args: + boxes1: a numpy array with shape [N, 4] holding N boxes. + boxes2: a numpy array with shape [M, 4] holding N boxes. + + Returns: + a numpy array with shape [N, M] representing pairwise iou scores. + """ + intersect = intersection(boxes1, boxes2) + area1 = area(boxes1) + area2 = area(boxes2) + union = np.expand_dims(area1, axis=1) + np.expand_dims(area2, axis=0) - intersect + return intersect / union + + +def ioa(boxes1, boxes2): + """Computes pairwise intersection-over-area between box collections. + + Intersection-over-area (ioa) between two boxes box1 and box2 is defined as + their intersection area over box2's area. Note that ioa is not symmetric, + that is, IOA(box1, box2) != IOA(box2, box1). + + Args: + boxes1: a numpy array with shape [N, 4] holding N boxes. + boxes2: a numpy array with shape [M, 4] holding N boxes. + + Returns: + a numpy array with shape [N, M] representing pairwise ioa scores. + """ + intersect = intersection(boxes1, boxes2) + areas = np.expand_dims(area(boxes2), axis=0) + return intersect / areas + + +class SortOrder(object): + """Enum class for sort order. + + Attributes: + ascend: ascend order. + descend: descend order. + """ + ASCEND = 1 + DESCEND = 2 + + +def area_boxlist(boxlist): + """Computes area of boxes. + + Args: + boxlist: BoxList holding N boxes + + Returns: + a numpy array with shape [N*1] representing box areas + """ + y_min, x_min, y_max, x_max = boxlist.get_coordinates() + return (y_max - y_min) * (x_max - x_min) + + +def intersection_boxlist(boxlist1, boxlist2): + """Compute pairwise intersection areas between boxes. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + + Returns: + a numpy array with shape [N*M] representing pairwise intersection area + """ + return intersection(boxlist1.get(), boxlist2.get()) + + +def iou_boxlist(boxlist1, boxlist2): + """Computes pairwise intersection-over-union between box collections. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + + Returns: + a numpy array with shape [N, M] representing pairwise iou scores. + """ + return iou(boxlist1.get(), boxlist2.get()) + + +def ioa_boxlist(boxlist1, boxlist2): + """Computes pairwise intersection-over-area between box collections. + + Intersection-over-area (ioa) between two boxes box1 and box2 is defined as + their intersection area over box2's area. Note that ioa is not symmetric, + that is, IOA(box1, box2) != IOA(box2, box1). + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + + Returns: + a numpy array with shape [N, M] representing pairwise ioa scores. + """ + return ioa(boxlist1.get(), boxlist2.get()) + + +def gather_boxlist(boxlist, indices, fields=None): + """Gather boxes from BoxList according to indices and return new BoxList. + + By default, gather returns boxes corresponding to the input index list, as + well as all additional fields stored in the boxlist (indexing into the + first dimension). However one can optionally only gather from a + subset of fields. + + Args: + boxlist: BoxList holding N boxes + indices: a 1-d numpy array of type int_ + fields: (optional) list of fields to also gather from. If None (default), + all fields are gathered from. Pass an empty fields list to only gather the box coordinates. + + Returns: + subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices + + Raises: + ValueError: if specified field is not contained in boxlist or if the indices are not of type int_ + """ + if indices.size: + if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0: + raise ValueError('indices are out of valid range.') + subboxlist = BoxList(boxlist.get()[indices, :]) + if fields is None: + fields = boxlist.get_extra_fields() + for field in fields: + extra_field_data = boxlist.get_field(field) + subboxlist.add_field(field, extra_field_data[indices, ...]) + return subboxlist + + +def sort_by_field_boxlist(boxlist, field, order=SortOrder.DESCEND): + """Sort boxes and associated fields according to a scalar field. + + A common use case is reordering the boxes according to descending scores. + + Args: + boxlist: BoxList holding N boxes. + field: A BoxList field for sorting and reordering the BoxList. + order: (Optional) 'descend' or 'ascend'. Default is descend. + + Returns: + sorted_boxlist: A sorted BoxList with the field in the specified order. + + Raises: + ValueError: if specified field does not exist or is not of single dimension. + ValueError: if the order is not either descend or ascend. + """ + if not boxlist.has_field(field): + raise ValueError('Field ' + field + ' does not exist') + if len(boxlist.get_field(field).shape) != 1: + raise ValueError('Field ' + field + 'should be single dimension.') + if order != SortOrder.DESCEND and order != SortOrder.ASCEND: + raise ValueError('Invalid sort order') + + field_to_sort = boxlist.get_field(field) + sorted_indices = np.argsort(field_to_sort) + if order == SortOrder.DESCEND: + sorted_indices = sorted_indices[::-1] + return gather_boxlist(boxlist, sorted_indices) + + +def non_max_suppression(boxlist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0): + """Non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning + away boxes that have high IOU (intersection over union) overlap (> thresh) + with already selected boxes. In each iteration, the detected bounding box with + highest score in the available pool is selected. + + Args: + boxlist: BoxList holding N boxes. Must contain a 'scores' field + representing detection scores. All scores belong to the same class. + max_output_size: maximum number of retained boxes + iou_threshold: intersection over union threshold. + score_threshold: minimum score threshold. Remove the boxes with scores less than + this value. Default value is set to -10. A very low threshold to pass pretty + much all the boxes, unless the user sets a different score threshold. + + Returns: + a BoxList holding M boxes where M <= max_output_size + Raises: + ValueError: if 'scores' field does not exist + ValueError: if threshold is not in [0, 1] + ValueError: if max_output_size < 0 + """ + if not boxlist.has_field('scores'): + raise ValueError('Field scores does not exist') + if iou_threshold < 0. or iou_threshold > 1.0: + raise ValueError('IOU threshold must be in [0, 1]') + if max_output_size < 0: + raise ValueError('max_output_size must be bigger than 0.') + + boxlist = filter_scores_greater_than(boxlist, score_threshold) + if boxlist.num_boxes() == 0: + return boxlist + + boxlist = sort_by_field_boxlist(boxlist, 'scores') + + # Prevent further computation if NMS is disabled. + if iou_threshold == 1.0: + if boxlist.num_boxes() > max_output_size: + selected_indices = np.arange(max_output_size) + return gather_boxlist(boxlist, selected_indices) + else: + return boxlist + + boxes = boxlist.get() + num_boxes = boxlist.num_boxes() + # is_index_valid is True only for all remaining valid boxes, + is_index_valid = np.full(num_boxes, 1, dtype=bool) + selected_indices = [] + num_output = 0 + for i in range(num_boxes): + if num_output < max_output_size: + if is_index_valid[i]: + num_output += 1 + selected_indices.append(i) + is_index_valid[i] = False + valid_indices = np.where(is_index_valid)[0] + if valid_indices.size == 0: + break + + intersect_over_union = iou(np.expand_dims(boxes[i, :], axis=0), boxes[valid_indices, :]) + intersect_over_union = np.squeeze(intersect_over_union, axis=0) + is_index_valid[valid_indices] = np.logical_and( + is_index_valid[valid_indices], + intersect_over_union <= iou_threshold) + return gather_boxlist(boxlist, np.array(selected_indices)) + + +def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh, max_output_size): + """Multi-class version of non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning + away boxes that have high IOU (intersection over union) overlap (> thresh) + with already selected boxes. It operates independently for each class for + which scores are provided (via the scores field of the input box_list), + pruning boxes with score less than a provided threshold prior to + applying NMS. + + Args: + boxlist: BoxList holding N boxes. Must contain a 'scores' field + representing detection scores. This scores field is a tensor that can + be 1 dimensional (in the case of a single class) or 2-dimensional, which + which case we assume that it takes the shape [num_boxes, num_classes]. + We further assume that this rank is known statically and that + scores.shape[1] is also known (i.e., the number of classes is fixed + and known at graph construction time). + score_thresh: scalar threshold for score (low scoring boxes are removed). + iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap + with previously selected boxes are removed). + max_output_size: maximum number of retained boxes per class. + + Returns: + a BoxList holding M boxes with a rank-1 scores field representing + corresponding scores for each box with scores sorted in decreasing order + and a rank-1 classes field representing a class label for each box. + Raises: + ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have + a valid scores field. + """ + if not 0 <= iou_thresh <= 1.0: + raise ValueError('thresh must be between 0 and 1') + if not isinstance(boxlist, BoxList): + raise ValueError('boxlist must be a BoxList') + if not boxlist.has_field('scores'): + raise ValueError('input boxlist must have \'scores\' field') + scores = boxlist.get_field('scores') + if len(scores.shape) == 1: + scores = np.reshape(scores, [-1, 1]) + elif len(scores.shape) == 2: + if scores.shape[1] is None: + raise ValueError('scores field must have statically defined second dimension') + else: + raise ValueError('scores field must be of rank 1 or 2') + num_boxes = boxlist.num_boxes() + num_scores = scores.shape[0] + num_classes = scores.shape[1] + + if num_boxes != num_scores: + raise ValueError('Incorrect scores field length: actual vs expected.') + + selected_boxes_list = [] + for class_idx in range(num_classes): + boxlist_and_class_scores = BoxList(boxlist.get()) + class_scores = np.reshape(scores[0:num_scores, class_idx], [-1]) + boxlist_and_class_scores.add_field('scores', class_scores) + boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores, score_thresh) + nms_result = non_max_suppression( + boxlist_filt, max_output_size=max_output_size, iou_threshold=iou_thresh, score_threshold=score_thresh) + nms_result.add_field('classes', np.zeros_like(nms_result.get_field('scores')) + class_idx) + selected_boxes_list.append(nms_result) + selected_boxes = concatenate_boxlist(selected_boxes_list) + sorted_boxes = sort_by_field_boxlist(selected_boxes, 'scores') + return sorted_boxes + + +def scale(boxlist, y_scale, x_scale): + """Scale box coordinates in x and y dimensions. + + Args: + boxlist: BoxList holding N boxes + y_scale: float + x_scale: float + + Returns: + boxlist: BoxList holding N boxes + """ + y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) + y_min = y_scale * y_min + y_max = y_scale * y_max + x_min = x_scale * x_min + x_max = x_scale * x_max + scaled_boxlist = BoxList(np.hstack([y_min, x_min, y_max, x_max])) + + fields = boxlist.get_extra_fields() + for field in fields: + extra_field_data = boxlist.get_field(field) + scaled_boxlist.add_field(field, extra_field_data) + + return scaled_boxlist + + +def clip_to_window(boxlist, window, filter_nonoverlapping=True): + """Clip bounding boxes to a window. + + This op clips input bounding boxes (represented by bounding box + corners) to a window, optionally filtering out boxes that do not + overlap at all with the window. + + Args: + boxlist: BoxList holding M_in boxes + window: a numpy array of shape [4] representing the [y_min, x_min, y_max, x_max] + window to which the op should clip boxes. + filter_nonoverlapping: whether to filter out boxes that do not overlap at all with the window. + + Returns: + a BoxList holding M_out boxes where M_out <= M_in + """ + y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) + win_y_min = window[0] + win_x_min = window[1] + win_y_max = window[2] + win_x_max = window[3] + y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min) + y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min) + x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min) + x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min) + clipped = BoxList(np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped])) + clipped = _copy_extra_fields(clipped, boxlist) + if filter_nonoverlapping: + areas = area(clipped) + nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)), [-1]).astype(np.int32) + clipped = gather_boxlist(clipped, nonzero_area_indices) + return clipped + + +def prune_non_overlapping_boxes(boxlist1, boxlist2, minoverlap=0.0): + """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. + + For each box in boxlist1, we want its IOA to be more than minoverlap with + at least one of the boxes in boxlist2. If it does not, we remove it. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + minoverlap: Minimum required overlap between boxes, to count them as overlapping. + + Returns: + A pruned boxlist with size [N', 4]. + """ + intersection_over_area = ioa(boxlist2, boxlist1) # [M, N] tensor + intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor + keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap)) + keep_inds = np.nonzero(keep_bool)[0] + new_boxlist1 = gather_boxlist(boxlist1, keep_inds) + return new_boxlist1 + + +def prune_outside_window(boxlist, window): + """Prunes bounding boxes that fall outside a given window. + + This function prunes bounding boxes that even partially fall outside the given + window. See also ClipToWindow which only prunes bounding boxes that fall + completely outside the window, and clips any bounding boxes that partially + overflow. + + Args: + boxlist: a BoxList holding M_in boxes. + window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax] of the window. + + Returns: + pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in. + valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. + """ + + y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) + win_y_min = window[0] + win_x_min = window[1] + win_y_max = window[2] + win_x_max = window[3] + coordinate_violations = np.hstack([ + np.less(y_min, win_y_min), np.less(x_min, win_x_min), + np.greater(y_max, win_y_max), np.greater(x_max, win_x_max)]) + valid_indices = np.reshape(np.where(np.logical_not(np.max(coordinate_violations, axis=1))), [-1]) + return gather_boxlist(boxlist, valid_indices), valid_indices + + +def concatenate_boxlist(boxlists, fields=None): + """Concatenate list of BoxLists. + + This op concatenates a list of input BoxLists into a larger BoxList. It also + handles concatenation of BoxList fields as long as the field tensor shapes + are equal except for the first dimension. + + Args: + boxlists: list of BoxList objects + fields: optional list of fields to also concatenate. By default, all + fields from the first BoxList in the list are included in the concatenation. + + Returns: + a BoxList with number of boxes equal to + sum([boxlist.num_boxes() for boxlist in BoxList]) + Raises: + ValueError: if boxlists is invalid (i.e., is not a list, is empty, or + contains non BoxList objects), or if requested fields are not contained in all boxlists + """ + if not isinstance(boxlists, list): + raise ValueError('boxlists should be a list') + if not boxlists: + raise ValueError('boxlists should have nonzero length') + for boxlist in boxlists: + if not isinstance(boxlist, BoxList): + raise ValueError('all elements of boxlists should be BoxList objects') + concatenated = BoxList(np.vstack([boxlist.get() for boxlist in boxlists])) + if fields is None: + fields = boxlists[0].get_extra_fields() + for field in fields: + first_field_shape = boxlists[0].get_field(field).shape + first_field_shape = first_field_shape[1:] + for boxlist in boxlists: + if not boxlist.has_field(field): + raise ValueError('boxlist must contain all requested fields') + field_shape = boxlist.get_field(field).shape + field_shape = field_shape[1:] + if field_shape != first_field_shape: + raise ValueError('field %s must have same shape for all boxlists ' + 'except for the 0th dimension.' % field) + concatenated_field = np.concatenate([boxlist.get_field(field) for boxlist in boxlists], axis=0) + concatenated.add_field(field, concatenated_field) + return concatenated + + +def filter_scores_greater_than(boxlist, thresh): + """Filter to keep only boxes with score exceeding a given threshold. + + This op keeps the collection of boxes whose corresponding scores are + greater than the input threshold. + + Args: + boxlist: BoxList holding N boxes. Must contain a 'scores' field representing detection scores. + thresh: scalar threshold + + Returns: + a BoxList holding M boxes where M <= N + + Raises: + ValueError: if boxlist not a BoxList object or if it does not have a scores field + """ + if not isinstance(boxlist, BoxList): + raise ValueError('boxlist must be a BoxList') + if not boxlist.has_field('scores'): + raise ValueError('input boxlist must have \'scores\' field') + scores = boxlist.get_field('scores') + if len(scores.shape) > 2: + raise ValueError('Scores should have rank 1 or 2') + if len(scores.shape) == 2 and scores.shape[1] != 1: + raise ValueError('Scores should have rank 1 or have shape ' + 'consistent with [None, 1]') + high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), [-1]).astype(np.int32) + return gather_boxlist(boxlist, high_score_indices) + + +def change_coordinate_frame(boxlist, window): + """Change coordinate frame of the boxlist to be relative to window's frame. + + Given a window of the form [ymin, xmin, ymax, xmax], + changes bounding box coordinates from boxlist to be relative to this window + (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). + + An example use case is data augmentation: where we are given groundtruth + boxes (boxlist) and would like to randomly crop the image to some + window (window). In this case we need to change the coordinate frame of + each groundtruth box to be relative to this new window. + + Args: + boxlist: A BoxList object holding N boxes. + window: a size 4 1-D numpy array. + + Returns: + Returns a BoxList object with N boxes. + """ + win_height = window[2] - window[0] + win_width = window[3] - window[1] + boxlist_new = scale( + BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width) + _copy_extra_fields(boxlist_new, boxlist) + + return boxlist_new + + +def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): + """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to. + + Args: + boxlist_to_copy_to: BoxList to which extra fields are copied. + boxlist_to_copy_from: BoxList from which fields are copied. + + Returns: + boxlist_to_copy_to with extra fields. + """ + for field in boxlist_to_copy_from.get_extra_fields(): + boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field)) + return boxlist_to_copy_to + + +def _update_valid_indices_by_removing_high_iou_boxes( + selected_indices, is_index_valid, intersect_over_union, threshold): + max_iou = np.max(intersect_over_union[:, selected_indices], axis=1) + return np.logical_and(is_index_valid, max_iou <= threshold) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_mask_list.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_mask_list.py new file mode 100644 index 0000000000..22cdb8770f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/np_mask_list.py @@ -0,0 +1,478 @@ +import numpy as np +from .np_box_list import * + +EPSILON = 1e-7 + + +class MaskList(BoxList): + """Convenience wrapper for BoxList with masks. + + BoxMaskList extends the np_box_list.BoxList to contain masks as well. + In particular, its constructor receives both boxes and masks. Note that the + masks correspond to the full image. + """ + + def __init__(self, box_data, mask_data): + """Constructs box collection. + + Args: + box_data: a numpy array of shape [N, 4] representing box coordinates + mask_data: a numpy array of shape [N, height, width] representing masks + with values are in {0,1}. The masks correspond to the full + image. The height and the width will be equal to image height and width. + + Raises: + ValueError: if bbox data is not a numpy array + ValueError: if invalid dimensions for bbox data + ValueError: if mask data is not a numpy array + ValueError: if invalid dimension for mask data + """ + super(MaskList, self).__init__(box_data) + if not isinstance(mask_data, np.ndarray): + raise ValueError('Mask data must be a numpy array.') + if len(mask_data.shape) != 3: + raise ValueError('Invalid dimensions for mask data.') + if mask_data.dtype != np.uint8: + raise ValueError('Invalid data type for mask data: uint8 is required.') + if mask_data.shape[0] != box_data.shape[0]: + raise ValueError('There should be the same number of boxes and masks.') + self.data['masks'] = mask_data + + def get_masks(self): + """Convenience function for accessing masks. + + Returns: + a numpy array of shape [N, height, width] representing masks + """ + return self.get_field('masks') + + +def boxlist_to_masklist(boxlist): + """Converts a BoxList containing 'masks' into a BoxMaskList. + + Args: + boxlist: An np_box_list.BoxList object. + + Returns: + An BoxMaskList object. + + Raises: + ValueError: If boxlist does not contain `masks` as a field. + """ + if not boxlist.has_field('masks'): + raise ValueError('boxlist does not contain mask field.') + masklist = MaskList(box_data=boxlist.get(), mask_data=boxlist.get_field('masks')) + extra_fields = boxlist.get_extra_fields() + for key in extra_fields: + if key != 'masks': + masklist.data[key] = boxlist.get_field(key) + return masklist + + +def area_mask(masks): + """Computes area of masks. + + Args: + masks: Numpy array with shape [N, height, width] holding N masks. Masks + values are of type np.uint8 and values are in {0,1}. + + Returns: + a numpy array with shape [N*1] representing mask areas. + + Raises: + ValueError: If masks.dtype is not np.uint8 + """ + if masks.dtype != np.uint8: + raise ValueError('Masks type should be np.uint8') + return np.sum(masks, axis=(1, 2), dtype=np.float32) + + +def intersection_mask(masks1, masks2): + """Compute pairwise intersection areas between masks. + + Args: + masks1: a numpy array with shape [N, height, width] holding N masks. Masks + values are of type np.uint8 and values are in {0,1}. + masks2: a numpy array with shape [M, height, width] holding M masks. Masks + values are of type np.uint8 and values are in {0,1}. + + Returns: + a numpy array with shape [N*M] representing pairwise intersection area. + + Raises: + ValueError: If masks1 and masks2 are not of type np.uint8. + """ + if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: + raise ValueError('masks1 and masks2 should be of type np.uint8') + n = masks1.shape[0] + m = masks2.shape[0] + answer = np.zeros([n, m], dtype=np.float32) + for i in np.arange(n): + for j in np.arange(m): + answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32) + return answer + + +def iou_mask(masks1, masks2): + """Computes pairwise intersection-over-union between mask collections. + + Args: + masks1: a numpy array with shape [N, height, width] holding N masks. Masks + values are of type np.uint8 and values are in {0,1}. + masks2: a numpy array with shape [M, height, width] holding N masks. Masks + values are of type np.uint8 and values are in {0,1}. + + Returns: + a numpy array with shape [N, M] representing pairwise iou scores. + + Raises: + ValueError: If masks1 and masks2 are not of type np.uint8. + """ + if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: + raise ValueError('masks1 and masks2 should be of type np.uint8') + intersect = intersection(masks1, masks2) + area1 = area(masks1) + area2 = area(masks2) + union = np.expand_dims(area1, axis=1) + np.expand_dims(area2, axis=0) - intersect + return intersect / np.maximum(union, EPSILON) + + +def ioa_mask(masks1, masks2): + """Computes pairwise intersection-over-area between box collections. + + Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as + their intersection area over mask2's area. Note that ioa is not symmetric, + that is, IOA(mask1, mask2) != IOA(mask2, mask1). + + Args: + masks1: a numpy array with shape [N, height, width] holding N masks. Masks + values are of type np.uint8 and values are in {0,1}. + masks2: a numpy array with shape [M, height, width] holding N masks. Masks + values are of type np.uint8 and values are in {0,1}. + + Returns: + a numpy array with shape [N, M] representing pairwise ioa scores. + + Raises: + ValueError: If masks1 and masks2 are not of type np.uint8. + """ + if masks1.dtype != np.uint8 or masks2.dtype != np.uint8: + raise ValueError('masks1 and masks2 should be of type np.uint8') + intersect = intersection(masks1, masks2) + areas = np.expand_dims(area(masks2), axis=0) + return intersect / (areas + EPSILON) + + +def area_masklist(masklist): + """Computes area of masks. + + Args: + masklist: BoxMaskList holding N boxes and masks + + Returns: + a numpy array with shape [N*1] representing mask areas + """ + return area_mask(masklist.get_masks()) + + +def intersection_masklist(masklist1, masklist2): + """Compute pairwise intersection areas between masks. + + Args: + masklist1: BoxMaskList holding N boxes and masks + masklist2: BoxMaskList holding M boxes and masks + + Returns: + a numpy array with shape [N*M] representing pairwise intersection area + """ + return intersection_mask(masklist1.get_masks(), masklist2.get_masks()) + + +def iou_masklist(masklist1, masklist2): + """Computes pairwise intersection-over-union between box and mask collections. + + Args: + masklist1: BoxMaskList holding N boxes and masks + masklist2: BoxMaskList holding M boxes and masks + + Returns: + a numpy array with shape [N, M] representing pairwise iou scores. + """ + return iou_mask(masklist1.get_masks(), masklist2.get_masks()) + + +def ioa_masklist(masklist1, masklist2): + """Computes pairwise intersection-over-area between box and mask collections. + + Intersection-over-area (ioa) between two masks mask1 and mask2 is defined as + their intersection area over mask2's area. Note that ioa is not symmetric, + that is, IOA(mask1, mask2) != IOA(mask2, mask1). + + Args: + masklist1: BoxMaskList holding N boxes and masks + masklist2: BoxMaskList holding M boxes and masks + + Returns: + a numpy array with shape [N, M] representing pairwise ioa scores. + """ + return ioa_mask(masklist1.get_masks(), masklist2.get_masks()) + + +def gather_masklist(masklist, indices, fields=None): + """Gather boxes from BoxMaskList according to indices. + + By default, gather returns boxes corresponding to the input index list, as + well as all additional fields stored in the masklist (indexing into the + first dimension). However one can optionally only gather from a + subset of fields. + + Args: + masklist: BoxMaskList holding N boxes + indices: a 1-d numpy array of type int_ + fields: (optional) list of fields to also gather from. If None (default), all fields + are gathered from. Pass an empty fields list to only gather the box coordinates. + + Returns: + submasklist: a BoxMaskList corresponding to the subset of the input masklist specified by indices + + Raises: + ValueError: if specified field is not contained in masklist or if the indices are not of type int_ + """ + if fields is not None: + if 'masks' not in fields: + fields.append('masks') + return boxlist_to_masklist(gather_boxlist(boxlist=masklist, indices=indices, fields=fields)) + + +def sort_by_field_masklist(masklist, field, order=SortOrder.DESCEND): + """Sort boxes and associated fields according to a scalar field. + + A common use case is reordering the boxes according to descending scores. + + Args: + masklist: BoxMaskList holding N boxes. + field: A BoxMaskList field for sorting and reordering the BoxMaskList. + order: (Optional) 'descend' or 'ascend'. Default is descend. + + Returns: + sorted_masklist: A sorted BoxMaskList with the field in the specified order. + """ + return boxlist_to_masklist(sort_by_field_boxlist(boxlist=masklist, field=field, order=order)) + + +def non_max_suppression_mask(masklist, max_output_size=10000, iou_threshold=1.0, score_threshold=-10.0): + """Non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning + away boxes that have high IOU (intersection over union) overlap (> thresh) + with already selected boxes. In each iteration, the detected bounding box with + highest score in the available pool is selected. + + Args: + masklist: BoxMaskList holding N boxes. Must contain a 'scores' field representing + detection scores. All scores belong to the same class. + max_output_size: maximum number of retained boxes + iou_threshold: intersection over union threshold. + score_threshold: minimum score threshold. Remove the boxes with scores + less than this value. Default value is set to -10. A very + low threshold to pass pretty much all the boxes, unless + the user sets a different score threshold. + + Returns: + an BoxMaskList holding M boxes where M <= max_output_size + + Raises: + ValueError: if 'scores' field does not exist + ValueError: if threshold is not in [0, 1] + ValueError: if max_output_size < 0 + """ + if not masklist.has_field('scores'): + raise ValueError('Field scores does not exist') + if iou_threshold < 0. or iou_threshold > 1.0: + raise ValueError('IOU threshold must be in [0, 1]') + if max_output_size < 0: + raise ValueError('max_output_size must be bigger than 0.') + + masklist = filter_scores_greater_than(masklist, score_threshold) + if masklist.num_boxes() == 0: + return masklist + + masklist = sort_by_field_boxlist(masklist, 'scores') + + # Prevent further computation if NMS is disabled. + if iou_threshold == 1.0: + if masklist.num_boxes() > max_output_size: + selected_indices = np.arange(max_output_size) + return gather_masklist(masklist, selected_indices) + else: + return masklist + + masks = masklist.get_masks() + num_masks = masklist.num_boxes() + + # is_index_valid is True only for all remaining valid boxes, + is_index_valid = np.full(num_masks, 1, dtype=bool) + selected_indices = [] + num_output = 0 + for i in range(num_masks): + if num_output < max_output_size: + if is_index_valid[i]: + num_output += 1 + selected_indices.append(i) + is_index_valid[i] = False + valid_indices = np.where(is_index_valid)[0] + if valid_indices.size == 0: + break + + intersect_over_union = iou_mask(np.expand_dims(masks[i], axis=0), masks[valid_indices]) + intersect_over_union = np.squeeze(intersect_over_union, axis=0) + is_index_valid[valid_indices] = np.logical_and( + is_index_valid[valid_indices], + intersect_over_union <= iou_threshold) + return gather_masklist(masklist, np.array(selected_indices)) + + +def multi_class_non_max_suppression_mask(masklist, score_thresh, iou_thresh, max_output_size): + """Multi-class version of non maximum suppression. + + This op greedily selects a subset of detection bounding boxes, pruning away boxes that have + high IOU (intersection over union) overlap (> thresh) with already selected boxes. It + operates independently for each class for which scores are provided (via the scores field + of the input box_list), pruning boxes with score less than a provided threshold prior to + applying NMS. + + Args: + masklist: BoxMaskList holding N boxes. Must contain a 'scores' field representing detection + scores. This scores field is a tensor that can be 1 dimensional (in the case of a + single class) or 2-dimensional, in which case we assume that it takes the shape + [num_boxes, num_classes]. We further assume that this rank is known statically and + that scores.shape[1] is also known (i.e., the number of classes is fixed and known + at graph construction time). + score_thresh: scalar threshold for score (low scoring boxes are removed). + iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap with previously + selected boxes are removed). + max_output_size: maximum number of retained boxes per class. + + Returns: + a masklist holding M boxes with a rank-1 scores field representing + corresponding scores for each box with scores sorted in decreasing order + and a rank-1 classes field representing a class label for each box. + Raises: + ValueError: if iou_thresh is not in [0, 1] or if input masklist does not have a valid scores field. + """ + if not 0 <= iou_thresh <= 1.0: + raise ValueError('thresh must be between 0 and 1') + if not isinstance(masklist, MaskList): + raise ValueError('masklist must be a masklist') + if not masklist.has_field('scores'): + raise ValueError('input masklist must have \'scores\' field') + scores = masklist.get_field('scores') + if len(scores.shape) == 1: + scores = np.reshape(scores, [-1, 1]) + elif len(scores.shape) == 2: + if scores.shape[1] is None: + raise ValueError('scores field must have statically defined second dimension') + else: + raise ValueError('scores field must be of rank 1 or 2') + + num_boxes = masklist.num_boxes() + num_scores = scores.shape[0] + num_classes = scores.shape[1] + + if num_boxes != num_scores: + raise ValueError('Incorrect scores field length: actual vs expected.') + + selected_boxes_list = [] + for class_idx in range(num_classes): + masklist_and_class_scores = MaskList(box_data=masklist.get(), mask_data=masklist.get_masks()) + class_scores = np.reshape(scores[0:num_scores, class_idx], [-1]) + masklist_and_class_scores.add_field('scores', class_scores) + masklist_filt = filter_scores_greater_than(masklist_and_class_scores, score_thresh) + nms_result = non_max_suppression( + masklist_filt, + max_output_size=max_output_size, + iou_threshold=iou_thresh, + score_threshold=score_thresh) + nms_result.add_field('classes', np.zeros_like(nms_result.get_field('scores')) + class_idx) + selected_boxes_list.append(nms_result) + selected_boxes = concatenate_boxlist(selected_boxes_list) + sorted_boxes = sort_by_field_boxlist(selected_boxes, 'scores') + return boxlist_to_masklist(boxlist=sorted_boxes) + + +def prune_non_overlapping_masklist(masklist1, masklist2, minoverlap=0.0): + """Prunes the boxes in list1 that overlap less than thresh with list2. + + For each mask in masklist1, we want its IOA to be more than minoverlap + with at least one of the masks in masklist2. If it does not, we remove + it. If the masks are not full size image, we do the pruning based on boxes. + + Args: + masklist1: BoxMaskList holding N boxes and masks. + masklist2: BoxMaskList holding M boxes and masks. + minoverlap: Minimum required overlap between boxes, to count them as overlapping. + + Returns: + A pruned masklist with size [N', 4]. + """ + intersection_over_area = ioa_masklist(masklist2, masklist1) # [M, N] tensor + intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor + keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap)) + keep_inds = np.nonzero(keep_bool)[0] + new_masklist1 = gather_masklist(masklist1, keep_inds) + return new_masklist1 + + +def concatenate_masklist(masklists, fields=None): + """Concatenate list of masklists. + + This op concatenates a list of input masklists into a larger + masklist. It also + handles concatenation of masklist fields as long as the field tensor + shapes are equal except for the first dimension. + + Args: + masklists: list of BoxMaskList objects + fields: optional list of fields to also concatenate. By default, all + fields from the first BoxMaskList in the list are included in the concatenation. + + Returns: + a masklist with number of boxes equal to sum([masklist.num_boxes() for masklist in masklist]) + Raises: + ValueError: if masklists is invalid (i.e., is not a list, is empty, or contains non + masklist objects), or if requested fields are not contained in all masklists + """ + if fields is not None: + if 'masks' not in fields: + fields.append('masks') + return boxlist_to_masklist(concatenate_boxlist(boxlists=masklists, fields=fields)) + + +def filter_scores_greater_than_masklist(masklist, thresh): + """Filter to keep only boxes and masks with score exceeding a given threshold. + + This op keeps the collection of boxes and masks whose corresponding scores are + greater than the input threshold. + + Args: + masklist: BoxMaskList holding N boxes and masks. Must contain a + 'scores' field representing detection scores. + thresh: scalar threshold + + Returns: + a BoxMaskList holding M boxes and masks where M <= N + + Raises: + ValueError: if masklist not a BoxMaskList object or if it does not have a scores field + """ + if not isinstance(masklist, MaskList): + raise ValueError('masklist must be a BoxMaskList') + if not masklist.has_field('scores'): + raise ValueError('input masklist must have \'scores\' field') + scores = masklist.get_field('scores') + if len(scores.shape) > 2: + raise ValueError('Scores should have rank 1 or 2') + if len(scores.shape) == 2 and scores.shape[1] != 1: + raise ValueError('Scores should have rank 1 or have shape consistent with [None, 1]') + high_score_indices = np.reshape(np.where(np.greater(scores, thresh)), [-1]).astype(np.int32) + return gather_masklist(masklist, high_score_indices) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/object_detection_evaluation.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/object_detection_evaluation.py new file mode 100644 index 0000000000..ee9211196f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/object_detection_evaluation.py @@ -0,0 +1,273 @@ +import logging + +import numpy as np + +from effdet.evaluation.metrics import compute_precision_recall, compute_average_precision, compute_cor_loc +from effdet.evaluation.per_image_evaluation import PerImageEvaluation + + +class ObjectDetectionEvaluation: + """Internal implementation of Pascal object detection metrics.""" + + def __init__(self, + num_gt_classes, + matching_iou_threshold=0.5, + nms_iou_threshold=1.0, + nms_max_output_boxes=10000, + recall_lower_bound=0.0, + recall_upper_bound=1.0, + use_weighted_mean_ap=False, + label_id_offset=0, + group_of_weight=0.0, + per_image_eval_class=PerImageEvaluation): + """Constructor. + Args: + num_gt_classes: Number of ground-truth classes. + matching_iou_threshold: IOU threshold used for matching detected boxes to ground-truth boxes. + nms_iou_threshold: IOU threshold used for non-maximum suppression. + nms_max_output_boxes: Maximum number of boxes returned by non-maximum suppression. + recall_lower_bound: lower bound of recall operating area + recall_upper_bound: upper bound of recall operating area + use_weighted_mean_ap: (optional) boolean which determines if the mean + average precision is computed directly from the scores and tp_fp_labels of all classes. + label_id_offset: The label id offset. + group_of_weight: Weight of group-of boxes.If set to 0, detections of the + correct class within a group-of box are ignored. If weight is > 0, then + if at least one detection falls within a group-of box with + matching_iou_threshold, weight group_of_weight is added to true + positives. Consequently, if no detection falls within a group-of box, + weight group_of_weight is added to false negatives. + per_image_eval_class: The class that contains functions for computing per image metrics. + Raises: + ValueError: if num_gt_classes is smaller than 1. + """ + if num_gt_classes < 1: + raise ValueError('Need at least 1 groundtruth class for evaluation.') + + self.per_image_eval = per_image_eval_class( + num_gt_classes=num_gt_classes, + matching_iou_threshold=matching_iou_threshold, + nms_iou_threshold=nms_iou_threshold, + nms_max_output_boxes=nms_max_output_boxes, + group_of_weight=group_of_weight) + self.recall_lower_bound = recall_lower_bound + self.recall_upper_bound = recall_upper_bound + self.group_of_weight = group_of_weight + self.num_class = num_gt_classes + self.use_weighted_mean_ap = use_weighted_mean_ap + self.label_id_offset = label_id_offset + + self.gt_boxes = {} + self.gt_class_labels = {} + self.gt_masks = {} + self.gt_is_difficult_list = {} + self.gt_is_group_of_list = {} + self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float) + self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int) + + self._initialize_detections() + + def _initialize_detections(self): + """Initializes internal data structures.""" + self.detection_keys = set() + self.scores_per_class = [[] for _ in range(self.num_class)] + self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)] + self.num_images_correctly_detected_per_class = np.zeros(self.num_class) + self.average_precision_per_class = np.empty(self.num_class, dtype=float) + self.average_precision_per_class.fill(np.nan) + self.precisions_per_class = [np.nan] * self.num_class + self.recalls_per_class = [np.nan] * self.num_class + self.sum_tp_class = [np.nan] * self.num_class + + self.corloc_per_class = np.ones(self.num_class, dtype=float) + + def clear_detections(self): + self._initialize_detections() + + def add_single_ground_truth_image_info( + self, image_key, gt_boxes, gt_class_labels, + gt_is_difficult_list=None, gt_is_group_of_list=None, gt_masks=None): + """Adds groundtruth for a single image to be used for evaluation. + Args: + image_key: A unique string/integer identifier for the image. + gt_boxes: float32 numpy array of shape [num_boxes, 4] containing + `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax] in absolute image coordinates. + gt_class_labels: integer numpy array of shape [num_boxes] + containing 0-indexed groundtruth classes for the boxes. + gt_is_difficult_list: A length M numpy boolean array denoting + whether a ground truth box is a difficult instance or not. To support + the case that no boxes are difficult, it is by default set as None. + gt_is_group_of_list: A length M numpy boolean array denoting + whether a ground truth box is a group-of box or not. To support the case + that no boxes are groups-of, it is by default set as None. + gt_masks: uint8 numpy array of shape [num_boxes, height, width] + containing `num_boxes` groundtruth masks. The mask values range from 0 to 1. + """ + if image_key in self.gt_boxes: + logging.warning('image %s has already been added to the ground truth database.', image_key) + return + + self.gt_boxes[image_key] = gt_boxes + self.gt_class_labels[image_key] = gt_class_labels + self.gt_masks[image_key] = gt_masks + if gt_is_difficult_list is None: + num_boxes = gt_boxes.shape[0] + gt_is_difficult_list = np.zeros(num_boxes, dtype=bool) + gt_is_difficult_list = gt_is_difficult_list.astype(dtype=bool) + self.gt_is_difficult_list[image_key] = gt_is_difficult_list + if gt_is_group_of_list is None: + num_boxes = gt_boxes.shape[0] + gt_is_group_of_list = np.zeros(num_boxes, dtype=bool) + if gt_masks is None: + num_boxes = gt_boxes.shape[0] + mask_presence_indicator = np.zeros(num_boxes, dtype=bool) + else: + mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) == 0).astype(dtype=bool) + + gt_is_group_of_list = gt_is_group_of_list.astype(dtype=bool) + self.gt_is_group_of_list[image_key] = gt_is_group_of_list + + # ignore boxes without masks + masked_gt_is_difficult_list = gt_is_difficult_list | mask_presence_indicator + for class_index in range(self.num_class): + num_gt_instances = np.sum( + gt_class_labels[~masked_gt_is_difficult_list & ~gt_is_group_of_list] == class_index) + num_groupof_gt_instances = self.group_of_weight * np.sum( + gt_class_labels[gt_is_group_of_list & ~masked_gt_is_difficult_list] == class_index) + self.num_gt_instances_per_class[class_index] += num_gt_instances + num_groupof_gt_instances + if np.any(gt_class_labels == class_index): + self.num_gt_imgs_per_class[class_index] += 1 + + def add_single_detected_image_info( + self, image_key, detected_boxes, detected_scores, detected_class_labels, detected_masks=None): + """Adds detections for a single image to be used for evaluation. + Args: + image_key: A unique string/integer identifier for the image. + detected_boxes: float32 numpy array of shape [num_boxes, 4] containing + `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] in + absolute image coordinates. + detected_scores: float32 numpy array of shape [num_boxes] containing + detection scores for the boxes. + detected_class_labels: integer numpy array of shape [num_boxes] containing + 0-indexed detection classes for the boxes. + detected_masks: np.uint8 numpy array of shape [num_boxes, height, width] + containing `num_boxes` detection masks with values ranging between 0 and 1. + Raises: + ValueError: if the number of boxes, scores and class labels differ in length. + """ + if len(detected_boxes) != len(detected_scores) or len(detected_boxes) != len(detected_class_labels): + raise ValueError( + 'detected_boxes, detected_scores and ' + 'detected_class_labels should all have same lengths. Got' + '[%d, %d, %d]' % len(detected_boxes), len(detected_scores), + len(detected_class_labels)) + + if image_key in self.detection_keys: + logging.warning('image %s has already been added to the detection result database', image_key) + return + + self.detection_keys.add(image_key) + if image_key in self.gt_boxes: + gt_boxes = self.gt_boxes[image_key] + gt_class_labels = self.gt_class_labels[image_key] + # Masks are popped instead of look up. The reason is that we do not want + # to keep all masks in memory which can cause memory overflow. + gt_masks = self.gt_masks.pop(image_key) + gt_is_difficult_list = self.gt_is_difficult_list[image_key] + gt_is_group_of_list = self.gt_is_group_of_list[image_key] + else: + gt_boxes = np.empty(shape=[0, 4], dtype=float) + gt_class_labels = np.array([], dtype=int) + if detected_masks is None: + gt_masks = None + else: + gt_masks = np.empty(shape=[0, 1, 1], dtype=float) + gt_is_difficult_list = np.array([], dtype=bool) + gt_is_group_of_list = np.array([], dtype=bool) + scores, tp_fp_labels, is_class_correctly_detected_in_image = \ + self.per_image_eval.compute_object_detection_metrics( + detected_boxes=detected_boxes, + detected_scores=detected_scores, + detected_class_labels=detected_class_labels, + gt_boxes=gt_boxes, + gt_class_labels=gt_class_labels, + gt_is_difficult_list=gt_is_difficult_list, + gt_is_group_of_list=gt_is_group_of_list, + detected_masks=detected_masks, + gt_masks=gt_masks) + + for i in range(self.num_class): + if scores[i].shape[0] > 0: + self.scores_per_class[i].append(scores[i]) + self.tp_fp_labels_per_class[i].append(tp_fp_labels[i]) + self.num_images_correctly_detected_per_class += is_class_correctly_detected_in_image + + def evaluate(self): + """Compute evaluation result. + Returns: + A dict with the following fields - + average_precision: float numpy array of average precision for each class. + mean_ap: mean average precision of all classes, float scalar + precisions: List of precisions, each precision is a float numpy array + recalls: List of recalls, each recall is a float numpy array + corloc: numpy float array + mean_corloc: Mean CorLoc score for each class, float scalar + """ + if (self.num_gt_instances_per_class == 0).any(): + logging.warning( + 'The following classes have no ground truth examples: %s', + np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) + + if self.use_weighted_mean_ap: + all_scores = np.array([], dtype=float) + all_tp_fp_labels = np.array([], dtype=bool) + for class_index in range(self.num_class): + if self.num_gt_instances_per_class[class_index] == 0: + continue + if not self.scores_per_class[class_index]: + scores = np.array([], dtype=float) + tp_fp_labels = np.array([], dtype=float) + else: + scores = np.concatenate(self.scores_per_class[class_index]) + tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) + if self.use_weighted_mean_ap: + all_scores = np.append(all_scores, scores) + all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) + precision, recall = compute_precision_recall( + scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) + recall_within_bound_indices = [ + index for index, value in enumerate(recall) if + value >= self.recall_lower_bound and value <= self.recall_upper_bound + ] + recall_within_bound = recall[recall_within_bound_indices] + precision_within_bound = precision[recall_within_bound_indices] + + self.precisions_per_class[class_index] = precision_within_bound + self.recalls_per_class[class_index] = recall_within_bound + self.sum_tp_class[class_index] = tp_fp_labels.sum() + average_precision = compute_average_precision(precision_within_bound, recall_within_bound) + self.average_precision_per_class[class_index] = average_precision + logging.debug('average_precision: %f', average_precision) + + self.corloc_per_class = compute_cor_loc( + self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) + + if self.use_weighted_mean_ap: + num_gt_instances = np.sum(self.num_gt_instances_per_class) + precision, recall = compute_precision_recall(all_scores, all_tp_fp_labels, num_gt_instances) + recall_within_bound_indices = [ + index for index, value in enumerate(recall) if + value >= self.recall_lower_bound and value <= self.recall_upper_bound + ] + recall_within_bound = recall[recall_within_bound_indices] + precision_within_bound = precision[recall_within_bound_indices] + mean_ap = compute_average_precision(precision_within_bound, recall_within_bound) + else: + mean_ap = np.nanmean(self.average_precision_per_class) + mean_corloc = np.nanmean(self.corloc_per_class) + + return dict( + per_class_ap=self.average_precision_per_class, mean_ap=mean_ap, + per_class_precision=self.precisions_per_class, + per_class_recall=self.recalls_per_class, + per_class_corlocs=self.corloc_per_class, mean_corloc=mean_corloc) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/per_image_evaluation.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/per_image_evaluation.py new file mode 100644 index 0000000000..e904027c52 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluation/per_image_evaluation.py @@ -0,0 +1,538 @@ +from .np_mask_list import * +from .metrics import * + + +class PerImageEvaluation: + """Evaluate detection result of a single image.""" + + def __init__(self, + num_gt_classes, + matching_iou_threshold=0.5, + nms_iou_threshold=0.3, + nms_max_output_boxes=50, + group_of_weight=0.0): + """Initialized PerImageEvaluation by evaluation parameters. + Args: + num_gt_classes: Number of ground truth object classes + matching_iou_threshold: A ratio of area intersection to union, which is + the threshold to consider whether a detection is true positive or not + nms_iou_threshold: IOU threshold used in Non Maximum Suppression. + nms_max_output_boxes: Number of maximum output boxes in NMS. + group_of_weight: Weight of the group-of boxes. + """ + self.matching_iou_threshold = matching_iou_threshold + self.nms_iou_threshold = nms_iou_threshold + self.nms_max_output_boxes = nms_max_output_boxes + self.num_gt_classes = num_gt_classes + self.group_of_weight = group_of_weight + + def compute_object_detection_metrics( + self, detected_boxes, detected_scores, detected_class_labels, + gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list, + detected_masks=None, gt_masks=None): + """Evaluates detections as being tp, fp or weighted from a single image. + The evaluation is done in two stages: + 1. All detections are matched to non group-of boxes; true positives are + determined and detections matched to difficult boxes are ignored. + 2. Detections that are determined as false positives are matched against + group-of boxes and weighted if matched. + Args: + detected_boxes: A float numpy array of shape [N, 4], representing N + regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max] + detected_scores: A float numpy array of shape [N, 1], representing the + confidence scores of the detected N object instances. + detected_class_labels: A integer numpy array of shape [N, 1], repreneting + the class labels of the detected N object instances. + gt_boxes: A float numpy array of shape [M, 4], representing M + regions of object instances in ground truth + gt_class_labels: An integer numpy array of shape [M, 1], + representing M class labels of object instances in ground truth + gt_is_difficult_list: A boolean numpy array of length M denoting + whether a ground truth box is a difficult instance or not + gt_is_group_of_list: A boolean numpy array of length M denoting + whether a ground truth box has group-of tag + detected_masks: (optional) A uint8 numpy array of shape [N, height, + width]. If not None, the metrics will be computed based on masks. + gt_masks: (optional) A uint8 numpy array of shape [M, height, + width]. Can have empty masks, i.e. where all values are 0. + Returns: + scores: A list of C float numpy arrays. Each numpy array is of + shape [K, 1], representing K scores detected with object class label c + tp_fp_labels: A list of C boolean numpy arrays. Each numpy array + is of shape [K, 1], representing K True/False positive label of + object instances detected with class label c + is_class_correctly_detected_in_image: a numpy integer array of + shape [C, 1], indicating whether the correponding class has a least + one instance being correctly detected in the image + """ + detected_boxes, detected_scores, detected_class_labels, detected_masks = ( + self._remove_invalid_boxes(detected_boxes, detected_scores, detected_class_labels, detected_masks)) + + scores, tp_fp_labels = self._compute_tp_fp( + detected_boxes=detected_boxes, + detected_scores=detected_scores, + detected_class_labels=detected_class_labels, + gt_boxes=gt_boxes, + gt_class_labels=gt_class_labels, + gt_is_difficult_list=gt_is_difficult_list, + gt_is_group_of_list=gt_is_group_of_list, + detected_masks=detected_masks, + gt_masks=gt_masks) + + is_class_correctly_detected_in_image = self._compute_cor_loc( + detected_boxes=detected_boxes, + detected_scores=detected_scores, + detected_class_labels=detected_class_labels, + gt_boxes=gt_boxes, + gt_class_labels=gt_class_labels, + detected_masks=detected_masks, + gt_masks=gt_masks) + + return scores, tp_fp_labels, is_class_correctly_detected_in_image + + def _compute_cor_loc( + self, detected_boxes, detected_scores, detected_class_labels, + gt_boxes, gt_class_labels, detected_masks=None, gt_masks=None): + """Compute CorLoc score for object detection result. + Args: + detected_boxes: A float numpy array of shape [N, 4], representing N + regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max] + detected_scores: A float numpy array of shape [N, 1], representing the + confidence scores of the detected N object instances. + detected_class_labels: A integer numpy array of shape [N, 1], repreneting + the class labels of the detected N object instances. + gt_boxes: A float numpy array of shape [M, 4], representing M + regions of object instances in ground truth + gt_class_labels: An integer numpy array of shape [M, 1], + representing M class labels of object instances in ground truth + detected_masks: (optional) A uint8 numpy array of shape [N, height, width]. + If not None, the scores will be computed based on masks. + gt_masks: (optional) A uint8 numpy array of shape [M, height, width]. + Returns: + is_class_correctly_detected_in_image: a numpy integer array of + shape [C, 1], indicating whether the correponding class has a least + one instance being correctly detected in the image + Raises: + ValueError: If detected masks is not None but groundtruth masks are None, + or the other way around. + """ + if (detected_masks is not None and gt_masks is None) or ( + detected_masks is None and gt_masks is not None): + raise ValueError( + 'If `detected_masks` is provided, then `gt_masks` should also be provided.') + + is_class_correctly_detected_in_image = np.zeros( + self.num_gt_classes, dtype=int) + for i in range(self.num_gt_classes): + (gt_boxes_at_ith_class, gt_masks_at_ith_class, + detected_boxes_at_ith_class, detected_scores_at_ith_class, + detected_masks_at_ith_class) = self._get_ith_class_arrays( + detected_boxes, detected_scores, detected_masks, + detected_class_labels, gt_boxes, gt_masks, + gt_class_labels, i) + is_class_correctly_detected_in_image[i] = ( + self._compute_is_class_correctly_detected_in_image( + detected_boxes=detected_boxes_at_ith_class, + detected_scores=detected_scores_at_ith_class, + gt_boxes=gt_boxes_at_ith_class, + detected_masks=detected_masks_at_ith_class, + gt_masks=gt_masks_at_ith_class)) + + return is_class_correctly_detected_in_image + + def _compute_is_class_correctly_detected_in_image( + self, detected_boxes, detected_scores, gt_boxes, detected_masks=None, gt_masks=None): + """Compute CorLoc score for a single class. + Args: + detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates + detected_scores: A 1-d numpy array of length N representing classification score + gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates + detected_masks: (optional) A np.uint8 numpy array of shape [N, height, width]. + If not None, the scores will be computed based on masks. + gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width]. + Returns: + is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a + class is correctly detected in the image or not + """ + if detected_boxes.size > 0: + if gt_boxes.size > 0: + max_score_id = np.argmax(detected_scores) + mask_mode = False + if detected_masks is not None and gt_masks is not None: + mask_mode = True + if mask_mode: + detected_boxlist = MaskList( + box_data=np.expand_dims(detected_boxes[max_score_id], axis=0), + mask_data=np.expand_dims(detected_masks[max_score_id], axis=0)) + gt_boxlist = MaskList(box_data=gt_boxes, mask_data=gt_masks) + iou = iou_masklist(detected_boxlist, gt_boxlist) + else: + detected_boxlist = BoxList(np.expand_dims(detected_boxes[max_score_id, :], axis=0)) + gt_boxlist = BoxList(gt_boxes) + iou = iou_boxlist(detected_boxlist, gt_boxlist) + if np.max(iou) >= self.matching_iou_threshold: + return 1 + return 0 + + def _compute_tp_fp( + self, detected_boxes, detected_scores, detected_class_labels, + gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None): + """Labels true/false positives of detections of an image across all classes. + Args: + detected_boxes: A float numpy array of shape [N, 4], representing N + regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max] + detected_scores: A float numpy array of shape [N, 1], representing the + confidence scores of the detected N object instances. + detected_class_labels: A integer numpy array of shape [N, 1], representing + the class labels of the detected N object instances. + gt_boxes: A float numpy array of shape [M, 4], representing M + regions of object instances in ground truth + gt_class_labels: An integer numpy array of shape [M, 1], + representing M class labels of object instances in ground truth + gt_is_difficult_list: A boolean numpy array of length M denoting + whether a ground truth box is a difficult instance or not + gt_is_group_of_list: A boolean numpy array of length M denoting + whether a ground truth box has group-of tag + detected_masks: (optional) A np.uint8 numpy array of shape [N, height, + width]. If not None, the scores will be computed based on masks. + gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width]. + Returns: + result_scores: A list of float numpy arrays. Each numpy array is of + shape [K, 1], representing K scores detected with object class label c + result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of + shape [K, 1], representing K True/False positive label of object + instances detected with class label c + Raises: + ValueError: If detected masks is not None but groundtruth masks are None, + or the other way around. + """ + if detected_masks is not None and gt_masks is None: + raise ValueError( + 'Detected masks is available but groundtruth masks is not.') + if detected_masks is None and gt_masks is not None: + raise ValueError( + 'Groundtruth masks is available but detected masks is not.') + + result_scores = [] + result_tp_fp_labels = [] + for i in range(self.num_gt_classes): + gt_is_difficult_list_at_ith_class = ( + gt_is_difficult_list[gt_class_labels == i]) + gt_is_group_of_list_at_ith_class = ( + gt_is_group_of_list[gt_class_labels == i]) + (gt_boxes_at_ith_class, gt_masks_at_ith_class, + detected_boxes_at_ith_class, detected_scores_at_ith_class, + detected_masks_at_ith_class) = self._get_ith_class_arrays( + detected_boxes, detected_scores, detected_masks, + detected_class_labels, gt_boxes, gt_masks, + gt_class_labels, i) + scores, tp_fp_labels = self._compute_tp_fp_for_single_class( + detected_boxes=detected_boxes_at_ith_class, + detected_scores=detected_scores_at_ith_class, + gt_boxes=gt_boxes_at_ith_class, + gt_is_difficult_list=gt_is_difficult_list_at_ith_class, + gt_is_group_of_list=gt_is_group_of_list_at_ith_class, + detected_masks=detected_masks_at_ith_class, + gt_masks=gt_masks_at_ith_class) + result_scores.append(scores) + result_tp_fp_labels.append(tp_fp_labels) + return result_scores, result_tp_fp_labels + + def _get_overlaps_and_scores_mask_mode( + self, detected_boxes, detected_scores, detected_masks, + gt_boxes, gt_masks, gt_is_group_of_list): + """Computes overlaps and scores between detected and groudntruth masks. + Args: + detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates + detected_scores: A 1-d numpy array of length N representing classification score + detected_masks: A uint8 numpy array of shape [N, height, width]. If not + None, the scores will be computed based on masks. + gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates + gt_masks: A uint8 numpy array of shape [M, height, width]. + gt_is_group_of_list: A boolean numpy array of length M denoting + whether a ground truth box has group-of tag. If a groundtruth box is + group-of box, every detection matching this box is ignored. + Returns: + iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If + gt_non_group_of_boxlist.num_boxes() == 0 it will be None. + ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If + gt_group_of_boxlist.num_boxes() == 0 it will be None. + scores: The score of the detected boxlist. + num_boxes: Number of non-maximum suppressed detected boxes. + """ + detected_boxlist = MaskList(box_data=detected_boxes, mask_data=detected_masks) + detected_boxlist.add_field('scores', detected_scores) + detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold) + gt_non_group_of_boxlist = MaskList( + box_data=gt_boxes[~gt_is_group_of_list], mask_data=gt_masks[~gt_is_group_of_list]) + gt_group_of_boxlist = MaskList( + box_data=gt_boxes[gt_is_group_of_list], mask_data=gt_masks[gt_is_group_of_list]) + iou_b = iou_masklist(detected_boxlist, gt_non_group_of_boxlist) + ioa_b = np.transpose(ioa_masklist(gt_group_of_boxlist, detected_boxlist)) + scores = detected_boxlist.get_field('scores') + num_boxes = detected_boxlist.num_boxes() + return iou_b, ioa_b, scores, num_boxes + + def _get_overlaps_and_scores_box_mode( + self, detected_boxes, detected_scores, gt_boxes, gt_is_group_of_list): + """Computes overlaps and scores between detected and groudntruth boxes. + Args: + detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates + detected_scores: A 1-d numpy array of length N representing classification score + gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates + gt_is_group_of_list: A boolean numpy array of length M denoting + whether a ground truth box has group-of tag. If a groundtruth box is + group-of box, every detection matching this box is ignored. + Returns: + iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If + gt_non_group_of_boxlist.num_boxes() == 0 it will be None. + ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If + gt_group_of_boxlist.num_boxes() == 0 it will be None. + scores: The score of the detected boxlist. + num_boxes: Number of non-maximum suppressed detected boxes. + """ + detected_boxlist = BoxList(detected_boxes) + detected_boxlist.add_field('scores', detected_scores) + detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold) + gt_non_group_of_boxlist = BoxList(gt_boxes[~gt_is_group_of_list]) + gt_group_of_boxlist = BoxList(gt_boxes[gt_is_group_of_list]) + iou_b = iou_boxlist(detected_boxlist, gt_non_group_of_boxlist) + ioa_b = np.transpose(ioa_boxlist(gt_group_of_boxlist, detected_boxlist)) + scores = detected_boxlist.get_field('scores') + num_boxes = detected_boxlist.num_boxes() + return iou_b, ioa_b, scores, num_boxes + + def _compute_tp_fp_for_single_class( + self, detected_boxes, detected_scores, gt_boxes, + gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None): + """Labels boxes detected with the same class from the same image as tp/fp. + Args: + detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates + detected_scores: A 1-d numpy array of length N representing classification score + gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates + gt_is_difficult_list: A boolean numpy array of length M denoting + whether a ground truth box is a difficult instance or not. If a + groundtruth box is difficult, every detection matching this box is ignored. + gt_is_group_of_list: A boolean numpy array of length M denoting + whether a ground truth box has group-of tag. If a groundtruth box is + group-of box, every detection matching this box is ignored. + detected_masks: (optional) A uint8 numpy array of shape [N, height, + width]. If not None, the scores will be computed based on masks. + gt_masks: (optional) A uint8 numpy array of shape [M, height, width]. + Returns: + Two arrays of the same size, containing all boxes that were evaluated as + being true positives or false positives; if a box matched to a difficult + box or to a group-of box, it is ignored. + scores: A numpy array representing the detection scores. + tp_fp_labels: a boolean numpy array indicating whether a detection is a true positive. + """ + if detected_boxes.size == 0: + return np.array([], dtype=float), np.array([], dtype=bool) + + mask_mode = False + if detected_masks is not None and gt_masks is not None: + mask_mode = True + + iou_b = np.ndarray([0, 0]) + ioa_b = np.ndarray([0, 0]) + iou_m = np.ndarray([0, 0]) + ioa_m = np.ndarray([0, 0]) + if mask_mode: + # For Instance Segmentation Evaluation on Open Images V5, not all boxed + # instances have corresponding segmentation annotations. Those boxes that + # dont have segmentation annotations are represented as empty masks in + # gt_masks nd array. + mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) > 0) + + iou_m, ioa_m, scores, num_detected_boxes = self._get_overlaps_and_scores_mask_mode( + detected_boxes=detected_boxes, + detected_scores=detected_scores, + detected_masks=detected_masks, + gt_boxes=gt_boxes[mask_presence_indicator, :], + gt_masks=gt_masks[mask_presence_indicator, :], + gt_is_group_of_list=gt_is_group_of_list[mask_presence_indicator]) + + if sum(mask_presence_indicator) < len(mask_presence_indicator): + # Not all masks are present - some masks are empty + iou_b, ioa_b, _, num_detected_boxes = self._get_overlaps_and_scores_box_mode( + detected_boxes=detected_boxes, + detected_scores=detected_scores, + gt_boxes=gt_boxes[~mask_presence_indicator, :], + gt_is_group_of_list=gt_is_group_of_list[~mask_presence_indicator]) + num_detected_boxes = detected_boxes.shape[0] + else: + mask_presence_indicator = np.zeros(gt_is_group_of_list.shape, dtype=bool) + iou_b, ioa_b, scores, num_detected_boxes = self._get_overlaps_and_scores_box_mode( + detected_boxes=detected_boxes, + detected_scores=detected_scores, + gt_boxes=gt_boxes, + gt_is_group_of_list=gt_is_group_of_list) + + if gt_boxes.size == 0: + return scores, np.zeros(num_detected_boxes, dtype=bool) + + tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool) + is_matched_to_box = np.zeros(num_detected_boxes, dtype=bool) + is_matched_to_difficult = np.zeros(num_detected_boxes, dtype=bool) + is_matched_to_group_of = np.zeros(num_detected_boxes, dtype=bool) + + def compute_match_iou(iou_matrix, gt_nongroup_of_is_difficult_list, is_box): + """Computes TP/FP for non group-of box matching. + The function updates the following local variables: + tp_fp_labels - if a box is matched to group-of + is_matched_to_difficult - the detections that were processed at this are + matched to difficult box. + is_matched_to_box - the detections that were processed at this stage are marked as is_box. + Args: + iou_matrix: intersection-over-union matrix [num_gt_boxes]x[num_det_boxes]. + gt_nongroup_of_is_difficult_list: boolean that specifies if gt box is difficult. + is_box: boolean that specifies if currently boxes or masks are processed. + """ + max_overlap_gt_ids = np.argmax(iou_matrix, axis=1) + is_gt_detected = np.zeros(iou_matrix.shape[1], dtype=bool) + for i in range(num_detected_boxes): + gt_id = max_overlap_gt_ids[i] + is_evaluatable = ( + not tp_fp_labels[i] and + not is_matched_to_difficult[i] and + iou_matrix[i, gt_id] >= self.matching_iou_threshold and + not is_matched_to_group_of[i]) + if is_evaluatable: + if not gt_nongroup_of_is_difficult_list[gt_id]: + if not is_gt_detected[gt_id]: + tp_fp_labels[i] = True + is_gt_detected[gt_id] = True + is_matched_to_box[i] = is_box + else: + is_matched_to_difficult[i] = True + + def compute_match_ioa(ioa_matrix, is_box): + """Computes TP/FP for group-of box matching. + The function updates the following local variables: + is_matched_to_group_of - if a box is matched to group-of + is_matched_to_box - the detections that were processed at this stage are marked as is_box. + Args: + ioa_matrix: intersection-over-area matrix [num_gt_boxes]x[num_det_boxes]. + is_box: boolean that specifies if currently boxes or masks are processed. + Returns: + scores_group_of: of detections matched to group-of boxes[num_groupof_matched]. + tp_fp_labels_group_of: boolean array of size [num_groupof_matched], all values are True. + """ + scores_group_of = np.zeros(ioa_matrix.shape[1], dtype=float) + tp_fp_labels_group_of = self.group_of_weight * np.ones(ioa_matrix.shape[1], dtype=float) + max_overlap_group_of_gt_ids = np.argmax(ioa_matrix, axis=1) + for i in range(num_detected_boxes): + gt_id = max_overlap_group_of_gt_ids[i] + is_evaluatable = ( + not tp_fp_labels[i] and + not is_matched_to_difficult[i] and + ioa_matrix[i, gt_id] >= self.matching_iou_threshold and + not is_matched_to_group_of[i]) + if is_evaluatable: + is_matched_to_group_of[i] = True + is_matched_to_box[i] = is_box + scores_group_of[gt_id] = max(scores_group_of[gt_id], scores[i]) + selector = np.where((scores_group_of > 0) & (tp_fp_labels_group_of > 0)) + scores_group_of = scores_group_of[selector] + tp_fp_labels_group_of = tp_fp_labels_group_of[selector] + + return scores_group_of, tp_fp_labels_group_of + + # The evaluation is done in two stages: + # 1. Evaluate all objects that actually have instance level masks. + # 2. Evaluate all objects that are not already evaluated as boxes. + if iou_m.shape[1] > 0: + gt_is_difficult_mask_list = gt_is_difficult_list[mask_presence_indicator] + gt_is_group_of_mask_list = gt_is_group_of_list[mask_presence_indicator] + compute_match_iou(iou_m, gt_is_difficult_mask_list[~gt_is_group_of_mask_list], is_box=False) + + scores_mask_group_of = np.ndarray([0], dtype=float) + tp_fp_labels_mask_group_of = np.ndarray([0], dtype=float) + if ioa_m.shape[1] > 0: + scores_mask_group_of, tp_fp_labels_mask_group_of = compute_match_ioa(ioa_m, is_box=False) + + # Tp-fp evaluation for non-group of boxes (if any). + if iou_b.shape[1] > 0: + gt_is_difficult_box_list = gt_is_difficult_list[~mask_presence_indicator] + gt_is_group_of_box_list = gt_is_group_of_list[~mask_presence_indicator] + compute_match_iou(iou_b, gt_is_difficult_box_list[~gt_is_group_of_box_list], is_box=True) + + scores_box_group_of = np.ndarray([0], dtype=float) + tp_fp_labels_box_group_of = np.ndarray([0], dtype=float) + if ioa_b.shape[1] > 0: + scores_box_group_of, tp_fp_labels_box_group_of = compute_match_ioa(ioa_b, is_box=True) + + if mask_mode: + # Note: here crowds are treated as ignore regions. + valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of & ~is_matched_to_box) + return np.concatenate((scores[valid_entries], scores_mask_group_of)),\ + np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_mask_group_of)) + else: + valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of) + return np.concatenate((scores[valid_entries], scores_box_group_of)),\ + np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_box_group_of)) + + def _get_ith_class_arrays( + self, detected_boxes, detected_scores, detected_masks, detected_class_labels, + gt_boxes, gt_masks, gt_class_labels, class_index): + """Returns numpy arrays belonging to class with index `class_index`. + Args: + detected_boxes: A numpy array containing detected boxes. + detected_scores: A numpy array containing detected scores. + detected_masks: A numpy array containing detected masks. + detected_class_labels: A numpy array containing detected class labels. + gt_boxes: A numpy array containing groundtruth boxes. + gt_masks: A numpy array containing groundtruth masks. + gt_class_labels: A numpy array containing groundtruth class labels. + class_index: An integer index. + Returns: + gt_boxes_at_ith_class: A numpy array containing groundtruth boxes labeled as ith class. + gt_masks_at_ith_class: A numpy array containing groundtruth masks labeled as ith class. + detected_boxes_at_ith_class: A numpy array containing detected boxes corresponding to the ith class. + detected_scores_at_ith_class: A numpy array containing detected scores corresponding to the ith class. + detected_masks_at_ith_class: A numpy array containing detected masks corresponding to the ith class. + """ + selected_groundtruth = (gt_class_labels == class_index) + gt_boxes_at_ith_class = gt_boxes[selected_groundtruth] + if gt_masks is not None: + gt_masks_at_ith_class = gt_masks[selected_groundtruth] + else: + gt_masks_at_ith_class = None + selected_detections = (detected_class_labels == class_index) + detected_boxes_at_ith_class = detected_boxes[selected_detections] + detected_scores_at_ith_class = detected_scores[selected_detections] + if detected_masks is not None: + detected_masks_at_ith_class = detected_masks[selected_detections] + else: + detected_masks_at_ith_class = None + return (gt_boxes_at_ith_class, gt_masks_at_ith_class, + detected_boxes_at_ith_class, detected_scores_at_ith_class, + detected_masks_at_ith_class) + + def _remove_invalid_boxes( + self, detected_boxes, detected_scores, detected_class_labels, detected_masks=None): + """Removes entries with invalid boxes. + A box is invalid if either its xmax is smaller than its xmin, or its ymax is smaller than its ymin. + Args: + detected_boxes: A float numpy array of size [num_boxes, 4] containing box + coordinates in [ymin, xmin, ymax, xmax] format. + detected_scores: A float numpy array of size [num_boxes]. + detected_class_labels: A int32 numpy array of size [num_boxes]. + detected_masks: A uint8 numpy array of size [num_boxes, height, width]. + Returns: + valid_detected_boxes: A float numpy array of size [num_valid_boxes, 4] + containing box coordinates in [ymin, xmin, ymax, xmax] format. + valid_detected_scores: A float numpy array of size [num_valid_boxes]. + valid_detected_class_labels: A int32 numpy array of size [num_valid_boxes]. + valid_detected_masks: A uint8 numpy array of size [num_valid_boxes, height, width]. + """ + valid_indices = np.logical_and( + detected_boxes[:, 0] < detected_boxes[:, 2], detected_boxes[:, 1] < detected_boxes[:, 3]) + detected_boxes = detected_boxes[valid_indices] + detected_scores = detected_scores[valid_indices] + detected_class_labels = detected_class_labels[valid_indices] + if detected_masks is not None: + detected_masks = detected_masks[valid_indices] + return [detected_boxes, detected_scores, detected_class_labels, detected_masks] + + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluator.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluator.py new file mode 100644 index 0000000000..b923655688 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/evaluator.py @@ -0,0 +1,184 @@ +import torch +import torch.distributed as dist +import abc +import json +import logging +import time +import numpy as np + +from .distributed import synchronize, is_main_process, all_gather_container +from pycocotools.cocoeval import COCOeval + +# FIXME experimenting with speedups for OpenImages eval, it's slow +#import pyximport; py_importer, pyx_importer = pyximport.install(pyimport=True) +import effdet.evaluation.detection_evaluator as tfm_eval +#pyximport.uninstall(py_importer, pyx_importer) + +_logger = logging.getLogger(__name__) + + +__all__ = ['CocoEvaluator', 'PascalEvaluator', 'OpenImagesEvaluator', 'create_evaluator'] + + +class Evaluator: + + def __init__(self, distributed=False, pred_yxyx=False): + self.distributed = distributed + self.distributed_device = None + self.pred_yxyx = pred_yxyx + self.img_indices = [] + self.predictions = [] + + def add_predictions(self, detections, target): + if self.distributed: + if self.distributed_device is None: + # cache for use later to broadcast end metric + self.distributed_device = detections.device + synchronize() + detections = all_gather_container(detections) + img_indices = all_gather_container(target['img_idx']) + if not is_main_process(): + return + else: + img_indices = target['img_idx'] + + detections = detections.cpu().numpy() + img_indices = img_indices.cpu().numpy() + for img_idx, img_dets in zip(img_indices, detections): + self.img_indices.append(img_idx) + self.predictions.append(img_dets) + + def _coco_predictions(self): + # generate coco-style predictions + coco_predictions = [] + coco_ids = [] + for img_idx, img_dets in zip(self.img_indices, self.predictions): + img_id = self._dataset.img_ids[img_idx] + coco_ids.append(img_id) + if self.pred_yxyx: + # to xyxy + img_dets[:, 0:4] = img_dets[:, [1, 0, 3, 2]] + # to xywh + img_dets[:, 2] -= img_dets[:, 0] + img_dets[:, 3] -= img_dets[:, 1] + for det in img_dets: + score = float(det[4]) + if score < .001: # stop when below this threshold, scores in descending order + break + coco_det = dict( + image_id=int(img_id), + bbox=det[0:4].tolist(), + score=score, + category_id=int(det[5])) + coco_predictions.append(coco_det) + return coco_predictions, coco_ids + + @abc.abstractmethod + def evaluate(self): + pass + + def save(self, result_file): + # save results in coco style, override to save in a alternate form + if not self.distributed or dist.get_rank() == 0: + assert len(self.predictions) + coco_predictions, coco_ids = self._coco_predictions() + json.dump(coco_predictions, open(result_file, 'w'), indent=4) + + +class CocoEvaluator(Evaluator): + + def __init__(self, dataset, distributed=False, pred_yxyx=False): + super().__init__(distributed=distributed, pred_yxyx=pred_yxyx) + self._dataset = dataset.parser + self.coco_api = dataset.parser.coco + + def reset(self): + self.img_indices = [] + self.predictions = [] + + def evaluate(self): + if not self.distributed or dist.get_rank() == 0: + assert len(self.predictions) + coco_predictions, coco_ids = self._coco_predictions() + json.dump(coco_predictions, open('./temp.json', 'w'), indent=4) + results = self.coco_api.loadRes('./temp.json') + coco_eval = COCOeval(self.coco_api, results, 'bbox') + coco_eval.params.imgIds = coco_ids # score only ids we've used + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + metric = coco_eval.stats[0] # mAP 0.5-0.95 + if self.distributed: + dist.broadcast(torch.tensor(metric, device=self.distributed_device), 0) + else: + metric = torch.tensor(0, device=self.distributed_device) + dist.broadcast(metric, 0) + metric = metric.item() + self.reset() + return metric + + +class TfmEvaluator(Evaluator): + """ Tensorflow Models Evaluator Wrapper """ + def __init__( + self, dataset, distributed=False, pred_yxyx=False, evaluator_cls=tfm_eval.ObjectDetectionEvaluator): + super().__init__(distributed=distributed, pred_yxyx=pred_yxyx) + self._evaluator = evaluator_cls(categories=dataset.parser.cat_dicts) + self._eval_metric_name = self._evaluator._metric_names[0] + self._dataset = dataset.parser + + def reset(self): + self._evaluator.clear() + self.img_indices = [] + self.predictions = [] + + def evaluate(self): + if not self.distributed or dist.get_rank() == 0: + for img_idx, img_dets in zip(self.img_indices, self.predictions): + gt = self._dataset.get_ann_info(img_idx) + self._evaluator.add_single_ground_truth_image_info(img_idx, gt) + + bbox = img_dets[:, 0:4] if self.pred_yxyx else img_dets[:, [1, 0, 3, 2]] + det = dict(bbox=bbox, score=img_dets[:, 4], cls=img_dets[:, 5]) + self._evaluator.add_single_detected_image_info(img_idx, det) + + metrics = self._evaluator.evaluate() + _logger.info('Metrics:') + for k, v in metrics.items(): + _logger.info(f'{k}: {v}') + map_metric = metrics[self._eval_metric_name] + if self.distributed: + dist.broadcast(torch.tensor(map_metric, device=self.distributed_device), 0) + else: + map_metric = torch.tensor(0, device=self.distributed_device) + wait = dist.broadcast(map_metric, 0, async_op=True) + while not wait.is_completed(): + # wait without spinning the cpu @ 100%, no need for low latency here + time.sleep(0.5) + map_metric = map_metric.item() + self.reset() + return map_metric + + +class PascalEvaluator(TfmEvaluator): + + def __init__(self, dataset, distributed=False, pred_yxyx=False): + super().__init__( + dataset, distributed=distributed, pred_yxyx=pred_yxyx, evaluator_cls=tfm_eval.PascalDetectionEvaluator) + + +class OpenImagesEvaluator(TfmEvaluator): + + def __init__(self, dataset, distributed=False, pred_yxyx=False): + super().__init__( + dataset, distributed=distributed, pred_yxyx=pred_yxyx, evaluator_cls=tfm_eval.OpenImagesDetectionEvaluator) + + +def create_evaluator(name, dataset, distributed=False, pred_yxyx=False): + # FIXME support OpenImages Challenge2019 metric w/ image level label consideration + if 'coco' in name: + return CocoEvaluator(dataset, distributed=distributed, pred_yxyx=pred_yxyx) + elif 'openimages' in name: + return OpenImagesEvaluator(dataset, distributed=distributed, pred_yxyx=pred_yxyx) + else: + return PascalEvaluator(dataset, distributed=distributed, pred_yxyx=pred_yxyx) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/factory.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/factory.py new file mode 100644 index 0000000000..1e1db0764f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/factory.py @@ -0,0 +1,55 @@ +from .efficientdet import EfficientDet, HeadNet +from .bench import DetBenchPredict +from .config import get_efficientdet_config +from .helpers import load_pretrained, load_checkpoint + + +def create_model( + model_name, bench_task='', num_classes=None, pretrained=False, + checkpoint_path='', checkpoint_ema=False, **kwargs): + + config = get_efficientdet_config(model_name) + return create_model_from_config( + config, bench_task=bench_task, num_classes=num_classes, pretrained=pretrained, + checkpoint_path=checkpoint_path, checkpoint_ema=checkpoint_ema, **kwargs) + + +def create_model_from_config( + config, bench_task='', num_classes=None, pretrained=False, + checkpoint_path='', checkpoint_ema=False, **kwargs): + + pretrained_backbone = kwargs.pop('pretrained_backbone', True) + if pretrained or checkpoint_path: + pretrained_backbone = False # no point in loading backbone weights + + # Config overrides, override some config values via kwargs. + overrides = ( + 'redundant_bias', 'label_smoothing', 'legacy_focal', 'jit_loss', 'soft_nms', 'max_det_per_image', 'image_size') + for ov in overrides: + value = kwargs.pop(ov, None) + if value is not None: + setattr(config, ov, value) + + labeler = kwargs.pop('bench_labeler', False) + + # create the base model + model = EfficientDet(config, pretrained_backbone=pretrained_backbone, **kwargs) + + # pretrained weights are always spec'd for original config, load them before we change the model + if pretrained: + load_pretrained(model, config.url) + + # reset model head if num_classes doesn't match configs + if num_classes is not None and num_classes != config.num_classes: + model.reset_head(num_classes=num_classes) + + # load an argument specified training checkpoint + if checkpoint_path: + load_checkpoint(model, checkpoint_path, use_ema=checkpoint_ema) + + # wrap model in task specific training/prediction bench if set + if bench_task == 'train': + model = DetBenchTrain(model, create_labeler=labeler) + elif bench_task == 'predict': + model = DetBenchPredict(model) + return model diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/helpers.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/helpers.py new file mode 100644 index 0000000000..597e65df2e --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/helpers.py @@ -0,0 +1,22 @@ +import torch +import os +import logging +from collections import OrderedDict + +from timm.models import load_checkpoint + +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url + + +def load_pretrained(model, url, filter_fn=None, strict=True): + if not url: + logging.warning("Pretrained model URL is empty, using random initialization. " + "Did you intend to use a `tf_` variant of the model?") + return + state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu') + if filter_fn is not None: + state_dict = filter_fn(state_dict) + model.load_state_dict(state_dict, strict=strict) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/loss.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/loss.py new file mode 100644 index 0000000000..078b7a17b0 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/loss.py @@ -0,0 +1,259 @@ +""" EfficientDet Focal, Huber/Smooth L1 loss fns w/ jit support + +Based on loss fn in Google's automl EfficientDet repository (Apache 2.0 license). +https://github.com/google/automl/tree/master/efficientdet + +Copyright 2020 Ross Wightman +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from typing import Optional, List, Tuple + + +def focal_loss_legacy(logits, targets, alpha: float, gamma: float, normalizer): + """Compute the focal loss between `logits` and the golden `target` values. + + 'Legacy focal loss matches the loss used in the official Tensorflow impl for initial + model releases and some time after that. It eventually transitioned to the 'New' loss + defined below. + + Focal loss = -(1-pt)^gamma * log(pt) + where pt is the probability of being classified to the true class. + + Args: + logits: A float32 tensor of size [batch, height_in, width_in, num_predictions]. + + targets: A float32 tensor of size [batch, height_in, width_in, num_predictions]. + + alpha: A float32 scalar multiplying alpha to the loss from positive examples + and (1-alpha) to the loss from negative examples. + + gamma: A float32 scalar modulating loss from hard and easy examples. + + normalizer: A float32 scalar normalizes the total loss from all examples. + + Returns: + loss: A float32 scalar representing normalized total loss. + """ + positive_label_mask = targets == 1.0 + cross_entropy = F.binary_cross_entropy_with_logits(logits, targets.to(logits.dtype), reduction='none') + neg_logits = -1.0 * logits + modulator = torch.exp(gamma * targets * neg_logits - gamma * torch.log1p(torch.exp(neg_logits))) + + loss = modulator * cross_entropy + weighted_loss = torch.where(positive_label_mask, alpha * loss, (1.0 - alpha) * loss) + return weighted_loss / normalizer + + +def new_focal_loss(logits, targets, alpha: float, gamma: float, normalizer, label_smoothing: float = 0.01): + """Compute the focal loss between `logits` and the golden `target` values. + + 'New' is not the best descriptor, but this focal loss impl matches recent versions of + the official Tensorflow impl of EfficientDet. It has support for label smoothing, however + it is a bit slower, doesn't jit optimize well, and uses more memory. + + Focal loss = -(1-pt)^gamma * log(pt) + where pt is the probability of being classified to the true class. + Args: + logits: A float32 tensor of size [batch, height_in, width_in, num_predictions]. + targets: A float32 tensor of size [batch, height_in, width_in, num_predictions]. + alpha: A float32 scalar multiplying alpha to the loss from positive examples + and (1-alpha) to the loss from negative examples. + gamma: A float32 scalar modulating loss from hard and easy examples. + normalizer: Divide loss by this value. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. + Returns: + loss: A float32 scalar representing normalized total loss. + """ + # compute focal loss multipliers before label smoothing, such that it will not blow up the loss. + pred_prob = logits.sigmoid() + targets = targets.to(logits.dtype) + onem_targets = 1. - targets + p_t = (targets * pred_prob) + (onem_targets * (1. - pred_prob)) + alpha_factor = targets * alpha + onem_targets * (1. - alpha) + modulating_factor = (1. - p_t) ** gamma + + # apply label smoothing for cross_entropy for each entry. + if label_smoothing > 0.: + targets = targets * (1. - label_smoothing) + .5 * label_smoothing + ce = F.binary_cross_entropy_with_logits(logits, targets, reduction='none') + + # compute the final loss and return + return (1 / normalizer) * alpha_factor * modulating_factor * ce + + +def huber_loss( + input, target, delta: float = 1., weights: Optional[torch.Tensor] = None, size_average: bool = True): + """ + """ + err = input - target + abs_err = err.abs() + quadratic = torch.clamp(abs_err, max=delta) + linear = abs_err - quadratic + loss = 0.5 * quadratic.pow(2) + delta * linear + if weights is not None: + loss *= weights + if size_average: + return loss.mean() + else: + return loss.sum() + + +def smooth_l1_loss( + input, target, beta: float = 1. / 9, weights: Optional[torch.Tensor] = None, size_average: bool = True): + """ + very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter + """ + if beta < 1e-5: + # if beta == 0, then torch.where will result in nan gradients when + # the chain rule is applied due to pytorch implementation details + # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of + # zeros, rather than "no gradient"). To avoid this issue, we define + # small values of beta to be exactly l1 loss. + loss = torch.abs(input - target) + else: + err = torch.abs(input - target) + loss = torch.where(err < beta, 0.5 * err.pow(2) / beta, err - 0.5 * beta) + if weights is not None: + loss *= weights + if size_average: + return loss.mean() + else: + return loss.sum() + + +def _box_loss(box_outputs, box_targets, num_positives, delta: float = 0.1): + """Computes box regression loss.""" + # delta is typically around the mean value of regression target. + # for instances, the regression targets of 512x512 input with 6 anchors on + # P3-P7 pyramid is about [0.1, 0.1, 0.2, 0.2]. + normalizer = num_positives * 4.0 + mask = box_targets != 0.0 + box_loss = huber_loss(box_outputs, box_targets, weights=mask, delta=delta, size_average=False) + return box_loss / normalizer + + +def one_hot(x, num_classes: int): + # NOTE: PyTorch one-hot does not handle -ve entries (no hot) like Tensorflow, so mask them out + x_non_neg = (x >= 0).unsqueeze(-1) + onehot = torch.zeros(x.shape + (num_classes,), device=x.device, dtype=torch.float32) + return onehot.scatter(-1, x.unsqueeze(-1) * x_non_neg, 1) * x_non_neg + + +def loss_fn( + cls_outputs: List[torch.Tensor], + box_outputs: List[torch.Tensor], + cls_targets: List[torch.Tensor], + box_targets: List[torch.Tensor], + num_positives: torch.Tensor, + num_classes: int, + alpha: float, + gamma: float, + delta: float, + box_loss_weight: float, + label_smoothing: float = 0., + legacy_focal: bool = False, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Computes total detection loss. + Computes total detection loss including box and class loss from all levels. + Args: + cls_outputs: a List with values representing logits in [batch_size, height, width, num_anchors]. + at each feature level (index) + + box_outputs: a List with values representing box regression targets in + [batch_size, height, width, num_anchors * 4] at each feature level (index) + + cls_targets: groundtruth class targets. + + box_targets: groundtrusth box targets. + + num_positives: num positive grountruth anchors + + Returns: + total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. + + cls_loss: an integer tensor representing total class loss. + + box_loss: an integer tensor representing total box regression loss. + """ + # Sum all positives in a batch for normalization and avoid zero + # num_positives_sum, which would lead to inf loss during training + num_positives_sum = (num_positives.sum() + 1.0).float() + levels = len(cls_outputs) + + cls_losses = [] + box_losses = [] + for l in range(levels): + cls_targets_at_level = cls_targets[l] + box_targets_at_level = box_targets[l] + + # Onehot encoding for classification labels. + cls_targets_at_level_oh = one_hot(cls_targets_at_level, num_classes) + + bs, height, width, _, _ = cls_targets_at_level_oh.shape + cls_targets_at_level_oh = cls_targets_at_level_oh.view(bs, height, width, -1) + cls_outputs_at_level = cls_outputs[l].permute(0, 2, 3, 1).float() + if legacy_focal: + cls_loss = focal_loss_legacy( + cls_outputs_at_level, cls_targets_at_level_oh, + alpha=alpha, gamma=gamma, normalizer=num_positives_sum) + else: + cls_loss = new_focal_loss( + cls_outputs_at_level, cls_targets_at_level_oh, + alpha=alpha, gamma=gamma, normalizer=num_positives_sum, label_smoothing=label_smoothing) + cls_loss = cls_loss.view(bs, height, width, -1, num_classes) + cls_loss = cls_loss * (cls_targets_at_level != -2).unsqueeze(-1) + cls_losses.append(cls_loss.sum()) # FIXME reference code added a clamp here at some point ...clamp(0, 2)) + + box_losses.append(_box_loss( + box_outputs[l].permute(0, 2, 3, 1).float(), + box_targets_at_level, + num_positives_sum, + delta=delta)) + + # Sum per level losses to total loss. + cls_loss = torch.sum(torch.stack(cls_losses, dim=-1), dim=-1) + box_loss = torch.sum(torch.stack(box_losses, dim=-1), dim=-1) + total_loss = cls_loss + box_loss_weight * box_loss + return total_loss, cls_loss, box_loss + + +loss_jit = torch.jit.script(loss_fn) + + +class DetectionLoss(nn.Module): + + __constants__ = ['num_classes'] + + def __init__(self, config): + super(DetectionLoss, self).__init__() + self.config = config + self.num_classes = config.num_classes + self.alpha = config.alpha + self.gamma = config.gamma + self.delta = config.delta + self.box_loss_weight = config.box_loss_weight + self.label_smoothing = config.label_smoothing + self.legacy_focal = config.legacy_focal + self.use_jit = config.jit_loss + + def forward( + self, + cls_outputs: List[torch.Tensor], + box_outputs: List[torch.Tensor], + cls_targets: List[torch.Tensor], + box_targets: List[torch.Tensor], + num_positives: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + + l_fn = loss_fn + if not torch.jit.is_scripting() and self.use_jit: + # This branch only active if parent / bench itself isn't being scripted + # NOTE: I haven't figured out what to do here wrt to tracing, is it an issue? + l_fn = loss_jit + + return l_fn( + cls_outputs, box_outputs, cls_targets, box_targets, num_positives, + num_classes=self.num_classes, alpha=self.alpha, gamma=self.gamma, delta=self.delta, + box_loss_weight=self.box_loss_weight, label_smoothing=self.label_smoothing, legacy_focal=self.legacy_focal) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/README.md b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/README.md new file mode 100644 index 0000000000..c2ed390201 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/README.md @@ -0,0 +1,3 @@ +# Tensorflow Object Detection + +All of this code is adapted/ported/copied from https://github.com/google/automl/tree/552d0facd14f4fe9205a67fb13ecb5690a4d1c94/efficientdet/object_detection \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/__init__.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/__init__.py new file mode 100644 index 0000000000..5679660c5c --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Object detection data loaders and libraries are mostly based on RetinaNet: +# https://github.com/tensorflow/tpu/tree/master/models/official/retinanet +from .argmax_matcher import ArgMaxMatcher +from .box_coder import FasterRcnnBoxCoder +from .box_list import BoxList +from .matcher import Match +from .region_similarity_calculator import IouSimilarity +from .target_assigner import TargetAssigner diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/argmax_matcher.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/argmax_matcher.py new file mode 100644 index 0000000000..3b98b7a90f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/argmax_matcher.py @@ -0,0 +1,174 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Argmax matcher implementation. + +This class takes a similarity matrix and matches columns to rows based on the +maximum value per column. One can specify matched_thresholds and +to prevent columns from matching to rows (generally resulting in a negative +training example) and unmatched_theshold to ignore the match (generally +resulting in neither a positive or negative training example). + +This matcher is used in Fast(er)-RCNN. + +Note: matchers are used in TargetAssigners. There is a create_target_assigner +factory function for popular implementations. +""" +import torch +from .matcher import Match +from typing import Optional + + +def one_hot_bool(x, num_classes: int): + # for improved perf over PyTorch builtin one_hot, scatter to bool + onehot = torch.zeros(x.size(0), num_classes, device=x.device, dtype=torch.bool) + return onehot.scatter_(1, x.unsqueeze(1), 1) + + +@torch.jit.script +class ArgMaxMatcher(object): # cannot inherit with torchscript + """Matcher based on highest value. + + This class computes matches from a similarity matrix. Each column is matched + to a single row. + + To support object detection target assignment this class enables setting both + matched_threshold (upper threshold) and unmatched_threshold (lower thresholds) + defining three categories of similarity which define whether examples are + positive, negative, or ignored: + (1) similarity >= matched_threshold: Highest similarity. Matched/Positive! + (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity. + Depending on negatives_lower_than_unmatched, this is either + Unmatched/Negative OR Ignore. + (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag + negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore. + For ignored matches this class sets the values in the Match object to -2. + """ + + def __init__(self, + matched_threshold: float, + unmatched_threshold: Optional[float] = None, + negatives_lower_than_unmatched: bool = True, + force_match_for_each_row: bool = False): + """Construct ArgMaxMatcher. + + Args: + matched_threshold: Threshold for positive matches. Positive if + sim >= matched_threshold, where sim is the maximum value of the + similarity matrix for a given column. Set to None for no threshold. + unmatched_threshold: Threshold for negative matches. Negative if + sim < unmatched_threshold. Defaults to matched_threshold + when set to None. + negatives_lower_than_unmatched: Boolean which defaults to True. If True + then negative matches are the ones below the unmatched_threshold, + whereas ignored matches are in between the matched and unmatched + threshold. If False, then negative matches are in between the matched + and unmatched threshold, and everything lower than unmatched is ignored. + force_match_for_each_row: If True, ensures that each row is matched to + at least one column (which is not guaranteed otherwise if the + matched_threshold is high). Defaults to False. See + argmax_matcher_test.testMatcherForceMatch() for an example. + + Raises: + ValueError: if unmatched_threshold is set but matched_threshold is not set + or if unmatched_threshold > matched_threshold. + """ + if (matched_threshold is None) and (unmatched_threshold is not None): + raise ValueError('Need to also define matched_threshold when unmatched_threshold is defined') + self._matched_threshold = matched_threshold + self._unmatched_threshold: float = 0. + if unmatched_threshold is None: + self._unmatched_threshold = matched_threshold + else: + if unmatched_threshold > matched_threshold: + raise ValueError('unmatched_threshold needs to be smaller or equal to matched_threshold') + self._unmatched_threshold = unmatched_threshold + if not negatives_lower_than_unmatched: + if self._unmatched_threshold == self._matched_threshold: + raise ValueError('When negatives are in between matched and unmatched thresholds, these ' + 'cannot be of equal value. matched: %s, unmatched: %s', + self._matched_threshold, self._unmatched_threshold) + self._force_match_for_each_row = force_match_for_each_row + self._negatives_lower_than_unmatched = negatives_lower_than_unmatched + + def _match_when_rows_are_empty(self, similarity_matrix): + """Performs matching when the rows of similarity matrix are empty. + + When the rows are empty, all detections are false positives. So we return + a tensor of -1's to indicate that the columns do not match to any rows. + + Returns: + matches: int32 tensor indicating the row each column matches to. + """ + return -1 * torch.ones(similarity_matrix.shape[1], dtype=torch.long, device=similarity_matrix.device) + + def _match_when_rows_are_non_empty(self, similarity_matrix): + """Performs matching when the rows of similarity matrix are non empty. + + Returns: + matches: int32 tensor indicating the row each column matches to. + """ + # Matches for each column + matched_vals, matches = torch.max(similarity_matrix, 0) + + # Deal with matched and unmatched threshold + if self._matched_threshold is not None: + # Get logical indices of ignored and unmatched columns as tf.int64 + below_unmatched_threshold = self._unmatched_threshold > matched_vals + between_thresholds = (matched_vals >= self._unmatched_threshold) & \ + (self._matched_threshold > matched_vals) + + if self._negatives_lower_than_unmatched: + matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -1) + matches = self._set_values_using_indicator(matches, between_thresholds, -2) + else: + matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -2) + matches = self._set_values_using_indicator(matches, between_thresholds, -1) + + if self._force_match_for_each_row: + force_match_column_ids = torch.argmax(similarity_matrix, 1) + force_match_column_indicators = one_hot_bool(force_match_column_ids, similarity_matrix.shape[1]) + force_match_column_mask, force_match_row_ids = torch.max(force_match_column_indicators, 0) + final_matches = torch.where(force_match_column_mask, force_match_row_ids, matches) + return final_matches + else: + return matches + + def match(self, similarity_matrix): + """Tries to match each column of the similarity matrix to a row. + + Args: + similarity_matrix: tensor of shape [N, M] representing any similarity metric. + + Returns: + Match object with corresponding matches for each of M columns. + """ + if similarity_matrix.shape[0] == 0: + return Match(self._match_when_rows_are_empty(similarity_matrix)) + else: + return Match(self._match_when_rows_are_non_empty(similarity_matrix)) + + def _set_values_using_indicator(self, x, indicator, val: int): + """Set the indicated fields of x to val. + + Args: + x: tensor. + indicator: boolean with same shape as x. + val: scalar with value to set. + + Returns: + modified tensor. + """ + indicator = indicator.to(dtype=x.dtype) + return x * (1 - indicator) + val * indicator diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_coder.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_coder.py new file mode 100644 index 0000000000..cfdccd76f4 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_coder.py @@ -0,0 +1,172 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Base box coder. + +Box coders convert between coordinate frames, namely image-centric +(with (0,0) on the top left of image) and anchor-centric (with (0,0) being +defined by a specific anchor). + +Users of a BoxCoder can call two methods: + encode: which encodes a box with respect to a given anchor + (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and + decode: which inverts this encoding with a decode operation. +In both cases, the arguments are assumed to be in 1-1 correspondence already; +it is not the job of a BoxCoder to perform matching. +""" +import torch +from typing import List, Optional +from .box_list import BoxList + +# Box coder types. +FASTER_RCNN = 'faster_rcnn' +KEYPOINT = 'keypoint' +MEAN_STDDEV = 'mean_stddev' +SQUARE = 'square' + + +"""Faster RCNN box coder. + +Faster RCNN box coder follows the coding schema described below: + ty = (y - ya) / ha + tx = (x - xa) / wa + th = log(h / ha) + tw = log(w / wa) + where x, y, w, h denote the box's center coordinates, width and height + respectively. Similarly, xa, ya, wa, ha denote the anchor's center + coordinates, width and height. tx, ty, tw and th denote the anchor-encoded + center, width and height respectively. + + See http://arxiv.org/abs/1506.01497 for details. +""" + + +EPS = 1e-8 + + +#@torch.jit.script +class FasterRcnnBoxCoder(object): + """Faster RCNN box coder.""" + + def __init__(self, scale_factors: Optional[List[float]] = None, eps: float = EPS): + """Constructor for FasterRcnnBoxCoder. + + Args: + scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. + If set to None, does not perform scaling. For Faster RCNN, + the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. + """ + self._scale_factors = scale_factors + if scale_factors is not None: + assert len(scale_factors) == 4 + for scalar in scale_factors: + assert scalar > 0 + self.eps = eps + + #@property + def code_size(self): + return 4 + + def encode(self, boxes: BoxList, anchors: BoxList): + """Encode a box collection with respect to anchor collection. + + Args: + boxes: BoxList holding N boxes to be encoded. + anchors: BoxList of anchors. + + Returns: + a tensor representing N anchor-encoded boxes of the format [ty, tx, th, tw]. + """ + # Convert anchors to the center coordinate representation. + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes() + # Avoid NaN in division and log below. + ha += self.eps + wa += self.eps + h += self.eps + w += self.eps + + tx = (xcenter - xcenter_a) / wa + ty = (ycenter - ycenter_a) / ha + tw = torch.log(w / wa) + th = torch.log(h / ha) + # Scales location targets as used in paper for joint training. + if self._scale_factors is not None: + ty *= self._scale_factors[0] + tx *= self._scale_factors[1] + th *= self._scale_factors[2] + tw *= self._scale_factors[3] + return torch.stack([ty, tx, th, tw]).t() + + def decode(self, rel_codes, anchors: BoxList): + """Decode relative codes to boxes. + + Args: + rel_codes: a tensor representing N anchor-encoded boxes. + anchors: BoxList of anchors. + + Returns: + boxes: BoxList holding N bounding boxes. + """ + ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() + + ty, tx, th, tw = rel_codes.t().unbind() + if self._scale_factors is not None: + ty /= self._scale_factors[0] + tx /= self._scale_factors[1] + th /= self._scale_factors[2] + tw /= self._scale_factors[3] + w = torch.exp(tw) * wa + h = torch.exp(th) * ha + ycenter = ty * ha + ycenter_a + xcenter = tx * wa + xcenter_a + ymin = ycenter - h / 2. + xmin = xcenter - w / 2. + ymax = ycenter + h / 2. + xmax = xcenter + w / 2. + return BoxList(torch.stack([ymin, xmin, ymax, xmax]).t()) + + +def batch_decode(encoded_boxes, box_coder: FasterRcnnBoxCoder, anchors: BoxList): + """Decode a batch of encoded boxes. + + This op takes a batch of encoded bounding boxes and transforms + them to a batch of bounding boxes specified by their corners in + the order of [y_min, x_min, y_max, x_max]. + + Args: + encoded_boxes: a float32 tensor of shape [batch_size, num_anchors, + code_size] representing the location of the objects. + box_coder: a BoxCoder object. + anchors: a BoxList of anchors used to encode `encoded_boxes`. + + Returns: + decoded_boxes: a float32 tensor of shape [batch_size, num_anchors, coder_size] + representing the corners of the objects in the order of [y_min, x_min, y_max, x_max]. + + Raises: + ValueError: if batch sizes of the inputs are inconsistent, or if + the number of anchors inferred from encoded_boxes and anchors are inconsistent. + """ + assert len(encoded_boxes.shape) == 3 + if encoded_boxes.shape[1] != anchors.num_boxes(): + raise ValueError('The number of anchors inferred from encoded_boxes' + ' and anchors are inconsistent: shape[1] of encoded_boxes' + ' %s should be equal to the number of anchors: %s.' % + (encoded_boxes.shape[1], anchors.num_boxes())) + + decoded_boxes = torch.stack([ + box_coder.decode(boxes, anchors).boxes for boxes in encoded_boxes.unbind() + ]) + return decoded_boxes diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_list.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_list.py new file mode 100644 index 0000000000..09b77f3d7a --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/box_list.py @@ -0,0 +1,197 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Bounding Box List definition. + +BoxList represents a list of bounding boxes as tensorflow +tensors, where each bounding box is represented as a row of 4 numbers, +[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes +within a given list correspond to a single image. See also +box_list.py for common box related operations (such as area, iou, etc). + +Optionally, users can add additional related fields (such as weights). +We assume the following things to be true about fields: +* they correspond to boxes in the box_list along the 0th dimension +* they have inferable rank at graph construction time +* all dimensions except for possibly the 0th can be inferred + (i.e., not None) at graph construction time. + +Some other notes: + * Following tensorflow conventions, we use height, width ordering, + and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering + * Tensors are always provided as (flat) [N, 4] tensors. +""" +import torch +from typing import Optional, List, Dict + + +@torch.jit.script +class BoxList(object): + """Box collection.""" + data: Dict[str, torch.Tensor] + + def __init__(self, boxes): + """Constructs box collection. + + Args: + boxes: a tensor of shape [N, 4] representing box corners + + Raises: + ValueError: if invalid dimensions for bbox data or if bbox data is not in float32 format. + """ + if len(boxes.shape) != 2 or boxes.shape[-1] != 4: + raise ValueError('Invalid dimensions for box data.') + if boxes.dtype != torch.float32: + raise ValueError('Invalid tensor type: should be tf.float32') + self.data = {'boxes': boxes} + + def num_boxes(self): + """Returns number of boxes held in collection. + + Returns: + a tensor representing the number of boxes held in the collection. + """ + return self.data['boxes'].shape[0] + + def get_all_fields(self): + """Returns all fields.""" + return self.data.keys() + + def get_extra_fields(self): + """Returns all non-box fields (i.e., everything not named 'boxes').""" + # return [k for k in self.data.keys() if k != 'boxes'] # FIXME torscript doesn't support comprehensions yet + extra: List[str] = [] + for k in self.data.keys(): + if k != 'boxes': + extra.append(k) + return extra + + def add_field(self, field: str, field_data: torch.Tensor): + """Add field to box list. + + This method can be used to add related box data such as weights/labels, etc. + + Args: + field: a string key to access the data via `get` + field_data: a tensor containing the data to store in the BoxList + """ + self.data[field] = field_data + + def has_field(self, field: str): + return field in self.data + + #@property # FIXME for torchscript compat + def boxes(self): + """Convenience function for accessing box coordinates. + + Returns: + a tensor with shape [N, 4] representing box coordinates. + """ + return self.get_field('boxes') + + #@boxes.setter # FIXME for torchscript compat + def set_boxes(self, boxes): + """Convenience function for setting box coordinates. + + Args: + boxes: a tensor of shape [N, 4] representing box corners + + Raises: + ValueError: if invalid dimensions for bbox data + """ + if len(boxes.shape) != 2 or boxes.shape[-1] != 4: + raise ValueError('Invalid dimensions for box data.') + self.data['boxes'] = boxes + + def get_field(self, field: str): + """Accesses a box collection and associated fields. + + This function returns specified field with object; if no field is specified, + it returns the box coordinates. + + Args: + field: this optional string parameter can be used to specify a related field to be accessed. + + Returns: + a tensor representing the box collection or an associated field. + + Raises: + ValueError: if invalid field + """ + if not self.has_field(field): + raise ValueError(f'field {field} does not exist') + return self.data[field] + + def set_field(self, field: str, value: torch.Tensor): + """Sets the value of a field. + + Updates the field of a box_list with a given value. + + Args: + field: (string) name of the field to set value. + value: the value to assign to the field. + + Raises: + ValueError: if the box_list does not have specified field. + """ + if not self.has_field(field): + raise ValueError(f'field {field} does not exist') + self.data[field] = value + + def get_center_coordinates_and_sizes(self): + """Computes the center coordinates, height and width of the boxes. + + Returns: + a list of 4 1-D tensors [ycenter, xcenter, height, width]. + """ + box_corners = self.boxes() + ymin, xmin, ymax, xmax = box_corners.t().unbind() + width = xmax - xmin + height = ymax - ymin + ycenter = ymin + height / 2. + xcenter = xmin + width / 2. + return [ycenter, xcenter, height, width] + + def transpose_coordinates(self): + """Transpose the coordinate representation in a boxlist. + + """ + y_min, x_min, y_max, x_max = self.boxes().chunk(4, dim=1) + self.set_boxes(torch.cat([x_min, y_min, x_max, y_max], 1)) + + def as_tensor_dict(self, fields: Optional[List[str]] = None): + """Retrieves specified fields as a dictionary of tensors. + + Args: + fields: (optional) list of fields to return in the dictionary. + If None (default), all fields are returned. + + Returns: + tensor_dict: A dictionary of tensors specified by fields. + + Raises: + ValueError: if specified field is not contained in boxlist. + """ + tensor_dict = {} + if fields is None: + fields = self.get_all_fields() + for field in fields: + if not self.has_field(field): + raise ValueError('boxlist must contain all specified fields') + tensor_dict[field] = self.get_field(field) + return tensor_dict + + #@property + def device(self): + return self.data['boxes'].device diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/matcher.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/matcher.py new file mode 100644 index 0000000000..22aaab118d --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/matcher.py @@ -0,0 +1,179 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Matcher interface and Match class. + +This module defines the Matcher interface and the Match object. The job of the +matcher is to match row and column indices based on the similarity matrix and +other optional parameters. Each column is matched to at most one row. There +are three possibilities for the matching: + +1) match: A column matches a row. +2) no_match: A column does not match any row. +3) ignore: A column that is neither 'match' nor no_match. + +The ignore case is regularly encountered in object detection: when an anchor has +a relatively small overlap with a ground-truth box, one neither wants to +consider this box a positive example (match) nor a negative example (no match). + +The Match class is used to store the match results and it provides simple apis +to query the results. +""" +import torch + + +@torch.jit.script +class Match(object): + """Class to store results from the matcher. + + This class is used to store the results from the matcher. It provides + convenient methods to query the matching results. + """ + + def __init__(self, match_results: torch.Tensor): + """Constructs a Match object. + + Args: + match_results: Integer tensor of shape [N] with (1) match_results[i]>=0, + meaning that column i is matched with row match_results[i]. + (2) match_results[i]=-1, meaning that column i is not matched. + (3) match_results[i]=-2, meaning that column i is ignored. + + Raises: + ValueError: if match_results does not have rank 1 or is not an integer int32 scalar tensor + """ + if len(match_results.shape) != 1: + raise ValueError('match_results should have rank 1') + if match_results.dtype not in (torch.int32, torch.int64): + raise ValueError('match_results should be an int32 or int64 scalar tensor') + self.match_results = match_results + + def matched_column_indices(self): + """Returns column indices that match to some row. + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return torch.nonzero(self.match_results > -1).flatten().long() + + def matched_column_indicator(self): + """Returns column indices that are matched. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return self.match_results >= 0 + + def num_matched_columns(self): + """Returns number (int32 scalar tensor) of matched columns.""" + return self.matched_column_indices().numel() + + def unmatched_column_indices(self): + """Returns column indices that do not match any row. + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return torch.nonzero(self.match_results == -1).flatten().long() + + def unmatched_column_indicator(self): + """Returns column indices that are unmatched. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return self.match_results == -1 + + def num_unmatched_columns(self): + """Returns number (int32 scalar tensor) of unmatched columns.""" + return self.unmatched_column_indices().numel() + + def ignored_column_indices(self): + """Returns column indices that are ignored (neither Matched nor Unmatched). + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return torch.nonzero(self.ignored_column_indicator()).flatten().long() + + def ignored_column_indicator(self): + """Returns boolean column indicator where True means the column is ignored. + + Returns: + column_indicator: boolean vector which is True for all ignored column indices. + """ + return self.match_results == -2 + + def num_ignored_columns(self): + """Returns number (int32 scalar tensor) of matched columns.""" + return self.ignored_column_indices().numel() + + def unmatched_or_ignored_column_indices(self): + """Returns column indices that are unmatched or ignored. + + The indices returned by this op are always sorted in increasing order. + + Returns: + column_indices: int32 tensor of shape [K] with column indices. + """ + return torch.nonzero(0 > self.match_results).flatten().long() + + def matched_row_indices(self): + """Returns row indices that match some column. + + The indices returned by this op are ordered so as to be in correspondence with the output of + matched_column_indicator(). For example if self.matched_column_indicator() is [0,2], + and self.matched_row_indices() is [7, 3], then we know that column 0 was matched to row 7 and + column 2 was matched to row 3. + + Returns: + row_indices: int32 tensor of shape [K] with row indices. + """ + return torch.gather(self.match_results, 0, self.matched_column_indices()).flatten().long() + + def gather_based_on_match(self, input_tensor, unmatched_value, ignored_value): + """Gathers elements from `input_tensor` based on match results. + + For columns that are matched to a row, gathered_tensor[col] is set to input_tensor[match_results[col]]. + For columns that are unmatched, gathered_tensor[col] is set to unmatched_value. Finally, for columns that + are ignored gathered_tensor[col] is set to ignored_value. + + Note that the input_tensor.shape[1:] must match with unmatched_value.shape + and ignored_value.shape + + Args: + input_tensor: Tensor to gather values from. + unmatched_value: Constant tensor or python scalar value for unmatched columns. + ignored_value: Constant tensor or python scalar for ignored columns. + + Returns: + gathered_tensor: A tensor containing values gathered from input_tensor. + The shape of the gathered tensor is [match_results.shape[0]] + input_tensor.shape[1:]. + """ + if isinstance(ignored_value, torch.Tensor): + input_tensor = torch.cat([ignored_value, unmatched_value, input_tensor], dim=0) + else: + # scalars + input_tensor = torch.cat([ + torch.tensor([ignored_value, unmatched_value], dtype=input_tensor.dtype, device=input_tensor.device), + input_tensor], dim=0) + gather_indices = torch.clamp(self.match_results + 2, min=0) + gathered_tensor = torch.index_select(input_tensor, 0, gather_indices) + return gathered_tensor diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/region_similarity_calculator.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/region_similarity_calculator.py new file mode 100644 index 0000000000..f6945bc757 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/region_similarity_calculator.py @@ -0,0 +1,101 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Region Similarity Calculators for BoxLists. + +Region Similarity Calculators compare a pairwise measure of similarity +between the boxes in two BoxLists. +""" +import torch +from .box_list import BoxList + + +def area(boxlist: BoxList): + """Computes area of boxes. + + Args: + boxlist: BoxList holding N boxes + + Returns: + a tensor with shape [N] representing box areas. + """ + y_min, x_min, y_max, x_max = boxlist.boxes().chunk(4, dim=1) + out = (y_max - y_min).squeeze(1) * (x_max - x_min).squeeze(1) + return out + + +def intersection(boxlist1: BoxList, boxlist2: BoxList): + """Compute pairwise intersection areas between boxes. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + + Returns: + a tensor with shape [N, M] representing pairwise intersections + """ + y_min1, x_min1, y_max1, x_max1 = boxlist1.boxes().chunk(4, dim=1) + y_min2, x_min2, y_max2, x_max2 = boxlist2.boxes().chunk(4, dim=1) + all_pairs_min_ymax = torch.min(y_max1, y_max2.t()) + all_pairs_max_ymin = torch.max(y_min1, y_min2.t()) + intersect_heights = torch.clamp(all_pairs_min_ymax - all_pairs_max_ymin, min=0) + all_pairs_min_xmax = torch.min(x_max1, x_max2.t()) + all_pairs_max_xmin = torch.max(x_min1, x_min2.t()) + intersect_widths = torch.clamp(all_pairs_min_xmax - all_pairs_max_xmin, min=0) + return intersect_heights * intersect_widths + + +def iou(boxlist1: BoxList, boxlist2: BoxList): + """Computes pairwise intersection-over-union between box collections. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + + Returns: + a tensor with shape [N, M] representing pairwise iou scores. + """ + intersections = intersection(boxlist1, boxlist2) + areas1 = area(boxlist1) + areas2 = area(boxlist2) + unions = areas1.unsqueeze(1) + areas2.unsqueeze(0) - intersections + return torch.where(intersections == 0.0, torch.zeros_like(intersections), intersections / unions) + + +@torch.jit.script +class IouSimilarity(object): + """Class to compute similarity based on Intersection over Union (IOU) metric. + + This class computes pairwise similarity between two BoxLists based on IOU. + """ + def __init__(self): + pass + + def compare(self, boxlist1: BoxList, boxlist2: BoxList): + """Computes matrix of pairwise similarity between BoxLists. + + This op (to be overridden) computes a measure of pairwise similarity between + the boxes in the given BoxLists. Higher values indicate more similarity. + + Note that this method simply measures similarity and does not explicitly + perform a matching. + + Args: + boxlist1: BoxList holding N boxes. + boxlist2: BoxList holding M boxes. + + Returns: + a (float32) tensor of shape [N, M] with pairwise similarity score. + """ + return iou(boxlist1, boxlist2) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/target_assigner.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/target_assigner.py new file mode 100644 index 0000000000..6b97a4e728 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/object_detection/target_assigner.py @@ -0,0 +1,266 @@ +# Copyright 2020 Google Research. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Base target assigner module. + +The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and +groundtruth detections (bounding boxes), to assign classification and regression +targets to each anchor as well as weights to each anchor (specifying, e.g., +which anchors should not contribute to training loss). + +It assigns classification/regression targets by performing the following steps: +1) Computing pairwise similarity between anchors and groundtruth boxes using a + provided RegionSimilarity Calculator +2) Computing a matching based on the similarity matrix using a provided Matcher +3) Assigning regression targets based on the matching and a provided BoxCoder +4) Assigning classification targets based on the matching and groundtruth labels + +Note that TargetAssigners only operate on detections from a single +image at a time, so any logic for applying a TargetAssigner to multiple +images must be handled externally. +""" +import torch +from typing import Optional + +from . import box_list +from .region_similarity_calculator import IouSimilarity +from .argmax_matcher import ArgMaxMatcher +from .matcher import Match +from .box_list import BoxList +from .box_coder import FasterRcnnBoxCoder + +KEYPOINTS_FIELD_NAME = 'keypoints' + + +#@torch.jit.script +class TargetAssigner(object): + """Target assigner to compute classification and regression targets.""" + + def __init__(self, similarity_calc: IouSimilarity, matcher: ArgMaxMatcher, box_coder: FasterRcnnBoxCoder, + negative_class_weight: float = 1.0, unmatched_cls_target: Optional[float] = None, + keypoints_field_name: str = KEYPOINTS_FIELD_NAME): + """Construct Object Detection Target Assigner. + + Args: + similarity_calc: a RegionSimilarityCalculator + + matcher: Matcher used to match groundtruth to anchors. + + box_coder: BoxCoder used to encode matching groundtruth boxes with respect to anchors. + + negative_class_weight: classification weight to be associated to negative + anchors (default: 1.0). The weight must be in [0., 1.]. + + unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each + anchor (and can be empty for scalar targets). This shape must thus be + compatible with the groundtruth labels that are passed to the "assign" + function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). + If set to None, unmatched_cls_target is set to be [0] for each anchor. + + Raises: + ValueError: if similarity_calc is not a RegionSimilarityCalculator or + if matcher is not a Matcher or if box_coder is not a BoxCoder + """ + self._similarity_calc = similarity_calc + self._matcher = matcher + self._box_coder = box_coder + self._negative_class_weight = negative_class_weight + if unmatched_cls_target is not None: + self._unmatched_cls_target = unmatched_cls_target + else: + self._unmatched_cls_target = 0. + self._keypoints_field_name = keypoints_field_name + + def assign(self, anchors: BoxList, groundtruth_boxes: BoxList, groundtruth_labels=None, groundtruth_weights=None): + """Assign classification and regression targets to each anchor. + + For a given set of anchors and groundtruth detections, match anchors + to groundtruth_boxes and assign classification and regression targets to + each anchor as well as weights based on the resulting match (specifying, + e.g., which anchors should not contribute to training loss). + + Anchors that are not matched to anything are given a classification target + of self._unmatched_cls_target which can be specified via the constructor. + + Args: + anchors: a BoxList representing N anchors + + groundtruth_boxes: a BoxList representing M groundtruth boxes + + groundtruth_labels: a tensor of shape [M, d_1, ... d_k] + with labels for each of the ground_truth boxes. The subshape + [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set + to None, groundtruth_labels assumes a binary problem where all + ground_truth boxes get a positive label (of 1). + + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. The weights + must be in [0., 1.]. If None, all weights are set to 1. + + **params: Additional keyword arguments for specific implementations of the Matcher. + + Returns: + cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], + where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels + which has shape [num_gt_boxes, d_1, d_2, ... d_k]. + + cls_weights: a float32 tensor with shape [num_anchors] + + reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] + + reg_weights: a float32 tensor with shape [num_anchors] + + match: a matcher.Match object encoding the match between anchors and groundtruth boxes, + with rows corresponding to groundtruth boxes and columns corresponding to anchors. + + Raises: + ValueError: if anchors or groundtruth_boxes are not of type box_list.BoxList + """ + if not isinstance(anchors, box_list.BoxList): + raise ValueError('anchors must be an BoxList') + if not isinstance(groundtruth_boxes, box_list.BoxList): + raise ValueError('groundtruth_boxes must be an BoxList') + + # device = anchors.device() + # if groundtruth_labels is None: + # groundtruth_labels = torch.ones(groundtruth_boxes.num_boxes(), device=device).unsqueeze(0) + # groundtruth_labels = groundtruth_labels.unsqueeze(-1) + # if groundtruth_weights is None: + # num_gt_boxes = groundtruth_boxes.num_boxes() + # if not num_gt_boxes: + # num_gt_boxes = groundtruth_boxes.num_boxes() + # groundtruth_weights = torch.ones([num_gt_boxes], device=device) + + match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, anchors) + match = self._matcher.match(match_quality_matrix) + reg_targets = self._create_regression_targets(anchors, groundtruth_boxes, match) + cls_targets = self._create_classification_targets(groundtruth_labels, match) + #reg_weights = self._create_regression_weights(match, groundtruth_weights) + #cls_weights = self._create_classification_weights(match, groundtruth_weights) + + return cls_targets, reg_targets, match + + def _create_regression_targets(self, anchors: BoxList, groundtruth_boxes: BoxList, match: Match): + """Returns a regression target for each anchor. + + Args: + anchors: a BoxList representing N anchors + + groundtruth_boxes: a BoxList representing M groundtruth_boxes + + match: a matcher.Match object + + Returns: + reg_targets: a float32 tensor with shape [N, box_code_dimension] + """ + device = anchors.device() + zero_box = torch.zeros((1, 4), device=device) + matched_gt_boxes = match.gather_based_on_match( + groundtruth_boxes.boxes(), unmatched_value=zero_box, ignored_value=zero_box) + matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) + if groundtruth_boxes.has_field(self._keypoints_field_name): + groundtruth_keypoints = groundtruth_boxes.get_field(self._keypoints_field_name) + zero_kp = torch.zeros((1,) + groundtruth_keypoints.shape[1:], device=device) + matched_keypoints = match.gather_based_on_match( + groundtruth_keypoints, unmatched_value=zero_kp, ignored_value=zero_kp) + matched_gt_boxlist.add_field(self._keypoints_field_name, matched_keypoints) + matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) + + unmatched_ignored_reg_targets = self._default_regression_target(device).repeat(match.match_results.shape[0], 1) + + matched_anchors_mask = match.matched_column_indicator() + reg_targets = torch.where(matched_anchors_mask.unsqueeze(1), matched_reg_targets, unmatched_ignored_reg_targets) + return reg_targets + + def _default_regression_target(self, device: torch.device): + """Returns the default target for anchors to regress to. + + Default regression targets are set to zero (though in this implementation what + these targets are set to should not matter as the regression weight of any box + set to regress to the default target is zero). + + Returns: + default_target: a float32 tensor with shape [1, box_code_dimension] + """ + return torch.zeros(1, self._box_coder.code_size(), device=device) + + def _create_classification_targets(self, groundtruth_labels, match: Match): + """Create classification targets for each anchor. + + Assign a classification target of for each anchor to the matching + groundtruth label that is provided by match. Anchors that are not matched + to anything are given the target self._unmatched_cls_target + + Args: + groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] + with labels for each of the ground_truth boxes. The subshape + [d_1, ... d_k] can be empty (corresponding to scalar labels). + match: a matcher.Match object that provides a matching between anchors + and groundtruth boxes. + + Returns: + a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the + subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has + shape [num_gt_boxes, d_1, d_2, ... d_k]. + """ + return match.gather_based_on_match( + groundtruth_labels, + unmatched_value=self._unmatched_cls_target, ignored_value=self._unmatched_cls_target) + + def _create_regression_weights(self, match: Match, groundtruth_weights): + """Set regression weight for each anchor. + + Only positive anchors are set to contribute to the regression loss, so this + method returns a weight of 1 for every positive anchor and 0 for every + negative anchor. + + Args: + match: a matcher.Match object that provides a matching between anchors and groundtruth boxes. + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. + + Returns: + a float32 tensor with shape [num_anchors] representing regression weights. + """ + return match.gather_based_on_match(groundtruth_weights, ignored_value=0., unmatched_value=0.) + + def _create_classification_weights(self, match: Match, groundtruth_weights): + """Create classification weights for each anchor. + + Positive (matched) anchors are associated with a weight of + positive_class_weight and negative (unmatched) anchors are associated with + a weight of negative_class_weight. When anchors are ignored, weights are set + to zero. By default, both positive/negative weights are set to 1.0, + but they can be adjusted to handle class imbalance (which is almost always + the case in object detection). + + Args: + match: a matcher.Match object that provides a matching between anchors and groundtruth boxes. + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. + + Returns: + a float32 tensor with shape [num_anchors] representing classification weights. + """ + return match.gather_based_on_match( + groundtruth_weights, ignored_value=0., unmatched_value=self._negative_class_weight) + + def box_coder(self): + """Get BoxCoder of this TargetAssigner. + + Returns: + BoxCoder object. + """ + return self._box_coder diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/soft_nms.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/soft_nms.py new file mode 100644 index 0000000000..fff0158e3e --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/soft_nms.py @@ -0,0 +1,170 @@ +""" PyTorch Soft-NMS + +This code was adapted from a PR for detectron2 submitted by https://github.com/alekseynp +https://github.com/facebookresearch/detectron2/pull/1183/files + +Detectron2 is licensed Apache 2.0, Copyright Facebook Inc. +""" +import torch +from typing import List + + +def pairwise_iou(boxes1, boxes2) -> torch.Tensor: + """ + Given two lists of boxes of size N and M, + compute the IoU (intersection over union) + between __all__ N x M pairs of boxes. + The box order must be (xmin, ymin, xmax, ymax). + Args: + boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. + Returns: + Tensor: IoU, sized [N,M]. + """ + area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) # [N,] + area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) # [M,] + + width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max( + boxes1[:, None, :2], boxes2[:, :2] + ) # [N,M,2] + + width_height.clamp_(min=0) # [N,M,2] + inter = width_height.prod(dim=2) # [N,M] + + # handle empty boxes + iou = torch.where( + inter > 0, + inter / (area1[:, None] + area2 - inter), + torch.zeros(1, dtype=inter.dtype, device=inter.device), + ) + return iou + + +def soft_nms( + boxes, + scores, + method_gaussian: bool = True, + sigma: float = 0.5, + iou_threshold: float = .5, + score_threshold: float = 0.005 +): + """ + Soft non-max suppression algorithm. + + Implementation of [Soft-NMS -- Improving Object Detection With One Line of Codec] + (https://arxiv.org/abs/1704.04503) + + Args: + boxes_remain (Tensor[N, ?]): + boxes where NMS will be performed + if Boxes, in (x1, y1, x2, y2) format + if RotatedBoxes, in (x_ctr, y_ctr, width, height, angle_degrees) format + scores_remain (Tensor[N]): + scores for each one of the boxes + method_gaussian (bool): use gaussian method if True, otherwise linear + sigma (float): + parameter for Gaussian penalty function + iou_threshold (float): + iou threshold for applying linear decay. Nt from the paper + re-used as threshold for standard "hard" nms + score_threshold (float): + boxes with scores below this threshold are pruned at each iteration. + Dramatically reduces computation time. Authors use values in [10e-4, 10e-2] + + Returns: + tuple(Tensor, Tensor): + [0]: int64 tensor with the indices of the elements that have been kept + by Soft NMS, sorted in decreasing order of scores + [1]: float tensor with the re-scored scores of the elements that were kept + """ + device = boxes.device + boxes_remain = boxes.clone() + scores_remain = scores.clone() + num_elem = scores_remain.size()[0] + idxs = torch.arange(num_elem) + idxs_out = torch.zeros(num_elem, dtype=torch.int64, device=device) + scores_out = torch.zeros(num_elem, dtype=torch.float32, device=device) + count: int = 0 + + while scores_remain.numel() > 0: + top_idx = torch.argmax(scores_remain) + idxs_out[count] = idxs[top_idx] + scores_out[count] = scores_remain[top_idx] + count += 1 + + top_box = boxes_remain[top_idx] + ious = pairwise_iou(top_box.unsqueeze(0), boxes_remain)[0] + + if method_gaussian: + decay = torch.exp(-torch.pow(ious, 2) / sigma) + else: + decay = torch.ones_like(ious) + decay_mask = ious > iou_threshold + decay[decay_mask] = 1 - ious[decay_mask] + + scores_remain *= decay + keep = scores_remain > score_threshold + keep[top_idx] = torch.tensor(False, device=device) + + boxes_remain = boxes_remain[keep] + scores_remain = scores_remain[keep] + idxs = idxs[keep] + + return idxs_out[:count], scores_out[:count] + + +def batched_soft_nms( + boxes, scores, idxs, + method_gaussian: bool = True, + sigma: float = 0.5, + iou_threshold: float = .5, + score_threshold: float = 0.001): + + """ + Performs soft non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Args: + boxes (Tensor[N, 4]): + boxes where NMS will be performed. They + are expected to be in (x1, y1, x2, y2) format + scores (Tensor[N]): + scores for each one of the boxes + idxs (Tensor[N]): + indices of the categories for each one of the boxes. + method (str): + one of ['gaussian', 'linear', 'hard'] + see paper for details. users encouraged not to use "hard", as this is the + same nms available elsewhere in detectron2 + sigma (float): + parameter for Gaussian penalty function + iou_threshold (float): + iou threshold for applying linear decay. Nt from the paper + re-used as threshold for standard "hard" nms + score_threshold (float): + boxes with scores below this threshold are pruned at each iteration. + Dramatically reduces computation time. Authors use values in [10e-4, 10e-2] + Returns: + tuple(Tensor, Tensor): + [0]: int64 tensor with the indices of the elements that have been kept + by Soft NMS, sorted in decreasing order of scores + [1]: float tensor with the re-scored scores of the elements that were kept + """ + if boxes.numel() == 0: + return ( + torch.empty((0,), dtype=torch.int64, device=boxes.device), + torch.empty((0,), dtype=torch.float32, device=scores.device), + ) + # strategy: in order to perform NMS independently per class. + # we add an offset to all the boxes. The offset is dependent + # only on the class idx, and is large enough so that boxes + # from different classes do not overlap + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + 1) + boxes_for_nms = boxes + offsets[:, None] + return soft_nms( + boxes_for_nms, scores, method_gaussian=method_gaussian, sigma=sigma, + iou_threshold=iou_threshold, score_threshold=score_threshold + ) + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/version.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/version.py new file mode 100644 index 0000000000..a6587aeab9 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/effdet/version.py @@ -0,0 +1 @@ +__version__ = '0.2.4' diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements-sotabench.txt b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements-sotabench.txt new file mode 100644 index 0000000000..b2b0234892 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements-sotabench.txt @@ -0,0 +1,16 @@ +torch==1.4.0 +timm==0.3.2 +torchvision==0.5 +pycocotools>=2.0.0 +pyyaml + +# better to manually install pillow-simd +Pillow + +# conflict between pycocotools and numpy 1.18.+ +numpy<=1.17.5 + +# pycocotools needs this but doesn't have dep +matplotlib + +omegaconf>=2.0 \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements.txt b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements.txt new file mode 100644 index 0000000000..017bb6ece5 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/requirements.txt @@ -0,0 +1,10 @@ +torch>=1.4.0 +timm>=0.3.2 +torchvision +pyyaml +numpy + +# an update version that fixes some bugs, incl issues with numpy >= 1.18 +pycocotools>=2.0.2 + +omegaconf>=2.0 diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/setup.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/setup.py new file mode 100644 index 0000000000..264169456e --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/setup.py @@ -0,0 +1,47 @@ +""" Setup +""" +from setuptools import setup, find_packages +from codecs import open +from os import path + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +exec(open('effdet/version.py').read()) +setup( + name='effdet', + version=__version__, + description='EfficientDet for PyTorch', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/rwightman/efficientdet-pytorch', + author='Ross Wightman', + author_email='hello@rwightman.com', + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + + # Note that this is a string of words separated by whitespace, not a list. + keywords='pytorch pretrained efficientdet efficientnet bifpn object detection', + packages=find_packages(exclude=['data']), + install_requires=['torch >= 1.4', 'torchvision', 'timm >= 0.3.2', 'pycocotools>=2.0.2', 'omegaconf>=2.0'], + python_requires='>=3.6', +) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench.py new file mode 100644 index 0000000000..f078a8d63f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench.py @@ -0,0 +1,148 @@ +import os +import tqdm +import torch +try: + from apex import amp + has_amp = True +except ImportError: + has_amp = False +from sotabencheval.object_detection import COCOEvaluator +from sotabencheval.utils import is_server, extract_archive +from effdet import create_model, create_loader, create_dataset + +NUM_GPU = 1 +BATCH_SIZE = (128 if has_amp else 64) * NUM_GPU +ANNO_SET = 'val2017' + +if is_server(): + DATA_ROOT = './.data/vision/coco' + image_dir_zip = os.path.join('./.data/vision/coco', f'{ANNO_SET}.zip') + extract_archive(from_path=image_dir_zip, to_path='./.data/vision/coco') +else: + # local settings + DATA_ROOT = '' + + +def _bs(b=64): + b *= NUM_GPU + if has_amp: + b *= 2 + return b + + +def _entry(model_name, paper_model_name, paper_arxiv_id, batch_size=BATCH_SIZE, model_desc=None): + return dict( + model_name=model_name, + model_description=model_desc, + paper_model_name=paper_model_name, + paper_arxiv_id=paper_arxiv_id, + batch_size=batch_size) + +# NOTE For any original PyTorch models, I'll remove from this list when you add to sotabench to +# avoid overlap and confusion. Please contact me. +model_list = [ + + ## Weights trained by myself or others in PyTorch + _entry('resdet50', 'ResDet50', '1911.09070', batch_size=_bs(72), + model_desc='Trained in PyTorch with https://github.com/rwightman/efficientdet-pytorch'), + _entry('tf_efficientdet_lite0', 'EfficientDet-Lite0', '1911.09070', batch_size=_bs(128), + model_desc='Trained in PyTorch with https://github.com/rwightman/efficientdet-pytorch'), + _entry('efficientdet_d0', 'EfficientDet-D0', '1911.09070', batch_size=_bs(112), + model_desc='Trained in PyTorch with https://github.com/rwightman/efficientdet-pytorch'), + _entry('efficientdet_d1', 'EfficientDet-D1', '1911.09070', batch_size=_bs(72), + model_desc='Trained in PyTorch with https://github.com/rwightman/efficientdet-pytorch'), + + ## Weights ported by myself from other frameworks + _entry('tf_efficientdet_d0', 'EfficientDet-D0', '1911.09070', batch_size=_bs(112), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d1', 'EfficientDet-D1', '1911.09070', batch_size=_bs(72), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d2', 'EfficientDet-D2', '1911.09070', batch_size=_bs(48), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d3', 'EfficientDet-D3', '1911.09070', batch_size=_bs(32), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d4', 'EfficientDet-D4', '1911.09070', batch_size=_bs(16), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d5', 'EfficientDet-D5', '1911.09070', batch_size=_bs(12), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d6', 'EfficientDet-D6', '1911.09070', batch_size=_bs(8), + model_desc='Ported from official Google AI Tensorflow weights'), + _entry('tf_efficientdet_d7', 'EfficientDet-D7', '1911.09070', batch_size=_bs(4), + model_desc='Ported from official Google AI Tensorflow weights'), + # _entry('tf_efficientdet_d7x', 'EfficientDet-D7X', '1911.09070', batch_size=_bs(4), + # model_desc='Ported from official Google AI Tensorflow weights'), +] + + +def eval_model(model_name, paper_model_name, paper_arxiv_id, batch_size=64, model_description=''): + + # create model + bench = create_model( + model_name, + bench_task='predict', + pretrained=True, + ) + bench.eval() + input_size = bench.config.image_size + + param_count = sum([m.numel() for m in bench.parameters()]) + print('Model %s created, param count: %d' % (model_name, param_count)) + + bench = bench.cuda() + if has_amp: + print('Using AMP mixed precision.') + bench = amp.initialize(bench, opt_level='O1') + else: + print('AMP not installed, running network in FP32.') + + evaluator = COCOEvaluator( + root=DATA_ROOT, + model_name=paper_model_name, + model_description=model_description, + paper_arxiv_id=paper_arxiv_id) + + dataset = create_dataset('coco', DATA_ROOT, splits='val') + + loader = create_loader( + dataset, + input_size=input_size, + batch_size=batch_size, + use_prefetcher=True, + fill_color='mean', + num_workers=4, + pin_mem=True) + + iterator = tqdm.tqdm(loader, desc="Evaluation", mininterval=5) + sample_count = 0 + evaluator.reset_time() + with torch.no_grad(): + for i, (input, target) in enumerate(iterator): + output = bench(input, target) + output = output.cpu() + results = [] + for index, sample in enumerate(output): + image_id = int(dataset.parser.img_ids[sample_count]) + sample[:, 2] -= sample[:, 0] + sample[:, 3] -= sample[:, 1] + for det in sample: + score = float(det[4]) + if score < .001: # stop when below this threshold, scores in descending order + break + coco_det = dict( + image_id=image_id, + bbox=det[0:4].tolist(), + score=score, + category_id=int(det[5])) + results.append(coco_det) + sample_count += 1 + evaluator.add(results) + + if evaluator.cache_exists: + break + + evaluator.save() + + +for m in model_list: + eval_model(**m) + torch.cuda.empty_cache() diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench_setup.sh b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench_setup.sh new file mode 100644 index 0000000000..72a56596cf --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/sotabench_setup.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +source /workspace/venv/bin/activate + +pip install -r requirements-sotabench.txt + +apt-get git +git clone https://github.com/NVIDIA/apex +cd apex +pip install -v --no-cache-dir ./ diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/train.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/train.py new file mode 100644 index 0000000000..73208c55bd --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/train.py @@ -0,0 +1,656 @@ +#!/usr/bin/env python +""" EfficientDet Training Script + +This script was started from an early version of the PyTorch ImageNet example +(https://github.com/pytorch/examples/tree/master/imagenet) + +NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples +(https://github.com/NVIDIA/apex/tree/master/examples/imagenet) + +Hacked together by Ross Wightman (https://github.com/rwightman) +""" +import os +import argparse +import time +import yaml +import logging +from collections import OrderedDict +from contextlib import suppress +from datetime import datetime + +import torch +import torchvision.utils +from torch.nn.parallel import DistributedDataParallel as NativeDDP +try: + from apex import amp + from apex.parallel import DistributedDataParallel as ApexDDP + from apex.parallel import convert_syncbn_model + has_apex = True +except ImportError: + has_apex = False + +has_native_amp = False +try: + if getattr(torch.cuda.amp, 'autocast') is not None: + has_native_amp = True +except AttributeError: + pass + +from effdet import create_model, unwrap_bench, create_loader, create_dataset, create_evaluator +from effdet.data import resolve_input_config, SkipSubset +from effdet.anchors import Anchors, AnchorLabeler +from timm.models import resume_checkpoint, load_checkpoint +from timm.models.layers import set_layer_config +from timm.utils import * +from timm.optim import create_optimizer +from timm.scheduler import create_scheduler + +torch.backends.cudnn.benchmark = True + + +# The first arg parser parses out only the --config argument, this argument is used to +# load a yaml file containing key-values that override the defaults for the main parser below +config_parser = parser = argparse.ArgumentParser(description='Training Config', add_help=False) +parser.add_argument('-c', '--config', default='', type=str, metavar='FILE', + help='YAML config file specifying default arguments') + + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +# Dataset / Model parameters +parser.add_argument('root', metavar='DIR', + help='path to dataset') +parser.add_argument('--dataset', default='coco', type=str, metavar='DATASET', + help='Name of model to train (default: "coco"') +parser.add_argument('--model', default='tf_efficientdet_d1', type=str, metavar='MODEL', + help='Name of model to train (default: "tf_efficientdet_d1"') +add_bool_arg(parser, 'redundant-bias', default=None, help='override model config for redundant bias') +add_bool_arg(parser, 'soft-nms', default=None, help='override model config for soft-nms') +parser.add_argument('--val-skip', type=int, default=0, metavar='N', + help='Skip every N validation samples.') +parser.add_argument('--num-classes', type=int, default=None, metavar='N', + help='Override num_classes in model config if set. For fine-tuning from pretrained.') +parser.add_argument('--pretrained', action='store_true', default=False, + help='Start with pretrained version of specified network (if avail)') +parser.add_argument('--no-pretrained-backbone', action='store_true', default=False, + help='Do not start with pretrained backbone weights, fully random.') +parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH', + help='Initialize model from this checkpoint (default: none)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='Resume full model and optimizer state from checkpoint (default: none)') +parser.add_argument('--no-resume-opt', action='store_true', default=False, + help='prevent resume of optimizer state when resuming model') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--interpolation', default='', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('--fill-color', default=None, type=str, metavar='NAME', + help='Image augmentation fill (background) color ("mean" or int)') +parser.add_argument('-b', '--batch-size', type=int, default=32, metavar='N', + help='input batch size for training (default: 32)') +parser.add_argument('--clip-grad', type=float, default=10.0, metavar='NORM', + help='Clip gradient norm (default: 10.0)') + +# Optimizer parameters +parser.add_argument('--opt', default='momentum', type=str, metavar='OPTIMIZER', + help='Optimizer (default: "momentum"') +parser.add_argument('--opt-eps', default=1e-3, type=float, metavar='EPSILON', + help='Optimizer Epsilon (default: 1e-3)') +parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='SGD momentum (default: 0.9)') +parser.add_argument('--weight-decay', type=float, default=4e-5, + help='weight decay (default: 0.00004)') + +# Learning rate schedule parameters +parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', + help='LR scheduler (default: "step"') +parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') +parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', + help='learning rate noise on/off epoch percentages') +parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', + help='learning rate noise limit percent (default: 0.67)') +parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', + help='learning rate noise std-dev (default: 1.0)') +parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', + help='learning rate cycle len multiplier (default: 1.0)') +parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', + help='learning rate cycle limit') +parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', + help='warmup learning rate (default: 0.0001)') +parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', + help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') +parser.add_argument('--epochs', type=int, default=300, metavar='N', + help='number of epochs to train (default: 2)') +parser.add_argument('--start-epoch', default=None, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', + help='epoch interval to decay LR') +parser.add_argument('--warmup-epochs', type=int, default=5, metavar='N', + help='epochs to warmup LR, if scheduler supports') +parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', + help='epochs to cooldown LR at min_lr, after cyclic schedule ends') +parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', + help='patience epochs for Plateau LR scheduler (default: 10') +parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', + help='LR decay rate (default: 0.1)') + +# Augmentation parameters +parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', + help='Color jitter factor (default: 0.4)') +parser.add_argument('--aa', type=str, default=None, metavar='NAME', + help='Use AutoAugment policy. "v0" or "original". (default: None)'), +parser.add_argument('--reprob', type=float, default=0., metavar='PCT', + help='Random erase prob (default: 0.)') +parser.add_argument('--remode', type=str, default='pixel', + help='Random erase mode (default: "pixel")') +parser.add_argument('--recount', type=int, default=1, + help='Random erase count (default: 1)') +parser.add_argument('--train-interpolation', type=str, default='random', + help='Training interpolation (random, bilinear, bicubic default: "random")') + +# loss +parser.add_argument('--smoothing', type=float, default=None, help='override model config label smoothing') +add_bool_arg(parser, 'jit-loss', default=None, help='override model config for torchscript jit loss fn') +add_bool_arg(parser, 'legacy-focal', default=None, help='override model config to use legacy focal loss') + +# Model Exponential Moving Average +parser.add_argument('--model-ema', action='store_true', default=False, + help='Enable tracking moving average of model weights') +parser.add_argument('--model-ema-decay', type=float, default=0.9998, + help='decay factor for model weights moving average (default: 0.9998)') + +# Misc +parser.add_argument('--sync-bn', action='store_true', + help='Enable NVIDIA Apex or Torch synchronized BatchNorm.') +parser.add_argument('--dist-bn', type=str, default='', + help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') +parser.add_argument('--seed', type=int, default=42, metavar='S', + help='random seed (default: 42)') +parser.add_argument('--log-interval', type=int, default=50, metavar='N', + help='how many batches to wait before logging training status') +parser.add_argument('--recovery-interval', type=int, default=0, metavar='N', + help='how many batches to wait before writing recovery checkpoint') +parser.add_argument('-j', '--workers', type=int, default=4, metavar='N', + help='how many training processes to use (default: 1)') +parser.add_argument('--save-images', action='store_true', default=False, + help='save images of input bathes every log interval for debugging') +parser.add_argument('--amp', action='store_true', default=False, + help='use NVIDIA Apex AMP or Native AMP for mixed precision training') +parser.add_argument('--apex-amp', action='store_true', default=False, + help='Use NVIDIA Apex AMP mixed precision') +parser.add_argument('--native-amp', action='store_true', default=False, + help='Use Native Torch AMP mixed precision') +parser.add_argument('--channels-last', action='store_true', default=False, + help='Use channels_last memory layout') +parser.add_argument('--pin-mem', action='store_true', default=False, + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') +parser.add_argument('--no-prefetcher', action='store_true', default=False, + help='disable fast prefetcher') +parser.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') +add_bool_arg(parser, 'bench-labeler', default=False, + help='label targets in model bench, increases GPU load at expense of loader processes') +parser.add_argument('--output', default='', type=str, metavar='PATH', + help='path to output folder (default: none, current dir)') +parser.add_argument('--eval-metric', default='map', type=str, metavar='EVAL_METRIC', + help='Best metric (default: "map"') +parser.add_argument('--tta', type=int, default=0, metavar='N', + help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)') +parser.add_argument("--local_rank", default=0, type=int) + + +def _parse_args(): + # Do we have a config file to parse? + args_config, remaining = config_parser.parse_known_args() + if args_config.config: + with open(args_config.config, 'r') as f: + cfg = yaml.safe_load(f) + parser.set_defaults(**cfg) + + # The main arg parser parses the rest of the args, the usual + # defaults will have been overridden if config file specified. + args = parser.parse_args(remaining) + + # Cache the args as a text string to save them in the output dir later + args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) + return args, args_text + + +def main(): + setup_default_logging() + args, args_text = _parse_args() + + args.pretrained_backbone = not args.no_pretrained_backbone + args.prefetcher = not args.no_prefetcher + args.distributed = False + if 'WORLD_SIZE' in os.environ: + args.distributed = int(os.environ['WORLD_SIZE']) > 1 + args.device = 'cuda:0' + args.world_size = 1 + args.rank = 0 # global rank + if args.distributed: + args.device = 'cuda:%d' % args.local_rank + torch.cuda.set_device(args.local_rank) + torch.distributed.init_process_group(backend='nccl', init_method='env://') + args.world_size = torch.distributed.get_world_size() + args.rank = torch.distributed.get_rank() + assert args.rank >= 0 + + if args.distributed: + logging.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' + % (args.rank, args.world_size)) + else: + logging.info('Training with a single process on 1 GPU.') + + use_amp = None + if args.amp: + # for backwards compat, `--amp` arg tries apex before native amp + if has_apex: + args.apex_amp = True + elif has_native_amp: + args.native_amp = True + else: + logging.warning("Neither APEX or native Torch AMP is available, using float32. " + "Install NVIDA apex or upgrade to PyTorch 1.6.") + + if args.apex_amp: + if has_apex: + use_amp = 'apex' + else: + logging.warning("APEX AMP not available, using float32. Install NVIDA apex") + elif args.native_amp: + if has_native_amp: + use_amp = 'native' + else: + logging.warning("Native AMP not available, using float32. Upgrade to PyTorch 1.6.") + + torch.manual_seed(args.seed + args.rank) + + with set_layer_config(scriptable=args.torchscript): + model = create_model( + args.model, + bench_task='train', + num_classes=args.num_classes, + pretrained=args.pretrained, + pretrained_backbone=args.pretrained_backbone, + redundant_bias=args.redundant_bias, + label_smoothing=args.smoothing, + legacy_focal=args.legacy_focal, + jit_loss=args.jit_loss, + soft_nms=args.soft_nms, + bench_labeler=args.bench_labeler, + checkpoint_path=args.initial_checkpoint, + ) + model_config = model.config # grab before we obscure with DP/DDP wrappers + + if args.local_rank == 0: + logging.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) + + model.cuda() + if args.channels_last: + model = model.to(memory_format=torch.channels_last) + + if args.distributed and args.sync_bn: + if has_apex and use_amp != 'native': + model = convert_syncbn_model(model) + else: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + if args.local_rank == 0: + logging.info( + 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' + 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.') + + if args.torchscript: + assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model, force native amp with `--native-amp` flag' + assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model. Use `--dist-bn reduce` instead of `--sync-bn`' + model = torch.jit.script(model) + + optimizer = create_optimizer(args, model) + + amp_autocast = suppress # do nothing + loss_scaler = None + if use_amp == 'apex': + model, optimizer = amp.initialize(model, optimizer, opt_level='O1') + loss_scaler = ApexScaler() + if args.local_rank == 0: + logging.info('Using NVIDIA APEX AMP. Training in mixed precision.') + elif use_amp == 'native': + amp_autocast = torch.cuda.amp.autocast + loss_scaler = NativeScaler() + if args.local_rank == 0: + logging.info('Using native Torch AMP. Training in mixed precision.') + else: + if args.local_rank == 0: + logging.info('AMP not enabled. Training in float32.') + + # optionally resume from a checkpoint + resume_epoch = None + if args.resume: + resume_epoch = resume_checkpoint( + unwrap_bench(model), args.resume, + optimizer=None if args.no_resume_opt else optimizer, + loss_scaler=None if args.no_resume_opt else loss_scaler, + log_info=args.local_rank == 0) + + model_ema = None + if args.model_ema: + # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper + model_ema = ModelEmaV2(model, decay=args.model_ema_decay) + if args.resume: + load_checkpoint(unwrap_bench(model_ema), args.resume, use_ema=True) + + if args.distributed: + if has_apex and use_amp != 'native': + if args.local_rank == 0: + logging.info("Using apex DistributedDataParallel.") + model = ApexDDP(model, delay_allreduce=True) + else: + if args.local_rank == 0: + logging.info("Using torch DistributedDataParallel.") + model = NativeDDP(model, device_ids=[args.device]) + # NOTE: EMA model does not need to be wrapped by DDP... + if model_ema is not None and not args.resume: + # ...but it is a good idea to sync EMA copy of weights + # NOTE: ModelEma init could be moved after DDP wrapper if using PyTorch DDP, not Apex. + model_ema.set(model) + + lr_scheduler, num_epochs = create_scheduler(args, optimizer) + start_epoch = 0 + if args.start_epoch is not None: + # a specified start_epoch will always override the resume epoch + start_epoch = args.start_epoch + elif resume_epoch is not None: + start_epoch = resume_epoch + if lr_scheduler is not None and start_epoch > 0: + lr_scheduler.step(start_epoch) + + if args.local_rank == 0: + logging.info('Scheduled epochs: {}'.format(num_epochs)) + + loader_train, loader_eval, evaluator = create_datasets_and_loaders(args, model_config) + + if model_config.num_classes < loader_train.dataset.parser.max_label: + logging.error( + f'Model {model_config.num_classes} has fewer classes than dataset {loader_train.dataset.parser.max_label}.') + exit(1) + if model_config.num_classes > loader_train.dataset.parser.max_label: + logging.warning( + f'Model {model_config.num_classes} has more classes than dataset {loader_train.dataset.parser.max_label}.') + + eval_metric = args.eval_metric + best_metric = None + best_epoch = None + saver = None + output_dir = '' + if args.local_rank == 0: + output_base = args.output if args.output else './output' + exp_name = '-'.join([ + datetime.now().strftime("%Y%m%d-%H%M%S"), + args.model + ]) + output_dir = get_outdir(output_base, 'train', exp_name) + decreasing = True if eval_metric == 'loss' else False + saver = CheckpointSaver( + model, optimizer, args=args, model_ema=model_ema, amp_scaler=loss_scaler, + checkpoint_dir=output_dir, decreasing=decreasing, unwrap_fn=unwrap_bench) + with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: + f.write(args_text) + + try: + for epoch in range(start_epoch, num_epochs): + if args.distributed: + loader_train.sampler.set_epoch(epoch) + + train_metrics = train_epoch( + epoch, model, loader_train, optimizer, args, + lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, + amp_autocast=amp_autocast, loss_scaler=loss_scaler, model_ema=model_ema) + + if args.distributed and args.dist_bn in ('broadcast', 'reduce'): + if args.local_rank == 0: + logging.info("Distributing BatchNorm running means and vars") + distribute_bn(model, args.world_size, args.dist_bn == 'reduce') + + # the overhead of evaluating with coco style datasets is fairly high, so just ema or non, not both + if model_ema is not None: + if args.distributed and args.dist_bn in ('broadcast', 'reduce'): + distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') + + eval_metrics = validate(model_ema.module, loader_eval, args, evaluator, log_suffix=' (EMA)') + else: + eval_metrics = validate(model, loader_eval, args, evaluator) + + if lr_scheduler is not None: + # step LR for next epoch + lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) + + if saver is not None: + update_summary( + epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), + write_header=best_metric is None) + + # save proper checkpoint with eval metric + best_metric, best_epoch = saver.save_checkpoint(epoch=epoch, metric=eval_metrics[eval_metric]) + + except KeyboardInterrupt: + pass + if best_metric is not None: + logging.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch)) + + +def create_datasets_and_loaders( + args, + model_config, + transform_train_fn=None, + transform_eval_fn=None, + collate_fn=None, +): + """ Setup datasets, transforms, loaders, evaluator. + + Args: + args: Command line args / config for training + model_config: Model specific configuration dict / struct + transform_train_fn: Override default image + annotation transforms (see note in loaders.py) + transform_eval_fn: Override default image + annotation transforms (see note in loaders.py) + collate_fn: Override default fast collate function + + Returns: + Train loader, validation loader, evaluator + """ + input_config = resolve_input_config(args, model_config=model_config) + + dataset_train, dataset_eval = create_dataset(args.dataset, args.root) + + # setup labeler in loader/collate_fn if not enabled in the model bench + labeler = None + if not args.bench_labeler: + labeler = AnchorLabeler( + Anchors.from_config(model_config), model_config.num_classes, match_threshold=0.5) + + loader_train = create_loader( + dataset_train, + input_size=input_config['input_size'], + batch_size=args.batch_size, + is_training=True, + use_prefetcher=args.prefetcher, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + # color_jitter=args.color_jitter, + # auto_augment=args.aa, + interpolation=args.train_interpolation or input_config['interpolation'], + fill_color=input_config['fill_color'], + mean=input_config['mean'], + std=input_config['std'], + num_workers=args.workers, + distributed=args.distributed, + pin_mem=args.pin_mem, + anchor_labeler=labeler, + transform_fn=transform_train_fn, + collate_fn=collate_fn, + ) + + if args.val_skip > 1: + dataset_eval = SkipSubset(dataset_eval, args.val_skip) + loader_eval = create_loader( + dataset_eval, + input_size=input_config['input_size'], + batch_size=args.batch_size, + is_training=False, + use_prefetcher=args.prefetcher, + interpolation=input_config['interpolation'], + fill_color=input_config['fill_color'], + mean=input_config['mean'], + std=input_config['std'], + num_workers=args.workers, + distributed=args.distributed, + pin_mem=args.pin_mem, + anchor_labeler=labeler, + transform_fn=transform_eval_fn, + collate_fn=collate_fn, + ) + + evaluator = create_evaluator(args.dataset, loader_eval.dataset, distributed=args.distributed, pred_yxyx=False) + + return loader_train, loader_eval, evaluator + + +def train_epoch( + epoch, model, loader, optimizer, args, + lr_scheduler=None, saver=None, output_dir='', amp_autocast=suppress, loss_scaler=None, model_ema=None): + + batch_time_m = AverageMeter() + data_time_m = AverageMeter() + losses_m = AverageMeter() + + model.train() + + end = time.time() + last_idx = len(loader) - 1 + num_updates = epoch * len(loader) + for batch_idx, (input, target) in enumerate(loader): + last_batch = batch_idx == last_idx + data_time_m.update(time.time() - end) + + if args.channels_last: + input = input.contiguous(memory_format=torch.channels_last) + + with amp_autocast(): + output = model(input, target) + loss = output['loss'] + + if not args.distributed: + losses_m.update(loss.item(), input.size(0)) + + optimizer.zero_grad() + if loss_scaler is not None: + loss_scaler(loss, optimizer, clip_grad=args.clip_grad, parameters=model.parameters()) + else: + loss.backward() + if args.clip_grad: + torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) + optimizer.step() + + torch.cuda.synchronize() + if model_ema is not None: + model_ema.update(model) + num_updates += 1 + + batch_time_m.update(time.time() - end) + if last_batch or batch_idx % args.log_interval == 0: + lrl = [param_group['lr'] for param_group in optimizer.param_groups] + lr = sum(lrl) / len(lrl) + + if args.distributed: + reduced_loss = reduce_tensor(loss.data, args.world_size) + losses_m.update(reduced_loss.item(), input.size(0)) + + if args.local_rank == 0: + logging.info( + 'Train: {} [{:>4d}/{} ({:>3.0f}%)] ' + 'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) ' + 'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s ' + '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' + 'LR: {lr:.3e} ' + 'Data: {data_time.val:.3f} ({data_time.avg:.3f})'.format( + epoch, + batch_idx, len(loader), + 100. * batch_idx / last_idx, + loss=losses_m, + batch_time=batch_time_m, + rate=input.size(0) * args.world_size / batch_time_m.val, + rate_avg=input.size(0) * args.world_size / batch_time_m.avg, + lr=lr, + data_time=data_time_m)) + + if args.save_images and output_dir: + torchvision.utils.save_image( + input, + os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx), + padding=0, + normalize=True) + + if saver is not None and args.recovery_interval and ( + last_batch or (batch_idx + 1) % args.recovery_interval == 0): + saver.save_recovery(epoch, batch_idx=batch_idx) + + if lr_scheduler is not None: + lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg) + + end = time.time() + # end for + + if hasattr(optimizer, 'sync_lookahead'): + optimizer.sync_lookahead() + + return OrderedDict([('loss', losses_m.avg)]) + + +def validate(model, loader, args, evaluator=None, log_suffix=''): + batch_time_m = AverageMeter() + losses_m = AverageMeter() + + model.eval() + + end = time.time() + last_idx = len(loader) - 1 + with torch.no_grad(): + for batch_idx, (input, target) in enumerate(loader): + last_batch = batch_idx == last_idx + + output = model(input, target) + loss = output['loss'] + + if evaluator is not None: + evaluator.add_predictions(output['detections'], target) + + if args.distributed: + reduced_loss = reduce_tensor(loss.data, args.world_size) + else: + reduced_loss = loss.data + + torch.cuda.synchronize() + + losses_m.update(reduced_loss.item(), input.size(0)) + + batch_time_m.update(time.time() - end) + end = time.time() + if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0): + log_name = 'Test' + log_suffix + logging.info( + '{0}: [{1:>4d}/{2}] ' + 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' + 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '.format( + log_name, batch_idx, last_idx, batch_time=batch_time_m, loss=losses_m)) + + metrics = OrderedDict([('loss', losses_m.avg)]) + if evaluator is not None: + metrics['map'] = evaluator.evaluate() + + return metrics + + +if __name__ == '__main__': + main() diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/validate.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/validate.py new file mode 100644 index 0000000000..e3dc0efe06 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/efficientdet-pytorch/validate.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python +""" COCO validation script + +Hacked together by Ross Wightman (https://github.com/rwightman) +""" +import argparse +import time +import torch +import torch.nn.parallel +from contextlib import suppress + +from effdet import create_model, create_evaluator, create_dataset, create_loader +from effdet.data import resolve_input_config +from timm.utils import AverageMeter, setup_default_logging +from timm.models.layers import set_layer_config + +has_apex = False +try: + from apex import amp + has_apex = True +except ImportError: + pass + +has_native_amp = False +try: + if getattr(torch.cuda.amp, 'autocast') is not None: + has_native_amp = True +except AttributeError: + pass + +torch.backends.cudnn.benchmark = True + + +def add_bool_arg(parser, name, default=False, help=''): # FIXME move to utils + dest_name = name.replace('-', '_') + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument('--' + name, dest=dest_name, action='store_true', help=help) + group.add_argument('--no-' + name, dest=dest_name, action='store_false', help=help) + parser.set_defaults(**{dest_name: default}) + + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') +parser.add_argument('root', metavar='DIR', + help='path to dataset root') +parser.add_argument('--dataset', default='coco', type=str, metavar='DATASET', + help='Name of dataset (default: "coco"') +parser.add_argument('--split', default='val', + help='validation split') +parser.add_argument('--model', '-m', metavar='MODEL', default='tf_efficientdet_d1', + help='model architecture (default: tf_efficientdet_d1)') +add_bool_arg(parser, 'redundant-bias', default=None, + help='override model config for redundant bias layers') +add_bool_arg(parser, 'soft-nms', default=None, help='override model config for soft-nms') +parser.add_argument('--num-classes', type=int, default=None, metavar='N', + help='Override num_classes in model config if set. For fine-tuning from pretrained.') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('-b', '--batch-size', default=128, type=int, + metavar='N', help='mini-batch size (default: 128)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') +parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', + help='Override mean pixel value of dataset') +parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', + help='Override std deviation of of dataset') +parser.add_argument('--interpolation', default='bilinear', type=str, metavar='NAME', + help='Image resize interpolation type (overrides model)') +parser.add_argument('--fill-color', default=None, type=str, metavar='NAME', + help='Image augmentation fill (background) color ("mean" or int)') +parser.add_argument('--log-freq', default=10, type=int, + metavar='N', help='batch logging frequency (default: 10)') +parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--num-gpu', type=int, default=1, + help='Number of GPUS to use') +parser.add_argument('--no-prefetcher', action='store_true', default=False, + help='disable fast prefetcher') +parser.add_argument('--pin-mem', action='store_true', default=False, + help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') +parser.add_argument('--use-ema', dest='use_ema', action='store_true', + help='use ema version of weights if present') +parser.add_argument('--amp', action='store_true', default=False, + help='Use AMP mixed precision. Defaults to Apex, fallback to native Torch AMP.') +parser.add_argument('--apex-amp', action='store_true', default=False, + help='Use NVIDIA Apex AMP mixed precision') +parser.add_argument('--native-amp', action='store_true', default=False, + help='Use Native Torch AMP mixed precision') +parser.add_argument('--torchscript', dest='torchscript', action='store_true', + help='convert model torchscript for inference') +parser.add_argument('--results', default='./results.json', type=str, metavar='FILENAME', + help='JSON filename for evaluation results') + + +def validate(args): + setup_default_logging() + + if args.amp: + if has_apex: + args.apex_amp = True + elif has_native_amp: + args.native_amp = True + assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set." + args.pretrained = args.pretrained or not args.checkpoint # might as well try to validate something + args.prefetcher = not args.no_prefetcher + + # create model + with set_layer_config(scriptable=args.torchscript): + extra_args = {} + if args.img_size is not None: + extra_args = dict(image_size=(args.img_size, args.img_size)) + bench = create_model( + args.model, + bench_task='predict', + num_classes=args.num_classes, + pretrained=args.pretrained, + redundant_bias=args.redundant_bias, + soft_nms=args.soft_nms, + checkpoint_path=args.checkpoint, + checkpoint_ema=args.use_ema, + **extra_args, + ) + model_config = bench.config + + param_count = sum([m.numel() for m in bench.parameters()]) + print('Model %s created, param count: %d' % (args.model, param_count)) + + bench = bench.cuda() + + amp_autocast = suppress + if args.apex_amp: + bench = amp.initialize(bench, opt_level='O1') + print('Using NVIDIA APEX AMP. Validating in mixed precision.') + elif args.native_amp: + amp_autocast = torch.cuda.amp.autocast + print('Using native Torch AMP. Validating in mixed precision.') + else: + print('AMP not enabled. Validating in float32.') + + if args.num_gpu > 1: + bench = torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu))) + + dataset = create_dataset(args.dataset, args.root, args.split) + input_config = resolve_input_config(args, model_config) + loader = create_loader( + dataset, + input_size=input_config['input_size'], + batch_size=args.batch_size, + use_prefetcher=args.prefetcher, + interpolation=input_config['interpolation'], + fill_color=input_config['fill_color'], + mean=input_config['mean'], + std=input_config['std'], + num_workers=args.workers, + pin_mem=args.pin_mem) + + evaluator = create_evaluator(args.dataset, dataset, pred_yxyx=False) + bench.eval() + batch_time = AverageMeter() + end = time.time() + last_idx = len(loader) - 1 + with torch.no_grad(): + for i, (input, target) in enumerate(loader): + with amp_autocast(): + output = bench(input, img_info=target) + evaluator.add_predictions(output, target) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.log_freq == 0 or i == last_idx: + print( + 'Test: [{0:>4d}/{1}] ' + 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' + .format( + i, len(loader), batch_time=batch_time, + rate_avg=input.size(0) / batch_time.avg) + ) + + mean_ap = 0. + if dataset.parser.has_labels: + mean_ap = evaluator.evaluate() + else: + evaluator.save(args.results) + + return mean_ap + + +def main(): + args = parser.parse_args() + validate(args) + + +if __name__ == '__main__': + main() + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/export.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/export.py new file mode 100644 index 0000000000..b435e9050c --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/export.py @@ -0,0 +1,42 @@ +import sys +sys.path.append(r'./efficientdet-pytorch') +import torch +from effdet.config import get_efficientdet_config +from effdet.efficientdet import EfficientDet +import argparse +# import torch_aie +# from torch_aie import _enums + +parser = argparse.ArgumentParser(description='pth to onnx') +parser.add_argument('--batch_size', type=int, default=1, + help='batch size 1/4/8/16/32') +parser.add_argument('--checkpoint', type=str, default='./d0.pth', + help='pytorch checkpoint path ') +parser.add_argument('--ts_save_path', type=str, default='d0.ts', + help='export ts model path') + +if __name__ == '__main__': + args = parser.parse_args() + config = get_efficientdet_config(model_name='tf_efficientdet_d0') + model = EfficientDet(config=config,pretrained_backbone=False) + model_path = args.checkpoint + model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu'))) + model.eval() + + input_data = torch.ones(1, 3, 512, 512) + ts_model = torch.jit.trace(model, input_data) + ts_model.save(args.ts_save_path) + print(f'EfficientDet torch script model saved to {args.ts_save_path}') + + # torch_aie.set_device(0) + # print("start compile") + # torchaie_model = torch_aie.compile( + # ts_model, + # inputs=[torch_aie.Input(input_data.shape)], + # precision_policy=_enums.PrecisionPolicy.FP32, + # # allow_tensor_replace_int=True, + # soc_version='Ascend310P3', + # optimization_level=0 + # ) + # print("end compile") + # torchaie_model.eval() \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py new file mode 100644 index 0000000000..b01670febb --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py @@ -0,0 +1,93 @@ +import argparse +import time +from tqdm import tqdm + +import torch +import numpy as np + +import torch_aie +from torch_aie import _enums + +INPUT_WIDTH = 512 +INPUT_HEIGHT = 512 + +def parse_args(): + args = argparse.ArgumentParser(description="A program that operates in 'om' or 'ts' mode.") + args.add_argument("--mode", choices=["om", "ts"], required=True, help="Specify the mode ('om' or 'ts').") + args.add_argument('--om_path',help='MobilenetV1 om file path', type=str, + default='/onnx/mobilenetv1/mobilenet-v1_bs1.om' + ) + args.add_argument('--ts_path',help='MobilenetV1 ts file path', type=str, + default='/onnx/efficientdet/d0.ts' + ) + args.add_argument("--batch_size", type=int, default=4, help="batch size.") + return args.parse_args() + +if __name__ == '__main__': + infer_times = 100 + om_cost = 0 + pt_cost = 0 + opts = parse_args() + OM_PATH = opts.om_path + TS_PATH = opts.ts_path + BATCH_SIZE = opts.batch_size + + if opts.mode == "om": + om_model = InferSession(0, OM_PATH) + for _ in tqdm(range(0, infer_times)): + dummy_input = np.random.randn(1, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.uint8) + start = time.time() + output = om_model.infer([dummy_input], 'static', custom_sizes=90000000) + cost = time.time() - start + om_cost += cost + + if opts.mode == "ts": + ts_model = torch.jit.load(TS_PATH) + + input_info = [torch_aie.Input((BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT))] + + torch_aie.set_device(0) + print("start compile") + torchaie_model = torch_aie.compile( + ts_model, + inputs=input_info, + precision_policy=_enums.PrecisionPolicy.FP32, + soc_version='Ascend310P3', + optimization_level=0 + ) + print("end compile") + torchaie_model.eval() + + dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32) + input_tensor = torch.Tensor(dummy_input) + input_tensor = input_tensor.to("npu:0") + + loops = 100 + warm_ctr = 10 + + default_stream = torch_aie.npu.default_stream() + time_cost = 0 + + while warm_ctr: + _ = torchaie_model(input_tensor) + default_stream.synchronize() + warm_ctr -= 1 + + for i in range(loops): + t0 = time.time() + _ = torchaie_model(input_tensor) + default_stream.synchronize() + t1 = time.time() + time_cost += (t1 - t0) + print(i) + + print(f"fps: {loops} * {BATCH_SIZE} / {time_cost : .3f} samples/s") + print("torch_aie fps: ", loops * BATCH_SIZE / time_cost) + from datetime import datetime + current_time = datetime.now() + formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") + print("Current Time:", formatted_time) + + + + diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/requirements.txt b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/requirements.txt new file mode 100644 index 0000000000..56f2f883ab --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/requirements.txt @@ -0,0 +1,67 @@ +absl-py==2.0.0 +antlr4-python3-runtime==4.9.3 +attrs==23.1.0 +certifi==2023.11.17 +charset-normalizer==3.3.2 +cloudpickle==3.0.0 +cmake==3.27.7 +coloredlogs==15.0.1 +contourpy==1.2.0 +custom-passes-reduce==0.0.0 +cycler==0.12.1 +decorator==5.1.1 +filelock==3.13.1 +flatbuffers==23.5.26 +fonttools==4.45.0 +fsspec==2023.10.0 +huggingface-hub==0.19.4 +humanfriendly==10.0 +idna==3.4 +importlib-resources==6.1.1 +Jinja2==3.1.2 +kiwisolver==1.4.5 +lit==17.0.5 +MarkupSafe==2.1.3 +matplotlib==3.8.2 +mpmath==1.3.0 +networkx==3.2.1 +numpy==1.26.2 +nvidia-cublas-cu11==11.10.3.66 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +nvidia-cuda-runtime-cu11==11.7.99 +nvidia-cudnn-cu11==8.5.0.96 +nvidia-cufft-cu11==10.9.0.58 +nvidia-curand-cu11==10.2.10.91 +nvidia-cusolver-cu11==11.4.0.1 +nvidia-cusparse-cu11==11.7.4.91 +nvidia-nccl-cu11==2.14.3 +nvidia-nvtx-cu11==11.7.91 +omegaconf==2.3.0 +onnx==1.15.0 +onnx-simplifier==0.3.6 +onnxoptimizer==0.3.13 +onnxruntime==1.16.3 +packaging==23.2 +Pillow==10.1.0 +protobuf==4.25.1 +psutil==5.9.6 +pyascendie==0.0.0 +pycocotools==2.0.7 +pyparsing==3.1.1 +python-dateutil==2.8.2 +PyYAML==6.0.1 +requests==2.31.0 +scipy==1.11.4 +six==1.16.0 +sympy==1.12 +synr==0.5.0 +timm==0.6.11 +torch==2.0.1 +torchvision==0.15.2 +tornado==6.3.3 +tqdm==4.66.1 +triton==2.0.0 +typing_extensions==4.8.0 +urllib3==2.1.0 +zipp==3.17.0 diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/run.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/run.py new file mode 100644 index 0000000000..724f0b9a36 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/run.py @@ -0,0 +1,152 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.append(r'./efficientdet-pytorch') +import numpy as np +import time +import os +import argparse +import torch +from effdet import create_evaluator, create_dataset, create_loader, create_model +from effdet.data import resolve_input_config +from timm.utils import * +from timm.models.layers import set_layer_config +from effdet.bench import DetBenchPredict +from effdet.config import get_efficientdet_config +from tqdm import tqdm + +import torch_aie +from torch_aie import _enums + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') +parser.add_argument('--root', default='/home/ascend/coco2017', type=str, metavar='DIR', + help='path to dataset root') +# parser.add_argument('--omfile', default='./result', type=str, +# help='om inference bin file save path') +parser.add_argument("--ts_model_path", default="/onnx/efficientdet/d0.ts") +parser.add_argument("--input_bin_folder_path", default="./bin_save") +parser.add_argument('--dataset', default='coco', type=str, metavar='DATASET', + help='Name of dataset (default: "coco"') +parser.add_argument('--split', default='val', + help='validation split') +parser.add_argument('--model', '-m', metavar='MODEL', default='tf_efficientdet_d0', + help='model architecture (default: tf_efficientdet_d1)') +parser.add_argument('-b', '--batch-size', default=1, type=int, + metavar='N', help='mini-batch size (default: 128)') +parser.add_argument('--img-size', default=None, type=int, + metavar='N', help='Input image dimension, uses model default if empty') + +if __name__ == '__main__': + args = parser.parse_args() + setup_default_logging() + config = get_efficientdet_config(model_name='tf_efficientdet_d0') + with set_layer_config(scriptable=False): + extra_args = {} + bench = DetBenchPredict(config) + + dataset = create_dataset(args.dataset, args.root, args.split) + model_config = bench.config + param_count = sum([m.numel() for m in bench.parameters()]) + input_config = resolve_input_config(args, model_config) + loader = create_loader( + dataset, + input_size=input_config['input_size'], + batch_size=1, + use_prefetcher=True, + interpolation=input_config['interpolation'], + fill_color=input_config['fill_color'], + mean=input_config['mean'], + std=input_config['std'], + num_workers=4, + pin_mem=True, + ) + evaluator = create_evaluator(args.dataset, dataset, pred_yxyx=False) + bench.eval() + batch_time = AverageMeter() + end = time.time() + last_idx = len(loader) - 1 + + # om_data = args.omfile + # om_files = list(os.listdir(om_data)) + # files = list(set([file.split('_')[0] for file in om_files])) + # files.sort() + + ts_model_path = args.ts_model_path + ts_model = torch.jit.load(ts_model_path) + + input_info = [torch_aie.Input((1, 3, 512, 512))] + torch_aie.set_device(0) + print("start compile") + torchaie_model = torch_aie.compile( + ts_model, + inputs=input_info, + precision_policy=_enums.PrecisionPolicy.FP32, + soc_version='Ascend310P3' + ) + print("end compile") + torchaie_model.eval() + + bin_files = list(os.listdir(args.input_bin_folder_path)) + bin_files.sort() + mean_ap = 0. # test + + box_list = [i for i in range(5, 10)] + class_list = [i for i in range(0, 5)] + with torch.no_grad(): + for (i, (input, target)), file in zip(enumerate(loader), tqdm(bin_files)): + box_out, class_out = [], [] + path = os.path.join(args.input_bin_folder_path, file) # test + print(path) # test + input_np_arr = np.fromfile(os.path.join(args.input_bin_folder_path, file), dtype=np.float32).reshape((1, 3, 512, 512)) + input_tensor = torch.tensor(input_np_arr, dtype=torch.float32) + input_tensor = input_tensor.to("npu:0") + # class_preds, box_preds = ts_model.forward(input_tensor) # REVISE + class_preds, box_preds = torchaie_model.forward(input_tensor) # REVISE + for j in range(5): + # box_preds[i] = box_preds[i].to("cpu") + # class_preds[i] = class_preds[i].to("cpu") + box_data = box_preds[j].to("cpu") + class_data = class_preds[j].to("cpu") + box_out.append(box_data) + class_out.append(class_data) + + # size = 128 + # box_out, class_out = [], [] + # for box, class_ in zip(box_list, class_list): + # size /= 2 + # box_file = om_data + '/' + str(file) + "_"+ str(box) + '.bin' + # class_file = om_data + '/' + str(file) + "_"+ str(class_) + '.bin' + # box_data = np.fromfile(box_file, dtype=np.float32) + # class_data = np.fromfile(class_file, dtype=np.float32) + # box_data.shape = 1, 36, int(size), int(size) + # class_data.shape = 1, 810, int(size), int(size) + # box_data = torch.from_numpy(box_data) + # class_data = torch.from_numpy(class_data) + # box_out.append(box_data) + # class_out.append(class_data) + + output = bench(x=input, class_out=class_out, box_out=box_out, img_info=target) + evaluator.add_predictions(output, target) + print(i) + + # if dataset.parser.has_labels: + # mean_ap = evaluator.evaluate() + # print(mean_ap) + batch_time.update(time.time() - end) + end = time.time() + mean_ap = 0. + if dataset.parser.has_labels: + mean_ap = evaluator.evaluate() + print(mean_ap) \ No newline at end of file -- Gitee From 93d2248b90177be29008629d1834470388649d9f Mon Sep 17 00:00:00 2001 From: Guanzhong Chen Date: Tue, 28 Nov 2023 15:35:53 +0800 Subject: [PATCH 2/2] 1 --- AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py index b01670febb..aeae260b18 100644 --- a/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py +++ b/AscendIE/TorchAIE/built-in/cv/detection/efficientdet/perf.py @@ -51,14 +51,14 @@ if __name__ == '__main__': torchaie_model = torch_aie.compile( ts_model, inputs=input_info, - precision_policy=_enums.PrecisionPolicy.FP32, + precision_policy=_enums.PrecisionPolicy.FP16, soc_version='Ascend310P3', optimization_level=0 ) print("end compile") torchaie_model.eval() - dummy_input = np.random.randn(BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT).astype(np.float32) + dummy_input = np.zeros((BATCH_SIZE, 3, INPUT_WIDTH, INPUT_HEIGHT)) input_tensor = torch.Tensor(dummy_input) input_tensor = input_tensor.to("npu:0") -- Gitee