diff --git a/community/cv/ShipWise/.gitignore b/community/cv/ShipWise/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..958108fe86836bde34187d7cd3b7b91f8c0cfdc5 --- /dev/null +++ b/community/cv/ShipWise/.gitignore @@ -0,0 +1,102 @@ +# MindSpore +*.ir +kernel_meta/ +somas_meta/ +trace_code_graph_* + +# Cmake files +CMakeFiles/ +cmake_install.cmake +CMakeCache.txt +Makefile +cmake-build-debug + +# Dynamic libraries +*.so +*.so.* +*.dylib + +# Static libraries +*.la +*.lai +*.a +*.lib + +# Protocol buffers +*_pb2.py +*.pb.h +*.pb.cc +*.pb +*_grpc.py + +# Object files +*.o + +# Editor +.vscode +.idea/ + +# Cquery +.cquery_cached_index/ +compile_commands.json + +# Ctags and cscope +tags +TAGS +CTAGS +GTAGS +GRTAGS +GSYMS +GPATH +cscope.* + +# Python files +*__pycache__* +.pytest_cache + +# Mac files +*.DS_Store + +# Test results +test_temp_summary_event_file/ +*.dot +*.dat +*.svg +*.perf +*.info +*.ckpt +*.shp +*.pkl +*.pb +.clangd + +# lite opencl compile file +*.cl.inc + +# Custom +build/ +configs/ +data/ +demo/ +deploy/ +docs/ +examples/ +mindinsight/ +mindyolo.egg-info/ +requirements/ +runs/ +runs_infer/ +tests/ +tutorials/ +datasets +benchmark_results.md +CONTRIBUTING.md +GETTING_STARTED.md +GETTING_STARTED_CN.md +LICENSE.md +ma-pre-start.sh +mkdocs.yml +package.sh +setup.py +test.py + diff --git a/community/cv/ShipWise/README.md b/community/cv/ShipWise/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c4d9c999cf13ba79a4583ebb9a06c53e5ff1a54c --- /dev/null +++ b/community/cv/ShipWise/README.md @@ -0,0 +1,282 @@ +# 目录 + +- [1. ShipWiseNet 描述](#1-shipwisenet-描述) +- [2. 模型架构](#2-模型架构) +- [3. 数据集](#3-数据集) + - [3.1 预训练数据集](#31-预训练数据集) + - [3.2 项目任务数据集](#32-项目任务数据集) + - [3.3 数据处理](#33-数据处理) +- [4. 快速使用](#4-快速使用) + - [4.1 模型训练](#41-模型训练) + - [4.2 模型推理](#42-模型推理) +- [5. 脚本说明](#5-脚本说明) + - [5.1 脚本及样例代码](#51-脚本及样例代码) + - [5.2 脚本参数](#52-脚本参数) +- [6. 模型说明](#6-模型说明) + - [6.1 评估性能](#61-评估性能) + - [6.2 推理性能](#62-推理性能) +- [7. 项目展示](#7-项目展示) + +# 1. ShipWiseNet 描述 + +ShipWiseNet +是一个高效、轻量级的目标检测网络,专门设计用于检测海上船只和航行目标,适应复杂的海洋环境和实时需求。该模型基于先进的深度学习架构,结合特定的优化技术,实现了在高精度和低计算资源需求间的平衡,适用于嵌入式设备和无人系统等资源受限的场景。ShipWiseNet +提供了卓越的检测速度和鲁棒性,即使在天气恶劣、光照变化大或背景复杂的情况下,依旧能准确识别和定位船只目标。 + +# 2. 模型架构 + +ShipWiseNet 的架构设计聚焦于海上目标检测的特定需求。模型由特征提取、检测头、以及优化模块组成,以实现对船只的精准识别和高效检测: + +1. 特征提取模块:采用多层卷积神经网络(CNN)构建的特征提取层,经过优化以捕获船只的边缘特征和形态细节,确保在复杂背景中提取有效信息。该模块能够在多尺度上进行特征表达,增强了模型的适应性。 +2. 检测头:在检测层中,ShipWiseNet 使用了基于自适应锚框的检测策略,使其能够灵活应对不同尺寸、不同距离的目标。检测头采用了轻量级设计,进一步降低了计算复杂度。 +3. 优化模块:通过引入特定的正则化和损失函数优化策略,ShipWiseNet + 可以在保持检测精度的同时减少模型体积。此外,该模块还包含了精细化的背景抑制技术,能够有效过滤无关信息,从而提升海上复杂场景中的鲁棒性。 + +该架构使 ShipWiseNet 能够在低算力设备上流畅运行,同时保持高精度检测能力,非常适合海洋环境中的实时船只检测需求。 + +# 3. 数据集 + +## 3.1 预训练数据集 + +COCO 2017 数据集 是一个广泛用于目标检测、分割和关键点检测的通用大型数据集,包含了多样化的场景和对象类别。COCO 2017 +拥有超过20万张图像,涵盖了80类物体,为 ShipWiseNet 提供了丰富的预训练数据,使其具备较强的目标检测基础能力。 + +- 数据集规模:COCO 2017 数据集包括约118,000张训练图像和5,000张验证图像,图像内容涵盖了大量自然场景,帮助模型在预训练阶段学到丰富的特征表达。 +- 物体多样性:数据集中的物体种类和姿态多样,为 ShipWiseNet 模型在进行船只检测前提供了广泛的视觉先验知识,提升了模型的泛化能力。 +- 标注信息:COCO 2017 提供了精准的标注信息,支持边界框和分割等多种检测任务,使得模型能够在多尺度、多姿态下的检测任务中保持稳健的性能。 + +通过在 COCO 2017 数据集上进行预训练,ShipWiseNet 的基础特征提取能力得到强化,有助于其在 HRSC2016 +等专用数据集上进行微调时,更快适应海上目标检测的需求。 + +## 3.2 项目任务数据集 + +HRSC2016 数据集 是一个专门用于船只检测的高分辨率遥感图像数据集,广泛应用于海上目标检测和识别任务。该数据集包含了多种尺度、角度、姿态的船只目标,能够很好地模拟实际海上场景的复杂性,是训练和评估 +ShipWiseNet 模型的理想选择。 + +1. 数据集规模:HRSC2016 包含1000余张高分辨率图像,涵盖了船只的多种类型和复杂背景,适合用于提升模型在各种海洋环境中的适应能力。 +2. 图像特征:数据集中的图像具有丰富的细节,且角度多变,从不同视角展现了船只的形态特征,有助于模型在不同俯仰角和复杂背景下准确检测。 +3. 标注信息:每张图像都提供了精确的船只位置标注,包括船只的轮廓和姿态信息,适用于旋转框标注格式,支持模型在多角度下的检测能力。 + +利用 HRSC2016 数据集,ShipWiseNet 能够学习到海上船只的多种视觉特征,提升对复杂背景和船只多样性的适应性。这使得 ShipWiseNet +在实际海上环境中具有更强的泛化能力和鲁棒性。 + +## 3.3 数据处理 + +在使用数据集前,需要对数据进行预处理,以适应模型的输入要求。数据处理主要包括图像读取、数据增强、标注解析等步骤: + +```python +def xml_to_txt(xml_file_dir, txt_file_dir): + os.makedirs(txt_file_dir, exist_ok=True) + all_class_ids = set() + for xml_file in os.listdir(xml_file_dir): + if not xml_file.endswith('.xml') or xml_file == 'annotation_fmt.xml': + continue + xml_file_path = os.path.join(xml_file_dir, xml_file) + tree = ET.parse(xml_file_path) + root = tree.getroot() + img_id = root.find('Img_ID').text + img_width = int(root.find('Img_SizeWidth').text) + img_height = int(root.find('Img_SizeHeight').text) + txt_file_path = os.path.join(txt_file_dir, f"{img_id}.txt") + objs = root.findall('.//HRSC_Object') + if len(objs) == 0: + continue + with open(txt_file_path, 'w') as txt_file: + for obj in objs: + class_id = convert_cls_id(obj.find('Class_ID').text) + all_class_ids.add(class_id) + box_xmin = int(obj.find('box_xmin').text) + box_ymin = int(obj.find('box_ymin').text) + box_xmax = int(obj.find('box_xmax').text) + box_ymax = int(obj.find('box_ymax').text) + x_center = ((box_xmin + box_xmax) / 2) / img_width + y_center = ((box_ymin + box_ymax) / 2) / img_height + box_width = (box_xmax - box_xmin) / img_width + box_height = (box_ymax - box_ymin) / img_height + txt_file.write(f"{class_id} {x_center} {y_center} {box_width} {box_height}\n") + print(f"Total class ids: {len(all_class_ids)}") + print([str(class_id) for class_id in all_class_ids]) +``` + +# 4. 快速使用 + +通过官方网站安装 MindSpore 后,您可以按照如下步骤进行训练和评估: + +## 4.1 模型训练 + +```bash +# 训练模型 +python train.py --config ./workspace/configs/ship-wise/ship-wise-s.yaml --log_interval 52 +``` + +```txt +2024-09-16 17:20:40,329 [INFO] Epoch 146/500, Step 52/422, imgsize (640, 640), loss: 3.8198, lbox: 0.7182, lcls: 1.4714, dfl: 1.6302, cur_lr: 0.007129000034183264 +2024-09-16 17:20:40,329 [INFO] Epoch 146/500, Step 52/422, step time: 993.04 ms +2024-09-16 17:21:31,956 [INFO] Epoch 146/500, Step 104/422, imgsize (640, 640), loss: 4.2556, lbox: 1.0156, lcls: 1.9761, dfl: 1.2639, cur_lr: 0.007129000034183264 +2024-09-16 17:21:31,957 [INFO] Epoch 146/500, Step 104/422, step time: 992.82 ms +2024-09-16 17:22:23,579 [INFO] Epoch 146/500, Step 156/422, imgsize (640, 640), loss: 3.3041, lbox: 0.7597, lcls: 1.2315, dfl: 1.3129, cur_lr: 0.007129000034183264 +2024-09-16 17:22:23,579 [INFO] Epoch 146/500, Step 156/422, step time: 992.73 ms +2024-09-16 17:23:15,262 [INFO] Epoch 146/500, Step 208/422, imgsize (640, 640), loss: 3.7350, lbox: 0.8932, lcls: 1.7161, dfl: 1.1257, cur_lr: 0.007129000034183264 +2024-09-16 17:23:15,263 [INFO] Epoch 146/500, Step 208/422, step time: 993.93 ms +2024-09-16 17:24:06,943 [INFO] Epoch 146/500, Step 260/422, imgsize (640, 640), loss: 3.1018, lbox: 0.7251, lcls: 1.1993, dfl: 1.1774, cur_lr: 0.007129000034183264 +2024-09-16 17:24:06,944 [INFO] Epoch 146/500, Step 260/422, step time: 993.86 ms +2024-09-16 17:24:58,402 [INFO] Epoch 146/500, Step 312/422, imgsize (640, 640), loss: 3.4070, lbox: 0.8956, lcls: 1.3462, dfl: 1.1653, cur_lr: 0.007129000034183264 +2024-09-16 17:24:58,403 [INFO] Epoch 146/500, Step 312/422, step time: 989.59 ms +2024-09-16 17:25:49,862 [INFO] Epoch 146/500, Step 364/422, imgsize (640, 640), loss: 2.2349, lbox: 0.5543, lcls: 0.7309, dfl: 0.9497, cur_lr: 0.007129000034183264 +2024-09-16 17:25:49,863 [INFO] Epoch 146/500, Step 364/422, step time: 989.62 ms +2024-09-16 17:26:41,257 [INFO] Epoch 146/500, Step 416/422, imgsize (640, 640), loss: 2.7312, lbox: 0.5390, lcls: 1.0165, dfl: 1.1757, cur_lr: 0.007129000034183264 +2024-09-16 17:26:41,258 [INFO] Epoch 146/500, Step 416/422, step time: 988.38 ms +2024-09-16 17:26:47,405 [INFO] Saving model to ./runs\2024.09.15-22.56.30\weights\ship-wise-s-146_422.ckpt +2024-09-16 17:26:47,405 [INFO] Epoch 146/500, epoch time: 6.98 min. +``` + +## 4.2 模型推理 + +```bash +python predict.py --config=./workspace/configs/ship-wise/ship-wise-s.yaml --weight=./runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt --image_path=H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test\100000630.bmp +``` + +```txt +2024-09-16 18:33:22,989 [INFO] number of network params, total: 11.166471M, trainable: 11.14642M +2024-09-16 18:33:23,172 [INFO] Load checkpoint from [runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt] success. +2024-09-16 18:33:25,788 [INFO] Predict result is: {'category_id': [18], 'bbox': [[699.768, 529.886, 216.182, 189.988]], 'score': [0.76474]} +2024-09-16 18:33:25,788 [INFO] Speed: 2594.7/1.8/2596.4 ms inference/NMS/total per 640x640 image at batch-size 1; +2024-09-16 18:33:25,788 [INFO] Detect a image success. +2024-09-16 18:33:25,797 [INFO] Infer completed. +``` + +# 5. 脚本说明 + +## 5.1 脚本及样例代码 + +```text +├── root_directory // 项目的根目录 +│ ├── predict.py // 用于预测的主脚本 +│ ├── train.py // 用于训练的主脚本 +│ ├── __init__.py // 初始化脚本 +│ +├── configs // 配置文件目录 +│ ├── dataset // 数据集配置文件 +│ │ ├── HRSC2016.yaml // HRSC2016 数据集的配置文件 +│ │ +│ └── ship-wise // ShipWiseNet 模型的配置文件 +│ ├── hyp.scratch.high.yaml // 高精度模式的超参数配置 +│ ├── hyp.scratch.low.yaml // 低精度模式的超参数配置 +│ ├── ship-wise-base.yaml // ShipWiseNet 基础模型配置 +│ ├── ship-wise-l.yaml // ShipWiseNet 大型模型配置 +│ ├── ship-wise-s.yaml // ShipWiseNet 小型模型配置 +│ +├── datasets // 数据集文件夹 +│ └── HRSC2016 // HRSC2016 数据集目录 +│ ├── test.txt // 测试集文件列表 +│ ├── train.cache.npy // 训练集缓存文件 +│ ├── train.txt // 训练集文件列表 +│ ├── val.txt // 验证集文件列表 +│ +├── flask // 用于模型服务的 Flask 项目目录 +│ ├── index.py // 主入口文件 +│ ├── __init__.py // 初始化脚本 +│ │ +│ ├── model // 模型相关文件 +│ │ ├── yolov8.py // YOLOv8 模型实现文件 +│ │ ├── __init__.py // 初始化脚本 +│ │ └── __pycache__ // Python 字节码缓存目录 +│ │ ├── yolov8.cpython-38.pyc // YOLOv8 字节码缓存 +│ │ └── __init__.cpython-38.pyc // 初始化脚本字节码缓存 +│ │ +│ └── __pycache__ // Python 字节码缓存目录 +│ +└── script // 脚本文件夹 + ├── train.md // 训练文档 + ├── __init__.py // 初始化脚本 + │ + ├── dataset_tools // 数据集工具文件夹 + │ ├── __init__.py // 初始化脚本 + │ │ + │ └── HRSC // HRSC 数据集工具 + │ ├── __init__.py // 初始化脚本 + │ ├── 切分数据集.py // 数据集分割脚本 + │ └── 转换数据集为YOLO格式.py // 数据集格式转换脚本 + │ + └── __pycache__ // Python 字节码缓存目录 +``` + +## 5.2 脚本参数 + +```text +train.py 中主要的参数有: + +可选参数: + + --device_target 实现代码的设备。默认值:Ascend + --data_dir 训练数据集目录 + --per_batch_size 训练的批处理大小。默认值:32(单卡),16(Ascend 8卡)或32(GPU 8卡) + --resume_yolov5 用于微调的YoLOv5的CKPT文件。默认值:""。 + --lr_scheduler 学习率调度器。可选值:exponential或cosine_annealing + 默认值:cosine_annealing + --lr 学习率。默认值:0.01(单卡),0.02(Ascend 8卡)或0.025(GPU 8卡) + --lr_epochs 学习率变化轮次,用英文逗号(,)分割。默认值为'220,250'。 + --lr_gamma 指数级lr_scheduler系数降低学习率。默认值为0.1。 + --eta_min cosine_annealing调度器中的eta_min。默认值为0。 + --t_max 在cosine_annealing调度器中的T-max。默认值为300(8卡)。 + --max_epoch 模型训练最大轮次。默认值为300(8卡)。 + --warmup_epochs 热身总轮次。默认值为20(8卡)。 + --weight_decay 权重衰减因子。默认值为0.0005。 + --momentum 动量参数。默认值为0.9。 + --loss_scale 静态损失缩放。默认值为64。 + --label_smooth 是否在CE中使用标签平滑。默认值为0。 + --label_smooth_factor 初始one-hot编码的平滑强度。默认值为0.1。 + --log_interval 日志记录间隔步骤。默认值为100。 + --ckpt_path CKPT文件保存位置。默认值为outputs/。 + --is_distributed 是否进行分布式训练,1表示是,0表示否。默认值为0。 + --rank 分布式训练的本地序号。默认值为0。 + --group_size 设备的全局大小。默认值为1。 + --need_profiler 是否使用Profiler,0表示否,1表示是。默认值为0。 + --training_shape 设置固定训练shape。默认值为""。 + --resize_rate 调整多尺度训练率。默认值为10。 + --bind_cpu 分布式训练时是否绑定cpu。默认值为True。 + --device_num 每台服务器的设备数量。默认值为8。 +``` + +# 6. 模型说明 + +## 6.1 评估性能 + +| 参数 | ShipWise-s | +|-------------|----------------------------------------------------------------| +| 资源 | CPU 5.10GHz,14核;内存32GB | +| 上传日期 | 26/10/2024 | +| MindSpore版本 | 2.2.14 | +| 数据集 | 1680张图 | +| 训练参数 | epoch=300, batch_size=2, lr=0.01,momentum=0.937,warmup_epoch=3 | +| 优化器 | 动量 | +| 损失函数 | YOLOv8Loss | +| 输出 | 框和标签 | +| 损失 | 2.6635 | +| 总时长 | 19小时20分58秒 | +| 微调检查点 | 42.6MB(.ckpt文件) | + +## 6.2 推理性能 + +| 参数 | YOLOv5s | +|-------------|-------------------------| +| 资源 | CPU 5.10GHz,14核;内存32GB | +| 上传日期 | 26/10/2024 | +| MindSpore版本 | 2.2.14 | +| 数据集 | 1680张图 | +| batch_size | 1 | +| 输出 | 边框位置和分数,以及概率 | +| 准确率 | mAP >= 82.6%(shape=640) | +| 微调检查点 | 42.6MB(.ckpt文件) | + + + +# 7. 项目展示 + +首页展示 + +![首页展示](./assets/pic-1.png) + +数据可视化大屏 + +![数据可视化大屏](./assets/pic-2.png) \ No newline at end of file diff --git a/community/cv/ShipWise/assets/pic-1.png b/community/cv/ShipWise/assets/pic-1.png new file mode 100644 index 0000000000000000000000000000000000000000..baa800c3af63ced712170aa288348bc281a09c32 Binary files /dev/null and b/community/cv/ShipWise/assets/pic-1.png differ diff --git a/community/cv/ShipWise/assets/pic-2.png b/community/cv/ShipWise/assets/pic-2.png new file mode 100644 index 0000000000000000000000000000000000000000..dda605ccf235116e14b3035755a508cd99f7e378 Binary files /dev/null and b/community/cv/ShipWise/assets/pic-2.png differ diff --git a/community/cv/ShipWise/mindyolo/__init__.py b/community/cv/ShipWise/mindyolo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3ac0e002036597328a58a34b8a2c9d7793a03564 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/__init__.py @@ -0,0 +1,12 @@ +"""mindyolo init""" +from . import data, models, optim, utils +from .data import * +from .models import * +from .optim import * +from .utils import * +from .version import __version__ + +__all__ = [] +__all__.extend(data.__all__) +__all__.extend(models.__all__) +__all__.extend(optim.__all__) diff --git a/community/cv/ShipWise/mindyolo/csrc/__init__.py b/community/cv/ShipWise/mindyolo/csrc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..180424bb011d0347456fc329776a173635b60b8f --- /dev/null +++ b/community/cv/ShipWise/mindyolo/csrc/__init__.py @@ -0,0 +1,3 @@ +from .fast_coco_eval import COCOeval_fast + +__all__ = ['COCOeval_fast'] diff --git a/community/cv/ShipWise/mindyolo/csrc/build.sh b/community/cv/ShipWise/mindyolo/csrc/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..69fe1e4d95da111a536c8e9b66698185ab902416 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/csrc/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash +# Build dynamic library +python setup.py build_ext --inplace && echo "Build fast_coco_eval successfully." diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/__init__.py b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cbb55e37a25d74ca00711aa5eb61dc8291813d3e --- /dev/null +++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/__init__.py @@ -0,0 +1,3 @@ +from .fast_coco_eval_api import COCOeval_fast + +__all = ['COCOeval_fast'] diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.cpp b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.cpp new file mode 100644 index 0000000000000000000000000000000000000000..880b89956ed12e5b9694a4f0ba78086370385ea7 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.cpp @@ -0,0 +1,504 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +// This file was copied from project facebookresearch/detectron2 +// The file link is https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/cocoeval/cocoeval.cpp +#include "cocoeval.h" +#include +#include +#include +#include + +using namespace pybind11::literals; + +namespace COCOeval { + +// Sort detections from highest score to lowest, such that +// detection_instances[detection_sorted_indices[t]] >= +// detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match +// original COCO API +void SortInstancesByDetectionScore( + const std::vector& detection_instances, + std::vector* detection_sorted_indices) { + detection_sorted_indices->resize(detection_instances.size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_instances](size_t j1, size_t j2) { + return detection_instances[j1].score > detection_instances[j2].score; + }); +} + +// Partition the ground truth objects based on whether or not to ignore them +// based on area +void SortInstancesByIgnore( + const std::array& area_range, + const std::vector& ground_truth_instances, + std::vector* ground_truth_sorted_indices, + std::vector* ignores) { + ignores->clear(); + ignores->reserve(ground_truth_instances.size()); + for (auto o : ground_truth_instances) { + ignores->push_back( + o.ignore || o.area < area_range[0] || o.area > area_range[1]); + } + + ground_truth_sorted_indices->resize(ground_truth_instances.size()); + std::iota( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + 0); + std::stable_sort( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + [&ignores](size_t j1, size_t j2) { + return (int)(*ignores)[j1] < (int)(*ignores)[j2]; + }); +} + +// For each IOU threshold, greedily match each detected instance to a ground +// truth instance (if possible) and store the results +void MatchDetectionsToGroundTruth( + const std::vector& detection_instances, + const std::vector& detection_sorted_indices, + const std::vector& ground_truth_instances, + const std::vector& ground_truth_sorted_indices, + const std::vector& ignores, + const std::vector>& ious, + const std::vector& iou_thresholds, + const std::array& area_range, + ImageEvaluation* results) { + // Initialize memory to store return data matches and ignore + const int num_iou_thresholds = iou_thresholds.size(); + const int num_ground_truth = ground_truth_sorted_indices.size(); + const int num_detections = detection_sorted_indices.size(); + std::vector ground_truth_matches( + num_iou_thresholds * num_ground_truth, 0); + std::vector& detection_matches = results->detection_matches; + std::vector& detection_ignores = results->detection_ignores; + std::vector& ground_truth_ignores = results->ground_truth_ignores; + detection_matches.resize(num_iou_thresholds * num_detections, 0); + detection_ignores.resize(num_iou_thresholds * num_detections, false); + ground_truth_ignores.resize(num_ground_truth); + for (auto g = 0; g < num_ground_truth; ++g) { + ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + for (auto d = 0; d < num_detections; ++d) { + // information about best match so far (match=-1 -> unmatched) + double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); + int match = -1; + for (auto g = 0; g < num_ground_truth; ++g) { + // if this ground truth instance is already matched and not a + // crowd, it cannot be matched to another detection + if (ground_truth_matches[t * num_ground_truth + g] > 0 && + !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { + continue; + } + + // if detected instance matched to a regular ground truth + // instance, we can break on the first ground truth instance + // tagged as ignore (because they are sorted by the ignore tag) + if (match >= 0 && !ground_truth_ignores[match] && + ground_truth_ignores[g]) { + break; + } + + // if IOU overlap is the best so far, store the match appropriately + if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { + best_iou = ious[d][ground_truth_sorted_indices[g]]; + match = g; + } + } + // if match was made, store id of match for both detection and + // ground truth + if (match >= 0) { + detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; + detection_matches[t * num_detections + d] = + ground_truth_instances[ground_truth_sorted_indices[match]].id; + ground_truth_matches[t * num_ground_truth + match] = + detection_instances[detection_sorted_indices[d]].id; + } + + // set unmatched detections outside of area range to ignore + const InstanceAnnotation& detection = + detection_instances[detection_sorted_indices[d]]; + detection_ignores[t * num_detections + d] = + detection_ignores[t * num_detections + d] || + (detection_matches[t * num_detections + d] == 0 && + (detection.area < area_range[0] || detection.area > area_range[1])); + } + } + + // store detection score results + results->detection_scores.resize(detection_sorted_indices.size()); + for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { + results->detection_scores[d] = + detection_instances[detection_sorted_indices[d]].score; + } +} + +std::vector EvaluateImages( + const std::vector>& area_ranges, + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances) { + const int num_area_ranges = area_ranges.size(); + const int num_images = image_category_ground_truth_instances.size(); + const int num_categories = + image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; + std::vector detection_sorted_indices; + std::vector ground_truth_sorted_indices; + std::vector ignores; + std::vector results_all( + num_images * num_area_ranges * num_categories); + + // Store results for each image, category, and area range combination. Results + // for each IOU threshold are packed into the same ImageEvaluation object + for (auto i = 0; i < num_images; ++i) { + for (auto c = 0; c < num_categories; ++c) { + const std::vector& ground_truth_instances = + image_category_ground_truth_instances[i][c]; + const std::vector& detection_instances = + image_category_detection_instances[i][c]; + + SortInstancesByDetectionScore( + detection_instances, &detection_sorted_indices); + if ((int)detection_sorted_indices.size() > max_detections) { + detection_sorted_indices.resize(max_detections); + } + + for (size_t a = 0; a < area_ranges.size(); ++a) { + SortInstancesByIgnore( + area_ranges[a], + ground_truth_instances, + &ground_truth_sorted_indices, + &ignores); + + MatchDetectionsToGroundTruth( + detection_instances, + detection_sorted_indices, + ground_truth_instances, + ground_truth_sorted_indices, + ignores, + image_category_ious[i][c], + iou_thresholds, + area_ranges[a], + &results_all + [c * num_area_ranges * num_images + a * num_images + i]); + } + } + } + + return results_all; +} + +// Convert a python list to a vector +template +std::vector list_to_vec(const py::list& l) { + std::vector v(py::len(l)); + for (int i = 0; i < (int)py::len(l); ++i) { + v[i] = l[i].cast(); + } + return v; +} + +// Helper function to Accumulate() +// Considers the evaluation results applicable to a particular category, area +// range, and max_detections parameter setting, which begin at +// evaluations[evaluation_index]. Extracts a sorted list of length n of all +// applicable detection instances concatenated across all images in the dataset, +// which are represented by the outputs evaluation_indices, detection_scores, +// image_detection_indices, and detection_sorted_indices--all of which are +// length n. evaluation_indices[i] stores the applicable index into +// evaluations[] for instance i, which has detection score detection_score[i], +// and is the image_detection_indices[i]'th of the list of detections +// for the image containing i. detection_sorted_indices[] defines a sorted +// permutation of the 3 other outputs +int BuildSortedDetectionList( + const std::vector& evaluations, + const int64_t evaluation_index, + const int64_t num_images, + const int max_detections, + std::vector* evaluation_indices, + std::vector* detection_scores, + std::vector* detection_sorted_indices, + std::vector* image_detection_indices) { + assert(evaluations.size() >= evaluation_index + num_images); + + // Extract a list of object instances of the applicable category, area + // range, and max detections requirements such that they can be sorted + image_detection_indices->clear(); + evaluation_indices->clear(); + detection_scores->clear(); + image_detection_indices->reserve(num_images * max_detections); + evaluation_indices->reserve(num_images * max_detections); + detection_scores->reserve(num_images * max_detections); + int num_valid_ground_truth = 0; + for (auto i = 0; i < num_images; ++i) { + const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; + + for (int d = 0; + d < (int)evaluation.detection_scores.size() && d < max_detections; + ++d) { // detected instances + evaluation_indices->push_back(evaluation_index + i); + image_detection_indices->push_back(d); + detection_scores->push_back(evaluation.detection_scores[d]); + } + for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { + if (!ground_truth_ignore) { + ++num_valid_ground_truth; + } + } + } + + // Sort detections by decreasing score, using stable sort to match + // python implementation + detection_sorted_indices->resize(detection_scores->size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_scores](size_t j1, size_t j2) { + return (*detection_scores)[j1] > (*detection_scores)[j2]; + }); + + return num_valid_ground_truth; +} + +// Helper function to Accumulate() +// Compute a precision recall curve given a sorted list of detected instances +// encoded in evaluations, evaluation_indices, detection_scores, +// detection_sorted_indices, image_detection_indices (see +// BuildSortedDetectionList()). Using vectors precisions and recalls +// and temporary storage, output the results into precisions_out, recalls_out, +// and scores_out, which are large buffers containing many precion/recall curves +// for all possible parameter settings, with precisions_out_index and +// recalls_out_index defining the applicable indices to store results. +void ComputePrecisionRecallCurve( + const int64_t precisions_out_index, + const int64_t precisions_out_stride, + const int64_t recalls_out_index, + const std::vector& recall_thresholds, + const int iou_threshold_index, + const int num_iou_thresholds, + const int num_valid_ground_truth, + const std::vector& evaluations, + const std::vector& evaluation_indices, + const std::vector& detection_scores, + const std::vector& detection_sorted_indices, + const std::vector& image_detection_indices, + std::vector* precisions, + std::vector* recalls, + std::vector* precisions_out, + std::vector* scores_out, + std::vector* recalls_out) { + assert(recalls_out->size() > recalls_out_index); + + // Compute precision/recall for each instance in the sorted list of detections + int64_t true_positives_sum = 0, false_positives_sum = 0; + precisions->clear(); + recalls->clear(); + precisions->reserve(detection_sorted_indices.size()); + recalls->reserve(detection_sorted_indices.size()); + assert(!evaluations.empty() || detection_sorted_indices.empty()); + for (auto detection_sorted_index : detection_sorted_indices) { + const ImageEvaluation& evaluation = + evaluations[evaluation_indices[detection_sorted_index]]; + const auto num_detections = + evaluation.detection_matches.size() / num_iou_thresholds; + const auto detection_index = iou_threshold_index * num_detections + + image_detection_indices[detection_sorted_index]; + assert(evaluation.detection_matches.size() > detection_index); + assert(evaluation.detection_ignores.size() > detection_index); + const int64_t detection_match = + evaluation.detection_matches[detection_index]; + const bool detection_ignores = + evaluation.detection_ignores[detection_index]; + const auto true_positive = detection_match > 0 && !detection_ignores; + const auto false_positive = detection_match == 0 && !detection_ignores; + if (true_positive) { + ++true_positives_sum; + } + if (false_positive) { + ++false_positives_sum; + } + + const double recall = + static_cast(true_positives_sum) / num_valid_ground_truth; + recalls->push_back(recall); + const int64_t num_valid_detections = + true_positives_sum + false_positives_sum; + const double precision = num_valid_detections > 0 + ? static_cast(true_positives_sum) / num_valid_detections + : 0.0; + precisions->push_back(precision); + } + + (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; + + for (int64_t i = static_cast(precisions->size()) - 1; i > 0; --i) { + if ((*precisions)[i] > (*precisions)[i - 1]) { + (*precisions)[i - 1] = (*precisions)[i]; + } + } + + // Sample the per instance precision/recall list at each recall threshold + for (size_t r = 0; r < recall_thresholds.size(); ++r) { + // first index in recalls >= recall_thresholds[r] + std::vector::iterator low = std::lower_bound( + recalls->begin(), recalls->end(), recall_thresholds[r]); + size_t precisions_index = low - recalls->begin(); + + const auto results_ind = precisions_out_index + r * precisions_out_stride; + assert(results_ind < precisions_out->size()); + assert(results_ind < scores_out->size()); + if (precisions_index < precisions->size()) { + (*precisions_out)[results_ind] = (*precisions)[precisions_index]; + (*scores_out)[results_ind] = + detection_scores[detection_sorted_indices[precisions_index]]; + } else { + (*precisions_out)[results_ind] = 0; + (*scores_out)[results_ind] = 0; + } + } +} +py::dict Accumulate( + const py::object& params, + const std::vector& evaluations) { + const std::vector recall_thresholds = + list_to_vec(params.attr("recThrs")); + const std::vector max_detections = + list_to_vec(params.attr("maxDets")); + const int num_iou_thresholds = py::len(params.attr("iouThrs")); + const int num_recall_thresholds = py::len(params.attr("recThrs")); + const int num_categories = params.attr("useCats").cast() == 1 + ? py::len(params.attr("catIds")) + : 1; + const int num_area_ranges = py::len(params.attr("areaRng")); + const int num_max_detections = py::len(params.attr("maxDets")); + const int num_images = py::len(params.attr("imgIds")); + + std::vector precisions_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + std::vector recalls_out( + num_iou_thresholds * num_categories * num_area_ranges * + num_max_detections, + -1); + std::vector scores_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + + // Consider the list of all detected instances in the entire dataset in one + // large list. evaluation_indices, detection_scores, + // image_detection_indices, and detection_sorted_indices all have the same + // length as this list, such that each entry corresponds to one detected + // instance + std::vector evaluation_indices; // indices into evaluations[] + std::vector detection_scores; // detection scores of each instance + std::vector detection_sorted_indices; // sorted indices of all + // instances in the dataset + std::vector + image_detection_indices; // indices into the list of detected instances in + // the same image as each instance + std::vector precisions, recalls; + + for (auto c = 0; c < num_categories; ++c) { + for (auto a = 0; a < num_area_ranges; ++a) { + for (auto m = 0; m < num_max_detections; ++m) { + // The COCO PythonAPI assumes evaluations[] (the return value of + // COCOeval::EvaluateImages() is one long list storing results for each + // combination of category, area range, and image id, with categories in + // the outermost loop and images in the innermost loop. + const int64_t evaluations_index = + c * num_area_ranges * num_images + a * num_images; + int num_valid_ground_truth = BuildSortedDetectionList( + evaluations, + evaluations_index, + num_images, + max_detections[m], + &evaluation_indices, + &detection_scores, + &detection_sorted_indices, + &image_detection_indices); + + if (num_valid_ground_truth == 0) { + continue; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + // recalls_out is a flattened vectors representing a + // num_iou_thresholds X num_categories X num_area_ranges X + // num_max_detections matrix + const int64_t recalls_out_index = + t * num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + // precisions_out and scores_out are flattened vectors + // representing a num_iou_thresholds X num_recall_thresholds X + // num_categories X num_area_ranges X num_max_detections matrix + const int64_t precisions_out_stride = + num_categories * num_area_ranges * num_max_detections; + const int64_t precisions_out_index = t * num_recall_thresholds * + num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + ComputePrecisionRecallCurve( + precisions_out_index, + precisions_out_stride, + recalls_out_index, + recall_thresholds, + t, + num_iou_thresholds, + num_valid_ground_truth, + evaluations, + evaluation_indices, + detection_scores, + detection_sorted_indices, + image_detection_indices, + &precisions, + &recalls, + &precisions_out, + &scores_out, + &recalls_out); + } + } + } + } + + time_t rawtime; + struct tm local_time; + std::array buffer; + time(&rawtime); +#ifdef _WIN32 + localtime_s(&local_time, &rawtime); +#else + localtime_r(&rawtime, &local_time); +#endif + strftime( + buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); + return py::dict( + "params"_a = params, + "counts"_a = std::vector({num_iou_thresholds, + num_recall_thresholds, + num_categories, + num_area_ranges, + num_max_detections}), + "date"_a = buffer, + "precision"_a = precisions_out, + "recall"_a = recalls_out, + "scores"_a = scores_out); +} + +} // namespace COCOeval diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.h b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.h new file mode 100644 index 0000000000000000000000000000000000000000..1febb409edc8bd8b5d67dc85c34fe3e8372d94ff --- /dev/null +++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.h @@ -0,0 +1,100 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +// This file was copied from project facebookresearch/detectron2 +// The file link is https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/cocoeval/cocoeval.h +#pragma once + +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace COCOeval { + +// Annotation data for a single object instance in an image +struct InstanceAnnotation { + InstanceAnnotation( + uint64_t id, + double score, + double area, + bool is_crowd, + bool ignore) + : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} + uint64_t id; + double score = 0.; + double area = 0.; + bool is_crowd = false; + bool ignore = false; +}; + +// Stores intermediate results for evaluating detection results for a single +// image that has D detected instances and G ground truth instances. This stores +// matches between detected and ground truth instances +struct ImageEvaluation { + // For each of the D detected instances, the id of the matched ground truth + // instance, or 0 if unmatched + std::vector detection_matches; + + // The detection score of each of the D detected instances + std::vector detection_scores; + + // Marks whether or not each of G instances was ignored from evaluation (e.g., + // because it's outside area_range) + std::vector ground_truth_ignores; + + // Marks whether or not each of D instances was ignored from evaluation (e.g., + // because it's outside aRng) + std::vector detection_ignores; +}; + +template +using ImageCategoryInstances = std::vector>>; + +// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each +// combination of image, category, area range settings, and IOU thresholds to +// evaluate, it matches detected instances to ground truth instances and stores +// the results into a vector of ImageEvaluation results, which will be +// interpreted by the COCOeval::Accumulate() function to produce precion-recall +// curves. The parameters of nested vectors have the following semantics: +// image_category_ious[i][c][d][g] is the intersection over union of the d'th +// detected instance and g'th ground truth instance of +// category category_ids[c] in image image_ids[i] +// image_category_ground_truth_instances[i][c] is a vector of ground truth +// instances in image image_ids[i] of category category_ids[c] +// image_category_detection_instances[i][c] is a vector of detected +// instances in image image_ids[i] of category category_ids[c] +std::vector EvaluateImages( + const std::vector>& area_ranges, // vector of 2-tuples + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances); + +// C++ implementation of COCOeval.accumulate(), which generates precision +// recall curves for each set of category, IOU threshold, detection area range, +// and max number of detections parameters. It is assumed that the parameter +// evaluations is the return value of the functon COCOeval::EvaluateImages(), +// which was called with the same parameter settings params +py::dict Accumulate( + const py::object& params, + const std::vector& evalutations); + +} // namespace COCOeval + +PYBIND11_MODULE(fast_coco_eval, m) +{ + m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); + m.def( + "COCOevalEvaluateImages", + &COCOeval::EvaluateImages, + "COCOeval::EvaluateImages"); + pybind11::class_(m, "InstanceAnnotation") + .def(pybind11::init()); + pybind11::class_(m, "ImageEvaluation") + .def(pybind11::init<>()); +} diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval.cp38-win_amd64.pyd b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval.cp38-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..62e4aeeb5f3767910f1f7c33a6cc74babd48d2b6 Binary files /dev/null and b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval.cp38-win_amd64.pyd differ diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval_api.py b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval_api.py new file mode 100644 index 0000000000000000000000000000000000000000..7be4d742c450752f0d9b9fd7d4ea79fae148c3f8 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval_api.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# This file is modified from +# https://github.com/facebookresearch/detectron2/blob/master/detectron2/evaluation/fast_eval_api.py +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import copy +import time + +import numpy as np +from pycocotools.cocoeval import COCOeval + +from . import fast_coco_eval + +class COCOeval_fast(COCOeval): + """ + This is a slightly modified version of the original COCO API, where the functions evaluateImg() + and accumulate() are implemented in C++ to speedup evaluation + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.module = fast_coco_eval + + def evaluate(self): + """ + Run per image evaluation on given images and store results in self.evalImgs_cpp, a + datastructure that isn't readable from Python but is used by a c++ implementation of + accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure + self.evalImgs because this datastructure is a computational bottleneck. + :return: None + """ + tic = time.time() + + print("Running per image evaluation...") + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = "segm" if p.useSegm == 1 else "bbox" + print( + "useSegm (deprecated) is not None. Running {} evaluation".format( + p.iouType + ) + ) + print("Evaluate annotation type *{}*".format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == "segm" or p.iouType == "bbox": + computeIoU = self.computeIoU + elif p.iouType == "keypoints": + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) + for imgId in p.imgIds + for catId in catIds + } + + maxDet = p.maxDets[-1] + + # <<<< Beginning of code differences with original COCO API + def convert_instances_to_cpp(instances, is_det=False): + # Convert annotations for a list of instances in an image to a format that's fast + # to access in C++ + instances_cpp = [] + for instance in instances: + instance_cpp = self.module.InstanceAnnotation( + int(instance["id"]), + instance["score"] if is_det else instance.get("score", 0.0), + instance["area"], + bool(instance.get("iscrowd", 0)), + bool(instance.get("ignore", 0)), + ) + instances_cpp.append(instance_cpp) + return instances_cpp + + # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ + ground_truth_instances = [ + [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] + for imgId in p.imgIds + ] + detected_instances = [ + [ + convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) + for catId in p.catIds + ] + for imgId in p.imgIds + ] + ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] + + if not p.useCats: + # For each image, flatten per-category lists into a single list + ground_truth_instances = [ + [[o for c in i for o in c]] for i in ground_truth_instances + ] + detected_instances = [ + [[o for c in i for o in c]] for i in detected_instances + ] + + # Call C++ implementation of self.evaluateImgs() + self._evalImgs_cpp = self.module.COCOevalEvaluateImages( + p.areaRng, + maxDet, + p.iouThrs, + ious, + ground_truth_instances, + detected_instances, + ) + self._evalImgs = None + + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) + # >>>> End of code differences with original COCO API + + def accumulate(self): + """ + Accumulate per image evaluation results and store the result in self.eval. Does not + support changing parameter settings from those used by self.evaluate() + """ + print("Accumulating evaluation results...") + tic = time.time() + if not hasattr(self, "_evalImgs_cpp"): + print("Please run evaluate() first") + + self.eval = self.module.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) + + # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections + self.eval["recall"] = np.array(self.eval["recall"]).reshape( + self.eval["counts"][:1] + self.eval["counts"][2:] + ) + + # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X + # num_area_ranges X num_max_detections + self.eval["precision"] = np.array(self.eval["precision"]).reshape( + self.eval["counts"] + ) + self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) + toc = time.time() + print( + "COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic) + ) diff --git a/community/cv/ShipWise/mindyolo/models/__init__.py b/community/cv/ShipWise/mindyolo/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..683aaa6978e08cc1ad5568e0106462600689a8f7 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/__init__.py @@ -0,0 +1,33 @@ +from . import (heads, initializer, layers, losses, model_factory, yolov3, + yolov4, yolov5, yolov7, yolov8) + +from . import shipwise + +__all__ = [] + +__all__.extend(heads.__all__) +__all__.extend(layers.__all__) +__all__.extend(losses.__all__) +__all__.extend(yolov8.__all__) +__all__.extend(yolov7.__all__) +__all__.extend(yolov5.__all__) +__all__.extend(yolov4.__all__) +__all__.extend(yolov3.__all__) +__all__.extend(initializer.__all__) +__all__.extend(model_factory.__all__) +__all__.extend(shipwise.__all__) + +# fixme: since yolov7 is used as both the file and function name, we need to import * after __all__ + +from .heads import * +from .initializer import * +from .layers import * +from .losses import * +from .model_factory import * +from .yolov3 import * +from .yolov4 import * +from .yolov5 import * +from .yolov7 import * +from .yolov8 import * +from .yolox import * +from .shipwise import * diff --git a/community/cv/ShipWise/mindyolo/models/heads/__init__.py b/community/cv/ShipWise/mindyolo/models/heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..593e3df833d2e4179022f94b5bbef650bf25b6c3 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/__init__.py @@ -0,0 +1,17 @@ +"""layers init""" +from .yolov3_head import * +from .yolov4_head import * +from .yolov5_head import * +from .yolov7_head import * +from .yolov8_head import * +from .yolox_head import * + + +__all__ = [ + "YOLOv3Head", + "YOLOv4Head", + "YOLOv5Head", + "YOLOv7Head", "YOLOv7AuxHead", + "YOLOv8Head", "YOLOv8SegHead", + "YOLOXHead" +] diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov3_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov3_head.py new file mode 100644 index 0000000000000000000000000000000000000000..e28e5ef795d132ab3e1e3b6a75ca981036eb6813 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/yolov3_head.py @@ -0,0 +1,92 @@ +import math +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Parameter, Tensor, nn, ops + +from mindyolo.utils import logger +from ..layers.utils import meshgrid + + +class YOLOv3Head(nn.Cell): + """ + YOLOv3 Detect Head, convert the output result to a prediction box based on the anchor point. + """ + + def __init__(self, nc=80, anchors=(), stride=(), ch=()): # detection layer + super(YOLOv3Head, self).__init__() + + assert isinstance(anchors, (tuple, list)) and len(anchors) > 0 + assert isinstance(stride, (tuple, list)) and len(stride) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + + # anchor preprocess + anchors = np.array(anchors) + stride = np.array(stride) + anchors, anchor_grid = self._check_anchor_order( + anchors=anchors.reshape((self.nl, -1, 2)), + anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)), + stride=stride, + ) + anchors = anchors / stride.reshape((-1, 1, 1)) + + self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False) + self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False) # shape(nl,na,2) + self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False) # shape(nl,1,na,1,1,2) + + self.m = nn.CellList( + [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch] + ) # output conv + + def construct(self, x): + z = () # inference output + outs = () + for i in range(self.nl): + out = self.m[i](x[i]) # conv + bs, _, ny, nx = out.shape # (bs,255,20,20) + out = out.view(bs, self.na, self.no, ny, nx).transpose((0, 1, 3, 4, 2)) # (bs,3,20,20,85) + outs += (out,) + + if not self.training: # inference + grid_tensor = self._make_grid(nx, ny, out.dtype) + + y = ops.Sigmoid()(out) + y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + z += (y.view(bs, -1, self.no),) + + return outs if self.training else (ops.concat(z, 1), outs) + + def initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + # https://arxiv.org/abs/1708.02002 section 3.3 + m = self + for mi, s in zip(m.m, m.stride): # from + s = s.asnumpy() + b = mi.bias.view(m.na, -1).asnumpy() # conv.bias(255) to (3,85) + b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else np.log(cf / cf.sum()) # cls + mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1)) + + @staticmethod + def _make_grid(nx=20, ny=20, dtype=ms.float32): + # FIXME: Not supported on a specific model of machine + xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny))) + return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype) + + @staticmethod + def _check_anchor_order(anchors, anchor_grid, stride): + # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary + a = np.prod(anchor_grid, -1).reshape((-1,)) # anchor area + da = a[-1] - a[0] # delta a + ds = stride[-1] - stride[0] # delta s + if np.sign(da) != np.sign(ds): # same order + logger.warning("Reversing anchor order") + anchors = anchors[::-1, ...] + anchor_grid = anchor_grid[::-1, ...] + return anchors, anchor_grid diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov4_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov4_head.py new file mode 100644 index 0000000000000000000000000000000000000000..06a46f0fdc28543f5b5a48e55d281c8f17f03fa6 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/yolov4_head.py @@ -0,0 +1,121 @@ +import mindspore as ms +from mindspore import Tensor, nn, ops + + +class YOLOv4Head(nn.Cell): + """ + YOLOv4 Detect Head, convert the output result to a prediction box based on the anchor point. + """ + + def __init__(self, nc=80, anchors=(), ch=()): # detection layer + super(YOLOv4Head, self).__init__() + + assert isinstance(anchors, (tuple, list)) and len(anchors) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = 3 # number of detection layers + self.na = len(anchors) // 3 # number of anchors + + self.m = nn.CellList( + [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch] + ) # output conv + + # prediction on the default anchor boxes + self.detect_1 = DetectionBlock("l", anchors, self.no) + self.detect_2 = DetectionBlock("m", anchors, self.no) + self.detect_3 = DetectionBlock("s", anchors, self.no) + + def construct(self, x): + big_object_output = self.m[0](x[0]) + medium_object_output = self.m[1](x[1]) + small_object_output = self.m[2](x[2]) + bs = small_object_output.shape[0] + output_big = self.detect_1(big_object_output) + output_me = self.detect_2(medium_object_output) + output_small = self.detect_3(small_object_output) + if not self.training: + big = output_big.view(bs, -1, self.no) + me = output_me.view(bs, -1, self.no) + small = output_small.view(bs, -1, self.no) + return ops.concat((big, me, small), 1), (output_big, output_me, output_small) + + return output_big, output_me, output_small + + +class DetectionBlock(nn.Cell): + """ + YOLOv4 detection Network. It will finally output the detection result. + """ + + def __init__(self, scale, anchor_scales, no): + super(DetectionBlock, self).__init__() + if scale == "s": + idx = (6, 7, 8) + self.scale_x_y = 1.2 + self.offset_x_y = 0.1 + self.stride = 8 + elif scale == "m": + idx = (3, 4, 5) + self.scale_x_y = 1.1 + self.offset_x_y = 0.05 + self.stride = 16 + elif scale == "l": + idx = (0, 1, 2) + self.scale_x_y = 1.05 + self.offset_x_y = 0.025 + self.stride = 32 + else: + raise KeyError("Invalid scale value for DetectionBlock") + self.anchors = Tensor([anchor_scales[i] for i in idx], ms.float32) + self.num_anchors_per_scale = 3 + self.num_attrib = no + + self.sigmoid = ops.Sigmoid() + + def construct(self, x): + """construct method""" + num_batch = x.shape[0] + grid_size = x.shape[2:4] + input_shape = [size * self.stride for size in grid_size] + input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) + + # Reshape and transpose the feature to [n, grid_size[0], grid_size[1], 3, num_attrib] + prediction = x.view(num_batch, self.num_anchors_per_scale, self.num_attrib, grid_size[0], grid_size[1]) + prediction = prediction.transpose((0, 3, 4, 1, 2)) + + range_x = range(grid_size[1]) + range_y = range(grid_size[0]) + grid_x = ops.cast(ops.tuple_to_array(range_x), ms.float32) + grid_y = ops.cast(ops.tuple_to_array(range_y), ms.float32) + # Tensor of shape [grid_size[0], grid_size[1], 1, 1] representing the coordinate of x/y axis for each grid + # [batch, gridx, gridy, 1, 1] + grid_x = ops.tile(grid_x.view(1, 1, -1, 1, 1), (1, grid_size[0], 1, 1, 1)) + grid_y = ops.tile(grid_y.view(1, -1, 1, 1, 1), (1, 1, grid_size[1], 1, 1)) + # Shape is [grid_size[0], grid_size[1], 1, 2] + grid = ops.concat((grid_x, grid_y), -1) + + box_xy = prediction[:, :, :, :, :2] + box_wh = prediction[:, :, :, :, 2:4] + box_confidence = prediction[:, :, :, :, 4:5] + box_probs = prediction[:, :, :, :, 5:] + + # gridsize1 is x + # gridsize0 is y + box_xy = (self.scale_x_y * self.sigmoid(box_xy) - self.offset_x_y + grid) / ops.cast( + ops.tuple_to_array((grid_size[1], grid_size[0])), ms.float32 + ) + # box_wh is w->h + box_wh = ops.exp(box_wh) * self.anchors / input_shape + box_confidence = self.sigmoid(box_confidence) + box_probs = self.sigmoid(box_probs) + + if self.training: + return prediction, box_xy, box_wh + box_xy *= input_shape + box_wh *= input_shape + return ops.concat((box_xy.astype(ms.float32), + box_wh.astype(ms.float32), + box_confidence.astype(ms.float32), + box_probs.astype(ms.float32)), -1) diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov5_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov5_head.py new file mode 100644 index 0000000000000000000000000000000000000000..183e503ca318cec1854d82887228b1c5e0b0fcf5 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/yolov5_head.py @@ -0,0 +1,105 @@ +import math +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Parameter, Tensor, nn, ops + +from mindyolo.utils import logger +from ..layers.utils import meshgrid + + +class YOLOv5Head(nn.Cell): + def __init__(self, nc=80, anchors=(), stride=(), ch=()): # detection layer + super(YOLOv5Head, self).__init__() + + assert isinstance(anchors, (tuple, list)) and len(anchors) > 0 + assert isinstance(stride, (tuple, list)) and len(stride) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + + # anchor preprocess + anchors = np.array(anchors) + stride = np.array(stride) + anchors, anchor_grid = self._check_anchor_order( + anchors=anchors.reshape((self.nl, -1, 2)), + anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)), + stride=stride, + ) + anchors = anchors / stride.reshape((-1, 1, 1)) + + self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False) + self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False) # shape(nl,na,2) + self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False) # shape(nl,1,na,1,1,2) + + self.m = nn.CellList( + [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch] + ) # output conv + + def construct(self, x): + z = () # inference output + outs = () + for i in range(self.nl): + out = self.m[i](x[i]) # conv + bs, _, ny, nx = out.shape # (bs,255,20,20) + out = ops.Transpose()(out.view(bs, self.na, self.no, ny, nx), (0, 1, 3, 4, 2)) # (bs,3,20,20,85) + out = out + outs += (out,) + + if not self.training: # inference + grid_tensor = self._make_grid(nx, ny, out.dtype) + + y = ops.Sigmoid()(out) + y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + z += (y.view(bs, -1, self.no),) + + # return outs + return outs if self.training else (ops.concat(z, 1), outs) + + def initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + # https://arxiv.org/abs/1708.02002 section 3.3 + m = self + for mi, s in zip(m.m, m.stride): # from + s = s.asnumpy() + b = mi.bias.view(m.na, -1).asnumpy() # conv.bias(255) to (3,85) + b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else np.log(cf / cf.sum()) # cls + mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1)) + + @staticmethod + def _make_grid(nx=20, ny=20, dtype=ms.float32): + # FIXME: Not supported on a specific model of machine + xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny))) + return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype) + + @staticmethod + def _check_anchor_order(anchors, anchor_grid, stride): + # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary + a = np.prod(anchor_grid, -1).reshape((-1,)) # anchor area + da = a[-1] - a[0] # delta a + ds = stride[-1] - stride[0] # delta s + if np.sign(da) != np.sign(ds): # same order + logger.warning("Reversing anchor order") + anchors = anchors[::-1, ...] + anchor_grid = anchor_grid[::-1, ...] + return anchors, anchor_grid + + def convert(self, z): + z = ops.concat(z, 1) + box = z[:, :, :4] + conf = z[:, :, 4:5] + score = z[:, :, 5:] + score *= conf + convert_matrix = get_convert_matrix() + box = ops.matmul(box, convert_matrix) + return (box, score) + + +@ops.constexpr(reuse_result=True) +def get_convert_matrix(): + return Tensor(np.array([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]]), dtype=ms.float32) diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov7_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov7_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9c60cde9b0bb0e7fd847911ad1c4b015383ca676 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/yolov7_head.py @@ -0,0 +1,207 @@ +import math +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Parameter, Tensor, nn, ops + +from mindyolo.utils import logger +from ..layers.implicit import ImplicitA, ImplicitM +from ..layers.utils import meshgrid + + +class YOLOv7Head(nn.Cell): + """ + YOLOv7 Detect Head, convert the output result to a prediction box based on the anchor point. + """ + + def __init__(self, nc=80, anchors=(), stride=(), ch=()): # detection layer + super(YOLOv7Head, self).__init__() + + assert isinstance(anchors, (tuple, list)) and len(anchors) > 0 + assert isinstance(stride, (tuple, list)) and len(stride) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + + # anchor preprocess + anchors = np.array(anchors) + stride = np.array(stride) + anchors, anchor_grid = self._check_anchor_order( + anchors=anchors.reshape((self.nl, -1, 2)), + anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)), + stride=stride, + ) + anchors = anchors / stride.reshape((-1, 1, 1)) + + self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False) + self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False) # shape(nl,na,2) + self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False) # shape(nl,1,na,1,1,2) + self.convert_matrix = Parameter( + Tensor(np.array([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]]), dtype=ms.float32), + requires_grad=False, + ) + + self.m = nn.CellList( + [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch] + ) # output conv + + self.ia = nn.CellList([ImplicitA(x) for x in ch]) + self.im = nn.CellList([ImplicitM(self.no * self.na) for _ in ch]) + + def construct(self, x): + z = () # inference output + outs = () + for i in range(self.nl): + out = self.m[i](self.ia[i](x[i])) # conv + out = self.im[i](out) + bs, _, ny, nx = out.shape # (bs,255,20,20) + out = out.view(bs, self.na, self.no, ny, nx).transpose((0, 1, 3, 4, 2)) # (bs,3,20,20,85) + outs += (out,) + + if not self.training: # inference + grid_tensor = self._make_grid(nx, ny, out.dtype) + + # y = ops.sigmoid(out) + y = ops.Sigmoid()(out) + y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + z += (y.view(bs, -1, self.no),) + + return outs if self.training else (ops.concat(z, 1), outs) + + def initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + # https://arxiv.org/abs/1708.02002 section 3.3 + m = self + for mi, s in zip(m.m, m.stride): # from + s = s.asnumpy() + b = mi.bias.view(m.na, -1).asnumpy() # conv.bias(255) to (3,85) + b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else np.log(cf / cf.sum()) # cls + mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1)) + + @staticmethod + def _make_grid(nx=20, ny=20, dtype=ms.float32): + # FIXME: Not supported on a specific model of machine + xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny))) + return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype) + + @staticmethod + def _check_anchor_order(anchors, anchor_grid, stride): + # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary + a = np.prod(anchor_grid, -1).reshape((-1,)) # anchor area + da = a[-1] - a[0] # delta a + ds = stride[-1] - stride[0] # delta s + if np.sign(da) != np.sign(ds): # same order + logger.warning("Reversing anchor order") + anchors = anchors[::-1, ...] + anchor_grid = anchor_grid[::-1, ...] + return anchors, anchor_grid + + +class YOLOv7AuxHead(nn.Cell): + """ + YOLOv7 Detect Aux Head, convert the output result to a prediction box based on the anchor point. + """ + + def __init__(self, nc=80, anchors=(), stride=(), ch=()): # detection layer + super(YOLOv7AuxHead, self).__init__() + + assert isinstance(anchors, (tuple, list)) and len(anchors) > 0 + assert isinstance(stride, (tuple, list)) and len(stride) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + + # anchor preprocess + anchors = np.array(anchors) + stride = np.array(stride) + anchors, anchor_grid = self._check_anchor_order( + anchors=anchors.reshape((self.nl, -1, 2)), + anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)), + stride=stride, + ) + anchors /= stride.reshape((-1, 1, 1)) + + self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False) + self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False) # shape(nl,na,2) + self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False) # shape(nl,1,na,1,1,2) + self.convert_matrix = Parameter( + Tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=ms.float32), + requires_grad=False, + ) + + self.m = nn.CellList( + [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch[: self.nl]] + ) # output conv + self.m2 = nn.CellList( + [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch[self.nl :]] + ) # output conv + + self.ia = nn.CellList([ImplicitA(x) for x in ch[: self.nl]]) + self.im = nn.CellList([ImplicitM(self.no * self.na) for _ in ch[: self.nl]]) + + def construct(self, x): + z = () # inference output + outs_1 = () + outs_2 = () + for i in range(self.nl): + out1 = self.m[i](self.ia[i](x[i])) # conv + out1 = self.im[i](out1) + bs, _, ny, nx = out1.shape # x(bs,255,20,20) to x(bs,3,20,20,85) + out1 = ops.Transpose()(out1.view(bs, self.na, self.no, ny, nx), (0, 1, 3, 4, 2)) + outs_1 += (out1,) + + out2 = self.m2[i](x[i + self.nl]) + out2 = ops.Transpose()(out2.view(bs, self.na, self.no, ny, nx), (0, 1, 3, 4, 2)) + outs_2 += (out2,) + + if not self.training: # inference + grid_tensor = self._make_grid(nx, ny, out1.dtype) + + # y = ops.sigmoid(out1) + y = ops.Sigmoid()(out1) + y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + z += (y.view(bs, -1, self.no),) + outs = outs_1 + outs_2 + return outs if self.training else (ops.concat(z, 1), outs_1) + + def _initialize_aux_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + # https://arxiv.org/abs/1708.02002 section 3.3 + m = self + for mi, mi2, s in zip(m.m, m.m2, m.stride): # from + s = s.asnumpy() + + b = mi.bias.view(m.na, -1).asnumpy() # conv.bias(255) to (3,85) + b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else np.log(cf / cf.sum()) # cls + mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1)) + + b2 = mi2.bias.view(m.na, -1).asnumpy() # conv.bias(255) to (3,85) + b2[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) + b2[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else np.log(cf / cf.sum()) # cls + mi2.bias = ops.assign(mi2.bias, Tensor(b2, ms.float32).view(-1)) + + @staticmethod + def _make_grid(nx=20, ny=20, dtype=ms.float32): + xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny))) + return ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).astype(dtype) + + @staticmethod + def _check_anchor_order(anchors, anchor_grid, stride): + # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary + a = np.prod(anchor_grid, -1).reshape((-1,)) # anchor area + da = a[-1] - a[0] # delta a + ds = stride[-1] - stride[0] # delta s + if np.sign(da) != np.sign(ds): # same order + logger.warning("Reversing anchor order") + anchors = anchors[::-1, ...] + anchor_grid = anchor_grid[::-1, ...] + return anchors, anchor_grid diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov8_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov8_head.py new file mode 100644 index 0000000000000000000000000000000000000000..c821409fc5c85f1f59e4668437b35465d1576e3b --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/yolov8_head.py @@ -0,0 +1,155 @@ +import math +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Parameter, Tensor, nn, ops + +from ..layers import DFL, ConvNormAct, Identity +from ..layers.utils import meshgrid + + +class YOLOv8Head(nn.Cell): + # YOLOv8 Detect head for detection models + def __init__(self, nc=80, reg_max=16, stride=(), ch=(), sync_bn=False): # detection layer + super().__init__() + # self.dynamic = False # force grid reconstruction + + assert isinstance(stride, (tuple, list)) and len(stride) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = reg_max # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False) + + c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels + self.cv2 = nn.CellList( + [ + nn.SequentialCell( + [ + ConvNormAct(x, c2, 3, sync_bn=sync_bn), + ConvNormAct(c2, c2, 3, sync_bn=sync_bn), + nn.Conv2d(c2, 4 * self.reg_max, 1, has_bias=True), + ] + ) + for x in ch + ] + ) + self.cv3 = nn.CellList( + [ + nn.SequentialCell( + [ + ConvNormAct(x, c3, 3, sync_bn=sync_bn), + ConvNormAct(c3, c3, 3, sync_bn=sync_bn), + nn.Conv2d(c3, self.nc, 1, has_bias=True), + ] + ) + for x in ch + ] + ) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else Identity() + + def construct(self, x): + shape = x[0].shape # BCHW + out = () + for i in range(self.nl): + out += (ops.concat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1),) + + p = None + if not self.training: + _anchors, _strides = self.make_anchors(out, self.stride, 0.5) + _anchors, _strides = _anchors.swapaxes(0, 1), _strides.swapaxes(0, 1) + _x = () + for i in range(len(out)): + _x += (out[i].view(shape[0], self.no, -1),) + _x = ops.concat(_x, 2) + box, cls = _x[:, : self.reg_max * 4, :], _x[:, self.reg_max * 4 : self.reg_max * 4 + self.nc, :] + # box, cls = ops.concat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) + dbox = self.dist2bbox(self.dfl(box), ops.expand_dims(_anchors, 0), xywh=True, axis=1) * _strides + p = ops.concat((dbox, ops.Sigmoid()(cls)), 1) + p = ops.transpose(p, (0, 2, 1)) # (bs, no-84, nbox) -> (bs, nbox, no-84) + + return out if self.training else (p, out) + + @staticmethod + def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = (), () + dtype = feats[0].dtype + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = mnp.arange(w, dtype=dtype) + grid_cell_offset # shift x + sy = mnp.arange(h, dtype=dtype) + grid_cell_offset # shift y + # FIXME: Not supported on a specific model of machine + sy, sx = meshgrid((sy, sx), indexing="ij") + anchor_points += (ops.stack((sx, sy), -1).view(-1, 2),) + stride_tensor += (ops.ones((h * w, 1), dtype) * stride,) + return ops.concat(anchor_points), ops.concat(stride_tensor) + + @staticmethod + def dist2bbox(distance, anchor_points, xywh=True, axis=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = ops.split(distance, split_size_or_sections=2, axis=axis) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return ops.concat((c_xy, wh), axis) # xywh bbox + return ops.concat((x1y1, x2y2), axis) # xyxy bbox + + def initialize_biases(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + s = s.asnumpy() + a[-1].bias = ops.assign(a[-1].bias, Tensor(np.ones(a[-1].bias.shape), ms.float32)) + b_np = b[-1].bias.data.asnumpy() + b_np[: m.nc] = math.log(5 / m.nc / (640 / int(s)) ** 2) + b[-1].bias = ops.assign(b[-1].bias, Tensor(b_np, ms.float32)) + + +class YOLOv8SegHead(YOLOv8Head): + """YOLOv8 Segment head for segmentation models.""" + + def __init__(self, nc=80, reg_max=16, nm=32, npr=256, stride=(), ch=()): + """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.""" + super().__init__(nc, reg_max, stride, ch) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.detect = YOLOv8Head.construct + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.CellList([nn.SequentialCell(ConvNormAct(x, c4, 3), ConvNormAct(c4, c4, 3), nn.Conv2d(c4, self.nm, 1, has_bias=True)) for x in ch]) + + def construct(self, x): + """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients.""" + p = self.proto(x[0]) # mask protos + bs = p.shape[0] # batch size + + mc = ops.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) # x: out if self.training else (p, out) + if self.training: + return x, mc, p + + mc = ops.transpose(mc, (0, 2, 1)) # (bs, 32, nbox) -> (bs, nbox, 32) + # cat: (bs, nbox, no-84), (bs, nbox, 32) -> (bs, nbox, 84+32) + return ops.cat([x[0], mc], 2), (x[1], mc, p) + + +class Proto(nn.Cell): + """YOLOv8 mask Proto module for segmentation models.""" + + def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks + super().__init__() + self.cv1 = ConvNormAct(c1, c_, k=3) + self.upsample = nn.Conv2dTranspose(c_, c_, 2, 2, padding=0, has_bias=True) # nn.Upsample(scale_factor=2, mode='nearest') + self.cv2 = ConvNormAct(c_, c_, k=3) + self.cv3 = ConvNormAct(c_, c2) + + def construct(self, x): + """Performs a forward pass through layers using an upsampled input image.""" + return self.cv3(self.cv2(self.upsample(self.cv1(x)))) diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolox_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolox_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a74598a9b5ee84488d3590b87e88915db9328610 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/heads/yolox_head.py @@ -0,0 +1,127 @@ +import math + +import mindspore as ms +from mindspore import Tensor, nn, ops +from mindspore import numpy as mnp +from mindspore.common import initializer as init + +from mindyolo.models.layers.conv import ConvNormAct, DWConvNormAct +from ..layers.utils import meshgrid + + +class YOLOXHead(nn.Cell): + def __init__( + self, + nc=80, + stride=(8, 16, 32), + ch=(256, 512, 1024), + is_standard_backbone=True, + act=True, + depth_wise=False, + sync_bn=False, + ): + """ + YOlOx head + Args: + is_standard_backbone: whether the predecessor backbone is a standard one or darknet53. default, True + """ + super().__init__() + assert isinstance(stride, (tuple, list)) and len(stride) > 0 + assert isinstance(ch, (tuple, list)) and len(ch) > 0 + + self.nc = nc + self.nl = len(ch) + self.no = nc + 4 + 1 + self.stride = Tensor(stride, ms.int32) + + self.stems = nn.CellList() # len = num_layer + self.cls_convs = nn.CellList() + self.reg_convs = nn.CellList() + self.cls_preds = nn.CellList() + self.reg_preds = nn.CellList() + self.obj_preds = nn.CellList() + + hidden_ch = ch[2] // 4 if is_standard_backbone else 256 + HeadCNA = DWConvNormAct if depth_wise else ConvNormAct + for i in range(self.nl): # three kind of resolution, 80, 40, 20 + self.stems.append(ConvNormAct(ch[i], hidden_ch, 1, act=act, sync_bn=sync_bn)) + self.cls_convs.append( + nn.SequentialCell( + [ + HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn), + HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn), + ] + ) + ) + self.reg_convs.append( + nn.SequentialCell( + [ + HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn), + HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn), + ] + ) + ) + self.cls_preds.append(nn.Conv2d(hidden_ch, self.nc, 1, pad_mode="pad", has_bias=True)) + self.reg_preds.append(nn.Conv2d(hidden_ch, 4, 1, pad_mode="pad", has_bias=True)) + self.obj_preds.append(nn.Conv2d(hidden_ch, 1, 1, pad_mode="pad", has_bias=True)) + + def construct(self, feat_list): + assert isinstance(feat_list, (tuple, list)) and len(feat_list) == self.nl + outputs = [] + for i in range(self.nl): # 80, 40, 20 + # Get head features + x = self.stems[i](feat_list[i]) + + cls_feat = self.cls_convs[i](x) + cls_output = self.cls_preds[i](cls_feat) + + reg_feat = self.reg_convs[i](x) + reg_output = self.reg_preds[i](reg_feat) + obj_output = self.obj_preds[i](reg_feat) + + # Convert to origin image scale (640) + output = ( + ops.concat([reg_output, obj_output, cls_output], 1) + if self.training + else ops.concat([reg_output, ops.sigmoid(obj_output), ops.sigmoid(cls_output)], 1) + ) + output = self.convert_to_origin_scale(output, stride=self.stride[i]) + outputs.append(output) + outputs_cat = ops.concat(outputs, 1) + return outputs_cat if self.training else (outputs_cat, 1) + + def initialize_biases(self, prior_prob=1e-2): + for i in range(self.nl): # 80, 40, 20 + for cell in [self.cls_preds[i], self.obj_preds[i]]: + cell.bias.set_data( + init.initializer(-math.log((1 - prior_prob) / prior_prob), cell.bias.shape, cell.bias.dtype) + ) + + def convert_to_origin_scale(self, output, stride): + """map to origin image scale for each fpn""" + batch_size = ops.shape(output)[0] + grid_size = ops.shape(output)[2:4] + stride = ops.cast(stride, output.dtype) + + # reshape predictions + output = ops.transpose(output, (0, 2, 3, 1)) # (bs,85,80,80)-->(bs, 80, 80, 85) + output = ops.reshape(output, (batch_size, 1 * grid_size[0] * grid_size[1], -1)) # bs, 6400, 85 + + # make grid + grid = self._make_grid(nx=grid_size[1], ny=grid_size[0], dtype=output.dtype) # (1,1,80,80,2) + grid = ops.reshape(grid, (1, -1, 2)) # grid(1, 6400, 2) + + # feature map scale to origin scale + output_xy = output[..., :2] + output_xy = (output_xy + grid) * stride + output_wh = output[..., 2:4] + output_wh = ops.exp(output_wh) * stride + output_other = output[..., 4:] + output_t = ops.concat([output_xy, output_wh, output_other], -1) + return output_t # bs, 6400, 85 + + @staticmethod + def _make_grid(nx=20, ny=20, dtype=ms.float32): + # FIXME: Not supported on a specific model of machine + xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny))) + return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype) diff --git a/community/cv/ShipWise/mindyolo/models/initializer.py b/community/cv/ShipWise/mindyolo/models/initializer.py new file mode 100644 index 0000000000000000000000000000000000000000..39f42f3503cc15112b47367027a7c63c7287ae25 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/initializer.py @@ -0,0 +1,45 @@ +import math + +from mindspore import nn +from mindspore.common import initializer as init + +__all__ = ["initialize_defult"] + + +def initialize_defult(model): + for _, cell in model.cells_and_names(): + if isinstance(cell, nn.Conv2d): + cell.weight.set_data( + init.initializer(init.HeUniform(negative_slope=math.sqrt(5)), cell.weight.shape, cell.weight.dtype) + ) + if cell.bias is not None: + fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.shape) + bound = 1 / math.sqrt(fan_in) + cell.bias.set_data(init.initializer(init.Uniform(bound), cell.bias.shape, cell.bias.dtype)) + elif isinstance(cell, nn.Dense): + cell.weight.set_data( + init.initializer(init.HeUniform(negative_slope=math.sqrt(5)), cell.weight.shape, cell.weight.dtype) + ) + if cell.bias is not None: + fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.shape) + bound = 1 / math.sqrt(fan_in) + cell.bias.set_data(init.initializer(init.Uniform(bound), cell.bias.shape, cell.bias.dtype)) + + +def _calculate_fan_in_and_fan_out(shape): + dimensions = len(shape) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + + num_input_fmaps = shape[1] + num_output_fmaps = shape[0] + receptive_field_size = 1 + if dimensions > 2: + # math.prod is not always available, accumulate the product manually + # we could use functools.reduce but that is not supported by TorchScript + for s in shape[2:]: + receptive_field_size *= s + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + + return fan_in, fan_out diff --git a/community/cv/ShipWise/mindyolo/models/layers/__init__.py b/community/cv/ShipWise/mindyolo/models/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a86b9485a3abc0a7b5190bd683f2569cb18a788c --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/__init__.py @@ -0,0 +1,37 @@ +"""layers init""" +from .activation import * +from .bottleneck import * +from .common import * +from .conv import * +from .implicit import * +from .pool import * +from .spp import * +from .upsample import * + +__all__ = [ + "Swish", + "Shortcut", + "Concat", + "ReOrg", + "Identity", + "DFL", + "ConvNormAct", + "RepConv", + "DownC", + "Focus", + "Bottleneck", + "C3", + "C2f", + "DWConvNormAct", + "DWBottleneck", + "DWC3", + "ImplicitA", + "ImplicitM", + "MP", + "SP", + "MaxPool2d", + "SPPCSPC", + "SPPF", + "Upsample", + "Residualblock", +] diff --git a/community/cv/ShipWise/mindyolo/models/layers/activation.py b/community/cv/ShipWise/mindyolo/models/layers/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..59e051bf92bc607576baa9ed7da0260f0bd348f8 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/activation.py @@ -0,0 +1,17 @@ +""" +Custom activation operators. +""" +from mindspore import nn, ops + + +class Swish(nn.Cell): + """ + Swish activation function: x * sigmoid(βx). If beta equals 1, you can use nn.SiLU instead. + """ + + def __init__(self, beta=1.0): + super().__init__() + self.beta = beta + + def construct(self, x): + return x * ops.sigmoid(self.beta * x) diff --git a/community/cv/ShipWise/mindyolo/models/layers/bottleneck.py b/community/cv/ShipWise/mindyolo/models/layers/bottleneck.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd9bb81d1830d944bb6efdc6f729994dc259d41 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/bottleneck.py @@ -0,0 +1,138 @@ +from mindspore import nn, ops + +from .conv import ConvNormAct, DWConvNormAct + + +class Bottleneck(nn.Cell): + # Standard bottleneck + def __init__( + self, c1, c2, shortcut=True, k=(1, 3), g=(1, 1), e=0.5, act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.conv1 = ConvNormAct(c1, c_, k[0], 1, g=g[0], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv2 = ConvNormAct(c_, c2, k[1], 1, g=g[1], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.add = shortcut and c1 == c2 + + def construct(self, x): + if self.add: + out = x + self.conv2(self.conv1(x)) + else: + out = self.conv2(self.conv1(x)) + return out + + +class Residualblock(nn.Cell): + def __init__( + self, c1, c2, k=(1, 3), g=(1, 1), act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, kernels, groups, expand + super().__init__() + self.conv1 = ConvNormAct(c1, c2, k[0], 1, g=g[0], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv2 = ConvNormAct(c2, c2, k[1], 1, g=g[1], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + + def construct(self, x): + out = x + self.conv2(self.conv1(x)) + return out + + +class C3(nn.Cell): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, e=0.5, momentum=0.97, eps=1e-3, sync_bn=False): + super(C3, self).__init__() + c_ = int(c2 * e) # hidden channels + self.conv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv2 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv3 = ConvNormAct(2 * c_, c2, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) # act=FReLU(c2) + self.m = nn.SequentialCell( + [ + Bottleneck(c_, c_, shortcut, k=(1, 3), e=1.0, momentum=momentum, eps=eps, sync_bn=sync_bn) + for _ in range(n) + ] + ) + self.concat = ops.Concat(axis=1) + + def construct(self, x): + c1 = self.conv1(x) + c2 = self.m(c1) + c3 = self.conv2(x) + c4 = self.concat((c2, c3)) + c5 = self.conv3(c4) + + return c5 + + +class C2f(nn.Cell): + # CSP Bottleneck with 2 convolutions + def __init__( + self, c1, c2, n=1, shortcut=False, g=1, e=0.5, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + _c = int(c2 * e) # hidden channels + self.cv1 = ConvNormAct(c1, 2 * _c, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv2 = ConvNormAct( + (2 + n) * _c, c2, 1, momentum=momentum, eps=eps, sync_bn=sync_bn + ) # optional act=FReLU(c2) + self.m = nn.CellList( + [ + Bottleneck(_c, _c, shortcut, k=(3, 3), g=(1, g), e=1.0, momentum=momentum, eps=eps, sync_bn=sync_bn) + for _ in range(n) + ] + ) + + def construct(self, x): + y = () + x = self.cv1(x) + _c = x.shape[1] // 2 + x_tuple = ops.split(x, axis=1, split_size_or_sections=_c) + y += x_tuple + for i in range(len(self.m)): + m = self.m[i] + out = m(y[-1]) + y += (out,) + + return self.cv2(ops.concat(y, axis=1)) + + +class DWBottleneck(nn.Cell): + # depthwise bottleneck used in yolox nano scale + def __init__( + self, c1, c2, shortcut=True, k=(1, 3), e=0.5, act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, shortcut, groups, kernels, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.conv1 = ConvNormAct(c1, c_, k[0], 1, act=True, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv2 = DWConvNormAct(c_, c2, k[1], 1, act=True, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.add = shortcut and c1 == c2 + + def construct(self, x): + if self.add: + out = x + self.conv2(self.conv1(x)) + else: + out = self.conv2(self.conv1(x)) + return out + + +class DWC3(nn.Cell): + # depthwise DwC3 used in yolox nano scale, similar as C3 + def __init__(self, c1, c2, n=1, shortcut=True, e=0.5, momentum=0.97, eps=1e-3, sync_bn=False): + super(DWC3, self).__init__() + c_ = int(c2 * e) # hidden channels + self.conv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv2 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv3 = ConvNormAct(2 * c_, c2, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) # act=FReLU(c2) + self.m = nn.SequentialCell( + [ + DWBottleneck(c_, c_, shortcut, k=(1, 3), e=1.0, momentum=momentum, eps=eps, sync_bn=sync_bn) + for _ in range(n) + ] + ) + self.concat = ops.Concat(axis=1) + + def construct(self, x): + c1 = self.conv1(x) + c2 = self.m(c1) + c3 = self.conv2(x) + c4 = self.concat((c2, c3)) + c5 = self.conv3(c4) + + return c5 diff --git a/community/cv/ShipWise/mindyolo/models/layers/common.py b/community/cv/ShipWise/mindyolo/models/layers/common.py new file mode 100644 index 0000000000000000000000000000000000000000..5c45f02d62efbe2cdf6d48fbaee7a8aebbee39f2 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/common.py @@ -0,0 +1,73 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn, ops + + +class Shortcut(nn.Cell): + """ + Shortcut layer. + """ + + def construct(self, x): + if isinstance(x, (tuple, list)) and len(x) == 2: + return x[0] + x[1] + return x + + +class Concat(nn.Cell): + """ + Connect tensor in the specified axis. + """ + + def __init__(self, axis=1): + super(Concat, self).__init__() + self.axis = axis + + def construct(self, x): + return ops.concat(x, self.axis) + + +class ReOrg(nn.Cell): + """ + Reorganize the input Tensor (b, c, w, h) into a new shape (b, 4c, w/2, h/2). + """ + + def __init__(self): + super(ReOrg, self).__init__() + + def construct(self, x): + # in: (b,c,w,h) -> out: (b,4c,w/2,h/2) + x1 = x[:, :, ::2, ::2] + x2 = x[:, :, 1::2, ::2] + x3 = x[:, :, ::2, 1::2] + x4 = x[:, :, 1::2, 1::2] + out = ops.concat((x1, x2, x3, x4), 1) + return out + + +class Identity(nn.Cell): + def construct(self, x): + return x + + +class DFL(nn.Cell): + # Integral module of Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 + def __init__(self, c1=16): + super().__init__() + self.conv = nn.Conv2d(c1, 1, 1, has_bias=False) + self.conv.weight.requires_grad = False + self.c1 = c1 + self.softmax = ops.Softmax(axis=1) + + def construct(self, x): + b, c, a = x.shape # batch, channels, anchors + x = self.softmax(x.view(b, 4, self.c1, a).swapaxes(2, 1)) + x = self.conv(x) + x = x.view(b, 4, a) + return x + + def initialize_conv_weight(self): + self.conv.weight = ops.assign( + self.conv.weight, Tensor(np.arange(self.c1).reshape((1, self.c1, 1, 1)), dtype=ms.float32) + ) diff --git a/community/cv/ShipWise/mindyolo/models/layers/conv.py b/community/cv/ShipWise/mindyolo/models/layers/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..ff801ec39b0d17ce49e010db0ac77a058a182bd7 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/conv.py @@ -0,0 +1,168 @@ +from mindspore import nn, ops + +from .common import Identity +from .utils import autopad + + +class ConvNormAct(nn.Cell): + """Conv2d + BN + Act + + Args: + c1 (int): In channels, the channel number of the input tensor of the Conv2d layer. + c2 (int): Out channels, the channel number of the output tensor of the Conv2d layer. + k (Union[int, tuple[int]]): Kernel size, Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. Default: 1. + s (Union[int, tuple[int]]): Stride, the movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the height + and width directions respectively. Default: 1. + p (Union[None, int, tuple[int]]): Padding, the number of padding on the height and width directions of the input. + The data type is None or an integer or a tuple of four integers. If `padding` is an None, then padding with autopad. + If `padding` is an integer, then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: None. + g (int): Group, Splits filter into groups, `c1` and `c2` must be + divisible by `group`. If the group is equal to `c1` and `c2`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + d (Union[int, tuple[int]]): Dilation, Dilation size of 2D convolution kernel. + The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled + every `k` elements. The value of `k` on the height and width directions is in range of [1, H] + and [1, W] respectively. Default: 1. + act (Union[bool, nn.Cell]): Activation. The data type is bool or nn.Cell. If `act` is True, + then the activation function uses nn.SiLU. If `act` is False, do not use activation function. + If 'act' is nn.Cell, use the object of this cell as the activation function. Default: True. + sync_bn (bool): Whether the BN layer use nn.SyncBatchNorm. Default: False. + """ + + def __init__( + self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, kernel, stride, padding, groups + super(ConvNormAct, self).__init__() + self.conv = nn.Conv2d( + c1, c2, k, s, pad_mode="pad", padding=autopad(k, p, d), group=g, dilation=d, has_bias=False + ) + + if sync_bn: + self.bn = nn.SyncBatchNorm(c2, momentum=momentum, eps=eps) + else: + self.bn = nn.BatchNorm2d(c2, momentum=momentum, eps=eps) + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Cell) else Identity) + + def construct(self, x): + return self.act(self.bn(self.conv(x))) + + +class RepConv(nn.Cell): + """Represented convolution, https://arxiv.org/abs/2101.03697 + + Args: + c1 (int): In channels, the channel number of the input tensor of the Conv2d layer. + c2 (int): Out channels, the channel number of the output tensor of the Conv2d layer. + k (Union[int, tuple[int]]): Kernel size, Specifies the height and width of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the height + and width of the convolution kernel. A tuple of two integers represents the height + and width of the convolution kernel respectively. Default: 1. + s (Union[int, tuple[int]]): Stride, the movement stride of the 2D convolution kernel. + The data type is an integer or a tuple of two integers. An integer represents the movement step size + in both height and width directions. A tuple of two integers represents the movement step size in the height + and width directions respectively. Default: 1. + p (Union[None, int, tuple[int]]): Padding, the number of padding on the height and width directions of the input. + The data type is None or an integer or a tuple of four integers. If `padding` is an None, then padding with autopad. + If `padding` is an integer, then the top, bottom, left, and right padding are all equal to `padding`. + If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding + is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. + The value should be greater than or equal to 0. Default: None. + g (int): Group, Splits filter into groups, `c1` and `c2` must be + divisible by `group`. If the group is equal to `c1` and `c2`, + this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1. + act (Union[bool, nn.Cell]): Activation. The data type is bool or nn.Cell. If `act` is True, + then the activation function uses nn.SiLU. If `act` is False, do not use activation function. + If 'act' is nn.Cell, use the object of this cell as the activation function. Default: True. + sync_bn (bool): Whether the BN layer use nn.SyncBatchNorm. Default: False. + """ + + def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False): + super(RepConv, self).__init__() + + self.groups = g + self.in_channels = c1 + self.out_channels = c2 + + assert k == 3 + assert autopad(k, p) == 1 + + padding_11 = autopad(k, p) - k // 2 + + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Cell) else Identity) + + if sync_bn: + BatchNorm = nn.SyncBatchNorm + else: + BatchNorm = nn.BatchNorm2d + + self.rbr_identity = BatchNorm(num_features=c1, momentum=(1 - 0.03), eps=1e-3) if c2 == c1 and s == 1 else None + self.rbr_dense = nn.SequentialCell( + [ + nn.Conv2d(c1, c2, k, s, pad_mode="pad", padding=autopad(k, p), group=g, has_bias=False), + BatchNorm(num_features=c2, momentum=momentum, eps=eps), + ] + ) + self.rbr_1x1 = nn.SequentialCell( + nn.Conv2d(c1, c2, 1, s, pad_mode="pad", padding=padding_11, group=g, has_bias=False), + BatchNorm(num_features=c2, momentum=momentum, eps=eps), + ) + + def construct(self, inputs): + if self.rbr_identity is None: + id_out = 0.0 + else: + id_out = self.rbr_identity(inputs) + + return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) + + def fuse(self): + # TODO: The reparameterization function will be developed in subsequent versions + pass + + +class DownC(nn.Cell): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, n=1, k=2, momentum=0.97, eps=1e-3, sync_bn=False): + super(DownC, self).__init__() + c_ = c1 # hidden channels + self.cv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv2 = ConvNormAct(c_, c2 // 2, 3, k, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv3 = ConvNormAct(c1, c2 // 2, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.mp = nn.MaxPool2d(kernel_size=k, stride=k) + + def construct(self, x): + return ops.concat((self.cv2(self.cv1(x)), self.cv3(self.mp(x))), axis=1) + + +class Focus(nn.Cell): + # Focus wh information into c-space + def __init__( + self, c1, c2, k=1, s=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, kernel, stride, padding, groups + super(Focus, self).__init__() + self.conv = ConvNormAct(c1 * 4, c2, k, s, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + + def construct(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return self.conv(ops.concat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) + + +class DWConvNormAct(nn.Cell): + """Conv2d + BN + Act, depthwise ConvNormAct used in yolox nano scale, an approach to reduce parameter number""" + + def __init__( + self, c1, c2, k=1, s=1, p=None, d=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # ch_in, ch_out, kernel, stride, padding, groups + super(DWConvNormAct, self).__init__() + self.dconv = ConvNormAct(c1, c1, k, s, p, g=c1, d=d, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.pconv = ConvNormAct(c1, c2, k=1, s=1, p=p, g=1, d=d, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + + def construct(self, x): + return self.pconv(self.dconv(x)) diff --git a/community/cv/ShipWise/mindyolo/models/layers/implicit.py b/community/cv/ShipWise/mindyolo/models/layers/implicit.py new file mode 100644 index 0000000000000000000000000000000000000000..dde91d81a488a6b975a6ee8697c1073bb877e189 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/implicit.py @@ -0,0 +1,42 @@ +import numpy as np + +import mindspore as ms +from mindspore import Parameter, Tensor, nn + + +class ImplicitA(nn.Cell): + """ + https://arxiv.org/pdf/2105.04206v1.pdf. Implicit knowledge in YOLOR combined with convolution + feature map in addition and multiplication manner: Implicit knowledge in YOLOR can be simplified to a vector by + pre-computing at the inference stage. This vector can be combined with the bias and weight of the previous or + subsequent convolutional layer. + """ + + def __init__(self, channel, mean=0.0, std=0.02): + super(ImplicitA, self).__init__() + self.channel = channel + self.mean = mean + self.std = std + self.implicit = Parameter(Tensor(np.random.normal(self.mean, self.std, (1, channel, 1, 1)), ms.float32)) + + def construct(self, x): + return self.implicit + x + + +class ImplicitM(nn.Cell): + """ + https://arxiv.org/pdf/2105.04206v1.pdf. Implicit knowledge in YOLOR combined with convolution + feature map in addition and multiplication manner: Implicit knowledge in YOLOR can be simplified to a vector by + pre-computing at the inference stage. This vector can be combined with the bias and weight of the previous or + subsequent convolutional layer. + """ + + def __init__(self, channel, mean=0.0, std=0.02): + super(ImplicitM, self).__init__() + self.channel = channel + self.mean = mean + self.std = std + self.implicit = Parameter(Tensor(np.random.normal(self.mean, self.std, (1, channel, 1, 1)), ms.float32)) + + def construct(self, x): + return self.implicit * x diff --git a/community/cv/ShipWise/mindyolo/models/layers/pool.py b/community/cv/ShipWise/mindyolo/models/layers/pool.py new file mode 100644 index 0000000000000000000000000000000000000000..342fcff5409b28c4c850a7c838e850441bb19092 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/pool.py @@ -0,0 +1,44 @@ +from mindspore import nn + + +class MP(nn.Cell): + """ + Use the same step size and kernel size for maxpool. + """ + + def __init__(self, k=2): + super(MP, self).__init__() + self.m = nn.MaxPool2d(kernel_size=k, stride=k) + + def construct(self, x): + return self.m(x) + + +class SP(nn.Cell): + """ + Use autopad for maxpool. + """ + + def __init__(self, k=3, s=1): + super(SP, self).__init__() + self.m = MaxPool2d(kernel_size=k, stride=s, padding=k // 2) + + def construct(self, x): + return self.m(x) + + +class MaxPool2d(nn.Cell): + """ + Maxpool with pad. + """ + + def __init__(self, kernel_size, stride, padding=0): + super(MaxPool2d, self).__init__() + assert isinstance(padding, int) + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (padding, padding), (padding, padding))) + self.pool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride) + + def construct(self, x): + x = self.pad(x) + x = self.pool(x) + return x diff --git a/community/cv/ShipWise/mindyolo/models/layers/spp.py b/community/cv/ShipWise/mindyolo/models/layers/spp.py new file mode 100644 index 0000000000000000000000000000000000000000..4d7351e983b1bd9bc4cd40dd8dcc7ccca0117d87 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/spp.py @@ -0,0 +1,57 @@ +from mindspore import nn, ops + +from .conv import ConvNormAct +from .pool import MaxPool2d + + +class SPPCSPC(nn.Cell): + """ + CSPNet, https://arxiv.org/pdf/1911.11929v1.pdf. The main purpose of designing CSPNet is to enable + this architecture to achieve a richer gradient combination while reducing the amount of computation. This aim + is achieved by partitioning feature map of the base layer into two parts and then merging them through a proposed + cross-stage hierarchy. Our main concept is to make the gradient flow propagate through different network paths + by splitting the gradient flow. In this way, we have confirmed that the propagated gradient information can + have a large correlation difference by switching concatenation and transition steps. + """ + + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13), momentum=0.97, eps=1e-3, sync_bn=False): + super(SPPCSPC, self).__init__() + c_ = int(2 * c2 * e) # hidden channels + self.cv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv2 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv3 = ConvNormAct(c_, c_, 3, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv4 = ConvNormAct(c_, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.m = nn.CellList([MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + self.cv5 = ConvNormAct(4 * c_, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv6 = ConvNormAct(c_, c_, 3, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.cv7 = ConvNormAct(2 * c_, c2, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn) + + def construct(self, x): + x1 = self.cv4(self.cv3(self.cv1(x))) + m_tuple = (x1,) + for i in range(len(self.m)): + m_tuple += (self.m[i](x1),) + y1 = self.cv6(self.cv5(ops.Concat(axis=1)(m_tuple))) + y2 = self.cv2(x) + return self.cv7(ops.Concat(axis=1)((y1, y2))) + + +class SPPF(nn.Cell): + # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher + def __init__( + self, c1, c2, k=5, act=True, momentum=0.97, eps=1e-3, sync_bn=False + ): # equivalent to SPP(k=(5, 9, 13)) + super(SPPF, self).__init__() + c_ = c1 // 2 # hidden channels + self.conv1 = ConvNormAct(c1, c_, 1, 1, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.conv2 = ConvNormAct(c_ * 4, c2, 1, 1, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn) + self.concat = ops.Concat(axis=1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, pad_mode="same") + + def construct(self, x): + x = self.conv1(x) + y1 = self.m(x) + y2 = self.m(y1) + y3 = self.m(y2) + y = self.conv2(self.concat((x, y1, y2, y3))) + return y diff --git a/community/cv/ShipWise/mindyolo/models/layers/upsample.py b/community/cv/ShipWise/mindyolo/models/layers/upsample.py new file mode 100644 index 0000000000000000000000000000000000000000..96f3e2c8d8f41fdd45eacc27fbd6dd5dafe74421 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/upsample.py @@ -0,0 +1,29 @@ +from mindspore import nn, ops + + +class Upsample(nn.Cell): + """ + Using the interpolate method specified by `mode` resize the input tensor. + + Args: + scales (tuple[float], optional): a tuple of float. Describe the scale along each dimension. + Its length is the same as that of shape of `x`. The numbers in `scales` must all be positive. Only one of + `scales` and `sizes` can be specified. + sizes (tuple[int], optional): a tuple of int, describes the shape of the output tensor. The numbers in `sizes` + must all be positive. Only one of `scales` and `sizes` can be specified. If `sizes` is specified, then set + `scales` to 'None' in this operator's input list. It is 1 int elements :math:`(new\_width,)` when `mode` + is "linear". It is 2 int elements :math:`(new\_height, new\_width)` when `mode` is "bilinear". + mode (string): The method used to interpolate: 'linear' | 'bilinear'. Default is 'linear'. + """ + + def __init__(self, sizes=None, scales=None, mode="nearest"): + super(Upsample, self).__init__() + self.sizes = sizes + self.scales = scales + self.mode = mode + + def construct(self, x): + if self.mode == "nearest" and self.scales: + return ops.ResizeNearestNeighbor((x.shape[-2] * self.scales, x.shape[-1] * self.scales))(x) + else: + return ops.interpolate(x, sizes=self.sizes, scales=self.scales, mode=self.mode) diff --git a/community/cv/ShipWise/mindyolo/models/layers/utils.py b/community/cv/ShipWise/mindyolo/models/layers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b7a29bcedb0bffa5558275998e6f67257bd136aa --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/layers/utils.py @@ -0,0 +1,106 @@ +import math +from typing import Tuple + +from mindspore import Tensor, ops + + +def make_divisible(x, divisor): + # Returns x evenly divisible by divisor + return math.ceil(x / divisor) * divisor + + +def autopad(k, p=None, d=1): # kernel, padding, dilation + # Pad to 'same' shape outputs + if d > 1: + k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + if isinstance(p, list): + assert len(p) == 2 + p = (p[0], p[0], p[1], p[1]) + return p + + +# ------------------------box operation starts-------------------------- +def meshgrid(inputs, indexing="xy"): + # An alternative implementation of ops.meshgrid, Only supports inputs with a length of 2. + # Meshgrid op is not supported on a specific model of machine an alternative + # solution is adopted, which will be updated later. + x, y = inputs + nx, ny = x.shape[0], y.shape[0] + xv, yv = None, None + if indexing == "xy": + xv = ops.tile(x.view(1, -1), (ny, 1)) + yv = ops.tile(y.view(-1, 1), (1, nx)) + elif indexing == "ij": + xv = ops.tile(x.view(-1, 1), (1, ny)) + yv = ops.tile(y.view(1, -1), (nx, 1)) + + return xv, yv + + +def box_cxcywh_to_xyxy(bbox) -> Tensor: + """Convert bbox coordinates from (cx, cy, w, h) to (x1, y1, x2, y2) + + Args: + bbox (ms.Tensor): Shape (n, 4) for bboxes. + + Returns: + torch.Tensor: Converted bboxes. + """ + cx, cy, w, h = ops.unstack(bbox, axis=-1) + new_bbox = tuple([(cx - 0.5 * w), (cy - 0.5 * h), (cx + 0.5 * w), (cy + 0.5 * h)]) + aa = ops.stack(new_bbox, axis=-1) + return aa + + +def box_xyxy_to_cxcywh(bbox) -> Tensor: + """Convert bbox coordinates from (x1, y1, x2, y2) to (cx, cy, w, h) + + Args: + bbox (torch.Tensor): Shape (n, 4) for bboxes. + + Returns: + torch.Tensor: Converted bboxes. + """ + x0, y0, x1, y1 = ops.unstack(bbox, axis=-1) + new_bbox = tuple([(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]) + return ops.stack(new_bbox, axis=-1) + + +def box_scale(boxes, scale, sclale_reciprocal=False) -> Tensor: + """ + Scale the box with horizontal and vertical scaling factors + + Args: + boxes (Tensor[N, 4] or [bs, N, 4]): boxes are specified by their (x1, y1, x2, y2) coordinates + scale (Tuple[2]): scale factors for x and y coordinates + """ + assert len(boxes.shape) in [2, 3] + scale_x, scale_y = scale + if sclale_reciprocal: + scale_x, scale_y = 1.0 / scale_x, 1.0 / scale_y + new_scale = Tensor([scale_x, scale_y, scale_x, scale_y]) # (4,) or (bs, 4) + boxes *= new_scale + return boxes + + +def box_clip(boxes, clip_size: Tuple[int, int]) -> Tensor: + """ + Clip (in place) the boxes by limiting x coordinates to the range [0, width] + and y coordinates to the range [0, height]. + + Args: + boxes (Tensor[N, 4]): boxes are specified by their (x1, y1, x2, y2) coordinates + clip_size (height, width): The clipping box's size. + """ + h, w = clip_size + x1 = boxes[..., 0].clip(0, w) + y1 = boxes[..., 1].clip(0, h) + x2 = boxes[..., 2].clip(0, w) + y2 = boxes[..., 3].clip(0, h) + boxes = ops.stack((x1, y1, x2, y2), axis=-1) + return boxes + + +# ------------------------box operation ends-------------------------- diff --git a/community/cv/ShipWise/mindyolo/models/losses/__init__.py b/community/cv/ShipWise/mindyolo/models/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b54930a120e46df30d6ea4d2387daf53618030db --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/__init__.py @@ -0,0 +1,17 @@ +from . import (loss_factory, yolov3_loss, yolov4_loss, yolov5_loss, + yolov7_loss, yolov8_loss) +from .loss_factory import * +from .yolov3_loss import * +from .yolov4_loss import * +from .yolov5_loss import * +from .yolov7_loss import * +from .yolov8_loss import * +from .yolox_loss import * + +__all__ = [] +__all__.extend(yolov3_loss.__all__) +__all__.extend(yolov4_loss.__all__) +__all__.extend(yolov5_loss.__all__) +__all__.extend(yolov7_loss.__all__) +__all__.extend(yolov8_loss.__all__) +__all__.extend(loss_factory.__all__) diff --git a/community/cv/ShipWise/mindyolo/models/losses/focal_loss.py b/community/cv/ShipWise/mindyolo/models/losses/focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..371b5dddee7285420e09e30a830d47543041680b --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/focal_loss.py @@ -0,0 +1,98 @@ +import mindspore as ms +from mindspore import nn, ops + + +def smooth_BCE(eps=0.1): + """ + Return positive, negative label smoothing BCE targets, + https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + """ + return 1.0 - 0.5 * eps, 0.5 * eps + + +class FocalLoss(nn.Cell): + """ + Focal Loss for Dense Object Detection, https://arxiv.org/pdf/1708.02002v2.pdf + + Args: + bce_weight (Tensor, optional): A rescaling weight applied to the loss of each batch element for BCEWithLogitsLoss. + If not None, it can be broadcast to a tensor with shape of `logits`, + data type must be float16 or float32. Default: None. + bce_pos_weight (Tensor, optional): A weight of positive examples for BCEWithLogitsLoss. Must be a vector with length equal to the + number of classes. If not None, it must be broadcast to a tensor with shape of `logits`, data type + must be float16 or float32. Default: None. + gamma: A modulating factor (1 − pt)^gamma to the cross entropy loss, with tunable focusing. Default: 1.5 + alpha: An alpha-balanced variant of the focal loss. Default: 0.25 + reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none'. + If 'none', do not perform reduction. Default: 'mean'. + """ + + def __init__(self, bce_weight=None, bce_pos_weight=None, gamma=1.5, alpha=0.25, reduction="mean"): + super(FocalLoss, self).__init__() + self.loss_fcn = nn.BCEWithLogitsLoss(weight=bce_weight, pos_weight=bce_pos_weight, reduction="none") + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction # default mean + assert self.loss_fcn.reduction == "none" # required to apply FL to each element + + def construct(self, pred, true, mask=None): + ori_dtype = pred.dtype + loss = self.loss_fcn(pred.astype(ms.float32), true.astype(ms.float32)) + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = ops.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if mask is not None: + loss *= mask + + if self.reduction == "mean": + if mask is not None: + return (loss.sum() / mask.astype(loss.dtype).sum().clip(1, None)).astype(ori_dtype) + return loss.mean().astype(ori_dtype) + elif self.reduction == "sum": + return loss.sum().astype(ori_dtype) + else: # 'none' + return loss.astype(ori_dtype) + + +class BCEWithLogitsLoss(nn.Cell): + def __init__(self, bce_weight=None, bce_pos_weight=None, reduction="mean"): + """ + Adds sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy + between the logits and the labels. + + Args: + bce_weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. + If not None, it can be broadcast to a tensor with shape of `logits`, + data type must be float16 or float32. Default: None. + bce_pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the + number of classes. If not None, it must be broadcast to a tensor with shape of `logits`, data type + must be float16 or float32. Default: None. + reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none'. + If 'none', do not perform reduction. Default: 'mean'. + """ + + super(BCEWithLogitsLoss, self).__init__() + self.loss_fcn = nn.BCEWithLogitsLoss(weight=bce_weight, pos_weight=bce_pos_weight, reduction="none") + self.reduction = reduction # default mean + assert self.loss_fcn.reduction == "none" # required to apply FL to each element + + def construct(self, pred, true, mask=None): + ori_dtype = pred.dtype + loss = self.loss_fcn(pred.astype(ms.float32), true.astype(ms.float32)) + + if mask is not None: + loss *= mask + + if self.reduction == "mean": + if mask is not None: + return (loss.sum() / mask.astype(loss.dtype).sum().clip(1, None)).astype(ori_dtype) + return loss.mean().astype(ori_dtype) + elif self.reduction == "sum": + return loss.sum().astype(ori_dtype) + else: # 'none' + return loss.astype(ori_dtype) diff --git a/community/cv/ShipWise/mindyolo/models/losses/iou_loss.py b/community/cv/ShipWise/mindyolo/models/losses/iou_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..f3227778b6a84dbef221f83bf393b2ae99cdb632 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/iou_loss.py @@ -0,0 +1,152 @@ +import math + +import mindspore as ms +from mindspore import Tensor, ops + +from mindyolo.models.layers.utils import box_cxcywh_to_xyxy + +PI = Tensor(math.pi, ms.float32) +EPS = 1e-7 + + +def box_area(box): + """ + Return area of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box (Tensor[N, 4]) + Returns: + area (Tensor[N,]) + """ + return (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) + + +def batch_box_area(box): + """ + Return area of batch boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box (Tensor[B, N, 4]) + Returns: + area (Tensor[B, N]) + """ + return (box[:, :, 2] - box[:, :, 0]) * (box[:, :, 3] - box[:, :, 1]) + + +def box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + area1 = box_area(box1) + area2 = box_area(box2) + + expand_size_1 = box2.shape[0] + expand_size_2 = box1.shape[0] + + box1 = ops.tile(ops.expand_dims(box1, 1), (1, expand_size_1, 1)) + box2 = ops.tile(ops.expand_dims(box2, 0), (expand_size_2, 1, 1)) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + # inter = ops.minimum(box1[:, None, 2:], box2[None, :, 2:]) - ops.maximum(box1[:, None, :2], box2[None, :, :2]) + inter = ops.minimum(box1[..., 2:], box2[..., 2:]) - ops.maximum(box1[..., :2], box2[..., :2]) + inter = inter.clip(0.0, None) + inter = inter[:, :, 0] * inter[:, :, 1] + return inter / (area1[:, None] + area2[None, :] - inter).clip(EPS, None) # iou = inter / (area1 + area2 - inter) + + +def batch_box_iou(batch_box1, batch_box2, xywh=False): + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[B, N, 4]) + box2 (Tensor[B, M, 4]) + Returns: + iou (Tensor[B, N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + if xywh: + batch_box1 = box_cxcywh_to_xyxy(batch_box1) + batch_box2 = box_cxcywh_to_xyxy(batch_box2) + + area1 = batch_box_area(batch_box1) + area2 = batch_box_area(batch_box2) + + expand_size_1 = batch_box2.shape[1] + expand_size_2 = batch_box1.shape[1] + batch_box1 = ops.tile(ops.expand_dims(batch_box1, 2), (1, 1, expand_size_1, 1)) + batch_box2 = ops.tile(ops.expand_dims(batch_box2, 1), (1, expand_size_2, 1, 1)) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = ops.minimum(batch_box1[..., 2:], batch_box2[..., 2:]) - ops.maximum( + batch_box1[..., :2], batch_box2[..., :2] + ) + inter = inter.clip(0.0, None) + inter = inter[:, :, :, 0] * inter[:, :, :, 1] + return inter / (area1[:, :, None] + area2[:, None, :] - inter).clip( + EPS, None + ) # iou = inter / (area1 + area2 - inter) + + +def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + """ + Return intersection-over-union (IoU) of boxes. + Arguments: + box1 (Tensor[N, 4]) or (Tensor[bs, N, 4]) + box2 (Tensor[N, 4]) or (Tensor[bs, N, 4]) + xywh (bool): Whether the box format is (x_center, y_center, w, h) or (x1, y1, x2, y2). Default: True. + GIoU (bool): Whether to use GIoU. Default: False. + DIoU (bool): Whether to use DIoU. Default: False. + CIoU (bool): Whether to use CIoU. Default: False. + Returns: + iou (Tensor[N,]): the IoU values for every element in boxes1 and boxes2 + """ + + # Get the coordinates of bounding boxes + if xywh: # transform from xywh to xyxy + x1, y1, w1, h1 = ops.split(box1, split_size_or_sections=1, axis=-1) + x2, y2, w2, h2 = ops.split(box2, split_size_or_sections=1, axis=-1) + w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 + b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ + b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ + else: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = ops.split(box1, split_size_or_sections=1, axis=-1) + b2_x1, b2_y1, b2_x2, b2_y2 = ops.split(box2, split_size_or_sections=1, axis=-1) + + # Intersection area + inter = (ops.minimum(b1_x2, b2_x2) - ops.maximum(b1_x1, b2_x1)).clip(0., None) * \ + (ops.minimum(b1_y2, b2_y2) - ops.maximum(b1_y1, b2_y1)).clip(0., None) + + # Union Area + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + union = w1 * h1 + w2 * h2 - inter + eps + + # IoU + iou = inter / union + + if CIoU or DIoU or GIoU: + cw = ops.maximum(b1_x2, b2_x2) - ops.minimum(b1_x1, b2_x1) # convex (smallest enclosing box) width + ch = ops.maximum(b1_y2, b2_y2) - ops.minimum(b1_y1, b2_y1) # convex height + if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw**2 + ch**2 + eps # convex diagonal squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 + if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + # v = (4 / get_pi(iou.dtype) ** 2) * ops.pow(ops.atan(w2 / (h2 + eps)) - ops.atan(w1 / (h1 + eps)), 2) + v = (4 / PI.astype(iou.dtype) ** 2) * ops.pow(ops.atan(w2 / (h2 + eps)) - ops.atan(w1 / (h1 + eps)), 2) + alpha = v / (v - iou + (1 + eps)) + alpha = ops.stop_gradient(alpha) + return iou - (rho2 / c2 + v * alpha) # CIoU + return iou - rho2 / c2 # DIoU + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf + return iou # IoU diff --git a/community/cv/ShipWise/mindyolo/models/losses/loss_factory.py b/community/cv/ShipWise/mindyolo/models/losses/loss_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..79d0dd93e377e98dee5bdef291669c76431b7694 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/loss_factory.py @@ -0,0 +1,15 @@ +from mindyolo.models.registry import is_model, model_entrypoint + +__all__ = ["create_loss"] + + +def create_loss(name: str, **kwargs): + kwargs = {k: v for k, v in kwargs.items() if v is not None} + + if not is_model(name): + raise RuntimeError(f"Unknown loss module {name}") + + create_fn = model_entrypoint(name) + loss_fn = create_fn(**kwargs) + + return loss_fn diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov3_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov3_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..516f06bc1cf7224d302810bddd5ba1647f50d8fd --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/yolov3_loss.py @@ -0,0 +1,249 @@ +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Tensor, nn, ops + +from mindyolo.models.registry import register_model +from .focal_loss import BCEWithLogitsLoss, FocalLoss, smooth_BCE +from .iou_loss import batch_box_iou, bbox_iou + +CLIP_VALUE = 1000.0 +EPS = 1e-7 + +__all__ = ["YOLOv3Loss"] + + +@register_model +class YOLOv3Loss(nn.Cell): + def __init__( + self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs + ): + super(YOLOv3Loss, self).__init__() + self.hyp_box = box + self.hyp_obj = obj + self.hyp_cls = cls + self.hyp_anchor_t = anchor_t + self.nc = nc # number of classes + self.na = len(anchors[0]) // 2 # number of anchors + self.nl = len(anchors) # number of layers + + stride = np.array(stride) + anchors = np.array(anchors).reshape((self.nl, -1, 2)) + anchors = anchors / stride.reshape((-1, 1, 1)) + self.stride = Tensor(stride, ms.int32) + self.anchors = Tensor(anchors, ms.float32) # shape(nl,na,2) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=label_smoothing) # positive, negative BCE targets + # Focal loss + g = fl_gamma # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss( + bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g + ) + else: + # Define criteria + BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32)) + BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32)) + + _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.balance = ms.Parameter(Tensor(_balance, ms.float32), requires_grad=False) + self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0 + + self._off = Tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + ], + dtype=ms.float32, + ) + + self.loss_item_name = ["loss", "lbox", "lobj", "lcls"] # branch name returned by lossitem for print + + def construct(self, p, targets, imgs): + lcls, lbox, lobj = 0.0, 0.0, 0.0 + tcls, tbox, indices, anchors, tmasks = self.build_targets( + p, targets + ) # class, box, (image, anchor, gridj, gridi), anchors, mask + tcls, tbox, indices, anchors, tmasks = ( + ops.stop_gradient(tcls), + ops.stop_gradient(tbox), + ops.stop_gradient(indices), + ops.stop_gradient(anchors), + ops.stop_gradient(tmasks), + ) + + # Losses + for layer_index, pi in enumerate(p): # layer index, layer predictions + tmask = tmasks[layer_index] + b, a, gj, gi = ops.split(indices[layer_index] * tmask[None, :], split_size_or_sections=1, axis=0) # image, anchor, gridy, gridx + b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1) + tobj = ops.zeros(pi.shape[:4], pi.dtype) # target obj + + n = b.shape[0] # number of targets + if n: + _meta_pred = pi[b, a, gj, gi] # gather from (bs,na,h,w,nc) + pxy, pwh, _, pcls = _meta_pred[:, :2], _meta_pred[:, 2:4], _meta_pred[:, 4:5], _meta_pred[:, 5:] + + # Regression + pxy = ops.Sigmoid()(pxy) * 2 - 0.5 + pwh = (ops.Sigmoid()(pwh) * 2) ** 2 * anchors[layer_index] + pbox = ops.concat((pxy, pwh), 1) # predicted box + iou = bbox_iou(pbox, tbox[layer_index], CIoU=True).squeeze() # iou(prediction, target) + # iou = iou * tmask + # lbox += ((1.0 - iou) * tmask).mean() # iou loss + lbox += (((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum().clip(1, None)).astype(iou.dtype) + + # Objectness + iou = ops.stop_gradient(iou).clip(0, None).astype(pi.dtype) + # tobj[b, a, gj, gi] = iou * tmask # iou ratio + tobj[b, a, gj, gi] = ( + (1.0 - self.gr) + self.gr * ops.stop_gradient(iou).clip(0, None) + ) * tmask # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = ops.fill(pcls.dtype, pcls.shape, self.cn) # targets + + t[mnp.arange(n), tcls[layer_index]] = self.cp + lcls += self.BCEcls(pcls, t, ops.tile(tmask[:, None], (1, t.shape[-1]))) # BCE + + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[layer_index] # obj loss + + lbox *= self.hyp_box + lobj *= self.hyp_obj + lcls *= self.hyp_cls + bs = p[0].shape[0] # batch size + + loss = lbox + lobj + lcls + + return loss * bs, ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls))) + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + targets = targets.view(-1, 6) + mask_t = targets[:, 1] >= 0 + na, nt = self.na, targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch, tmasks = (), (), (), (), () + gain = ops.ones(7, ms.int32) # normalized to gridspace gain + ai = ops.tile(mnp.arange(na).view(-1, 1), (1, nt)) # shape: (na, nt) + ai = ops.cast(ai, targets.dtype) + targets = ops.concat( + (ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2 + ) # append anchor indices # shape: (na, nt, 7) + + g = 0.5 # bias + off = ops.cast(self._off, targets.dtype) * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain # shape(na,nt,7) # xywhn -> xywh + # Matches + # if nt: + r = t[..., 4:6] / anchors[:, None] # wh ratio + j = ops.maximum(r, 1 / r).max(2) < self.hyp_anchor_t # compare + + # t = t[j] # filter + mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1) + t = t.view(-1, 7) + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + jk = ops.logical_and((gxy % 1 < g), (gxy > 1)) + lm = ops.logical_and((gxi % 1 < g), (gxi > 1)) + j, k = jk[:, 0], jk[:, 1] + l, m = lm[:, 0], lm[:, 1] + + # # original + # j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *) + # t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7) + # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + # t = t.view(-1, 7) + # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :]) #(1,*,2) + (5,1,2) -> (5,*,2) + # offsets = offsets.view(-1, 2) + + # faster, + tag1, tag2 = ops.identity(j), ops.identity(k) + tag1, tag2 = ops.tile(tag1[:, None], (1, 2)), ops.tile(tag2[:, None], (1, 2)) + j_l = ops.logical_or(j, l).astype(ms.int32) + k_m = ops.logical_or(k, m).astype(ms.int32) + center = ops.ones_like(j_l) + j = ops.stack((center, j_l, k_m)) + t = ops.tile(t, (3, 1, 1)) # shape(5, *, 7) + t = t.view(-1, 7) + mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :] # (1,*,2) + (5,1,2) -> (5,na*nt,2) + offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype) + # offsets_new[0, :, :] = offsets[0, :, :] + offsets_new[1:2, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, :, :], offsets[3, :, :]) + offsets_new[2:3, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, :, :], offsets[4, :, :]) + offsets = offsets_new + offsets = offsets.view(-1, 2) + + # Define + b, c, gxy, gwh, a = ( + ops.cast(t[:, 0], ms.int32), + ops.cast(t[:, 1], ms.int32), + t[:, 2:4], + t[:, 4:6], + ops.cast(t[:, 6], ms.int32), + ) # (image, class), grid xy, grid wh, anchors + gij = ops.cast(gxy - offsets, ms.int32) + gij = gij[:] + gi, gj = gij[:, 0], gij[:, 1] # grid indices + gi = gi.clip(0, shape[3] - 1) + gj = gj.clip(0, shape[2] - 1) + + # Append + indices += (ops.stack((b, a, gj, gi), 0),) # image, anchor, grid + tbox += (ops.concat((gxy - gij, gwh), 1),) # box + anch += (anchors[a],) # anchors + tcls += (c,) # class + tmasks += (mask_m_t,) + + return ( + ops.stack(tcls), + ops.stack(tbox), + ops.stack(indices), + ops.stack(anch), + ops.stack(tmasks), + ) # class, box, (image, anchor, gridj, gridi), anchors, mask + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = ops.Identity()(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +@ops.constexpr +def get_tensor(x, dtype=ms.float32): + return Tensor(x, dtype) + + +if __name__ == "__main__": + from mindyolo.models.losses.loss_factory import create_loss + from mindyolo.utils.config import parse_config + + cfg = parse_config() + loss_fn = create_loss( + name="YOLOv7Loss", + **cfg.loss, + anchors=cfg.network.get("anchors", None), + stride=cfg.network.get("stride", None), + nc=cfg.data.get("nc", None), + ) + print(f"loss_fn is {loss_fn}") diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov4_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov4_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..51abf2cb3073eb01bf47531bf29598c44a22bd9a --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/yolov4_loss.py @@ -0,0 +1,278 @@ +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Tensor, nn, ops + +from mindyolo.models.registry import register_model +from .focal_loss import BCEWithLogitsLoss, smooth_BCE +from .iou_loss import bbox_iou + +CLIP_VALUE = 1000.0 +EPS = 1e-7 + +__all__ = ["YOLOv4Loss"] + + +class ConfidenceLoss(nn.Cell): + """Loss for confidence.""" + + def __init__(self): + super(ConfidenceLoss, self).__init__() + self.cross_entropy = ops.SigmoidCrossEntropyWithLogits() + self.reduce_sum = ops.ReduceSum() + + def construct(self, object_mask, predict_confidence, ignore_mask): + confidence_loss = self.cross_entropy(predict_confidence, object_mask) + confidence_loss = object_mask * confidence_loss + (1 - object_mask) * confidence_loss * ignore_mask + confidence_loss = self.reduce_sum(confidence_loss, ()) + return confidence_loss + + +@register_model +class YOLOv4Loss(nn.Cell): + def __init__(self, box, obj, cls, label_smoothing, ignore_threshold, iou_threshold, anchors, nc, **kwargs): + super(YOLOv4Loss, self).__init__() + self.ignore_threshold = ignore_threshold + self.iou = Iou() + self.iou_threshold = iou_threshold + self.hyp_box = box + self.hyp_obj = obj + self.hyp_cls = cls + self.nc = nc # number of classes + + anchors = np.array(anchors) + self.na = anchors.shape[0] # number of anchors + self.nl = 3 # number of layers + + self.anchors = Tensor(anchors, ms.float32) # shape(na,2) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=label_smoothing) # positive, negative BCE targets + + self.BCEobj = ConfidenceLoss() + self.BCEcls = BCEWithLogitsLoss(reduction="sum") + + self.loss_item_name = ["loss", "lbox", "lobj", "lcls"] # branch name returned by lossitem for print + + self.concat = ops.Concat(axis=-1) + self.reduce_max = ops.ReduceMax(keep_dims=False) + + def construct(self, p, targets, imgs): + image_shape = imgs.shape + gain = get_tensor(image_shape, targets.dtype)[[3, 2]] + ori_targets = targets.copy() + lcls, lbox, lobj = 0.0, 0.0, 0.0 + tcls, tbox, indices, anchors, tmasks = self.build_targets( + p, targets, imgs + ) # class, box, (image, anchor, gridj, gridi), anchors, mask + tcls, tbox, indices, anchors, tmasks = ( + ops.stop_gradient(tcls), + ops.stop_gradient(tbox), + ops.stop_gradient(indices), + ops.stop_gradient(anchors), + ops.stop_gradient(tmasks), + ) + + # Losses + for layer_index, yolo_out in enumerate(p): # layer index, layer predictions + pi = yolo_out[0] + tmask = tmasks[layer_index] + b, a, gj, gi = ops.split(indices[layer_index] * tmask[None, :], split_size_or_sections=1, axis=0) # image, anchor, gridy, gridx + b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1) + + pi_shape = pi.shape + y_true = ops.zeros((pi_shape[0], pi_shape[1], pi_shape[2], pi_shape[3], 1), pi.dtype) + y_true[b, gj, gi, a][:, 0] = 1.0 + + n = b.shape[0] # number of targets + if n: + pxy = yolo_out[1][b, gj, gi, a] + pwh = yolo_out[2][b, gj, gi, a] + _meta_pred = pi[b, gj, gi, a] # gather from (bs,na,h,w,nc) + pcls = _meta_pred[:, 5:] + + # Regression + pbox = ops.concat((pxy, pwh), 1) # predicted box + iou = bbox_iou(pbox, tbox, GIoU=True).squeeze() # iou(prediction, target) + # iou = iou * tmask + # lbox += ((1.0 - iou) * tmask).mean() # iou loss + box_loss_scale = 2 - tbox[:, 2] * tbox[:, 3] / gain[0] / gain[1] + lbox += (((1.0 - iou) * tmask * box_loss_scale).sum()).astype(iou.dtype) + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = ops.fill(pcls.dtype, pcls.shape, self.cn) # targets + + t[mnp.arange(n), tcls] = self.cp + lcls += self.BCEcls(pcls, t, ops.tile(tmask[:, None], (1, t.shape[-1]))) # BCE + + gt_box = ori_targets[:, :, 2:] + pred_boxes = self.concat((yolo_out[1], yolo_out[2])) + gt_shape = ops.Shape()(gt_box) + gt_box = ops.Reshape()(gt_box, (gt_shape[0], 1, 1, 1, gt_shape[1], gt_shape[2])) + iou = self.iou(ops.ExpandDims()(pred_boxes, -2), gt_box) + best_iou = self.reduce_max(iou, -1) + ignore_mask = best_iou < self.ignore_threshold + ignore_mask = ops.Cast()(ignore_mask, ms.float32) + ignore_mask = ops.ExpandDims()(ignore_mask, -1) + ignore_mask = ops.stop_gradient(ignore_mask) + object_mask = y_true[:, :, :, :, 0:1] + lobj += self.BCEobj(object_mask, pi[:, :, :, :, 4:5], ignore_mask) # obj loss + + lbox *= self.hyp_box + lobj *= self.hyp_obj + lcls *= self.hyp_cls + bs = p[0][0].shape[0] # batch size + + loss = lbox + lobj + lcls + + # ops.stack doesn't support type ms.float16 under ascend ms2.0, + # refer to issue #154 (https://github.com/mindspore-lab/mindyolo/issues/154) + return loss / bs / 8, ops.stop_gradient(ops.stack( + (loss.astype(ms.float32) / bs, + lbox.astype(ms.float32) / bs, + lobj.astype(ms.float32) / bs, + lcls.astype(ms.float32) / bs) + )) + + def build_targets(self, p, targets, imgs): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + image_shape = imgs.shape + targets = targets.view(-1, 6) + mask_t = targets[:, 1] >= 0 + na, nt = self.na, targets.shape[0] # number of anchors, targets + indices, anch, tmasks = (), (), () + gain_wh = ops.ones(7, ms.int32) # normalized to gridspace gain + ai = ops.tile(mnp.arange(na).view(-1, 1), (1, nt)) # shape: (na, nt) + ai = ops.cast(ai, targets.dtype) + targets_9_anchors = ops.concat( + (ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2 + ) # append anchor indices # shape: (na, nt, 7) + + gain_wh[4:6] = get_tensor(image_shape, targets_9_anchors.dtype)[[3, 2]] # xyxy gain + + # Match targets to anchors + t_wh = targets_9_anchors * gain_wh + # Matches + gt_box = ops.zeros((na, nt, 4), ms.float32) + gt_box[..., 2:] = t_wh[..., 4:6] + + anchor_shapes = ops.zeros((na, 1, 4), ms.float32) + anchor_shapes[..., 2:] = ops.ExpandDims()(self.anchors, 1) + anch_ious = bbox_iou(gt_box, anchor_shapes).squeeze() + + j = anch_ious == anch_ious.max(axis=0) + l = anch_ious > self.iou_threshold + + j_l = ops.logical_or(j, l).astype(ms.int32).reshape((self.nl, -1, nt)) + + anchor_scales = self.anchors.reshape((self.nl, -1, 2)) + ai = ops.tile(mnp.arange(na // self.nl).view(-1, 1), (1, nt)) # shape: (na, nt) + ai = ops.cast(ai, targets.dtype) + targets_3_anchors = ops.concat((ops.tile(targets, (na // self.nl, 1, 1)), ai[:, :, None]), 2) + for i in range(self.nl): + anchors, shape = anchor_scales[i], p[i][0].shape + gain_xy = ops.ones(7, ms.int32) # normalized to gridspace gain + gain_xy[2:4] = get_tensor(shape, targets_3_anchors.dtype)[[2, 1]] # xyxy gain + + t = targets_3_anchors * gain_xy + mask_m_t = (j_l[i] * ops.cast(mask_t[None, :], ms.int32)).view(-1) + t = t.view(-1, 7) + + # Define + b, gxy, a = ( + ops.cast(t[:, 0], ms.int32), + t[:, 2:4], + ops.cast(t[:, 6], ms.int32), + ) # (image, class), grid xy, grid wh, anchors + gij = ops.cast(gxy, ms.int32) + gij = gij[:] + gi, gj = gij[:, 0], gij[:, 1] # grid indices + gi = gi.clip(0, shape[2] - 1) + gj = gj.clip(0, shape[1] - 1) + + # Append + indices += (ops.stack((b, a, gj, gi), 0),) # image, anchor, grid + anch += (anchors[a],) # anchors + tmasks += (mask_m_t,) + + targets_3_anchors = targets_3_anchors.view(-1, 7) + tcls = ops.cast(targets_3_anchors[:, 1], ms.int32) # class + tbox = targets_3_anchors[:, 2:6] # box + + return ( + tcls, + tbox, + ops.stack(indices), + ops.stack(anch), + ops.stack(tmasks), + ) # class, box, (image, anchor, gridj, gridi), anchors, mask + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = ops.Identity()(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +@ops.constexpr +def get_tensor(x, dtype=ms.float32): + return Tensor(x, dtype) + + +class Iou(nn.Cell): + """Calculate the iou of boxes""" + + def __init__(self): + super(Iou, self).__init__() + self.min = ops.Minimum() + self.max = ops.Maximum() + + def construct(self, box1, box2): + """ + box1: pred_box [batch, gx, gy, anchors, 1, 4] ->4: [x_center, y_center, w, h] + box2: gt_box [batch, 1, 1, 1, maxbox, 4] + convert to topLeft and rightDown + """ + box1_xy = box1[:, :, :, :, :, :2] + box1_wh = box1[:, :, :, :, :, 2:4] + box1_mins = box1_xy - box1_wh / ops.scalar_to_tensor(2.0) # topLeft + box1_maxs = box1_xy + box1_wh / ops.scalar_to_tensor(2.0) # rightDown + + box2_xy = box2[:, :, :, :, :, :2] + box2_wh = box2[:, :, :, :, :, 2:4] + box2_mins = box2_xy - box2_wh / ops.scalar_to_tensor(2.0) + box2_maxs = box2_xy + box2_wh / ops.scalar_to_tensor(2.0) + + intersect_mins = self.max(box1_mins, box2_mins) + intersect_maxs = self.min(box1_maxs, box2_maxs) + intersect_wh = self.max(intersect_maxs - intersect_mins, ops.scalar_to_tensor(0.0)) + # P.squeeze: for effiecient slice + intersect_area = ops.Squeeze(-1)(intersect_wh[:, :, :, :, :, 0:1]) * ops.Squeeze(-1)( + intersect_wh[:, :, :, :, :, 1:2] + ) + box1_area = ops.Squeeze(-1)(box1_wh[:, :, :, :, :, 0:1]) * ops.Squeeze(-1)(box1_wh[:, :, :, :, :, 1:2]) + box2_area = ops.Squeeze(-1)(box2_wh[:, :, :, :, :, 0:1]) * ops.Squeeze(-1)(box2_wh[:, :, :, :, :, 1:2]) + iou = intersect_area / (box1_area + box2_area - intersect_area) + # iou : [batch, gx, gy, anchors, maxboxes] + return iou + + +if __name__ == "__main__": + from mindyolo.models.losses.loss_factory import create_loss + from mindyolo.utils.config import parse_config + + cfg = parse_config() + loss_fn = create_loss( + name="YOLOv7Loss", + **cfg.loss, + anchors=cfg.network.get("anchors", None), + stride=cfg.network.get("stride", None), + nc=cfg.data.get("nc", None), + ) + print(f"loss_fn is {loss_fn}") diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov5_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov5_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..890cefef5f50e9626899932918f04b6766919e07 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/yolov5_loss.py @@ -0,0 +1,231 @@ +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Parameter, Tensor, nn, ops + +from mindyolo.models.registry import register_model +from .focal_loss import BCEWithLogitsLoss, FocalLoss, smooth_BCE +from .iou_loss import bbox_iou + +__all__ = ["YOLOv5Loss"] + + +@register_model +class YOLOv5Loss(nn.Cell): + # Compute losses + def __init__( + self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs + ): + super(YOLOv5Loss, self).__init__() + + self.sort_obj_iou = False + self.hyp_anchor_t = anchor_t + self.hyp_box = box + self.hyp_obj = obj + self.hyp_cls = cls + self.nc = nc # number of classes + self.na = len(anchors[0]) // 2 # number of anchors + self.nl = len(anchors) # number of layers + stride = np.array(stride) + anchors = np.array(anchors).reshape((self.nl, -1, 2)) + anchors = anchors / stride.reshape((-1, 1, 1)) + self.stride = Tensor(stride, ms.int32) + self.anchors = Tensor(anchors, ms.float32) # shape(nl,na,2) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=label_smoothing) # positive, negative BCE targets + + # Focal loss + g = fl_gamma # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss( + bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g + ) + else: + # Define criteria + BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32)) + BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32)) + + _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.balance = Parameter(Tensor(_balance, ms.float32), requires_grad=False) + self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0 + + self._off = Tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + dtype=ms.float32, + ) + + self.loss_item_name = ["loss", "lbox", "lobj", "lcls"] # branch name returned by loss for print + + def scatter_index_tensor(self, x, index): + x_tmp = ops.transpose(x.reshape((-1, x.shape[-1])), (1, 0)) + res = x_tmp[index].reshape(x.shape[:-1]) + return res + + def construct(self, p, targets, imgs): # predictions, targets + lcls, lbox, lobj = 0.0, 0.0, 0.0 + + tcls, tbox, indices, anchors, tmasks = self.build_targets( + p, targets + ) # class, box, (image, anchor, gridj, gridi), anchors, mask + tcls, tbox, indices, anchors, tmasks = ( + ops.stop_gradient(tcls), + ops.stop_gradient(tbox), + ops.stop_gradient(indices), + ops.stop_gradient(anchors), + ops.stop_gradient(tmasks), + ) + + # Losses + for layer_index, pi in enumerate(p): # layer index, layer predictions + pi = ops.cast(pi, ms.float32) + tmask = tmasks[layer_index] + b, a, gj, gi = ops.split(indices[layer_index] * tmask[None, :], split_size_or_sections=1, axis=0) # image, anchor, gridy, gridx + b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1) + tobj = ops.zeros(pi.shape[:4], pi.dtype) # target obj + + n = b.shape[0] # number of targets + if n: + _meta_pred = pi[b, a, gj, gi] # gather from (bs,na,h,w,nc) + pxy, pwh, _, pcls = _meta_pred[:, :2], _meta_pred[:, 2:4], _meta_pred[:, 4:5], _meta_pred[:, 5:] + + # Regression + pxy = ops.Sigmoid()(pxy) * 2 - 0.5 + pwh = (ops.Sigmoid()(pwh) * 2) ** 2 * anchors[layer_index] + pbox = ops.concat((pxy, pwh), 1) # predicted box + iou = bbox_iou(pbox, tbox[layer_index], CIoU=True).squeeze() # iou(prediction, target) + lbox += ((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum() # iou loss + + # Objectness + iou = ops.stop_gradient(iou).clip(0, None) + if self.sort_obj_iou: + _, j = ops.sort(iou) + b, a, gj, gi, iou, tmask = b[j], a[j], gj[j], gi[j], iou[j], tmask[j] + if self.gr < 1: + iou = (1.0 - self.gr) + self.gr * iou + tobj[b, a, gj, gi] = ops.stop_gradient(iou) * tmask # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = ops.fill(pcls.dtype, pcls.shape, self.cn) # targets + + t[mnp.arange(n), tcls[layer_index]] = self.cp + lcls += self.BCEcls(pcls, t, ops.tile(tmask[:, None], (1, t.shape[-1]))) # BCE + + # obji = self.BCEobj(pi[..., 4], tobj) + obji = self.BCEobj(self.scatter_index_tensor(pi, 4), tobj) + lobj += obji * self.balance[layer_index] # obj loss + + lbox *= self.hyp_box + lobj *= self.hyp_obj + lcls *= self.hyp_cls + bs = p[0].shape[0] # batch size + + loss = lbox + lobj + lcls + loss_item = ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls))) + return loss * bs, loss_item + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + targets = targets.view(-1, 6) + mask_t = targets[:, 1] >= 0 + na, nt = self.na, targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch, tmasks = (), (), (), (), () + gain = ops.ones(7, ms.int32) # normalized to gridspace gain + ai = ops.tile(mnp.arange(na).view(-1, 1), (1, nt)) # shape: (na, nt) + ai = ops.cast(ai, targets.dtype) + targets = ops.concat( + (ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2 + ) # append anchor indices # shape: (na, nt, 7) + + g = 0.5 # bias + off = ops.cast(self._off, targets.dtype) * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain # shape(na,nt,7) # xywhn -> xywh + # Matches + r = t[..., 4:6] / anchors[:, None] # wh ratio + j = ops.maximum(r, 1 / r).max(2) < self.hyp_anchor_t # compare + + mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1) + t = t.view(-1, 7) + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + jk = ops.logical_and((gxy % 1 < g), (gxy > 1)) # .astype(ms.int32) + lm = ops.logical_and((gxi % 1 < g), (gxi > 1)) # .astype(ms.int32) + j, k = jk[:, 0], jk[:, 1] + l, m = lm[:, 0], lm[:, 1] + + # Original + # j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *) + # t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7) + # t = t.view(-1, 7) + # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + # # t = t.repeat((5, 1, 1))[j] + # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :]) #(1,*,2) + (5,1,2) -> (5,*,2) + # offsets = offsets.view(-1, 2) + + # Faster + tag1, tag2 = ops.identity(j), ops.identity(k) + tag1, tag2 = ops.tile(tag1[:, None], (1, 2)), ops.tile(tag2[:, None], (1, 2)) + j_l = ops.logical_or(j, l).astype(ms.int32) + k_m = ops.logical_or(k, m).astype(ms.int32) + center = ops.ones_like(j_l) + j = ops.stack((center, j_l, k_m)) + t = ops.tile(t, (3, 1, 1)) # shape(5, *, 7) + t = t.view(-1, 7) + mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :] # (1,*,2) + (5,1,2) -> (5,na*nt,2) + offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype) + offsets_new[1:2, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, :, :], offsets[3, :, :]) + offsets_new[2:3, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, :, :], offsets[4, :, :]) + offsets = offsets_new + offsets = offsets.view(-1, 2) + + # Define + b, c, gxy, gwh, a = ( + ops.cast(t[:, 0], ms.int32), + ops.cast(t[:, 1], ms.int32), + t[:, 2:4], + t[:, 4:6], + ops.cast(t[:, 6], ms.int32), + ) # (image, class), grid xy, grid wh, anchors + gij = ops.cast(gxy - offsets, ms.int32) + gij = gij[:] + gi, gj = gij[:, 0], gij[:, 1] # grid indices + gi = gi.clip(0, shape[3] - 1) + gj = gj.clip(0, shape[2] - 1) + + # Append + indices += (ops.stack((b, a, gj, gi), 0),) # image, anchor, grid + tbox += (ops.concat((gxy - gij, gwh), 1),) # box + anch += (anchors[a],) # anchors + tcls += (c,) # class + tmasks += (mask_m_t,) + + return ( + ops.stack(tcls), + ops.stack(tbox), + ops.stack(indices), + ops.stack(anch), + ops.stack(tmasks), + ) # class, box, (image, anchor, gridj, gridi), anchors, mask + + +@ops.constexpr +def get_tensor(x, dtype=ms.float32): + return Tensor(x, dtype) diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov7_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov7_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..46258369fc387ea9a6d962afb909cab54d801799 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/yolov7_loss.py @@ -0,0 +1,1018 @@ +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Parameter, Tensor, nn, ops + +from mindyolo.models.registry import register_model +from .focal_loss import BCEWithLogitsLoss, FocalLoss, smooth_BCE +from .iou_loss import batch_box_iou, bbox_iou + +CLIP_VALUE = 1000.0 +EPS = 1e-7 + +__all__ = ["YOLOv7Loss", "YOLOv7AuxLoss"] + + +@register_model +class YOLOv7Loss(nn.Cell): + def __init__( + self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs + ): + super(YOLOv7Loss, self).__init__() + self.hyp_box = box + self.hyp_obj = obj + self.hyp_cls = cls + self.hyp_anchor_t = anchor_t + self.nc = nc # number of classes + self.na = len(anchors[0]) // 2 # number of anchors + self.nl = len(anchors) # number of layers + + stride = np.array(stride) + anchors = np.array(anchors).reshape((self.nl, -1, 2)) + anchors = anchors / stride.reshape((-1, 1, 1)) + self.stride = Tensor(stride, ms.int32) + self.anchors = Tensor(anchors, ms.float32) # shape(nl,na,2) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=label_smoothing) # positive, negative BCE targets + # Focal loss + g = fl_gamma # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss( + bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g + ) + else: + # Define criteria + BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32)) + BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32)) + + _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.balance = ms.Parameter(Tensor(_balance, ms.float32), requires_grad=False) + self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0 + + self._off = Tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + ], + dtype=ms.float32, + ) + + self.loss_item_name = ["loss", "lbox", "lobj", "lcls"] # branch name returned by lossitem for print + + def construct(self, p, targets, imgs): + lcls, lbox, lobj = 0.0, 0.0, 0.0 + bs, as_, gjs, gis, targets, anchors, tmasks = self.build_targets(p, targets, imgs) # bs: (nl, bs*5*na*gt_max) + bs, as_, gjs, gis, targets, anchors, tmasks = ( + ops.stop_gradient(bs), + ops.stop_gradient(as_), + ops.stop_gradient(gjs), + ops.stop_gradient(gis), + ops.stop_gradient(targets), + ops.stop_gradient(anchors), + ops.stop_gradient(tmasks), + ) + + pre_gen_gains = () + for pp in p: + pre_gen_gains += (get_tensor(pp.shape, targets.dtype)[[3, 2, 3, 2]],) + + # Losses + # for i, pi in enumerate(p): # layer index, layer predictions + for i in range(self.nl): # layer index + pi = p[i] # layer predictions + b, a, gj, gi, tmask = bs[i], as_[i], gjs[i], gis[i], tmasks[i] # image, anchor, gridy, gridx, tmask + tobj = ops.zeros_like(pi[..., 0]) # target obj + + n = b.shape[0] # number of targets + ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + + # Regression + grid = ops.stack([gi, gj], axis=1) + pxy = ops.Sigmoid()(ps[:, :2]) * 2.0 - 0.5 + pwh = (ops.Sigmoid()(ps[:, 2:4]) * 2) ** 2 * anchors[i] + pbox = ops.concat((pxy, pwh), 1) # predicted box + selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i] + selected_tbox[:, :2] -= grid + iou = bbox_iou(pbox, selected_tbox, xywh=True, CIoU=True).view(-1) + lbox += ((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum().clip(1, None) # iou loss + + # Objectness + tobj[b, a, gj, gi] = ((1.0 - self.gr) + self.gr * ops.stop_gradient(iou).clip(0, None)) * tmask # iou ratio + + # Classification + selected_tcls = ops.cast(targets[i][:, 1], ms.int32) + if self.nc > 1: # cls loss (only if multiple classes) + t = ops.ones_like(ps[:, 5:]) * self.cn # targets + t[mnp.arange(n, dtype=ms.int32), selected_tcls] = self.cp + lcls += self.BCEcls(ps[:, 5:], t, ops.tile(tmask[:, None], (1, t.shape[1]))) # BCE + + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[i] # obj loss + + lbox *= self.hyp_box + lobj *= self.hyp_obj + lcls *= self.hyp_cls + bs = p[0].shape[0] # batch size + + loss = lbox + lobj + lcls + return loss * bs, ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls))) + + def build_targets(self, p, targets, imgs): + indices, anch, tmasks = self.find_3_positive(p, targets) + + na, n_gt_max = self.na, targets.shape[1] + nl, batch_size, img_size = len(p), p[0].shape[0], imgs[0].shape[1] + + this_target = targets.view(-1, 6) + + txywh = this_target[:, 2:6] * img_size + txyxy = xywh2xyxy(txywh) + txyxy = txyxy.view(batch_size, n_gt_max, 4) + this_target = this_target.view(batch_size, n_gt_max, 6) + this_mask = this_target[:, :, 1] >= 0 # (bs, gt_max) + + pxyxys = () + p_cls = () + p_obj = () + all_b = () + all_a = () + all_gj = () + all_gi = () + all_anch = () + all_tmasks = () + + # for i, pi in enumerate(p): + for i in range(self.nl): + pi = p[i] + _this_indices = indices[i].view(4, 3 * na, batch_size, n_gt_max).transpose(0, 2, 1, 3).view(4, -1) + _this_anch = anch[i].view(3 * na, batch_size, n_gt_max * 2).transpose(1, 0, 2).view(-1, 2) + _this_mask = tmasks[i].view(3 * na, batch_size, n_gt_max).transpose(1, 0, 2).view(-1) + + _this_indices *= _this_mask[None, :] + _this_anch *= _this_mask[:, None] + + b, a, gj, gi = ops.split(_this_indices, split_size_or_sections=1, axis=0) + b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1) + + fg_pred = pi[b, a, gj, gi] + p_obj += (fg_pred[:, 4:5].view(batch_size, 3 * na * n_gt_max, 1),) + p_cls += (fg_pred[:, 5:].view(batch_size, 3 * na * n_gt_max, -1),) + + grid = ops.stack((gi, gj), axis=1) + pxy = (ops.Sigmoid()(fg_pred[:, :2]) * 2.0 - 0.5 + grid) * self.stride[i] # / 8. + pwh = (ops.Sigmoid()(fg_pred[:, 2:4]) * 2) ** 2 * _this_anch * self.stride[i] # / 8. + pxywh = ops.concat((pxy, pwh), axis=-1) + pxyxy = xywh2xyxy(pxywh) + + b, a, gj, gi, pxyxy, _this_anch, _this_mask = ( + b.view(batch_size, -1), + a.view(batch_size, -1), + gj.view(batch_size, -1), + gi.view(batch_size, -1), + pxyxy.view(batch_size, -1, 4), + _this_anch.view(batch_size, -1, 2), + _this_mask.view(batch_size, -1), + ) + all_b += (b,) + all_a += (a,) + all_gj += (gj,) + all_gi += (gi,) + pxyxys += (pxyxy,) + all_anch += (_this_anch,) + all_tmasks += (_this_mask,) + + pxyxys = ops.concat(pxyxys, axis=1) # nl * (bs, 5*na*gt_max, 4) -> cat -> (bs, c, 4) # nt = bs * gt_max + p_obj = ops.concat(p_obj, axis=1) + p_cls = ops.concat(p_cls, axis=1) # nl * (bs, 5*na*gt_max, 80) -> (bs, nl*5*na*gt_max, 80) + all_b = ops.concat(all_b, axis=1) # nl * (bs, 5*na*gt_max) -> (bs, nl*5*na*gt_max) + all_a = ops.concat(all_a, axis=1) + all_gj = ops.concat(all_gj, axis=1) + all_gi = ops.concat(all_gi, axis=1) + all_anch = ops.concat(all_anch, axis=1) + all_tmasks = ops.concat(all_tmasks, axis=1) # (bs, nl*5*na*gt_max) + + this_mask = all_tmasks[:, None, :] * this_mask[:, :, None] # (bs, gt_max, nl*5*na*gt_max,) + + # (bs, gt_max, 4), (bs, nl*5*na*gt_max, 4) -> (bs, gt_max, nl*5*na*gt_max) + pair_wise_iou = batch_box_iou(txyxy, pxyxys) * this_mask # (bs, gt_max, nl*5*na*gt_max,) + pair_wise_iou_loss = -ops.log(pair_wise_iou + EPS) + + v, _ = ops.top_k(pair_wise_iou, 10) # (bs, gt_max, 10) + dynamic_ks = ops.cast(v.sum(-1).clip(1, 10), ms.int32) # (bs, gt_max) + + # (bs, gt_max, 80) + gt_cls_per_image = ops.one_hot( + indices=ops.cast(this_target[:, :, 1], ms.int32), + depth=self.nc, + on_value=ops.ones(1, p_cls.dtype), + off_value=ops.zeros(1, p_cls.dtype), + ) + # (bs, gt_max, nl*5*na*gt_max, 80) + gt_cls_per_image = ops.tile( + ops.expand_dims(ops.cast(gt_cls_per_image, p_cls.dtype), 2), (1, 1, pxyxys.shape[1], 1) + ) + + cls_preds_ = ops.sqrt(ops.Sigmoid()(p_cls) * ops.Sigmoid()(p_obj)) + cls_preds_ = ops.tile( + ops.expand_dims(cls_preds_, 1), (1, n_gt_max, 1, 1) + ) # (bs, nl*5*na*gt_max, 80) -> (bs, gt_max, nl*5*na*gt_max, 80) + y = cls_preds_ + + pair_wise_cls_loss = ops.binary_cross_entropy_with_logits( + ops.log(y / (1 - y) + EPS), + gt_cls_per_image, + ops.ones(1, cls_preds_.dtype), + ops.ones(1, cls_preds_.dtype), + reduction="none", + ).sum( + -1 + ) # (bs, gt_max, nl*5*na*gt_max) + + cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss + cost = cost * this_mask + cost += CLIP_VALUE * (1.0 - ops.cast(this_mask, cost.dtype)) + + sort_cost, sort_idx = ops.top_k(-cost, 10, sorted=True) # (bs, gt_max, 10) + sort_cost = -sort_cost + pos_idx = ops.stack((mnp.arange(batch_size * n_gt_max, dtype=ms.int32), dynamic_ks.view(-1) - 1), -1) + pos_v = ops.gather_nd(sort_cost.view(batch_size * n_gt_max, 10), pos_idx).view(batch_size, n_gt_max) + matching_matrix = ops.cast(cost <= pos_v[:, :, None], ms.int32) * this_mask + + # delete reduplicate match label, one anchor only match one gt + cost_argmin = mnp.argmin(cost, axis=1) # (bs, nl*5*na*gt_max) + anchor_matching_gt_mask = ops.one_hot( + cost_argmin, n_gt_max, ops.ones(1, ms.float16), ops.zeros(1, ms.float16), axis=-1 + ).transpose( + 0, 2, 1 + ) # (bs, gt_max, nl*5*na*gt_max) + matching_matrix = matching_matrix * ops.cast(anchor_matching_gt_mask, matching_matrix.dtype) + + fg_mask_inboxes = ( + matching_matrix.astype(ms.float16).sum(1) > 0.0 + ) # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max) + all_tmasks = all_tmasks * ops.cast(fg_mask_inboxes, ms.int32) # (bs, nl*5*na*gt_max) + matched_gt_inds = matching_matrix.argmax(1).astype(ms.int32) # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max) + matched_bs_inds = ops.tile( + mnp.arange(batch_size, dtype=ms.int32)[:, None], (1, matching_matrix.shape[2]) + ) # (bs, nl*5*na*gt_max) + matched_inds = ops.stack((matched_bs_inds.view(-1), matched_gt_inds.view(-1)), 1) # (bs*nl*5*na*gt_max, 2) + matched_inds *= all_tmasks.view(-1)[:, None] + this_target = ops.gather_nd(this_target, matched_inds) # (bs*nl*5*na*gt_max, 6) + # this_target = this_target.view(-1, 6)[matched_gt_inds.view(-1,)] # (bs*nl*5*na*gt_max, 6) + + # (bs, nl*5*na*gt_max,) -> (bs, nl, 5*na*gt_max) -> (nl, bs*5*na*gt_max) + matching_tmasks = all_tmasks.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) + matching_bs = all_b.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_as = all_a.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_gjs = all_gj.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_gis = all_gi.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_targets = ( + this_target.view(batch_size, nl, -1, 6).transpose(1, 0, 2, 3).view(nl, -1, 6) * matching_tmasks[..., None] + ) + matching_anchs = ( + all_anch.view(batch_size, nl, -1, 2).transpose(1, 0, 2, 3).view(nl, -1, 2) * matching_tmasks[..., None] + ) + + return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs, matching_tmasks + + def find_3_positive(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + targets = targets.view(-1, 6) # (bs, gt_max, 6) -> (bs*gt_max, 6) + mask_t = targets[:, 1] >= 0 # (bs*gt_max,) + na, nt = self.na, targets.shape[0] # number of anchors, targets + indices, anch, tmasks = (), (), () + gain = ops.ones(7, ms.int32) # normalized to gridspace gain + ai = ops.tile(mnp.arange(na, dtype=targets.dtype).view(na, 1), (1, nt)) # shape: (na, nt) + targets = ops.concat((ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2) # append anchor indices # (na, nt, 7) + + g = 0.5 # bias + off = ops.cast(self._off, targets.dtype) * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]] # xyxy gain # [W, H, W, H] + + # Match targets to anchors + t = targets * gain # (na, nt, 7) + # Matches + r = t[:, :, 4:6] / anchors[:, None, :] # wh ratio + j = ops.maximum(r, 1.0 / r).max(2) < self.hyp_anchor_t # compare # (na, nt) + + mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1) + t = t.view(-1, 7) # (na*nt, 7) + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + jk = ops.logical_and((gxy % 1.0 < g), (gxy > 1.0)) + lm = ops.logical_and((gxi % 1.0 < g), (gxi > 1.0)) + j, k = jk[:, 0], jk[:, 1] + l, m = lm[:, 0], lm[:, 1] + + # original + # j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *) + # t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7) + # t = t.view(-1, 7) + # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + # # t = t.repeat((5, 1, 1))[j] + # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :]) # (1,*,2) + (5,1,2) -> (5,na*nt,2) + # offsets = offsets.view(-1, 2) # (5*na*nt, 2) + # # offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + + # Faster + tag1, tag2 = ops.tile(j[:, None], (1, 2)), ops.tile(k[:, None], (1, 2)) + j_l = ops.logical_or(j, l).astype(ms.int32) + k_m = ops.logical_or(k, m).astype(ms.int32) + center = ops.ones_like(j_l) + j = ops.stack((center, j_l, k_m)) + mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + t = ops.tile(t, (3, 1, 1)) # shape(5, *, 7) + t = t.view(-1, 7) + offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :] # (1,*,2) + (5,1,2) -> (5,na*nt,2) + offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype) + offsets_new[1, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, ...], offsets[3, ...]) + offsets_new[2, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, ...], offsets[4, ...]) + offsets = offsets_new + offsets = offsets.view(-1, 2) + + # Define + b, c, gxy, gwh, a = ( + ops.cast(t[:, 0], ms.int32), + ops.cast(t[:, 1], ms.int32), + t[:, 2:4], + t[:, 4:6], + ops.cast(t[:, 6], ms.int32), + ) # (image, class), grid xy, grid wh, anchors # b: (5*na*nt,), gxy: (5*na*nt, 2) + gij = ops.cast(gxy - offsets, ms.int32) + gi, gj = gij[:, 0], gij[:, 1] # grid indices + gi = gi.clip(0, shape[3] - 1) + gj = gj.clip(0, shape[2] - 1) + + # Append + indices += (ops.stack((b, a, gj, gi), 0),) # image, anchor, grid + anch += (anchors[a],) # anchors + tmasks += (mask_m_t,) + + return indices, anch, tmasks + + +@register_model +class YOLOv7AuxLoss(nn.Cell): + def __init__( + self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs + ): + super(YOLOv7AuxLoss, self).__init__() + self.hyp_box = box + self.hyp_obj = obj + self.hyp_cls = cls + self.hyp_anchor_t = anchor_t + self.nc = nc # number of classes + self.na = len(anchors[0]) // 2 # number of anchors + self.nl = len(anchors) # number of layers + + # modify weight of box/obj/cls when aux_loss + self.hyp_box *= 3.0 / self.nl + self.hyp_cls *= self.nc / 80.0 * 3.0 / self.nl + self.hyp_obj *= 2.0**2 * 3.0 / self.nl + + stride = np.array(stride) + anchors = np.array(anchors).reshape((self.nl, -1, 2)) + anchors = anchors / stride.reshape((-1, 1, 1)) + self.stride = Tensor(stride, ms.int32) + self.anchors = Tensor(anchors, ms.float32) # shape(nl,na,2) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=label_smoothing) # positive, negative BCE targets + # Focal loss + g = fl_gamma # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss( + bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g + ) + else: + # Define criteria + BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32)) + BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32)) + + _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.balance = ms.Parameter(Tensor(_balance, ms.float32), requires_grad=False) + self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0 + + self._off = Tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + dtype=ms.float32, + ) + + self.loss_item_name = ["loss", "lbox", "lobj", "lcls"] # branch name returned by loss for print + + def construct(self, p, targets, imgs): + lcls, lbox, lobj = 0.0, 0.0, 0.0 + targets_ori = targets + bs, as_, gjs, gis, targets, anchors, tmasks = self.build_targets( + p[: self.nl], targets_ori, imgs + ) # bs: (nl, bs*3*na*gt_max) + bs_aux, as_aux_, gjs_aux, gis_aux, targets_aux, anchors_aux, tmasks_aux = self.build_targets_2( + p[: self.nl], targets_ori, imgs + ) # bs: (nl, bs*5*na*gt_max) + + bs, as_, gjs, gis, targets, anchors, tmasks = ( + ops.stop_gradient(bs), + ops.stop_gradient(as_), + ops.stop_gradient(gjs), + ops.stop_gradient(gis), + ops.stop_gradient(targets), + ops.stop_gradient(anchors), + ops.stop_gradient(tmasks), + ) + bs_aux, as_aux_, gjs_aux, gis_aux, targets_aux, anchors_aux, tmasks_aux = ( + ops.stop_gradient(bs_aux), + ops.stop_gradient(as_aux_), + ops.stop_gradient(gjs_aux), + ops.stop_gradient(gis_aux), + ops.stop_gradient(targets_aux), + ops.stop_gradient(anchors_aux), + ops.stop_gradient(tmasks_aux), + ) + + pre_gen_gains = () + # pre_gen_gains_aux = () + for pp in p[: self.nl]: + pre_gen_gains += (get_tensor(pp.shape, targets.dtype)[[3, 2, 3, 2]],) + # pre_gen_gains_aux += (get_tensor(pp.shape, targets.dtype)[[3, 2, 3, 2]],) + + # Losses + for i in range(self.nl): # layer index + pi = p[i] # layer predictions + pi_aux = p[i + self.nl] + b, a, gj, gi, tmask = bs[i], as_[i], gjs[i], gis[i], tmasks[i] # image, anchor, gridy, gridx, tmask + b_aux, a_aux, gj_aux, gi_aux, tmask_aux = bs_aux[i], as_aux_[i], gjs_aux[i], gis_aux[i], tmasks_aux[i] + tobj = ops.zeros_like(pi[..., 0]) # target obj + tobj_aux = ops.zeros_like(pi_aux[..., 0]) # target obj + + # 1. Branch1, Compute main branch loss + n = b.shape[0] # number of targets + ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + # 1.1. Regression + grid = ops.stack([gi, gj], axis=1) + pxy = ops.Sigmoid()(ps[:, :2]) * 2.0 - 0.5 + pwh = (ops.Sigmoid()(ps[:, 2:4]) * 2) ** 2 * anchors[i] + pbox = ops.concat((pxy, pwh), 1) # predicted box + selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i] + selected_tbox[:, :2] -= grid + iou = bbox_iou(pbox, selected_tbox, xywh=True, CIoU=True).view(-1) + lbox += ((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum().clip(1, None) # iou loss + # 1.2. Objectness + tobj[b, a, gj, gi] = ((1.0 - self.gr) + self.gr * ops.stop_gradient(iou).clip(0, None)) * tmask # iou ratio + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[i] # obj loss + # 1.3. Classification + selected_tcls = ops.cast(targets[i][:, 1], ms.int32) + if self.nc > 1: # cls loss (only if multiple classes) + t = ops.ones_like(ps[:, 5:]) * self.cn # targets + t[mnp.arange(n, dtype=ms.int32), selected_tcls] = self.cp + lcls += self.BCEcls(ps[:, 5:], t, ops.tile(tmask[:, None], (1, t.shape[1]))) # BCE + + # 2. Branch2, Compute Aux branch loss + n_aux = b_aux.shape[0] # number of targets + ps_aux = pi[b_aux, a_aux, gj_aux, gi_aux] # prediction subset corresponding to targets + # 2.1. Regression + grid_aux = ops.stack([gi_aux, gj_aux], axis=1) + pxy_aux = ops.Sigmoid()(ps_aux[:, :2]) * 2.0 - 0.5 + pwh_aux = (ops.Sigmoid()(ps_aux[:, 2:4]) * 2) ** 2 * anchors_aux[i] + pbox_aux = ops.concat((pxy_aux, pwh_aux), 1) # predicted box + selected_tbox_aux = targets_aux[i][:, 2:6] * pre_gen_gains[i] + selected_tbox_aux[:, :2] -= grid_aux + iou_aux = bbox_iou(pbox_aux, selected_tbox_aux, xywh=True, CIoU=True).view(-1) + lbox += ( + 0.25 * ((1.0 - iou_aux) * tmask_aux).sum() / tmask_aux.astype(iou_aux.dtype).sum().clip(1, None) + ) # iou loss + # 1.2. Objectness + tobj_aux[b_aux, a_aux, gj_aux, gi_aux] = ( + (1.0 - self.gr) + self.gr * ops.stop_gradient(iou_aux).clip(0, None) + ) * tmask_aux # iou ratio + obji_aux = self.BCEobj(pi_aux[..., 4], tobj_aux) + lobj += 0.25 * obji_aux * self.balance[i] # obj loss + # 1.3. Classification + selected_tcls_aux = ops.cast(targets_aux[i][:, 1], ms.int32) + if self.nc > 1: # cls loss (only if multiple classes) + t_aux = ops.ones_like(ps_aux[:, 5:]) * self.cn # targets + t_aux[mnp.arange(n_aux, dtype=ms.int32), selected_tcls_aux] = self.cp + lcls += 0.25 * self.BCEcls( + ps_aux[:, 5:], t_aux, ops.tile(tmask_aux[:, None], (1, t_aux.shape[1])) + ) # BCE + + lbox *= self.hyp_box + lobj *= self.hyp_obj + lcls *= self.hyp_cls + bs = p[0].shape[0] # batch size + + loss = lbox + lobj + lcls + return loss * bs, ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls))) + + def build_targets(self, p, targets, imgs): + indices, anch, tmasks = self.find_3_positive(p, targets) + + na, n_gt_max = self.na, targets.shape[1] + nl, batch_size, img_size = len(p), p[0].shape[0], imgs[0].shape[1] + + this_target = targets.view(-1, 6) + + txywh = this_target[:, 2:6] * img_size + txyxy = xywh2xyxy(txywh) + txyxy = txyxy.view(batch_size, n_gt_max, 4) + this_target = this_target.view(batch_size, n_gt_max, 6) + this_mask = this_target[:, :, 1] >= 0 # (bs, gt_max) + + pxyxys = () + p_cls = () + p_obj = () + all_b = () + all_a = () + all_gj = () + all_gi = () + all_anch = () + all_tmasks = () + + # for i, pi in enumerate(p): + for i in range(self.nl): + pi = p[i] + _this_indices = indices[i].view(4, 3 * na, batch_size, n_gt_max).transpose(0, 2, 1, 3).view(4, -1) + _this_anch = anch[i].view(3 * na, batch_size, n_gt_max * 2).transpose(1, 0, 2).view(-1, 2) + _this_mask = tmasks[i].view(3 * na, batch_size, n_gt_max).transpose(1, 0, 2).view(-1) + + _this_indices *= _this_mask[None, :] + _this_anch *= _this_mask[:, None] + + b, a, gj, gi = ops.split(_this_indices, split_size_or_sections=1, axis=0) + b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1) + + fg_pred = pi[b, a, gj, gi] + p_obj += (fg_pred[:, 4:5].view(batch_size, 3 * na * n_gt_max, 1),) + p_cls += (fg_pred[:, 5:].view(batch_size, 3 * na * n_gt_max, -1),) + + grid = ops.stack((gi, gj), axis=1) + pxy = (ops.Sigmoid()(fg_pred[:, :2]) * 2.0 - 0.5 + grid) * self.stride[i] # / 8. + pwh = (ops.Sigmoid()(fg_pred[:, 2:4]) * 2) ** 2 * _this_anch * self.stride[i] # / 8. + pxywh = ops.concat((pxy, pwh), axis=-1) + pxyxy = xywh2xyxy(pxywh) + + b, a, gj, gi, pxyxy, _this_anch, _this_mask = ( + b.view(batch_size, -1), + a.view(batch_size, -1), + gj.view(batch_size, -1), + gi.view(batch_size, -1), + pxyxy.view(batch_size, -1, 4), + _this_anch.view(batch_size, -1, 2), + _this_mask.view(batch_size, -1), + ) + all_b += (b,) + all_a += (a,) + all_gj += (gj,) + all_gi += (gi,) + pxyxys += (pxyxy,) + all_anch += (_this_anch,) + all_tmasks += (_this_mask,) + + pxyxys = ops.concat(pxyxys, axis=1) # nl * (bs, 5*na*gt_max, 4) -> cat -> (bs, c, 4) # nt = bs * gt_max + p_obj = ops.concat(p_obj, axis=1) + p_cls = ops.concat(p_cls, axis=1) # nl * (bs, 5*na*gt_max, 80) -> (bs, nl*5*na*gt_max, 80) + all_b = ops.concat(all_b, axis=1) # nl * (bs, 5*na*gt_max) -> (bs, nl*5*na*gt_max) + all_a = ops.concat(all_a, axis=1) + all_gj = ops.concat(all_gj, axis=1) + all_gi = ops.concat(all_gi, axis=1) + all_anch = ops.concat(all_anch, axis=1) + all_tmasks = ops.concat(all_tmasks, axis=1) # (bs, nl*5*na*gt_max) + + this_mask = all_tmasks[:, None, :] * this_mask[:, :, None] # (bs, gt_max, nl*5*na*gt_max,) + + # (bs, gt_max, 4), (bs, nl*5*na*gt_max, 4) -> (bs, gt_max, nl*5*na*gt_max) + pair_wise_iou = batch_box_iou(txyxy, pxyxys) * this_mask # (bs, gt_max, nl*5*na*gt_max,) + pair_wise_iou_loss = -ops.log(pair_wise_iou + EPS) + + # Top 20 iou sum for aux, default 10 + v, _ = ops.top_k(pair_wise_iou, 20) # (bs, gt_max, 20) + dynamic_ks = ops.cast(v.sum(-1).clip(1, 20), ms.int32) # (bs, gt_max) + + # (bs, gt_max, 80) + gt_cls_per_image = ops.one_hot( + indices=ops.cast(this_target[:, :, 1], ms.int32), + depth=self.nc, + on_value=ops.ones(1, p_cls.dtype), + off_value=ops.zeros(1, p_cls.dtype), + ) + # (bs, gt_max, nl*5*na*gt_max, 80) + gt_cls_per_image = ops.tile( + ops.expand_dims(ops.cast(gt_cls_per_image, p_cls.dtype), 2), (1, 1, pxyxys.shape[1], 1) + ) + + cls_preds_ = ops.sqrt(ops.Sigmoid()(p_cls) * ops.Sigmoid()(p_obj)) + cls_preds_ = ops.tile( + ops.expand_dims(cls_preds_, 1), (1, n_gt_max, 1, 1) + ) # (bs, nl*5*na*gt_max, 80) -> (bs, gt_max, nl*5*na*gt_max, 80) + y = cls_preds_ + + pair_wise_cls_loss = ops.binary_cross_entropy_with_logits( + ops.log(y / (1 - y) + EPS), + gt_cls_per_image, + ops.ones(1, cls_preds_.dtype), + ops.ones(1, cls_preds_.dtype), + reduction="none", + ).sum( + -1 + ) # (bs, gt_max, nl*5*na*gt_max) + + cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss + cost = cost * this_mask + cost += CLIP_VALUE * (1.0 - ops.cast(this_mask, cost.dtype)) + + sort_cost, sort_idx = ops.top_k(-cost, 20, sorted=True) # (bs, gt_max, 20) + sort_cost = -sort_cost + pos_idx = ops.stack((mnp.arange(batch_size * n_gt_max, dtype=ms.int32), dynamic_ks.view(-1) - 1), -1) + pos_v = ops.gather_nd(sort_cost.view(batch_size * n_gt_max, 20), pos_idx).view(batch_size, n_gt_max) + matching_matrix = ops.cast(cost <= pos_v[:, :, None], ms.int32) * this_mask + + # delete reduplicate match label, one anchor only match one gt + cost_argmin = mnp.argmin(cost, axis=1) # (bs, nl*5*na*gt_max) + anchor_matching_gt_mask = ops.one_hot( + cost_argmin, n_gt_max, ops.ones(1, ms.float16), ops.zeros(1, ms.float16), axis=-1 + ).transpose( + 0, 2, 1 + ) # (bs, gt_max, nl*5*na*gt_max) + matching_matrix = matching_matrix * ops.cast(anchor_matching_gt_mask, matching_matrix.dtype) + + fg_mask_inboxes = ( + matching_matrix.astype(ms.float16).sum(1) > 0.0 + ) # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max) + all_tmasks = all_tmasks * ops.cast(fg_mask_inboxes, ms.int32) # (bs, nl*5*na*gt_max) + matched_gt_inds = matching_matrix.argmax(1).astype(ms.int32) # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max) + matched_bs_inds = ops.tile( + mnp.arange(batch_size, dtype=ms.int32)[:, None], (1, matching_matrix.shape[2]) + ) # (bs, nl*5*na*gt_max) + matched_inds = ops.stack((matched_bs_inds.view(-1), matched_gt_inds.view(-1)), 1) # (bs*nl*5*na*gt_max, 2) + matched_inds *= all_tmasks.view(-1)[:, None] + this_target = ops.gather_nd(this_target, matched_inds) # (bs*nl*5*na*gt_max, 6) + # this_target = this_target.view(-1, 6)[matched_gt_inds.view(-1,)] # (bs*nl*5*na*gt_max, 6) + + # (bs, nl*5*na*gt_max,) -> (bs, nl, 5*na*gt_max) -> (nl, bs*5*na*gt_max) + matching_tmasks = all_tmasks.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) + matching_bs = all_b.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_as = all_a.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_gjs = all_gj.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_gis = all_gi.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_targets = ( + this_target.view(batch_size, nl, -1, 6).transpose(1, 0, 2, 3).view(nl, -1, 6) * matching_tmasks[..., None] + ) + matching_anchs = ( + all_anch.view(batch_size, nl, -1, 2).transpose(1, 0, 2, 3).view(nl, -1, 2) * matching_tmasks[..., None] + ) + + return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs, matching_tmasks + + def build_targets_2(self, p, targets, imgs): + indices, anch, tmasks = self.find_5_positive(p, targets) + + na, n_gt_max = self.na, targets.shape[1] + nl, batch_size, img_size = len(p), p[0].shape[0], imgs[0].shape[1] + + this_target = targets.view(-1, 6) + + txywh = this_target[:, 2:6] * img_size + txyxy = xywh2xyxy(txywh) + txyxy = txyxy.view(batch_size, n_gt_max, 4) + this_target = this_target.view(batch_size, n_gt_max, 6) + this_mask = this_target[:, :, 1] >= 0 # (bs, gt_max) + + pxyxys = () + p_cls = () + p_obj = () + all_b = () + all_a = () + all_gj = () + all_gi = () + all_anch = () + all_tmasks = () + + # for i, pi in enumerate(p): + for i in range(self.nl): + pi = p[i] + _this_indices = indices[i].view(4, 5 * na, batch_size, n_gt_max).transpose(0, 2, 1, 3).view(4, -1) + _this_anch = anch[i].view(5 * na, batch_size, n_gt_max * 2).transpose(1, 0, 2).view(-1, 2) + _this_mask = tmasks[i].view(5 * na, batch_size, n_gt_max).transpose(1, 0, 2).view(-1) + + _this_indices *= _this_mask[None, :] + _this_anch *= _this_mask[:, None] + + b, a, gj, gi = ops.split(_this_indices, split_size_or_sections=1, axis=0) + b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1) + + fg_pred = pi[b, a, gj, gi] + p_obj += (fg_pred[:, 4:5].view(batch_size, 5 * na * n_gt_max, 1),) + p_cls += (fg_pred[:, 5:].view(batch_size, 5 * na * n_gt_max, -1),) + + grid = ops.stack((gi, gj), axis=1) + pxy = (ops.Sigmoid()(fg_pred[:, :2]) * 2.0 - 0.5 + grid) * self.stride[i] # / 8. + pwh = (ops.Sigmoid()(fg_pred[:, 2:4]) * 2) ** 2 * _this_anch * self.stride[i] # / 8. + pxywh = ops.concat((pxy, pwh), axis=-1) + pxyxy = xywh2xyxy(pxywh) + + b, a, gj, gi, pxyxy, _this_anch, _this_mask = ( + b.view(batch_size, -1), + a.view(batch_size, -1), + gj.view(batch_size, -1), + gi.view(batch_size, -1), + pxyxy.view(batch_size, -1, 4), + _this_anch.view(batch_size, -1, 2), + _this_mask.view(batch_size, -1), + ) + all_b += (b,) + all_a += (a,) + all_gj += (gj,) + all_gi += (gi,) + pxyxys += (pxyxy,) + all_anch += (_this_anch,) + all_tmasks += (_this_mask,) + + pxyxys = ops.concat(pxyxys, axis=1) # nl * (bs, 5*na*gt_max, 4) -> cat -> (bs, c, 4) # nt = bs * gt_max + p_obj = ops.concat(p_obj, axis=1) + p_cls = ops.concat(p_cls, axis=1) # nl * (bs, 5*na*gt_max, 80) -> (bs, nl*5*na*gt_max, 80) + all_b = ops.concat(all_b, axis=1) # nl * (bs, 5*na*gt_max) -> (bs, nl*5*na*gt_max) + all_a = ops.concat(all_a, axis=1) + all_gj = ops.concat(all_gj, axis=1) + all_gi = ops.concat(all_gi, axis=1) + all_anch = ops.concat(all_anch, axis=1) + all_tmasks = ops.concat(all_tmasks, axis=1) # (bs, nl*5*na*gt_max) + + this_mask = all_tmasks[:, None, :] * this_mask[:, :, None] # (bs, gt_max, nl*5*na*gt_max,) + + # (bs, gt_max, 4), (bs, nl*5*na*gt_max, 4) -> (bs, gt_max, nl*5*na*gt_max) + pair_wise_iou = batch_box_iou(txyxy, pxyxys) * this_mask # (bs, gt_max, nl*5*na*gt_max,) + pair_wise_iou_loss = -ops.log(pair_wise_iou + EPS) + + # Top 20 iou sum for aux, default 10 + v, _ = ops.top_k(pair_wise_iou, 20) # (bs, gt_max, 20) + dynamic_ks = ops.cast(v.sum(-1).clip(1, 20), ms.int32) # (bs, gt_max) + + # (bs, gt_max, 80) + gt_cls_per_image = ops.one_hot( + indices=ops.cast(this_target[:, :, 1], ms.int32), + depth=self.nc, + on_value=ops.ones(1, p_cls.dtype), + off_value=ops.zeros(1, p_cls.dtype), + ) + # (bs, gt_max, nl*5*na*gt_max, 80) + gt_cls_per_image = ops.tile( + ops.expand_dims(ops.cast(gt_cls_per_image, p_cls.dtype), 2), (1, 1, pxyxys.shape[1], 1) + ) + + cls_preds_ = ops.sqrt(ops.Sigmoid()(p_cls) * ops.Sigmoid()(p_obj)) + cls_preds_ = ops.tile( + ops.expand_dims(cls_preds_, 1), (1, n_gt_max, 1, 1) + ) # (bs, nl*5*na*gt_max, 80) -> (bs, gt_max, nl*5*na*gt_max, 80) + y = cls_preds_ + + pair_wise_cls_loss = ops.binary_cross_entropy_with_logits( + ops.log(y / (1 - y) + EPS), + gt_cls_per_image, + ops.ones(1, cls_preds_.dtype), + ops.ones(1, cls_preds_.dtype), + reduction="none", + ).sum( + -1 + ) # (bs, gt_max, nl*5*na*gt_max) + + cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss + cost = cost * this_mask + cost += CLIP_VALUE * (1.0 - ops.cast(this_mask, cost.dtype)) + + sort_cost, sort_idx = ops.top_k(-cost, 20, sorted=True) # (bs, gt_max, 20) + sort_cost = -sort_cost + pos_idx = ops.stack((mnp.arange(batch_size * n_gt_max, dtype=ms.int32), dynamic_ks.view(-1) - 1), -1) + pos_v = ops.gather_nd(sort_cost.view(batch_size * n_gt_max, 20), pos_idx).view(batch_size, n_gt_max) + matching_matrix = ops.cast(cost <= pos_v[:, :, None], ms.int32) * this_mask + + # delete reduplicate match label, one anchor only match one gt + cost_argmin = mnp.argmin(cost, axis=1) # (bs, nl*5*na*gt_max) + anchor_matching_gt_mask = ops.one_hot( + cost_argmin, n_gt_max, ops.ones(1, ms.float16), ops.zeros(1, ms.float16), axis=-1 + ).transpose( + 0, 2, 1 + ) # (bs, gt_max, nl*5*na*gt_max) + matching_matrix = matching_matrix * ops.cast(anchor_matching_gt_mask, matching_matrix.dtype) + + fg_mask_inboxes = ( + matching_matrix.astype(ms.float16).sum(1) > 0.0 + ) # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max) + all_tmasks = all_tmasks * ops.cast(fg_mask_inboxes, ms.int32) # (bs, nl*5*na*gt_max) + matched_gt_inds = matching_matrix.argmax(1).astype(ms.int32) # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max) + matched_bs_inds = ops.tile( + mnp.arange(batch_size, dtype=ms.int32)[:, None], (1, matching_matrix.shape[2]) + ) # (bs, nl*5*na*gt_max) + matched_inds = ops.stack((matched_bs_inds.view(-1), matched_gt_inds.view(-1)), 1) # (bs*nl*5*na*gt_max, 2) + matched_inds *= all_tmasks.view(-1)[:, None] + this_target = ops.gather_nd(this_target, matched_inds) # (bs*nl*5*na*gt_max, 6) + # this_target = this_target.view(-1, 6)[matched_gt_inds.view(-1,)] # (bs*nl*5*na*gt_max, 6) + + # (bs, nl*5*na*gt_max,) -> (bs, nl, 5*na*gt_max) -> (nl, bs*5*na*gt_max) + matching_tmasks = all_tmasks.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) + matching_bs = all_b.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_as = all_a.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_gjs = all_gj.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_gis = all_gi.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks + matching_targets = ( + this_target.view(batch_size, nl, -1, 6).transpose(1, 0, 2, 3).view(nl, -1, 6) * matching_tmasks[..., None] + ) + matching_anchs = ( + all_anch.view(batch_size, nl, -1, 2).transpose(1, 0, 2, 3).view(nl, -1, 2) * matching_tmasks[..., None] + ) + + return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs, matching_tmasks + + def find_3_positive(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + targets = targets.view(-1, 6) # (bs, gt_max, 6) -> (bs*gt_max, 6) + mask_t = targets[:, 1] >= 0 # (bs*gt_max,) + na, nt = self.na, targets.shape[0] # number of anchors, targets + indices, anch, tmasks = (), (), () + gain = ops.ones(7, ms.int32) # normalized to gridspace gain + ai = ops.tile(mnp.arange(na, dtype=targets.dtype).view(na, 1), (1, nt)) # shape: (na, nt) + targets = ops.concat((ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2) # append anchor indices # (na, nt, 7) + + g = 0.5 # bias + off = ops.cast(self._off, targets.dtype) * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]] # xyxy gain # [W, H, W, H] + + # Match targets to anchors + t = targets * gain # (na, nt, 7) + # Matches + # if nt: + r = t[:, :, 4:6] / anchors[:, None, :] # wh ratio + j = ops.maximum(r, 1.0 / r).max(2) < self.hyp_anchor_t # compare # (na, nt) + + # t = t[j] # filter + mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1) + t = t.view(-1, 7) # (na*nt, 7) + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + jk = ops.logical_and((gxy % 1.0 < g), (gxy > 1.0)) + lm = ops.logical_and((gxi % 1.0 < g), (gxi > 1.0)) + j, k = jk[:, 0], jk[:, 1] + l, m = lm[:, 0], lm[:, 1] + + # original + # j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *) + # t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7) + # t = t.view(-1, 7) + # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + # # t = t.repeat((5, 1, 1))[j] + # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :]) # (1,*,2) + (5,1,2) -> (5,na*nt,2) + # offsets = offsets.view(-1, 2) # (5*na*nt, 2) + # # offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + + # Faster + tag1, tag2 = ops.tile(j[:, None], (1, 2)), ops.tile(k[:, None], (1, 2)) + j_l = ops.logical_or(j, l).astype(ms.int32) + k_m = ops.logical_or(k, m).astype(ms.int32) + center = ops.ones_like(j_l) + j = ops.stack((center, j_l, k_m)) + mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + t = ops.tile(t, (3, 1, 1)) # shape(5, *, 7) + t = t.view(-1, 7) + offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :] # (1,*,2) + (5,1,2) -> (5,na*nt,2) + offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype) + # offsets_new[0, :, :] = offsets[0, :, :] + offsets_new[1, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, ...], offsets[3, ...]) + offsets_new[2, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, ...], offsets[4, ...]) + offsets = offsets_new + offsets = offsets.view(-1, 2) + + # Define + b, c, gxy, gwh, a = ( + ops.cast(t[:, 0], ms.int32), + ops.cast(t[:, 1], ms.int32), + t[:, 2:4], + t[:, 4:6], + ops.cast(t[:, 6], ms.int32), + ) # (image, class), grid xy, grid wh, anchors # b: (5*na*nt,), gxy: (5*na*nt, 2) + # gij = gxy - offsets + gij = ops.cast(gxy - offsets, ms.int32) + gi, gj = gij[:, 0], gij[:, 1] # grid indices + gi = gi.clip(0, shape[3] - 1) + gj = gj.clip(0, shape[2] - 1) + + # Append + indices += (ops.stack((b, a, gj, gi), 0),) # image, anchor, grid + anch += (anchors[a],) # anchors + tmasks += (mask_m_t,) + + return indices, anch, tmasks + + def find_5_positive(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + targets = targets.view(-1, 6) # (bs, gt_max, 6) -> (bs*gt_max, 6) + mask_t = targets[:, 1] >= 0 # (bs*gt_max,) + na, nt = self.na, targets.shape[0] # number of anchors, targets + indices, anch, tmasks = (), (), () + gain = ops.ones(7, ms.int32) # normalized to gridspace gain + ai = ops.tile(mnp.arange(na, dtype=targets.dtype).view(na, 1), (1, nt)) # shape: (na, nt) + targets = ops.concat((ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2) # append anchor indices # (na, nt, 7) + + g = 1.0 # bias + off = ops.cast(self._off, targets.dtype) * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]] # xyxy gain # [W, H, W, H] + + # Match targets to anchors + t = targets * gain # (na, nt, 7) + # Matches + r = t[:, :, 4:6] / anchors[:, None, :] # wh ratio + j = ops.maximum(r, 1.0 / r).max(2) < self.hyp_anchor_t # compare # (na, nt) + + # t = t[j] # filter + mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1) # filter + t = t.view(-1, 7) # (na*nt, 7) + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + jk = ops.logical_and((gxy % 1.0 < g), (gxy > 1.0)).astype(ms.int32) + lm = ops.logical_and((gxi % 1.0 < g), (gxi > 1.0)).astype(ms.int32) + j, k = jk[:, 0], jk[:, 1] + l, m = lm[:, 0], lm[:, 1] + + # original + j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *) + t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7) + t = t.view(-1, 7) + mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1) + offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :] # (1,*,2) + (5,1,2) -> (5,na*nt,2) + offsets = offsets.view(-1, 2) # (5*na*nt, 2) + + # Define + b, c, gxy, gwh, a = ( + ops.cast(t[:, 0], ms.int32), + ops.cast(t[:, 1], ms.int32), + t[:, 2:4], + t[:, 4:6], + ops.cast(t[:, 6], ms.int32), + ) # (image, class), grid xy, grid wh, anchors # b: (5*na*nt,), gxy: (5*na*nt, 2) + # gij = gxy - offsets + gij = ops.cast(gxy - offsets, ms.int32) + gi, gj = gij[:, 0], gij[:, 1] # grid indices + gi = gi.clip(0, shape[3] - 1) + gj = gj.clip(0, shape[2] - 1) + + # Append + indices += (ops.stack((b, a, gj, gi), 0),) # image, anchor, grid + anch += (anchors[a],) # anchors + tmasks += (mask_m_t,) + + return indices, anch, tmasks + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = ops.Identity()(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +@ops.constexpr +def get_tensor(x, dtype=ms.float32): + return Tensor(x, dtype) + + +if __name__ == "__main__": + from mindyolo.models.losses.loss_factory import create_loss + from mindyolo.utils.config import parse_config + + cfg = parse_config() + loss_fn = create_loss( + name="YOLOv7Loss", + **cfg.loss, + anchors=cfg.network.get("anchors", None), + stride=cfg.network.get("stride", None), + nc=cfg.data.get("nc", None), + ) + print(f"loss_fn is {loss_fn}") diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov8_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov8_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..6e85efe626ca855881612757d10fe4e95844cf6b --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/yolov8_loss.py @@ -0,0 +1,595 @@ +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Tensor, nn, ops + +from mindyolo.models.registry import register_model + +from .iou_loss import bbox_iou + +CLIP_VALUE = 1000.0 +EPS = 1e-7 + +__all__ = ["YOLOv8Loss", "YOLOv8SegLoss"] + + +@register_model +class YOLOv8Loss(nn.Cell): + def __init__(self, box, cls, dfl, stride, nc, reg_max=16, **kwargs): + super(YOLOv8Loss, self).__init__() + + self.bce = nn.BCEWithLogitsLoss(reduction="none") + self.hyp_box = box + self.hyp_cls = cls + self.hyp_dfl = dfl + self.stride = stride # model strides + self.nc = nc # number of classes + self.no = nc + reg_max * 4 + self.reg_max = reg_max + + self.use_dfl = reg_max > 1 + self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0) + self.bbox_loss = BboxLoss(reg_max, use_dfl=self.use_dfl) + self.proj = mnp.arange(reg_max) + + # ops + self.sigmoid = ops.Sigmoid() + + # branch name returned by lossitem for print + self.loss_item_name = ["loss", "lbox", "lcls", "dfl"] + + def construct(self, feats, targets, imgs): + """YOLOv8 Loss + Args: + feats: list of tensor, feats[i] shape: (bs, nc+reg_max*4, hi, wi) + targets: [image_idx,cls,x,y,w,h], shape: (bs, gt_max, 6) + """ + loss = ops.zeros(3, ms.float32) # box, cls, dfl + batch_size = feats[0].shape[0] + _x = () + for xi in feats: + _x += (xi.view(batch_size, self.no, -1),) + _x = ops.concat(_x, 2) + pred_distri, pred_scores = _x[:, : self.reg_max * 4, :], _x[:, -self.nc :, :] # (bs, nc, h*w) + pred_distri, pred_scores = pred_distri.transpose((0, 2, 1)), pred_scores.transpose((0, 2, 1)) + + dtype = pred_scores.dtype + imgsz = get_tensor(feats[0].shape[2:], dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = self.make_anchors(feats, self.stride, 0.5) + + # targets + targets, mask_gt = self.preprocess(targets, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets[:, :, :1], targets[:, :, 1:5] # cls, xyxy + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, shape: (b, h*w, 4) + + _, target_bboxes, target_scores, fg_mask, _ = self.assigner( + self.sigmoid(pred_scores), + (pred_bboxes * stride_tensor).astype(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt, + ) + # stop gradient + target_bboxes, target_scores, fg_mask = ( + ops.stop_gradient(target_bboxes), + ops.stop_gradient(target_scores), + ops.stop_gradient(fg_mask), + ) + + target_bboxes /= stride_tensor + + target_scores_sum = ops.maximum(target_scores.sum(), 1) + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[1] = self.bce(pred_scores, ops.cast(target_scores, dtype)).sum() / target_scores_sum # BCE + + # bbox loss + # if fg_mask.sum(): + loss[0], loss[2] = self.bbox_loss( + pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask + ) + + loss[0] *= self.hyp_box # box gain + loss[1] *= self.hyp_cls # cls gain + loss[2] *= self.hyp_dfl # dfl gain + + return loss.sum() * batch_size, ops.stop_gradient( + ops.concat((loss.sum(keepdims=True), loss)) + ) # loss(box, cls, dfl) + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4) + # pred_dist = ops.softmax(pred_dist, axis=3) # ms version >= 1.9.0 + pred_dist = ops.Softmax(axis=3)(pred_dist) # ms version <= 1.8.1 + # (batch, anchors, 4, reg_max) @ (reg_max,) -> (batch, anchors, 4) + _dtype = pred_dist.dtype + pred_dist = ops.matmul(pred_dist.astype(ms.float16), self.proj.astype(ms.float16)).astype(_dtype) + return self.dist2bbox(pred_dist, anchor_points, xywh=False) + + def preprocess(self, targets, scale_tensor): + """preprocess gt boxes + + Args: + targets: [image_idx,cls,x,y,w,h], shape: (bs, gt_max, 6) + scale_tensor: (4,) + Return: + out: [cls,x,y,x,y], shape: (bs, gt_max, 5) + mask_gt: (bs, gt_max) + """ + mask_gt = targets[:, :, 1] >= 0 # (bs, gt_max) + out = targets[:, :, 1:] * mask_gt[:, :, None] # [cls,x,y,w,h], shape: (bs, gt_max, 5) + out[..., 1:5] = xywh2xyxy(out[..., 1:5] * scale_tensor) + return out, mask_gt + + @staticmethod + def dist2bbox(distance, anchor_points, xywh=True, axis=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = ops.split(distance, split_size_or_sections=2, axis=axis) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return ops.concat((c_xy, wh), axis) # xywh bbox + return ops.concat((x1y1, x2y2), axis) # xyxy bbox + + @staticmethod + def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = (), () + dtype = feats[0].dtype + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = mnp.arange(w, dtype=dtype) + grid_cell_offset # shift x + sy = mnp.arange(h, dtype=dtype) + grid_cell_offset # shift y + sy, sx = ops.meshgrid(sy, sx, indexing="ij") + anchor_points += (ops.stack((sx, sy), -1).view(-1, 2),) + stride_tensor += (ops.ones((h * w, 1), dtype) * stride,) + return ops.concat(anchor_points), ops.concat(stride_tensor) + + +@register_model +class YOLOv8SegLoss(YOLOv8Loss): + def __init__(self, box, cls, dfl, stride, nc, reg_max=16, nm=32, overlap=True, max_object_num=600, **kwargs): + super(YOLOv8SegLoss, self).__init__(box, cls, dfl, stride, nc, reg_max) + + self.overlap = overlap + self.nm = nm + self.max_object_num = max_object_num + + # branch name returned by lossitem for print + self.loss_item_name = ["loss", "lbox", "lseg", "lcls", "dfl"] + + def construct(self, preds, target_box, target_seg): + """YOLOv8 Loss + Args: + feats: list of tensor, feats[i] shape: (bs, nc+reg_max*4, hi, wi) + targets: [image_idx,cls,x,y,w,h], shape: (bs, gt_max, 6) + """ + loss = ops.zeros(4, ms.float32) # box, cls, dfl, mask + # (bs, nc+reg_max*4, hi, wi), (bs, k, hi*wi), (bs, k, 138, 138); k = 32; + feats, pred_masks, proto = preds # x, mc, p; + batch_size, _, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width + + _x = () + for xi in feats: + _x += (xi.view(batch_size, self.no, -1),) + _x = ops.concat(_x, 2) + pred_distri, pred_scores = _x[:, :self.reg_max * 4, :], _x[:, -self.nc:, :] # (bs, nc, h*w) + + # b, grids, .. + pred_scores = pred_scores.transpose(0, 2, 1) # (bs, h*w, nc) + pred_distri = pred_distri.transpose(0, 2, 1) # (bs, h*w, regmax * 4) + pred_masks = pred_masks.transpose(0, 2, 1) # (bs, h*w, k) + + dtype = pred_scores.dtype + imgsz = get_tensor(feats[0].shape[2:], dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = self.make_anchors(feats, self.stride, 0.5) + + # targets + target_box, mask_gt = self.preprocess(target_box, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = target_box[:, :, :1], target_box[:, :, 1:5] # cls, xyxy + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, shape: (b, h*w, 4) + + _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + self.sigmoid(pred_scores), + (pred_bboxes * stride_tensor).astype(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt, + ) + + # stop gradient + target_bboxes, target_scores, fg_mask, target_gt_idx = ( + ops.stop_gradient(target_bboxes), + ops.stop_gradient(target_scores), + ops.stop_gradient(fg_mask), + ops.stop_gradient(target_gt_idx) + ) + + target_scores_sum = ops.maximum(target_scores.sum(), 1) + + # cls loss + loss[2] = self.bce(pred_scores, ops.cast(target_scores, dtype)).sum() / target_scores_sum # BCE + + # bbox loss + loss[0], loss[3] = self.bbox_loss( + pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask + ) + + # FIXME: mask target reshape, dynamic shape feature required. + # masks = target_seg # (b, 1, mask_h, mask_w) if overlap else (bs, N, mask_h, mask_w) + # if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + # masks = ops.interpolate(ops.expand_dims(masks, 0), size=(mask_h, mask_w), mode="nearest")[0] + + for i in range(batch_size): + _fg_mask, _fg_mask_index = ops.topk(fg_mask[i].astype(ms.float16), self.max_object_num) + _mask = target_seg[i] # (mask_h, mask_w) if overlap else (n_gt, mask_h, mask_w) + _mask_idx = target_gt_idx[i] # (b, N) -> (N,) + _mask_idx = ops.gather(_mask_idx, _fg_mask_index, axis=0) # (max_object_num,) + + if self.overlap: + _cond = _mask[None, :, :] == (_mask_idx[:, None, None] + 1) + gt_mask = ops.where( + _cond, + ops.ones(_cond.shape, pred_masks.dtype), + ops.zeros(_cond.shape, pred_masks.dtype) + ) + else: + gt_mask = _mask[_mask_idx] # (n_gt, mask_h, mask_w) -> (N, mask_h, mask_w)/(max_object_num, mask_h, mask_w) + + xyxyn = target_bboxes[i] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * get_tensor((mask_w, mask_h, mask_w, mask_h), xyxyn.dtype) + + _loss_1 = self.single_mask_loss( + gt_mask, pred_masks[i], proto[i], mxyxy, marea, _fg_mask, _fg_mask_index + ) + loss[1] += _loss_1 + + loss[0] *= self.hyp_box # box gain + loss[1] *= self.hyp_box / batch_size # seg gain + loss[2] *= self.hyp_cls # cls gain + loss[3] *= self.hyp_dfl # dfl gain + + return loss.sum() * batch_size, ops.stop_gradient( + ops.concat((loss.sum(keepdims=True), loss)) + ) # loss, lbox, lseg, lcls, ldfl + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area, _fg_mask, _fg_mask_index): + """Mask loss for one image.""" + pred = ops.gather(pred, _fg_mask_index, axis=0) + xyxy = ops.gather(xyxy, _fg_mask_index, axis=0) + area = ops.gather(area, _fg_mask_index, axis=0) + + _dtype = pred.dtype + pred_mask = ops.matmul( + pred.astype(ms.float16), + proto.astype(ms.float16).view(self.nm, -1) + ).view(-1, *proto.shape[1:]).astype(_dtype) # (n, 32) @ (32,80,80) -> (n,80,80) + + loss = ops.binary_cross_entropy_with_logits( + pred_mask, gt_mask, reduction='none', + weight=ops.ones(1, pred_mask.dtype), + pos_weight=ops.ones(1, pred_mask.dtype) + ) + + single_loss = (self.crop_mask(loss, xyxy).mean(axis=(1, 2)) / ops.clip(area, min=1e-4)) + single_loss *= _fg_mask + + num_seg = ops.clip(_fg_mask.sum(), min=1.0) + + return single_loss.sum() / num_seg + + @staticmethod + def crop_mask(masks, boxes): + """ + It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box + + Args: + masks (Tensor): [h, w, n] tensor of masks + boxes (Tensor): [n, 4] tensor of bbox coordinates in relative point form + + Returns: + (Tensor): The masks are being cropped to the bounding box. + """ + n, h, w = masks.shape + x1, y1, x2, y2 = ops.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1) + r = ops.arange(w, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w) + c = ops.arange(h, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1) + + return masks * ops.logical_and( + ops.logical_and((r >= x1), (r < x2)), + ops.logical_and((c >= y1), (c < y2)) + ).astype(x1.dtype) + + +class BboxLoss(nn.Cell): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def construct( + self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask + ): + """ + Args: + pred_dist: (bs, N, reg_max * 4) + pred_bboxes: (bs, N, 4) + anchor_points: (N, 2) + target_bboxes: (bs, N, 4) + target_scores: (bs, N, num_classes) + target_scores_sum: (1,) + fg_mask: (bs, N) + """ + # IoU loss + weight = target_scores.sum(-1).expand_dims(-1) # (bs, N, num_classes) -> (bs, N) -> (bs, N, 1) + iou = bbox_iou(pred_bboxes, target_bboxes, xywh=False, CIoU=True) + loss_iou = ((1.0 - iou) * weight * fg_mask.expand_dims(2)).sum() / target_scores_sum + + # DFL loss + if self.use_dfl: + target_ltrb = self.bbox2dist(anchor_points, target_bboxes, self.reg_max - 1) + loss_dfl = self._df_loss(pred_dist.view(-1, self.reg_max), target_ltrb) * weight * fg_mask[:, :, None] + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = ops.zeros(1, ms.float32) + + return loss_iou, loss_dfl + + @staticmethod + def bbox2dist(anchor_points, bbox, reg_max): + """Transform bbox(xyxy) to dist(ltrb).""" + x1y1, x2y2 = ops.split(bbox, split_size_or_sections=2, axis=-1) + return ops.concat((anchor_points - x1y1, x2y2 - anchor_points), -1).clip(0, reg_max - 0.01) # dist (lt, rb) + + @staticmethod + def _df_loss(pred_dist, target): + # Return sum of left and right DFL losses + # Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 + """ + Args: + pred_dist: (bs*N*4, reg_max) + target: (bs, N, 4) + fg_mask: (bs, N) + Return: + loss: (bs, N, 1) + """ + tl = ops.cast(target, ms.int32) # target left + tr = tl + 1 # target right + wl = tr - target # weight left + wr = 1 - wl # weight right + + loss = ( + ops.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl + + ops.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr + ).mean(-1, keep_dims=True) + + return loss + + +class TaskAlignedAssigner(nn.Cell): + def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): + super().__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + self.alpha = alpha + self.beta = beta + self.eps = eps + + def construct(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): + """This code referenced to + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py + + Args: + pd_scores: (bs, N, num_classes) + pd_bboxes: (bs, N, 4) + anc_points: (N, 2) + gt_labels: (bs, n_gt, 1) + gt_bboxes: (bs, n_gt, 4) + mask_gt: (bs, n_gt) + Returns: + target_labels: (bs, N) + target_bboxes: (bs, N, 4) + target_scores: (bs, N, num_classes) + fg_mask: (bs, N) + target_gt_idx: (bs, N) + """ + bs, n_gt, _ = gt_labels.shape + mask_pos, align_metric, overlaps = self.get_pos_mask( + pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt + ) + + target_gt_idx, fg_mask, mask_pos = self.select_highest_overlaps(mask_pos, overlaps, n_gt) + + # assigned target + target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # normalize + align_metric *= mask_pos + pos_align_metrics = align_metric.max(axis=-1, keepdims=True) # (b, n_gt) + pos_overlaps = (overlaps * mask_pos).max(axis=-1, keepdims=True) # (b, n_gt) + norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).max(-2).expand_dims(-1) + target_scores = target_scores * norm_align_metric + + return target_labels, target_bboxes, target_scores, ops.cast(fg_mask, ms.bool_), target_gt_idx + + def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): + align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) # (b, n_gt, N) + mask_in_gts = self.select_candidates_in_gts(anc_points, gt_bboxes, mask_gt) # (b, n_gt, N) + mask_topk = self.select_topk_candidates( + align_metric * mask_in_gts, topk_mask=ops.cast(ops.tile(mask_gt[..., None], (1, 1, self.topk)), ms.bool_) + ) # (b, n_gt, h*w) + mask_pos = mask_topk * mask_in_gts * mask_gt[:, :, None] # (b, n_gt, N) + + return mask_pos, align_metric, overlaps + + def select_topk_candidates(self, metrics, topk_mask=None): + """ + Args: + metrics: (b, n_gt, N). + topk_mask: (b, n_gt, topk) or None + Returns: + mask: (b, n_gt, N) + """ + + num_anchors = metrics.shape[-1] # N + topk_metrics, topk_idxs = ops.top_k(metrics, self.topk) # (b, n_gt, topk) + if topk_mask is None: + topk_mask = ops.tile(topk_metrics.max(-1, keepdims=True) > self.eps, (1, 1, self.topk)) # (b, n_gt, topk) + topk_idxs = mnp.where(topk_mask, topk_idxs, ops.zeros_like(topk_idxs)) # (b, n_gt, topk) + is_in_topk = ops.one_hot(topk_idxs, num_anchors, ops.ones(1, ms.float32), ops.zeros(1, ms.float32)).sum( + -2 + ) # (b, n_gt, topk, N) -> (b, n_gt, N) + # filter invalid bboxes + is_in_topk = mnp.where(is_in_topk > 1, ops.zeros(1, ms.float32), is_in_topk) + is_in_topk = ops.cast(is_in_topk, metrics.dtype) + + return is_in_topk + + def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): + bs, n_gt, _ = gt_labels.shape + + ind0 = ops.tile(mnp.arange(bs, dtype=ms.int32).view(-1, 1), (1, n_gt)).view(-1, 1) # (b*n_gt, 1) + ind1 = ops.cast(gt_labels, ms.int32).squeeze(-1).view(-1, 1) # (b*n_gt, 1) + bbox_scores = ops.gather_nd( + pd_scores.transpose((0, 2, 1)), ops.concat((ind0, ind1), axis=1) + ) # (b, N, 80)->(b, 80, N)->(b*n_gt, N) + bbox_scores = bbox_scores.view(bs, n_gt, -1) + + # (b, n_gt, 1, 4), (b, 1, N, 4) -> (b, n_gt, N) + overlaps = ( + bbox_iou(gt_bboxes.expand_dims(2), pd_bboxes.expand_dims(1), xywh=False, CIoU=True).squeeze(3).clip(0, None) + ) + align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) + return align_metric, overlaps + + def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): + """ + Args: + gt_labels: (b, n_gt, 1) + gt_bboxes: (b, n_gt, 4) + target_gt_idx: (b, N) + fg_mask: (b, N) + """ + + # assigned target labels + bs, n_gt, _ = gt_labels.shape + batch_ind = mnp.arange(bs)[:, None] # (b, 1) + target_gt_idx = target_gt_idx + batch_ind * n_gt # (b, N) + target_labels = ops.cast(gt_labels, ms.int32).flatten()[target_gt_idx] # (b, N) + + # assigned target boxes + target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] # (b, n_gt, 4) -> (b * n_gt, 4) -> (b, N) + + # assigned target scores + target_labels.clip(0, None) + target_scores = ops.one_hot( + target_labels, self.num_classes, on_value=ops.ones(1, ms.int32), off_value=ops.zeros(1, ms.int32) + ) # (b, N, 80) + fg_scores_mask = ops.tile(fg_mask[:, :, None], (1, 1, self.num_classes)) # (b, N) -> (b, N, 80) + target_scores = mnp.where(fg_scores_mask > 0, target_scores, ops.zeros(1, ms.int32)) + + return target_labels, target_bboxes, target_scores + + @staticmethod + def select_candidates_in_gts(xy_centers, gt_bboxes, mask_gt=None, eps=1e-9): + """select the positive anchor center in gt + + Args: + xy_centers: (N, 2) + gt_bboxes: (bs, n_gt, 4) + mask_gt: (bs, n_gt) or None + Return: + select: shape(bs, n_gt, N) + """ + n_anchors = xy_centers.shape[0] + bs, n_boxes, _ = gt_bboxes.shape + x, y = ops.split(xy_centers.view(1, -1, 2), split_size_or_sections=1, axis=-1) # (1, N, 2) -> (1, N, 1) + left, top, right, bottom = ops.split( + gt_bboxes.view(-1, 1, 4), split_size_or_sections=1, axis=-1 + ) # (bs, n_gt, 4)->(bs*n_gt, 1, 4)->(bs*n_gt, 1, 1) + select = ops.logical_and( + ops.logical_and((x - left) > eps, (y - top) > eps), ops.logical_and((right - x) > eps, (bottom - y) > eps) + ).view( + bs, n_boxes, n_anchors + ) # (bs, n_gt, N) + + if mask_gt is not None: + select = ops.cast(select, ms.float32) * ops.cast(mask_gt[..., None], ms.float32) + + return select + + @staticmethod + def select_highest_overlaps(mask_pos, overlaps, n_gt): + """if an anchor box is assigned to multiple gts, + the one with the highest iou will be selected. + + Args: + mask_pos: (b, n_gt, N) + overlaps: (b, n_gt, N) + Return: + target_gt_idx: (b, N) + fg_mask: (b, N) + mask_pos: (b, n_gt, N) + """ + + fg_mask = mask_pos.sum(-2) # (b, n_gt, N) -> (b, N) + + # if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes + mask_multi_gts = ops.tile(ops.expand_dims(fg_mask > 1, 1), (1, n_gt, 1)) # (b, n_gt, N) + max_overlaps_idx = overlaps.argmax(1) # (b, n_gt, N) -> (b, N) + is_max_overlaps = ops.one_hot( + max_overlaps_idx, n_gt, on_value=ops.ones(1, ms.int32), off_value=ops.zeros(1, ms.int32) + ) # (b, N, n_gt) + is_max_overlaps = ops.cast( + ops.transpose(is_max_overlaps, (0, 2, 1)), overlaps.dtype + ) # (b, N, n_gt) -> (b, n_gt, N) + mask_pos = mnp.where(mask_multi_gts, is_max_overlaps, mask_pos) + fg_mask = mask_pos.sum(-2) + + # find each grid serve which gt(index) + target_gt_idx = mask_pos.argmax(-2) # (b, h*w) + return target_gt_idx, fg_mask, mask_pos + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = ops.Identity()(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x + y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y + y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x + y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y + return y + + +def xyxy2xywh(x): + """ + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format. + + Args: + x (Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. + Returns: + y (Tensor): The bounding box coordinates in (x, y, width, height) format. + """ + y = ops.Identity()(x) + y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center + y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center + y[..., 2] = x[..., 2] - x[..., 0] # width + y[..., 3] = x[..., 3] - x[..., 1] # height + return y + + +@ops.constexpr +def get_tensor(x, dtype=ms.float32): + return Tensor(x, dtype) diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolox_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolox_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..2946bfe2ba1cba7a590cb17c44a26d624b0b83d4 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/losses/yolox_loss.py @@ -0,0 +1,306 @@ +import numpy as np + +import mindspore as ms +import mindspore.numpy as mnp +from mindspore import Tensor, nn, ops + +from mindyolo.models.layers.utils import box_clip, box_cxcywh_to_xyxy, box_scale, box_xyxy_to_cxcywh +from mindyolo.models.losses.iou_loss import batch_box_iou, bbox_iou +from mindyolo.models.registry import register_model + +__all__ = ["YOLOXLoss"] + + +@register_model +class YOLOXLoss(nn.Cell): + """yolox with loss cell""" + + def __init__( + self, + nc=80, + input_size=(640, 640), + num_candidate_ota=10, + strides=(8, 16, 32), + use_l1=False, + use_summary=False, + **kwargs + ): + super(YOLOXLoss, self).__init__() + self.n_candidate_k = num_candidate_ota + self.on_value = Tensor(1.0, ms.float32) + self.off_value = Tensor(0.0, ms.float32) + self.num_class = nc + + self.unsqueeze = ops.ExpandDims() + self.reshape = ops.Reshape() + self.one_hot = ops.OneHot() + self.zeros = ops.ZerosLike() + self.sort_ascending = ops.Sort(descending=False) + self.batch_matmul_trans_a = ops.BatchMatMul(transpose_a=True) + self.bce_loss = nn.BCEWithLogitsLoss(reduction="none") + self.l1_loss = nn.L1Loss(reduction="none") + + self.strides = strides + self.input_size = input_size + self.grids = [(input_size[0] // _stride) * (input_size[1] // _stride) for _stride in strides] + self.num_total_anchor = sum(self.grids) + self.anchor_center_pos, self.anchor_strides = self._get_anchor_center_and_stride(norm=False) + + self.use_l1 = use_l1 + self.use_summary = use_summary + self.summary = ops.ScalarSummary() + self.assign = ops.Assign() + + self.loss_item_name = ["loss", "lbox", "lobj", "lcls", "lboxl1"] # branch name returned by lossitem for print + + def _get_anchor_center_and_stride(self, norm=False): + """ + creat a table for all layer of anchors(grids), the value is the pixel position of the grid center and its stride. + The coordinate of the value is relative to the input img + Returns: + anchor_center_pos (Tensor[num_total_anchor, 2]): pixel position of the grid center + anchor_strides (Tensor[num_total_anchor,]): anchor strides + """ + + anchor_strides_list = [] + for s, g in zip(self.strides, self.grids): + layer_stride = ops.ones((g,), ms.float32) * float(s) + anchor_strides_list.append(layer_stride) + anchor_strides = ops.concat(anchor_strides_list) + # (num_total_anchor, 2) + anchor_strides = ops.stack([anchor_strides, anchor_strides], axis=1) + + anchor_center_pos_list = [] + for stride in self.strides: + size_x = self.input_size[0] // stride + size_y = self.input_size[1] // stride + grid_x, grid_y = ops.meshgrid(mnp.arange(size_x), mnp.arange(size_y)) + grids = ops.stack((grid_x, grid_y), 2).reshape(-1, 2) + anchor_center_pos_list.append(grids) + + # (num_total_anchor, 2) + anchor_center_pos = ops.concat(anchor_center_pos_list, 0) + + # to the scale of input img + anchor_center_pos = (anchor_center_pos + 0.5) * anchor_strides + + if norm: + anchor_center_pos[..., 0] /= self.input_size[0] + anchor_center_pos[..., 1] /= self.input_size[1] + + anchor_strides[..., 0] /= self.input_size[0] + anchor_strides[..., 1] /= self.input_size[1] + + return anchor_center_pos, anchor_strides + + def in_box(self, anchors, boxes): + splitted_diff1 = anchors - boxes[..., :2] + splitted_diff2 = boxes[..., 2:] - anchors + temp1 = ops.logical_and(splitted_diff1[..., 0] > 0.0, splitted_diff1[..., 1] > 0.0) + temp2 = ops.logical_and(splitted_diff2[..., 0] > 0.0, splitted_diff2[..., 1] > 0.0) + in_mask = ops.logical_and(temp1, temp2) + + return in_mask + + def _get_foreground(self, gt_boxes, gt_valid_mask, center_radius=1.5): + """ + get the mask of foreground anchor point, + ref: simOTA, link + Args: + gt_boxes (Tensor[bs, num_gt_max, 4]): gt box in [x1,y1, x2, y2] format, normed + gt_valid_mask (Tensor[bs, num_gt_max]) : gt box valid mask, indicates valid if true + num_valid_gt (int): num of valid gt boxes + center_radius (float): radius threshold to judge whether an anchor is an inlier of the gt center. + The unit is pixel in the feature map scale. + Returns: + fg_mask (Tensor(bs, num_total_anchor)): mask to indicate whether an anchor falls in any gt box + in_center_box_mask (Tensor(bs, num_gt_max, num_total_anchor)): mask to indicate whether an anchor + falls both in a specific gt box and the core box with radius center_radius + + """ + bs, num_gt_max, _ = gt_boxes.shape + + gt_box_xyxy = gt_boxes + gt_box_center = 0.5 * (gt_box_xyxy[..., :2] + gt_box_xyxy[..., 2:]) + # 1. Gt box mask + # (bs, num_gt_max, num_total_anchor) + in_box_mask = self.in_box(self.anchor_center_pos, gt_box_xyxy.expand_dims(2)) + # fg_mask = in_box_mask.any(1) + + # 2. Gt core box mask + # (bs, num_gt_max, num_total_anchor, 4) + gt_core_box_xyxy = ops.concat( + [ + gt_box_center[:, :, None, :] - center_radius * self.anchor_strides, + gt_box_center[:, :, None, :] + center_radius * self.anchor_strides, + ], + axis=-1, + ) + # (bs, num_gt_max, num_total_anchor) + in_center_mask = self.in_box(self.anchor_center_pos, gt_core_box_xyxy) + in_center_box_mask = ops.logical_and(in_box_mask, in_center_mask) + + # 3. Fill padding pos with false (bs, num_gt_max, num_total_anchor) + expanded_gt_valid_mask = ops.repeat_elements( + gt_valid_mask[:, :, None].astype(ms.int32), rep=self.num_total_anchor, axis=2 + ).astype(ms.bool_) + in_center_box_mask = ops.logical_and(expanded_gt_valid_mask, in_center_box_mask) + pre_fg_mask = ops.logical_and(expanded_gt_valid_mask, in_box_mask.any(1, keep_dims=True)) + return in_center_box_mask, pre_fg_mask + + def construct(self, preds, targets, imgs=None): + """ + forward with loss return + Args: + preds (Tensor[bs, num_total_anchor, 85]): + targets (Tensor[bs, num_gt_max, 6]): 0: batch_id, 1: label, 2-6: box + """ + gt_valid_mask = targets[..., 1] >= 0 # defalut class column + gt_box_xyxy = box_cxcywh_to_xyxy(targets[:, :, 2:]) # (batch_size, gt_max, 4) in [xyxy] format + # reverse norm + gt_box_xyxy_raw = box_clip(box_scale(gt_box_xyxy, self.input_size), self.input_size) + # to cxcywh format + bbox_true = box_xyxy_to_cxcywh(gt_box_xyxy_raw) + is_inbox_and_incenter, pre_fg_mask = self._get_foreground(gt_box_xyxy_raw, gt_valid_mask) + + batch_size = preds.shape[0] + gt_max = targets.shape[1] + outputs = preds # batch_size, 8400, 85 + total_num_anchors = outputs.shape[1] + bbox_preds = outputs[:, :, :4] # batch_size, num_total_anchor, 4 + + obj_preds = outputs[:, :, 4:5] # batch_size, num_total_anchor, 1 + cls_preds = outputs[:, :, 5:] # (batch_size, num_total_anchor, num_class) + + # process label + gt_classes = ops.cast(targets[:, :, 1:2].squeeze(-1), ms.int32) + pair_wise_ious = batch_box_iou(bbox_true, bbox_preds, xywh=True) # (batch_size, gt_max, 8400) + pair_wise_ious = pair_wise_ious * pre_fg_mask + pair_wise_iou_loss = -ops.log(pair_wise_ious + 1e-8) * pre_fg_mask + gt_classes_ = self.one_hot(gt_classes, self.num_class, self.on_value, self.off_value) + # (bs, num_gt_max, num_class) -> (bs, num_gt_max, num_total_anchor, num_class) + gt_classes_expaned = ops.repeat_elements(self.unsqueeze(gt_classes_, 2), rep=total_num_anchors, axis=2) + gt_classes_expaned = ops.stop_gradient(gt_classes_expaned) + cls_preds_ = ops.sigmoid(ops.repeat_elements(self.unsqueeze(cls_preds, 1), rep=gt_max, axis=1)) * ops.sigmoid( + ops.repeat_elements(self.unsqueeze(obj_preds, 1), rep=gt_max, axis=1) + ) + # (bs, num_gt_max, num_total_anchor, num_class) -> (bs, num_gt_max, num_total_anchor) + pair_wise_cls_loss = ops.reduce_sum( + ops.binary_cross_entropy(ops.sqrt(cls_preds_), gt_classes_expaned, None, reduction="none"), -1 + ) + + pair_wise_cls_loss = pair_wise_cls_loss * pre_fg_mask + cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss + punishment_cost = 1000.0 * (1.0 - ops.cast(is_inbox_and_incenter, ms.float32)) + cost = ops.cast(cost + punishment_cost, ms.float16) + # dynamic k matching + ious_in_boxes_matrix = pair_wise_ious # (batch_size, gt_max, 8400) + ious_in_boxes_matrix = ops.cast(pre_fg_mask * ious_in_boxes_matrix, ms.float16) + topk_ious, _ = ops.top_k(ious_in_boxes_matrix, self.n_candidate_k, sorted=True) + + dynamic_ks = ops.reduce_sum(topk_ious, 2).astype(ms.int32).clip(min=1, max=total_num_anchors - 1) + + # (1, batch_size * gt_max, 2) + batch_iter = Tensor(np.arange(0, batch_size * gt_max), ms.int32) + dynamic_ks_indices = ops.stack((batch_iter, dynamic_ks.reshape((-1,))), axis=1) + + dynamic_ks_indices = ops.stop_gradient(dynamic_ks_indices) + + values, _ = ops.top_k(-cost, self.n_candidate_k, sorted=True) # b_s , 50, 8400 + values = ops.reshape(-values, (-1, self.n_candidate_k)) + max_neg_score = self.unsqueeze(ops.gather_nd(values, dynamic_ks_indices).reshape(batch_size, -1), 2) + # positive sample for each gt + pos_mask = ops.cast(cost < max_neg_score, ms.float32) # (batch_size, gt_num, 8400) + pos_mask = pre_fg_mask * pos_mask + # ----dynamic_k---- END----------------------------------------------------------------------------------------- + + # pick the one with the lower cost if a sample is positive for more than one gt + cost_t = cost * pos_mask + (1.0 - pos_mask) * 2000.0 + min_index = ops.argmin(cost_t, axis=1) + ret_posk = ops.transpose(ops.one_hot(min_index, gt_max, self.on_value, self.off_value), (0, 2, 1)) + pos_mask = pos_mask * ret_posk + pos_mask = ops.stop_gradient(pos_mask) + # AA problem--------------END ---------------------------------------------------------------------------------- + + # calculate target --------------------------------------------------------------------------------------------- + # Cast precision + pos_mask = ops.cast(pos_mask, ms.float16) + bbox_true = ops.cast(bbox_true, ms.float16) + gt_classes_ = ops.cast(gt_classes_, ms.float16) + + reg_target = self.batch_matmul_trans_a(pos_mask, bbox_true) # (batch_size, 8400, 4) + pred_ious_this_matching = self.unsqueeze(ops.reduce_sum((ious_in_boxes_matrix * pos_mask), 1), -1) + cls_target = self.batch_matmul_trans_a(pos_mask, gt_classes_) + + cls_target = cls_target * pred_ious_this_matching + obj_target = ops.reduce_max(pos_mask, 1) # (batch_size, 8400) + + # calculate l1_target + reg_target = ops.stop_gradient(reg_target) + cls_target = ops.stop_gradient(cls_target) + obj_target = ops.stop_gradient(obj_target) + bbox_preds = ops.cast(bbox_preds, ms.float32) + reg_target = ops.cast(reg_target, ms.float32) + obj_preds = ops.cast(obj_preds, ms.float32) + obj_target = ops.cast(obj_target, ms.float32) + cls_preds = ops.cast(cls_preds, ms.float32) + cls_target = ops.cast(cls_target, ms.float32) + loss_l1 = 0.0 + if self.use_l1: + l1_target = self.get_l1_format(reg_target) + l1_preds = self.get_l1_format(bbox_preds) + l1_target = ops.stop_gradient(l1_target) + l1_target = ops.cast(l1_target, ms.float32) + l1_preds = ops.cast(l1_preds, ms.float32) + loss_l1 = ops.reduce_sum(self.l1_loss(l1_preds, l1_target), -1) * obj_target + loss_l1 = ops.reduce_sum(loss_l1) + # calculate target -----------END------------------------------------------------------------------------------- + iou = bbox_iou(bbox_preds.reshape(-1, 4), reg_target.reshape(-1, 4), xywh=True).reshape(batch_size, -1) + loss_iou = (1 - iou * iou) * obj_target # (bs, num_total_anchor) + loss_iou = ops.reduce_sum(loss_iou) + + loss_obj = self.bce_loss(ops.reshape(obj_preds, (-1, 1)), ops.reshape(obj_target, (-1, 1))) + loss_obj = ops.reduce_sum(loss_obj) + + loss_cls = ops.reduce_sum(self.bce_loss(cls_preds, cls_target), -1) * obj_target + loss_cls = ops.reduce_sum(loss_cls) + + num_fg_mask = ops.reduce_sum(obj_target) == 0 + num_fg = (num_fg_mask == 0) * ops.reduce_sum(obj_target) + 1.0 * num_fg_mask + + loss_iou = 5 * loss_iou / num_fg + loss_cls = loss_cls / num_fg + loss_obj = loss_obj / num_fg + loss_l1 = loss_l1 / num_fg + loss_all = loss_iou + loss_cls + loss_obj + loss_l1 + + if self.use_summary: + self.summary("loss", loss_all) + self.summary("num_fg", num_fg) + self.summary("loss_iou", loss_iou) + self.summary("loss_cls", loss_cls) + self.summary("loss_obj", loss_obj) + self.summary("loss_l1", loss_l1) + + return loss_all, ops.stop_gradient(ops.stack((loss_all, loss_iou, loss_obj, loss_cls, loss_l1))) + + def get_l1_format_single(self, reg_target, stride, eps): + """calculate L1 loss related""" + reg_target = reg_target / stride + reg_target_xy = reg_target[:, :, :2] + reg_target_wh = reg_target[:, :, 2:] + reg_target_wh = ops.log(reg_target_wh + eps) + return ops.concat((reg_target_xy, reg_target_wh), -1) + + def get_l1_format(self, reg_target, eps=1e-8): + """calculate L1 loss related""" + reg_target_l = reg_target[:, 0 : self.grids[0], :] # (bs, 6400, 4) + reg_target_m = reg_target[:, self.grids[0] : self.grids[1] + self.grids[0], :] # (bs, 1600, 4) + reg_target_s = reg_target[:, -self.grids[2] :, :] # (bs, 400, 4) + + reg_target_l = self.get_l1_format_single(reg_target_l, self.strides[0], eps) + reg_target_m = self.get_l1_format_single(reg_target_m, self.strides[1], eps) + reg_target_s = self.get_l1_format_single(reg_target_s, self.strides[2], eps) + + l1_target = ops.concat([reg_target_l, reg_target_m, reg_target_s], axis=1) + return l1_target diff --git a/community/cv/ShipWise/mindyolo/models/model_factory.py b/community/cv/ShipWise/mindyolo/models/model_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..5d7c57511d05a565e6085e663def1488a5e3f40b --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/model_factory.py @@ -0,0 +1,214 @@ +import math +import os +from copy import deepcopy + +from mindspore import load_checkpoint, load_param_into_net, nn, ops + +from mindyolo.utils import logger +from .heads import * +from .layers import * +from .registry import is_model, model_entrypoint +from .initializer import initialize_defult + +__all__ = ["create_model", "build_model_from_cfg"] + + +def create_model( + model_name: str, + model_cfg: dict = None, + in_channels: int = 3, + num_classes: int = 80, + checkpoint_path: str = "", + **kwargs, +): + model_args = dict(cfg=model_cfg, num_classes=num_classes, in_channels=in_channels) + kwargs = {k: v for k, v in kwargs.items() if v is not None} + + if not is_model(model_name): + raise RuntimeError(f"Unknown model {model_name}") + + create_fn = model_entrypoint(model_name) + model = create_fn(**model_args, **kwargs) + + if checkpoint_path: + assert os.path.isfile(checkpoint_path) and checkpoint_path.endswith( + ".ckpt" + ), f"[{checkpoint_path}] not a ckpt file." + checkpoint_param = load_checkpoint(checkpoint_path) + load_param_into_net(model, checkpoint_param) + logger.info(f"Load checkpoint from [{checkpoint_path}] success.") + + return model + + +# Tools: build model from yaml cfg +def build_model_from_cfg(**kwargs): + return Model(**kwargs) + + +class Model(nn.Cell): + def __init__(self, model_cfg, in_channels=3, num_classes=80, sync_bn=False): + super(Model, self).__init__() + self.model, self.save, self.layers_param = parse_model( + deepcopy(model_cfg), ch=[in_channels], nc=num_classes, sync_bn=sync_bn + ) + # Recompute + if hasattr(model_cfg, "recompute") and model_cfg.recompute and model_cfg.recompute_layers > 0: + for i in range(model_cfg.recompute_layers): + self.model[i].recompute() + logger.info( + f"Turn on recompute, and the results of the first {model_cfg.recompute_layers} layers " + f"will be recomputed." + ) + initialize_defult(self) + + def construct(self, x): + y, dt = (), () # outputs + for i in range(len(self.model)): + m = self.model[i] + iol, f, _, _ = self.layers_param[i] # iol: index of layers + + if not (isinstance(f, int) and f == -1): # if not from previous layer + if isinstance(f, int): + x = y[f] + else: + _x = () + for j in f: + if j == -1: + _x += (x,) + else: + _x += (y[j],) + x = _x + + x = m(x) # run + + y += (x if iol in self.save else None,) # save output + return x + + @staticmethod + @ops.constexpr + def _get_h_w_list(ratio, gs, hw): + return tuple([math.ceil(x * ratio / gs) * gs for x in hw]) + + +def parse_model(d, ch, nc, sync_bn=False): # model_dict, input_channels(3) + _SYNC_BN = sync_bn + if _SYNC_BN: + logger.info("Parse model with Sync BN.") + verbose = d.get("verbose_log", False) + if verbose: + logger.info("") + logger.info("network structure are as follows") + logger.info("%3s%18s%3s%10s %-60s%-40s" % ("", "from", "n", "params", "module", "arguments")) + anchors, reg_max, max_channels = d.get("anchors", None), d.get("reg_max", None), d.get("max_channels", None) + stride, gd, gw = d.stride, d.depth_multiple, d.width_multiple + nc, na = ( + nc, + (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors, + ) # number of classes, number of anchors + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + layers_param = [] + num_total_param, num_train_param = 0, 0 + for i, (f, n, m, args) in enumerate(d.backbone + d.head): # from, number, module, args + kwargs = {} + m = eval(m) if isinstance(m, str) else m # eval strings + + _args = [] + for j, a in enumerate(args): + if isinstance(a, str) and "=" in a: + _index = a.find("=") + k, v = a[:_index], a[_index + 1 :] + try: + v = eval(v) + except: + logger.warning(f"Parse Model, args: {k}={v}, keep str type") + kwargs[k] = v + else: + try: + a = eval(a) if isinstance(a, str) else a + except: + logger.warning(f"Parse Model, args: {a}, keep str type") + _args += [ + a, + ] + args = _args + + n = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in ( + nn.Conv2d, + ConvNormAct, + RepConv, + DownC, + SPPCSPC, + SPPF, + C3, + C2f, + Bottleneck, + Residualblock, + Focus, + DWConvNormAct, + DWBottleneck, + DWC3, + ): + c1, c2 = ch[f], args[0] + if max_channels: + c2 = min(c2, max_channels) + c2 = math.ceil(c2 * gw / 8) * 8 + + args = [c1, c2, *args[1:]] + if m in ( + ConvNormAct, + RepConv, + DownC, + SPPCSPC, + SPPF, + C3, + C2f, + Bottleneck, + Residualblock, + DWConvNormAct, + DWBottleneck, + DWC3, + ): + kwargs["sync_bn"] = sync_bn + if m in (DownC, SPPCSPC, C3, C2f, DWC3): + args.insert(2, n) # number of repeats + n = 1 + elif m in (nn.BatchNorm2d, nn.SyncBatchNorm): + args = [ch[f]] + elif m in (Concat,): + c2 = sum([ch[x] for x in f]) + elif m is Shortcut: + c2 = ch[f[0]] + elif m in (YOLOv7Head, YOLOv7AuxHead, YOLOv5Head, YOLOv4Head, YOLOv3Head): + args.append([ch[x] for x in f]) + if isinstance(args[1], int): # number of anchors + args[1] = [list(range(args[1] * 2))] * len(f) + elif m in (YOLOv8Head, YOLOv8SegHead, YOLOXHead): # head of anchor free + args.append([ch[x] for x in f]) + if m in (YOLOv8SegHead,): + args[3] = math.ceil(min(args[3], max_channels) * gw / 8) * 8 + elif m is ReOrg: + c2 = ch[f] * 4 + else: + c2 = ch[f] + + m_ = nn.SequentialCell([m(*args, **kwargs) for _ in range(n)]) if n > 1 else m(*args, **kwargs) + + t = str(m) # module type + np = sum([x.size for x in m_.get_parameters()]) # number params + np_trainable = sum([x.size for x in m_.trainable_params()]) # number trainable params + num_total_param += np + num_train_param += np_trainable + m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params + layers_param.append((i, f, t, np)) + if verbose: + logger.info("%3s%18s%3s%10.0f %-60s%-40s" % (i, f, n, np, t, args + [kwargs] if kwargs else args)) # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + logger.info(f"number of network params, total: {num_total_param / 1e6}M, trainable: {num_train_param / 1e6}M") + return nn.CellList(layers), sorted(save), layers_param diff --git a/community/cv/ShipWise/mindyolo/models/registry.py b/community/cv/ShipWise/mindyolo/models/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..08e369a2706c275a70bc2c636c9eaf8fcbf28c65 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/registry.py @@ -0,0 +1,106 @@ +"""model registry and list""" +import fnmatch +import sys +from collections import defaultdict + +__all__ = ["list_models", "is_model", "model_entrypoint", "list_modules", "is_model_in_modules", "is_model_pretrained"] + +_module_to_models = defaultdict(set) +_model_to_module = {} +_model_entrypoints = {} +_model_has_pretrained = set() + + +def register_model(fn): + # lookup containing module + mod = sys.modules[fn.__module__] + module_name_split = fn.__module__.split(".") + module_name = module_name_split[-1] if len(module_name_split) else "" + + # add model to __all__ in module + model_name = fn.__name__ + if hasattr(mod, "__all__"): + mod.__all__.append(model_name) + else: + mod.__all__ = [model_name] + + # add entries to registry dict/sets + _model_entrypoints[model_name] = fn + _model_to_module[model_name] = module_name + _module_to_models[module_name].add(model_name) + has_pretrained = False + if hasattr(mod, "default_cfgs") and model_name in mod.default_cfgs: + cfg = mod.default_cfgs[model_name] + has_pretrained = "url" in cfg and cfg["url"] + if has_pretrained: + _model_has_pretrained.add(model_name) + return fn + + +def list_models(filter="", module="", pretrained=False, exclude_filters=""): + if module: + all_models = list(_module_to_models[module]) + else: + all_models = _model_entrypoints.keys() + + if filter: + models = [] + include_filters = filter if isinstance(filter, (tuple, list)) else [filter] + for f in include_filters: + include_models = fnmatch.filter(all_models, f) # include these models + if include_models: + models = set(models).union(include_models) + else: + models = all_models + + if exclude_filters: + if not isinstance(exclude_filters, (tuple, list)): + exclude_filters = [exclude_filters] + for xf in exclude_filters: + exclude_models = fnmatch.filter(models, xf) # exclude these models + if exclude_models: + models = set(models).difference(exclude_models) + + if pretrained: + models = _model_has_pretrained.intersection(models) + + models = sorted(list(models)) + + return models + + +def is_model(model_name): + """ + Check if a model name exists + """ + return model_name in _model_entrypoints + + +def model_entrypoint(model_name): + """ + Fetch a model entrypoint for specified model name + """ + return _model_entrypoints[model_name] + + +def list_modules(): + """ + Return list of module names that contain models / model entrypoints + """ + modules = _module_to_models.keys() + return list(sorted(modules)) + + +def is_model_in_modules(model_name, module_names): + """ + Check if a model exists within a subset of modules + Args: + model_name (str) - name of model to check + module_names (tuple, list, set) - names of modules to search in + """ + assert isinstance(module_names, (tuple, list, set)) + return any(model_name in _module_to_models[n] for n in module_names) + + +def is_model_pretrained(model_name): + return model_name in _model_has_pretrained diff --git a/community/cv/ShipWise/mindyolo/models/shipwise.py b/community/cv/ShipWise/mindyolo/models/shipwise.py new file mode 100644 index 0000000000000000000000000000000000000000..23e9e474cc546f99c80cd8d301f145d1ef4a7559 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/shipwise.py @@ -0,0 +1,104 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.heads.yolov8_head import YOLOv8Head +from mindyolo.models.model_factory import build_model_from_cfg +from mindyolo.models.registry import register_model + +__all__ = ["ShipWise", "shipwise"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + + +default_cfgs = {"shipwise": _cfg(url="")} + + +class SEBlock(nn.Cell): + """Squeeze-and-Excitation Block for channel-wise attention.""" + + def __init__(self, channels, reduction=16): + super(SEBlock, self).__init__() + self.pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.SequentialCell( + nn.Dense(channels, channels // reduction, has_bias=False), + nn.ReLU(), + nn.Dense(channels // reduction, channels, has_bias=False), + nn.Sigmoid() + ) + + def construct(self, x): + b, c, _, _ = x.shape + y = self.pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y + + +class ShipWise(nn.Cell): + def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False): + super(ShipWise, self).__init__() + self.cfg = cfg + self.stride = Tensor(np.array(cfg.stride), ms.int32) + self.stride_max = int(max(self.cfg.stride)) + ch, nc = in_channels, num_classes + + self.nc = nc # override yaml value + + # Build the base model + self.model = build_model_from_cfg( + model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn + ) + + # Insert SEBlock into the model without changing input/output interface + self.insert_se_block() + + self.names = [str(i) for i in range(nc)] # default names + + self.initialize_weights() + + def construct(self, x): + return self.model(x) + + def insert_se_block(self): + """Insert SEBlock into the model's backbone without altering the input/output interface.""" + # Assuming the backbone is a SequentialCell + backbone = self.model.model[0] + if isinstance(backbone, nn.SequentialCell): + # Insert SEBlock after the last layer of the backbone + layers = list(backbone.cells()) + backbone_out_channels = layers[-1].out_channels + se_block = SEBlock(channels=backbone_out_channels) + + # Reconstruct the backbone with SEBlock + new_backbone = nn.SequentialCell(*layers, se_block) + self.model.model[0] = new_backbone + + def initialize_weights(self): + # Initialize the weights of SEBlock if present + backbone = self.model.model[0] + if isinstance(backbone, nn.SequentialCell): + for m in backbone.cells(): + if isinstance(m, SEBlock): + for layer in m.fc.cells(): + if isinstance(layer, nn.Dense): + ms.common.initializer.initializer( + ms.common.initializer.XavierUniform(), layer.weight.shape, layer.weight.dtype + ) + + # Reset parameters for Detect Head + m = self.model.model[-1] + if isinstance(m, YOLOv8Head): + m.initialize_biases() + m.dfl.initialize_conv_weight() + + +@register_model +def shipwise(cfg, in_channels=3, num_classes=None, **kwargs) -> ShipWise: + """Get ShipWise model.""" + model = ShipWise(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + +# TODO: Preset pre-training model for ShipWise diff --git a/community/cv/ShipWise/mindyolo/models/yolov3.py b/community/cv/ShipWise/mindyolo/models/yolov3.py new file mode 100644 index 0000000000000000000000000000000000000000..9f87d2feeeda55923bcdc684e0500e1c06d94841 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/yolov3.py @@ -0,0 +1,66 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.heads.yolov3_head import YOLOv3Head +from mindyolo.models.model_factory import build_model_from_cfg +from mindyolo.models.registry import register_model + +__all__ = ["YOLOv3", "yolov3"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + + +default_cfgs = {"yolov3": _cfg(url="")} + + +class YOLOv3(nn.Cell): + def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False): + super(YOLOv3, self).__init__() + self.cfg = cfg + self.stride = Tensor(np.array(cfg.stride), ms.int32) + self.stride_max = int(max(self.cfg.stride)) + ch, nc = in_channels, num_classes + + self.nc = nc # override yaml value + self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn) + self.names = [str(i) for i in range(nc)] # default names + + self.initialize_weights() + + def construct(self, x): + return self.model(x) + + def initialize_weights(self): + # reset parameter for Detect Head + m = self.model.model[-1] + if isinstance(m, YOLOv3Head): + m.initialize_biases() + + +@register_model +def yolov3(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv3: + """Get yolov3 model.""" + model = YOLOv3(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + + +if __name__ == "__main__": + from mindyolo.models.model_factory import create_model + from mindyolo.utils.config import load_config, Config + + cfg, _, _ = load_config('../../configs/yolov3/yolov3.yaml') + cfg = Config(cfg) + network = create_model( + model_name=cfg.network.model_name, + model_cfg=cfg.network, + num_classes=cfg.data.nc, + sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False, + ) + x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32) + out = network(x) + out = out[0] if isinstance(out, (list, tuple)) else out + print(f"Output shape is {[o.shape for o in out]}") diff --git a/community/cv/ShipWise/mindyolo/models/yolov4.py b/community/cv/ShipWise/mindyolo/models/yolov4.py new file mode 100644 index 0000000000000000000000000000000000000000..08748c5f6d29c2b8593bc7d2c3432d22eca45205 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/yolov4.py @@ -0,0 +1,55 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.model_factory import build_model_from_cfg +from mindyolo.models.registry import register_model + +__all__ = ["YOLOv4", "yolov4"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + + +default_cfgs = {"yolov4": _cfg(url="")} + + +class YOLOv4(nn.Cell): + def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False): + super(YOLOv4, self).__init__() + self.cfg = cfg + ch, nc = in_channels, num_classes + + self.nc = nc # override yaml value + self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn) + self.names = [str(i) for i in range(nc)] # default names + + def construct(self, x): + return self.model(x) + + +@register_model +def yolov4(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv4: + """Get yolov4 model.""" + model = YOLOv4(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + + +if __name__ == "__main__": + from mindyolo.models.model_factory import create_model + from mindyolo.utils.config import load_config, Config + + cfg, _, _ = load_config('../../configs/yolov4/yolov4.yaml') + cfg = Config(cfg) + network = create_model( + model_name=cfg.network.model_name, + model_cfg=cfg.network, + num_classes=cfg.data.nc, + sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False, + ) + x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32) + out = network(x) + out = out[0] if isinstance(out, (list, tuple)) else out + print(f"Output shape is {[o.shape for o in out]}") diff --git a/community/cv/ShipWise/mindyolo/models/yolov5.py b/community/cv/ShipWise/mindyolo/models/yolov5.py new file mode 100644 index 0000000000000000000000000000000000000000..cb8931e8a64f703dec75d616b000e364464bc99d --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/yolov5.py @@ -0,0 +1,69 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.heads.yolov5_head import YOLOv5Head +from mindyolo.models.model_factory import build_model_from_cfg +from mindyolo.models.registry import register_model + +__all__ = ["YOLOv5", "yolov5"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + + +default_cfgs = {"yolov5": _cfg(url="")} + + +class YOLOv5(nn.Cell): + def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False): + super(YOLOv5, self).__init__() + self.cfg = cfg + self.stride = Tensor(np.array(cfg.stride), ms.int32) + self.stride_max = int(max(self.cfg.stride)) + ch, nc = in_channels, num_classes + + self.nc = nc # override yaml value + self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn) + self.names = [str(i) for i in range(nc)] # default names + + self.initialize_weights() + + def construct(self, x): + return self.model(x) + + def initialize_weights(self): + # reset parameter for Detect Head + m = self.model.model[-1] + if isinstance(m, YOLOv5Head): + m.initialize_biases() + + +@register_model +def yolov5(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv5: + """Get yolov5 model.""" + model = YOLOv5(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + + +# TODO: Preset pre-training model for yolov5-n/s/m + + +if __name__ == "__main__": + from mindyolo.models.model_factory import create_model + from mindyolo.utils.config import load_config, Config + + cfg, _, _ = load_config('../../configs/yolov5/yolov5s.yaml') + cfg = Config(cfg) + network = create_model( + model_name=cfg.network.model_name, + model_cfg=cfg.network, + num_classes=cfg.data.nc, + sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False, + ) + x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32) + out = network(x) + out = out[0] if isinstance(out, (list, tuple)) else out + print(f"Output shape is {[o.shape for o in out]}") diff --git a/community/cv/ShipWise/mindyolo/models/yolov7.py b/community/cv/ShipWise/mindyolo/models/yolov7.py new file mode 100644 index 0000000000000000000000000000000000000000..a0b100aa934eb3f9782f1f6a914e88752ee30206 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/yolov7.py @@ -0,0 +1,71 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.heads.yolov7_head import YOLOv7AuxHead, YOLOv7Head +from mindyolo.models.model_factory import build_model_from_cfg +from .registry import register_model + +__all__ = ["YOLOv7", "yolov7"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + + +default_cfgs = {"yolov7": _cfg(url="")} + + +class YOLOv7(nn.Cell): + def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False): + super(YOLOv7, self).__init__() + self.cfg = cfg + self.stride = Tensor(np.array(cfg.stride), ms.int32) + self.stride_max = int(max(self.cfg.stride)) + ch, nc = in_channels, num_classes + + self.nc = nc # override yaml value + self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn) + self.names = [str(i) for i in range(nc)] # default names + + self.initialize_weights() + + def construct(self, x): + return self.model(x) + + def initialize_weights(self): + # reset parameter for Detect Head + m = self.model.model[-1] + if isinstance(m, YOLOv7Head): + m.initialize_biases() + if isinstance(m, YOLOv7AuxHead): + m.initialize_aux_biases() + + +@register_model +def yolov7(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv7: + """Get yolov7 model.""" + model = YOLOv7(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + + +# TODO: Preset pre-training model for yolov7-tiny/l/x + + +if __name__ == "__main__": + from mindyolo.models.model_factory import create_model + from mindyolo.utils.config import load_config, Config + + cfg, _, _ = load_config('../../configs/yolov7/yolov7-tiny.yaml') + cfg = Config(cfg) + network = create_model( + model_name=cfg.network.model_name, + model_cfg=cfg.network, + num_classes=cfg.data.nc, + sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False, + ) + x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32) + out = network(x) + out = out[0] if isinstance(out, (list, tuple)) else out + print(f"Output shape is {[o.shape for o in out]}") diff --git a/community/cv/ShipWise/mindyolo/models/yolov8.py b/community/cv/ShipWise/mindyolo/models/yolov8.py new file mode 100644 index 0000000000000000000000000000000000000000..f67301f35cb892a00d55d807ccd65fd61f69a309 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/yolov8.py @@ -0,0 +1,70 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.heads.yolov8_head import YOLOv8Head +from mindyolo.models.model_factory import build_model_from_cfg +from mindyolo.models.registry import register_model + +__all__ = ["YOLOv8", "yolov8"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + + +default_cfgs = {"yolov8": _cfg(url="")} + + +class YOLOv8(nn.Cell): + def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False): + super(YOLOv8, self).__init__() + self.cfg = cfg + self.stride = Tensor(np.array(cfg.stride), ms.int32) + self.stride_max = int(max(self.cfg.stride)) + ch, nc = in_channels, num_classes + + self.nc = nc # override yaml value + self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn) + self.names = [str(i) for i in range(nc)] # default names + + self.initialize_weights() + + def construct(self, x): + return self.model(x) + + def initialize_weights(self): + # reset parameter for Detect Head + m = self.model.model[-1] + if isinstance(m, YOLOv8Head): + m.initialize_biases() + m.dfl.initialize_conv_weight() + + +@register_model +def yolov8(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv8: + """Get yolov8 model.""" + model = YOLOv8(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + + +# TODO: Preset pre-training model for yolov8-n + + +if __name__ == "__main__": + from mindyolo.models.model_factory import create_model + from mindyolo.utils.config import load_config, Config + + cfg, _, _ = load_config('../../configs/yolov8/yolov8s.yaml') + cfg = Config(cfg) + network = create_model( + model_name=cfg.network.model_name, + model_cfg=cfg.network, + num_classes=cfg.data.nc, + sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False, + ) + x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32) + out = network(x) + out = out[0] if isinstance(out, (list, tuple)) else out + print(f"Output shape is {[o.shape for o in out]}") diff --git a/community/cv/ShipWise/mindyolo/models/yolox.py b/community/cv/ShipWise/mindyolo/models/yolox.py new file mode 100644 index 0000000000000000000000000000000000000000..7dde87051704b3ce8e715b1e0a85a9b26ced63ea --- /dev/null +++ b/community/cv/ShipWise/mindyolo/models/yolox.py @@ -0,0 +1,65 @@ +import numpy as np + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.models.registry import register_model +from mindyolo.models.heads import YOLOXHead +from mindyolo.models.model_factory import build_model_from_cfg + +__all__ = ["YOLOX", "yolox"] + + +def _cfg(url="", **kwargs): + return {"url": url, **kwargs} + +default_cfgs = {"yolox": _cfg(url="")} + + +class YOLOX(nn.Cell): + """connect yolox backbone and head""" + + def __init__(self, cfg, in_channels=3, num_classes=80, sync_bn=False): + super(YOLOX, self).__init__() + self.cfg = cfg + self.stride = Tensor(np.array(cfg.stride), ms.int32) + ch, nc = in_channels, num_classes + self.nc = nc + self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn) + self.names = [str(i) for i in range(nc)] + + self.initialize_weights() + + def construct(self, x): + return self.model(x) + + def initialize_weights(self): + # reset parameter for Detect Head + m = self.model.model[-1] + assert isinstance(m, YOLOXHead) + m.initialize_biases() + + +@register_model +def yolox(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOX: + """Get yolox model.""" + model = YOLOX(cfg, in_channels=in_channels, num_classes=num_classes, **kwargs) + return model + + +if __name__ == "__main__": + from mindyolo.models.model_factory import create_model + from mindyolo.utils.config import load_config, Config + + cfg, _, _ = load_config('../../configs/yolox/yolox-s.yaml') + cfg = Config(cfg) + network = create_model( + model_name=cfg.network.model_name, + model_cfg=cfg.network, + num_classes=cfg.data.nc, + sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False, + ) + x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32) + out = network(x) + out = out[0] if isinstance(out, (list, tuple)) else out + print(f"Output shape is {[o.shape for o in out]}") diff --git a/community/cv/ShipWise/mindyolo/optim/__init__.py b/community/cv/ShipWise/mindyolo/optim/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..40b4f1f50b603cf275474e39ae136bd571d21f15 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/optim/__init__.py @@ -0,0 +1,11 @@ +from . import ema, group_params, optim_factory, scheduler +from .ema import * +from .group_params import * +from .optim_factory import * +from .scheduler import * + +__all__ = [] +__all__.extend(ema.__all__) +__all__.extend(group_params.__all__) +__all__.extend(scheduler.__all__) +__all__.extend(optim_factory.__all__) diff --git a/community/cv/ShipWise/mindyolo/optim/ema.py b/community/cv/ShipWise/mindyolo/optim/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..479df62a84157d5a10138f2008d84f8b73598d91 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/optim/ema.py @@ -0,0 +1,51 @@ +import mindspore as ms +from mindspore import Parameter, Tensor, nn, ops + +__all__ = ["EMA"] + + +class EMA(nn.Cell): + """Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weight is necessary for some training schemes to perform well. + """ + + def __init__(self, model, ema_model, decay=0.9999, updates=0): + super(EMA, self).__init__() + # Create EMA + self.ema = ema_model + self.ema.set_train(False) + self.weight = ms.ParameterTuple(list(model.get_parameters())) + self.ema_weight = ms.ParameterTuple(list(ema_model.get_parameters())) + self.updates = Parameter(Tensor(updates, ms.float32), requires_grad=False) # number of EMA updates + self.decay_value = decay + self.assign = ops.Assign() + self.hyper_map = ops.HyperMap() + + def decay(self, x): + # decay exponential ramp (to help early epochs) + return self.decay_value * (1 - ops.exp(ops.neg(x) / 2000)) + + @ms.jit + def update(self): + # Update EMA parameters + def update_param(d, ema_v, weight): + if weight.dtype == ms.int32: + return self.assign(ema_v, weight) + else: + tep_v = ema_v * d + return self.assign(ema_v, weight * (1.0 - d) + tep_v) + + ops.assign_add(self.updates, 1) + d = self.decay(self.updates) + success = self.hyper_map(ops.partial(update_param, d), self.ema_weight, self.weight) + + return success + + @ms.jit + def clone_from_model(self): + ops.assign_add(self.updates, 1) + success = self.hyper_map(ops.assign, self.ema_weight, self.weight) + return success diff --git a/community/cv/ShipWise/mindyolo/optim/group_params.py b/community/cv/ShipWise/mindyolo/optim/group_params.py new file mode 100644 index 0000000000000000000000000000000000000000..2c78edd2f96ada066dee30e38e8858dbcdf2df84 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/optim/group_params.py @@ -0,0 +1,284 @@ +import numpy as np + +from .scheduler import cosine_decay_lr, linear_lr + +__all__ = ["create_group_param"] + + +def create_group_param(params, gp_weight_decay=0.0, **kwargs): + """ + Create group parameters for optimizer. + + Args: + params: Network parameters + gp_weight_decay: Weight decay. Default: 0.0 + **kwargs: Others + """ + if "group_param" in kwargs: + gp_strategy = kwargs["group_param"] + if gp_strategy == "filter_bias_and_bn": + return filter_bias_and_bn(params, gp_weight_decay) + elif gp_strategy == "yolov8": + return group_param_yolov8(params, weight_decay=gp_weight_decay, **kwargs) + elif gp_strategy == "yolov7": + return group_param_yolov7(params, weight_decay=gp_weight_decay, **kwargs) + elif gp_strategy == "yolov5": + return group_param_yolov5(params, weight_decay=gp_weight_decay, **kwargs) + elif gp_strategy == "yolov4": + return group_param_yolov4(params, weight_decay=gp_weight_decay, **kwargs) + elif gp_strategy == "yolov3": + return group_param_yolov3(params, weight_decay=gp_weight_decay, **kwargs) + else: + raise NotImplementedError + else: + return params + + +def filter_bias_and_bn(params, weight_decay): + no_decay_params, decay_params = _group_param_common2(params) + + return [ + {"params": decay_params, "weight_decay": weight_decay}, + {"params": no_decay_params}, + ] + + +def group_param_yolov3( + params, + weight_decay, + start_factor, + end_factor, + lr_init, + warmup_bias_lr, + warmup_epochs, + min_warmup_step, + accumulate, + epochs, + steps_per_epoch, + total_batch_size, + **kwargs +): + # old: # weight, gamma, bias/beta + # new: # bias/beta, weight, others + pg0, pg1, pg2 = _group_param_common3(params) + + lr_pg0, lr_pg1, lr_pg2 = [], [], [] + lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs) + + warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step) + xi = [0, warmup_steps] + for i in range(epochs * steps_per_epoch): + _lr = lrs[i] + if i < warmup_steps: + lr_pg0.append(np.interp(i, xi, [warmup_bias_lr, _lr])) + lr_pg1.append(np.interp(i, xi, [0.0, _lr])) + lr_pg2.append(np.interp(i, xi, [0.0, _lr])) + else: + lr_pg0.append(_lr) + lr_pg1.append(_lr) + lr_pg2.append(_lr) + + nbs = 64 + weight_decay *= total_batch_size * accumulate / nbs # scale weight_decay + group_params = [ + {"params": pg0, "lr": lr_pg0}, + {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay}, + {"params": pg2, "lr": lr_pg2}, + ] + return group_params + + +def group_param_yolov4( + params, + weight_decay, + start_factor, + end_factor, + lr_init, + warmup_epochs, + min_warmup_step, + accumulate, + epochs, + steps_per_epoch, + total_batch_size, + **kwargs +): + pg0, pg1 = _group_param_common2(params) # bias/beta/gamma, others + + lr_pg0, lr_pg1 = [], [] + lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs) + + warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step) + + xi = [0, warmup_steps] + for i in range(epochs * steps_per_epoch): + _lr = lrs[i] + if i < warmup_steps: + lr_pg0.append(np.interp(i, xi, [0.0, lr_init])) + lr_pg1.append(np.interp(i, xi, [0.0, lr_init])) + + else: + lr_pg0.append(_lr) + lr_pg1.append(_lr) + + group_params = [{"params": pg0, "lr": lr_pg0}, {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay}] + return group_params + + +def group_param_yolov5( + params, + weight_decay, + start_factor, + end_factor, + lr_init, + warmup_bias_lr, + warmup_epochs, + min_warmup_step, + accumulate, + epochs, + steps_per_epoch, + total_batch_size, + **kwargs +): + # old: # weight, gamma, bias/beta + # new: # bias/beta, weight, others + pg0, pg1, pg2 = _group_param_common3(params) + + lr_pg0, lr_pg1, lr_pg2 = [], [], [] + lrs = linear_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs) + + warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step) + xi = [0, warmup_steps] + for i in range(epochs * steps_per_epoch): + _lr = lrs[i] + if i < warmup_steps: + lr_pg0.append(np.interp(i, xi, [warmup_bias_lr, _lr])) + lr_pg1.append(np.interp(i, xi, [0.0, _lr])) + lr_pg2.append(np.interp(i, xi, [0.0, _lr])) + else: + lr_pg0.append(_lr) + lr_pg1.append(_lr) + lr_pg2.append(_lr) + + nbs = 64 + weight_decay *= total_batch_size * accumulate / nbs # scale weight_decay + group_params = [ + {"params": pg0, "lr": lr_pg0}, + {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay}, + {"params": pg2, "lr": lr_pg2}, + ] + return group_params + + +def group_param_yolov7( + params, + weight_decay, + start_factor, + end_factor, + lr_init, + warmup_bias_lr, + warmup_epochs, + min_warmup_step, + accumulate, + epochs, + steps_per_epoch, + total_batch_size, + **kwargs +): + pg0, pg1, pg2 = _group_param_common3(params) # bias/beta, weight, others + + lr_pg0, lr_pg1, lr_pg2 = [], [], [] + lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs) + + warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step) + warmup_bias_steps_first = min(max(round(3 * steps_per_epoch), min_warmup_step), warmup_steps) + warmup_bias_lr_first = np.interp(warmup_bias_steps_first, [0, warmup_steps], [0.0, lr_init]) + xi = [0, warmup_steps] + for i in range(epochs * steps_per_epoch): + _lr = lrs[i] + if i < warmup_steps: + lr_pg0.append( + np.interp(i, [0, warmup_bias_steps_first, warmup_steps], [warmup_bias_lr, warmup_bias_lr_first, _lr]) + ) + lr_pg1.append(np.interp(i, xi, [0.0, _lr])) + lr_pg2.append(np.interp(i, xi, [0.0, _lr])) + + else: + lr_pg0.append(_lr) + lr_pg1.append(_lr) + lr_pg2.append(_lr) + + nbs = 64 + weight_decay *= total_batch_size * accumulate / nbs # scale weight_decay + group_params = [ + {"params": pg0, "lr": lr_pg0}, + {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay}, + {"params": pg2, "lr": lr_pg2}, + ] + return group_params + + +def group_param_yolov8( + params, + weight_decay, + start_factor, + end_factor, + lr_init, + warmup_bias_lr, + warmup_epochs, + min_warmup_step, + accumulate, + epochs, + steps_per_epoch, + total_batch_size, + **kwargs +): + pg0, pg1, pg2 = _group_param_common3(params) # bias/beta, weight, others + + lr_pg0, lr_pg1, lr_pg2 = [], [], [] + lrs = linear_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs) + + warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step) + xi = [0, warmup_steps] + for i in range(epochs * steps_per_epoch): + _lr = lrs[i] + if i < warmup_steps: + lr_pg0.append(np.interp(i, xi, [warmup_bias_lr, _lr])) + lr_pg1.append(np.interp(i, xi, [0.0, _lr])) + lr_pg2.append(np.interp(i, xi, [0.0, _lr])) + else: + lr_pg0.append(_lr) + lr_pg1.append(_lr) + lr_pg2.append(_lr) + + nbs = 64 + weight_decay *= total_batch_size * accumulate / nbs # scale weight_decay + group_params = [ + {"params": pg0, "lr": lr_pg0}, + {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay}, + {"params": pg2, "lr": lr_pg2}, + ] + return group_params + + +def _group_param_common2(params): + pg0, pg1 = [], [] # optimizer parameter groups + for p in params: + if "bias" in p.name or "beta" in p.name or "gamma" in p.name: + pg0.append(p) + else: + pg1.append(p) + + return pg0, pg1 # bias/beta/gamma, others + + +def _group_param_common3(params): + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + for p in params: + if "bias" in p.name or "beta" in p.name: + pg0.append(p) + elif "weight" in p.name: + pg1.append(p) + else: + pg2.append(p) + + return pg0, pg1, pg2 # bias/beta, weight, others diff --git a/community/cv/ShipWise/mindyolo/optim/optim_factory.py b/community/cv/ShipWise/mindyolo/optim/optim_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..8e1d39a33b7b98542d289b9e149c05e0a659c406 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/optim/optim_factory.py @@ -0,0 +1,64 @@ +""" optim factory """ +import os +from typing import Optional + +from mindspore import load_checkpoint, load_param_into_net, nn + +__all__ = ["create_optimizer"] + + +def create_optimizer( + params, + optimizer: str = "momentum", + lr: Optional[float] = 1e-3, + weight_decay: float = 0, + momentum: float = 0.9, + nesterov: bool = False, + loss_scale: float = 1.0, + checkpoint_path: str = "", + **kwargs, +): + r"""Creates optimizer by name. + + Args: + params: network parameters. + optim: optimizer name like 'sgd', 'nesterov', 'momentum'. + lr: learning rate, float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3. + weight_decay: weight decay factor. Default: 0. + momentum: momentum if the optimizer supports. Default: 0.9. + nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False. + loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0. + checkpoint_path: Optimizer weight path. Default: ''. + + Returns: + Optimizer object + """ + + optim = optimizer.lower() + + if optim == "sgd": + optimizer = nn.SGD( + params=params, + learning_rate=lr, + momentum=momentum, + weight_decay=weight_decay, + nesterov=nesterov, + loss_scale=loss_scale, + ) + elif optim in ["momentum", "nesterov"]: + optimizer = nn.Momentum( + params=params, + learning_rate=lr, + momentum=momentum, + weight_decay=weight_decay, + use_nesterov=nesterov, + loss_scale=loss_scale, + ) + else: + raise ValueError(f"Invalid optimizer: {optim}") + + if checkpoint_path.endswith(".ckpt") and os.path.isfile(checkpoint_path): + param_dict = load_checkpoint(checkpoint_path, filter_prefix="learning_rate") + load_param_into_net(optimizer, param_dict) + + return optimizer diff --git a/community/cv/ShipWise/mindyolo/optim/scheduler.py b/community/cv/ShipWise/mindyolo/optim/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..5fe074b137b357a7e434ac3815df589e7bba254d --- /dev/null +++ b/community/cv/ShipWise/mindyolo/optim/scheduler.py @@ -0,0 +1,237 @@ +import math +import numpy as np + +__all__ = ["create_lr_scheduler", "create_warmup_momentum_scheduler"] + + +def create_lr_scheduler(lr_init, lr_scheduler=None, by_epoch=True, **kwargs): + """ + Create lr scheduler for optimizer. + + Args: + lr_init: Initial learning rate + lr_scheduler: LR scheduler name like 'linear', 'cos'. + by_epoch: learning rate updated by epoch if true, else updated by iteration. Default true + **kwargs: Others + """ + + if lr_scheduler: + assert isinstance(lr_scheduler, str), f"lr_scheduler should be a string, but got {type(lr_scheduler)}" + if lr_scheduler == "yolox": + return create_yolox_lr_scheduler(lr_init=lr_init, by_epoch=by_epoch, **kwargs) + else: + return lr_init + + +def create_yolox_lr_scheduler( + start_factor, end_factor, lr_init, steps_per_epoch, warmup_epochs, epochs, by_epoch, cooldown_epochs=0, **kwargs +): + assert epochs - warmup_epochs - cooldown_epochs > 0, f"the sum of warmup({warmup_epochs}) and " \ + f"cooldown{cooldown_epochs} epoch should " \ + f"be less than total epoch{epochs}" + # quadratic + lrs_qua = quadratic_lr(0.01, start_factor, lr_init, steps_per_epoch, epochs=warmup_epochs, by_epoch=by_epoch) + + # cosine + cosine_epochs = epochs - warmup_epochs - cooldown_epochs + lrs_cos = cosine_decay_lr( + start_factor, end_factor, lr_init, steps_per_epoch, epochs=cosine_epochs, by_epoch=by_epoch + ) + + # constant + lrs_col = [] + if cooldown_epochs > 0: + cool_down_lr = lr_init * end_factor + lrs_col = [cool_down_lr] * cooldown_epochs * steps_per_epoch + + lrs = lrs_qua + lrs_cos + lrs_col + return lrs + + +def quadratic_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, by_epoch=True, t_max=None, **kwargs): + if t_max is None: + t_max = epochs if by_epoch else steps_per_epoch * epochs + lrs = [] + start_lr = lr_init * start_factor + end_lr = lr_init * end_factor + for i in range(steps_per_epoch * epochs): + epoch_idx = i // steps_per_epoch + index = epoch_idx if by_epoch else i + multiplier = min(index, t_max) / t_max + multiplier = pow(multiplier, 2) + lrs.append(start_lr + multiplier * (end_lr - start_lr)) + return lrs + + +def create_warmup_momentum_scheduler( + steps_per_epoch, momentum=None, warmup_momentum=None, warmup_epochs=None, min_warmup_step=None, **kwargs +): + """ + Create warmup momentum scheduler. + + Args: + steps_per_epoch: Number of steps in each epoch. + momentum (float, optional): Hyperparameter of type float, means momentum for the moving average. + It must be at least 0.0. Default: None. + warmup_momentum (float, optional): Hyperparameter of type float, means warmup momentum for the moving average. + It must be at least 0.0. Default: None. + warmup_epochs: Number of epochs for warmup. + min_warmup_step: Minimum number of steps for warmup. + **kwargs: Others + """ + + if warmup_momentum: + warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step) + return linear_momentum(warmup_momentum, momentum, warmup_steps) + else: + return None + + +def linear_momentum(start, end, total_steps): + """ + Args: + start: Starting value. + end: Ending value. + total_steps: Number of total step. + + Returns: + momentum_list: A list with length total_steps. + """ + + momentum_list = [] + for i in range(total_steps): + momentum_list.append(np.interp(i, [0, total_steps], [start, end])) + + return momentum_list + + +def linear_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, t_max=None, **kwargs): + """ + Args: + start_factor: Starting factor. + end_factor: Ending factor. + lr_init: Initial learning rate. + steps_per_epoch: Total number of steps per epoch. + epochs: Total number of epochs trained. + t_max: The maximum number of epochs where lr changes. Default: None. + + Examples: + >>> lrs = linear_lr(0.1, 0.01, 0.2, 100, 5) + >>> print(f"lrs len: {len(lrs)}") + >>> print(f"lrs per epoch: {[lrs[i] for i in range(len(lrs)) if ((i + 1) % 100 == 0)]}") + lrs len: 500 + lrs: [0.02, 0.0155, 0.011, 0.0065, 0.002] + """ + + if t_max is None: + t_max = epochs + lrs = [] + start_lr = lr_init * start_factor + end_lr = lr_init * end_factor + for i in range(steps_per_epoch * epochs): + epoch_idx = i // steps_per_epoch + multiplier = min(epoch_idx, t_max) / t_max + lrs.append(start_lr + multiplier * (end_lr - start_lr)) + return lrs + + +def cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, by_epoch=True, t_max=None, **kwargs): + """ + Args: + start_factor: Starting factor. + end_factor: Ending factor. + lr_init: Initial learning rate. + steps_per_epoch: Total number of steps per epoch. + epochs: Total number of epochs trained. + t_max: The maximum number of epochs where lr changes. Default: None. + + Examples: + >>> lrs = cosine_decay_lr(0.1, 0.01, 0.2, 100, 5) + >>> print(f"lrs len: {len(lrs)}") + >>> print(f"lrs: {[lrs[i] for i in range(len(lrs)) if ((i + 1) % 100 == 0)]}") + lrs len: 500 + lrs: [0.02, 0.0173, 0.011, 0.0046, 0.002] + """ + + if t_max is None: + t_max = epochs if by_epoch else steps_per_epoch * epochs + lrs = [] + start_lr = lr_init * start_factor + end_lr = lr_init * end_factor + delta = 0.5 * (start_lr - end_lr) + for i in range(steps_per_epoch * epochs): + epoch_idx = i // steps_per_epoch + index = epoch_idx if by_epoch else i + multiplier = min(index, t_max) / t_max + lrs.append(end_lr + delta * (1.0 + math.cos(math.pi * multiplier))) + return lrs + + +def cosine_decay_lr_with_linear_warmup( + warmup_epochs, + warmup_lrs, + start_factor, + end_factor, + lr_init, + steps_per_epoch, + epochs, + min_warmup_step=1000, + t_max=None, + **kwargs, +): + """ + Args: + warmup_epochs (Union[int, tuple[int]]): The warmup epochs of the lr scheduler. + The data type is an integer or a tuple of integers. An integer represents the warmup epoch size. + A tuple of integers represents the warmup epochs interpolation nodes. Like: [0, 12, 24] or 24. + warmup_lrs (Union[int, tuple[float]]): The warmup lr of the lr scheduler. + The data type is a float or a tuple of float(The last element can be None). + A float represents the start warmup lr. + A tuple of float represents the warmup lrs interpolation nodes. Like: [0.01, 0.1, 'None'] or [0.01, 0.1] or 0.01. + start_factor: Starting factor. + end_factor: Ending factor. + lr_init: Initial learning rate. + steps_per_epoch: Total number of steps per epoch. + epochs: Total number of epochs trained. + min_warmup_step (int): Minimum warm-up steps. Default: 1000. + t_max: The maximum number of epochs where lr changes. Default: None. + + Examples: + >>> lrs = cosine_decay_lr_with_linear_warmup([0, 3], [0.0001, None], 0.1, 0.01, 0.2, 100, 5, min_warmup_step=1) + >>> print(f"lrs len: {len(lrs)}") + >>> print(f"lrs every epoch: {[lrs[i] for i in range(len(lrs)) if ((i + 1) % 100 == 0)]}") + lrs len: 500 + lrs every epoch: [0.0066, 0.0115, 0.0109, 0.0046, 0.002] + """ + + warmup_epochs = [0, warmup_epochs] if isinstance(warmup_epochs, int) else warmup_epochs + if isinstance(warmup_epochs, (int, float)): + warmup_epochs = [0, int(warmup_epochs)] + elif isinstance(warmup_epochs, (list, tuple)): + warmup_epochs = warmup_epochs + else: + raise ValueError + + if isinstance(warmup_lrs, float): + warmup_lrs = [ + warmup_lrs, + ] + elif isinstance(warmup_lrs, (list, tuple)): + if warmup_lrs[-1] in ("None", "none", None): + warmup_lrs = warmup_lrs[:-1] + else: + raise ValueError + + assert ( + len(warmup_epochs) == len(warmup_lrs) + 1 + ), "LRScheduler: The length of 'warmup_epochs' and 'warmup_lrs' is inconsistent" + + lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, t_max) + warmup_steps = [min(i * steps_per_epoch, len(lrs)) for i in warmup_epochs] + warmup_steps[-1] = max(warmup_steps[-1], min(len(lrs), min_warmup_step)) + + for i in range(warmup_steps[-1]): + _lr = lrs[i] + lrs[i] = np.interp(i, warmup_steps, warmup_lrs + [_lr,]) + + return lrs diff --git a/community/cv/ShipWise/mindyolo/utils/__init__.py b/community/cv/ShipWise/mindyolo/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..16de95da931a58fae14338d3eb50cbc0837c5022 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/__init__.py @@ -0,0 +1,7 @@ +"""Utility Tools""" +from .checkpoint_manager import * +from .config import * +from .logger import * +from .metrics import * +from .modelarts import * +from .utils import * diff --git a/community/cv/ShipWise/mindyolo/utils/callback.py b/community/cv/ShipWise/mindyolo/utils/callback.py new file mode 100644 index 0000000000000000000000000000000000000000..df7c61ded3ba790e9a138053c52250f5bda4d8e1 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/callback.py @@ -0,0 +1,381 @@ +import math +import os +import sys +import time +from typing import Union, Tuple, List + +import numpy as np +from mindspore import Profiler, SummaryRecord, Tensor +from mindyolo.utils.modelarts import sync_data +from mindyolo.utils import CheckpointManager, logger +from mindyolo.utils.registry import Registry +from mindyolo.utils.train_step_factory import create_train_step_fn + +CALLBACK_REGISTRY = Registry("callback") + + +def create_callback(arg_callback): + def _create_callback_worker(name, **kwargs): + cb_cls = CALLBACK_REGISTRY.get(name) + instance = cb_cls(**kwargs) + return instance + + assert isinstance(arg_callback, (tuple, list)), f'expect callback to be list of tuple, ' \ + f'but got {type(arg_callback)} instead' + for i, cb in enumerate(arg_callback): + assert isinstance(cb, dict) and 'name' in cb, f'callback[{i}] is not a dict or does not contain key [name]' + + logger.info(CALLBACK_REGISTRY) + + return [_create_callback_worker(**kw) for kw in arg_callback] + + +class RunContext: + """ + Hold and manage information about the running state of the model + Args: + epoch_num (int): total epoch number in the training process + steps_per_epoch (int): total steps of one epoch + trainer (Trainer): trainer class that perform training process + test_fn (Function): test function that can evaluate the training model + enable_modelarts (bool): whether to enable modelarts. usually on cloud when true + ckpt_save_dir (str): checkpoint saving directory + train_url (str): training url. usually on cloud when not empty + + """ + + def __init__( + self, + epoch_num=0, + steps_per_epoch=0, + total_steps=0, + trainer=None, + test_fn=None, + enable_modelarts=False, + ckpt_save_dir="", + save_dir="", + train_url="", + overflow_still_update=False, + ms_jit=True, + rank_size=8, + ): + + self.epoch_num = epoch_num + self.steps_per_epoch = steps_per_epoch + self.total_epochs = total_steps + self.trainer = trainer + self.test_fn = test_fn + self.ckpt_save_dir = ckpt_save_dir + self.save_dir = save_dir + self.enable_modelarts = enable_modelarts + self.train_url = train_url + self.overflow_still_update = overflow_still_update + self.ms_jit = ms_jit + self.rank_size = rank_size + + # the first index start with 1 rather than 0 + self.cur_epoch_index = 0 + self.cur_step_index = 0 + self.loss = [] + self.lr = 0 + + +class BaseCallback: + """ + Base class of callback. Applied in Train function, it can take actions on 6 different stage of the training process. + + """ + + def __init__(self): + pass + + def __repr__(self): + members = vars(self) + mem_str = ", ".join([f"{k}={v}" for k, v in members.items()]) + fmt_str = self.__class__.__name__ + f"({mem_str})" + return fmt_str + + def on_train_begin(self, run_context: RunContext): + """hooks to run on the beginning of training process""" + pass + + def on_train_end(self, run_context: RunContext): + """hooks to run on the end of training process""" + pass + + def on_train_epoch_begin(self, run_context: RunContext): + """hooks to run on the beginning of a training epoch""" + pass + + def on_train_epoch_end(self, run_context: RunContext): + """hooks to run on the end of a training epoch""" + pass + + def on_train_step_begin(self, run_context: RunContext): + """hooks to run on the beginning of a training step""" + pass + + def on_train_step_end(self, run_context: RunContext): + """hooks to run on the end of a training step""" + pass + + +@CALLBACK_REGISTRY.registry_module() +class YoloxSwitchTrain(BaseCallback): + """ + Switch train hook applied in yolox model. Yolox model uses a two stage training strategy. Compared with the 1st + stage, the 2nd second has no mosaic data augmentation and add l1 loss item. Reference: url + + Args: + switch_epoch_num (int): index of epoch to switch stage. This value equals to the epoch number of first stage. + is_switch_loss (bool): whether to switch loss + is_switch_data_aug (bool): whether to switch data augmentation + + """ + + def __init__(self, switch_epoch_num=285, is_switch_loss=True, is_switch_data_aug=False, **kwargs): + super().__init__() + self.switch_epoch_num = switch_epoch_num + self.switch_epoch_index = switch_epoch_num + 1 + self.is_switch_loss = is_switch_loss + self.is_switch_data_aug = is_switch_data_aug + + def on_train_step_begin(self, run_context: RunContext): + pass + + def on_train_epoch_begin(self, run_context: RunContext): + cur_epoch_index = run_context.cur_epoch_index + trainer = run_context.trainer + loss_ratio = run_context.rank_size + overflow_still_update = run_context.overflow_still_update + ms_jit = run_context.ms_jit + + # switch loss + if self.is_switch_loss and cur_epoch_index == self.switch_epoch_index: + logger.info(f"\nAdding L1 loss starts from epoch {self.switch_epoch_index}. Graph recompiling\n") + trainer.loss_fn.use_l1 = True + trainer.train_step_fn = create_train_step_fn(task='detect', + network=trainer.network, + loss_fn=trainer.loss_fn, + optimizer=trainer.optimizer, + loss_ratio=loss_ratio, + scaler=trainer.scaler, + reducer=trainer.reducer, + ema=trainer.ema, + overflow_still_update=overflow_still_update, + ms_jit=ms_jit) + + # switch data_aug, not implemented hear + if self.is_switch_data_aug: + raise ValueError( + "Currently switch_data_aug should be implemented using multi-stage training pipe line. " + "Refer train_transforms for more information. Keep is_switch_data_aug button False." + ) + + +@CALLBACK_REGISTRY.registry_module() +class EvalWhileTrain(BaseCallback): + """ + Callback of evaluation while training. Mainly two parts are included, namely evaluating at requested time and + uploading ckpt file to cloud. Piecewise evaluation with different interval in each piece is supported. + Args: + stage_epochs (Union(List, Tuple, int)): For list or tuple type, piecewise mode is on and each element + indicates the epoch number in its piece. For int type, single piece mode is on and the value indicates + the possible max epoch index where the model will be evaluated. Default positive infinite means no switch + stage_intervals (Union(List, Tuple, int)): With the same type and length with stage_epochs, interval represents + the corresponding interval of each piece. Default 1 + eval_last_epoch (bool): whether to evaluate the last epoch of each piece. Default True + isolated_epochs (Union(List, Tuple, int, None)): isolated epochs to evaluation for flexible sense. Default None. + keep_checkpoint_max (int): the most possible checkpoint to keep on disk. Default 10. + + Example: + Case 1: evaluate single stage + >>> hook EvalWhileTrain(stage_intervals=5) + The above hook will evaluate the model with an interval of 5, and final epoch will be evaluated by default. + + Case 2: evaluate multiple stage + >>> hook = EvalWhileTrain(stage_epochs=[285, 15], stage_intervals=[25, 5], isolated_epochs=[3, 213]) + The above hook will evaluate the model by two stage. At 1st stage, 285 epochs are evaluated with an interval of + 25, while at 2nd stage, 15 epochs are evaluated with an interval of 5. Meanwhile, the model is evaluated at + 3 and 213 epoch specified by isolated_epochs. The final epoch of the two stage, namely 285 and 300, will be + evaluated by default. + """ + + def __init__( + self, + stage_epochs: Union[List, Tuple, int] = sys.maxsize, + stage_intervals: Union[List, Tuple, int] = 1, + eval_last_epoch=True, + isolated_epochs: Union[List, Tuple, int, None] = None, + keep_checkpoint_max=10, + ): + super().__init__() + assert isinstance(stage_intervals, (list, tuple, int)) + assert isinstance(stage_epochs, (list, tuple, int)) + + # cast interval list in case of 1 stage + if isinstance(stage_intervals, int) or isinstance(stage_epochs, int): + assert isinstance(stage_intervals, int) and isinstance( + stage_epochs, int + ), f"stage_intervals and stage_epochs must be int at the same time" + stage_intervals = [stage_intervals] + stage_epochs = [stage_epochs] + + # cast isolated_epochs to list + if isolated_epochs is not None: + assert isinstance(isolated_epochs, (list, tuple, int)) + if isinstance(isolated_epochs, int): + isolated_epochs = [isolated_epochs] + else: + isolated_epochs = [] + + assert len(stage_intervals) == len(stage_epochs) + self.stage_intervals = stage_intervals + self.stage_epochs = stage_epochs # for log + self.stage_cum_epochs = np.cumsum(stage_epochs) + self.eval_last_epoch = eval_last_epoch + self.isolated_epochs = isolated_epochs + self.keep_checkpoint_max = keep_checkpoint_max + self.manager_best = CheckpointManager(ckpt_save_policy="top_k") + self.ckpt_filelist_best = [] + + def on_train_epoch_end(self, run_context: RunContext): + cur_epoch_index = run_context.cur_epoch_index + epochs = run_context.epoch_num + # reset to total epoch if exceed + for i in range(len(self.stage_cum_epochs)): + if self.stage_cum_epochs[i] > epochs: + self.stage_cum_epochs[i] = epochs + + stage = np.searchsorted(self.stage_cum_epochs, cur_epoch_index, side="left") + # in case of cur_epoch_index greater than total epoch that need evaluation + if stage == len(self.stage_intervals): + return + + offset = self.stage_cum_epochs[stage - 1] if stage > 0 else 0 + interval_cond = (cur_epoch_index - offset) % self.stage_intervals[stage] == 0 + last_cond = self.eval_last_epoch and (cur_epoch_index == self.stage_cum_epochs[stage]) + isolated_cond = any(cur_epoch_index == e for e in self.isolated_epochs) + if interval_cond or last_cond or isolated_cond: + self._run_eval(run_context) + + def on_train_end(self, run_context: RunContext): + enable_modelarts = run_context.enable_modelarts + train_url = run_context.train_url + if enable_modelarts and self.ckpt_filelist_best: + ckpt_filelist_best = [s[0] for s in self.ckpt_filelist_best] + for p in ckpt_filelist_best: + sync_data(p, train_url + "/weights/" + p.split("/")[-1]) + + def _run_eval(self, run_context: RunContext): + s_eval_time = time.time() + + trainer = run_context.trainer + test_fn = run_context.test_fn + cur_epoch = run_context.cur_epoch_index + epochs = run_context.epoch_num + ckpt_save_dir = run_context.ckpt_save_dir + + eval_network = trainer.ema.ema if trainer.ema else trainer.network + _train_status = eval_network.training + eval_network.set_train(False) + accuracy = test_fn(network=eval_network, cur_epoch=f'{cur_epoch:03d}') + accuracy = accuracy[0] if isinstance(accuracy, (list, tuple)) else accuracy + eval_network.set_train(_train_status) + + save_path_best = os.path.join( + ckpt_save_dir, + f"best_{trainer.model_name}-{cur_epoch}_{trainer.steps_per_epoch}" f"_acc{accuracy:.3f}.ckpt", + ) + + if trainer.main_device: + self.ckpt_filelist_best = self.manager_best.save_ckpoint( + eval_network, num_ckpt=self.keep_checkpoint_max, metric=accuracy, save_path=save_path_best + ) + best_path, best_accu = self.ckpt_filelist_best[0] + logger.info( + f"Epoch {cur_epoch}/{epochs}, eval accuracy: {accuracy:.3f}, " + f"run_eval time: {(time.time() - s_eval_time):.3f} s." + ) + logger.info(f"best accuracy: {best_accu:.3f}, saved at: {best_path}") + + +@CALLBACK_REGISTRY.registry_module() +class SummaryCallback(BaseCallback): + """ + Callback of whether to collect summary data at training time. + """ + + def __init__(self): + super().__init__() + + def on_train_begin(self, run_context: RunContext): + """hooks to run on the beginning of training process""" + self.summary_dir = os.path.join(run_context.save_dir, "summary") + self.summary_record = SummaryRecord(self.summary_dir) + + def on_train_end(self, run_context: RunContext): + """hooks to run on the end of training process""" + self.summary_record.close() + if run_context.enable_modelarts: + for p in os.listdir(self.summary_dir): + summary_file_path = os.path.join(self.summary_dir, p) + sync_data(summary_file_path, run_context.train_url + "/summary/" + summary_file_path.split("/")[-1]) + + def on_train_epoch_end(self, run_context: RunContext): + """hooks to run on the end of a training epoch""" + trainer = run_context.trainer + if trainer.data_sink: + for i in range(len(run_context.loss)): + self.summary_record.add_value("scalar", f"{trainer.loss_item_name[i]}", run_context.loss[i]) + self.summary_record.add_value("scalar", f"cur_lr", Tensor(run_context.lr)) + self.summary_record.record(run_context.cur_epoch_index) + self.summary_record.flush() + + def on_train_step_end(self, run_context: RunContext): + """hooks to run on the end of a training step""" + trainer = run_context.trainer + if run_context.cur_step_index % trainer.log_interval == 0: + for i in range(len(run_context.loss)): + self.summary_record.add_value("scalar", f"{trainer.loss_item_name[i]}", run_context.loss[i]) + self.summary_record.add_value("scalar", f"cur_lr", Tensor(run_context.lr)) + self.summary_record.record(run_context.cur_step_index) + self.summary_record.flush() + + +@CALLBACK_REGISTRY.registry_module() +class ProfilerCallback(BaseCallback): + """ + Callback of whether to collect profiler data at training time. + + Example: + Case 1: Non-data sinking mode Collects performance data in the specified step interval. + Case 2: Data sink mode Collects performance data for a specified epoch interval. + """ + + def __init__(self, profiler_step_num): + super().__init__() + self.profiler_step_num = profiler_step_num + + def on_train_begin(self, run_context: RunContext): + """hooks to run on the beginning of training process""" + self.prof_dir = os.path.join(run_context.save_dir, "profiling_data") + self.prof = Profiler(output_path=self.prof_dir) + + def on_train_epoch_end(self, run_context: RunContext): + """hooks to run on the beginning of a training epoch""" + if run_context.cur_epoch_index == math.ceil(self.profiler_step_num/run_context.steps_per_epoch): + self.prof.stop() + self.prof.analyse() + + def on_train_step_end(self, run_context: RunContext): + """hooks to run on the beginning of a training step""" + if run_context.cur_step_index == self.profiler_step_num: + self.prof.stop() + self.prof.analyse() + + def on_train_end(self, run_context: RunContext): + if run_context.enable_modelarts: + for p in os.listdir(self.prof_dir): + prof_file_path = os.path.join(self.prof_dir, p) + sync_data(prof_file_path, run_context.train_url + "/profiling_data/" + prof_file_path.split("/")[-1]) diff --git a/community/cv/ShipWise/mindyolo/utils/checkpoint_manager.py b/community/cv/ShipWise/mindyolo/utils/checkpoint_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..2138f68b05ea7fbd00a0c7182d69ca92f84cab2a --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/checkpoint_manager.py @@ -0,0 +1,123 @@ +"""checkpoint manager """ +import os +import stat +import numpy as np + +import mindspore as ms +from mindspore import Tensor + +from mindyolo.utils import logger + +__all__ = ["CheckpointManager"] + + +class CheckpointManager: + """ + Manage checkpoint files according to ckpt_save_policy of checkpoint. + Args: + ckpt_save_policy (str): Checkpoint saving strategy. The optional values is None, "top_k" or "latest_k". + None means to save each checkpoint, top_k means to save K checkpoints with the highest accuracy, + and latest_k means saving the latest K checkpoint. Default: None. + """ + + def __init__(self, ckpt_save_policy=None): + self._ckpoint_filelist = [] + self.ckpt_save_policy = ckpt_save_policy + + @property + def ckpoint_filelist(self): + """Get all the related checkpoint files managed here.""" + return self._ckpoint_filelist + + @property + def ckpoint_num(self): + """Get the number of the related checkpoint files managed here.""" + return len(self._ckpoint_filelist) + + def update_ckpoint_filelist(self, directory, prefix): + """Update the checkpoint file list.""" + self._ckpoint_filelist = [] + files = os.listdir(directory) + for filename in files: + if os.path.splitext(filename)[-1] == ".ckpt" and filename.startswith(prefix + "-"): + mid_name = filename[len(prefix) : -5] + flag = not (True in [char.isalpha() for char in mid_name]) + if flag: + self._ckpoint_filelist.append(os.path.join(directory, filename)) + + def remove_ckpoint_file(self, file_name): + """Remove the specified checkpoint file from this checkpoint manager and also from the directory.""" + try: + os.chmod(file_name, stat.S_IWRITE) + os.remove(file_name) + except OSError: + logger.warning("OSError, failed to remove the older ckpt file %s.", file_name) + except ValueError: + logger.warning("ValueError, failed to remove the older ckpt file %s.", file_name) + + def remove_oldest_ckpoint_file(self): + """Remove the oldest checkpoint file from this checkpoint manager and also from the directory.""" + ckpoint_files = sorted(self._ckpoint_filelist, key=os.path.getmtime) + self.remove_ckpoint_file(ckpoint_files[0]) + self._ckpoint_filelist.remove(ckpoint_files[0]) + + def keep_one_ckpoint_per_minutes(self, minutes, cur_time): + """Only keep the latest one ckpt file per minutes, remove other files generated in [last_time, cur_time].""" + del_list = [] + oldest_file = "" + oldest_time = cur_time + for ck_file in self._ckpoint_filelist: + modify_time = os.path.getmtime(ck_file) + if cur_time - modify_time < 60 * minutes: + del_list.append(ck_file) + + if modify_time < oldest_time: + oldest_time = modify_time + oldest_file = ck_file + + for mv_file in del_list: + if mv_file == oldest_file: + continue + self.remove_ckpoint_file(mv_file) + + def top_K_checkpoint(self, network, K=10, metric=None, save_path=""): + """Save and return Top K checkpoint address and accuracy.""" + last_file = self._ckpoint_filelist[-1] if self._ckpoint_filelist else None + if isinstance(metric, Tensor): + metric = metric.asnumpy() + if self.ckpoint_num < K or np.greater(metric, last_file[1]): + if self.ckpoint_num >= K: + delete = K - 1 + if delete < 0 or self.ckpoint_num <= delete: + return + to_delete = self._ckpoint_filelist[delete:] + for d in to_delete: + self.remove_ckpoint_file(d[0]) + self._ckpoint_filelist = self._ckpoint_filelist[:delete] + ms.save_checkpoint(network, save_path, async_save=True) + self._ckpoint_filelist.append((save_path, float(metric))) + self._ckpoint_filelist = sorted(self._ckpoint_filelist, key=lambda x: x[1], reverse=True) + + def latest_K_checkpoint(self, network, K=10, save_path=""): + """Save latest K checkpoint.""" + if K and 0 < K <= self.ckpoint_num: + self.remove_oldest_ckpoint_file() + ms.save_checkpoint(network, save_path, async_save=True) + self._ckpoint_filelist.append(save_path) + + def save_ckpoint(self, network, num_ckpt=10, metric=None, save_path=""): + """Save checkpoint according to different save strategy.""" + if self.ckpt_save_policy is None: + ms.save_checkpoint(network, save_path, async_save=True) + elif self.ckpt_save_policy == "top_k": + if metric is None: + raise ValueError(f"The expected 'metric' is not None, but got: {metric}.") + self.top_K_checkpoint(network, K=num_ckpt, metric=metric, save_path=save_path) + return self._ckpoint_filelist + elif self.ckpt_save_policy == "latest_k": + self.latest_K_checkpoint(network, K=num_ckpt, save_path=save_path) + return self._ckpoint_filelist + else: + raise ValueError( + f"The expected 'ckpt_save_policy' is None, top_k or latest_k," f"but got: {self.ckpt_save_policy}." + ) diff --git a/community/cv/ShipWise/mindyolo/utils/config.py b/community/cv/ShipWise/mindyolo/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..0b35a893d6b78c937a2c4d468ac451918eadeaa0 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/config.py @@ -0,0 +1,150 @@ +import argparse +import collections +import os +from copy import deepcopy +import yaml + +try: + collectionsAbc = collections.abc +except AttributeError: + collectionsAbc = collections + +__all__ = ["parse_args"] + + +def parse_args(parser): + parser_config = argparse.ArgumentParser(description="Config", add_help=False) + parser_config.add_argument( + "-c", "--config", type=str, default="", help="YAML config file specifying default arguments." + ) + + args_config, remaining = parser_config.parse_known_args() + + # Do we have a config file to parse? + if args_config.config: + cfg, _, _ = load_config(args_config.config) + cfg = Config(cfg) + parser.set_defaults(**cfg) + parser.set_defaults(config=args_config.config) + + # The main arg parser parses the rest of the args, the usual + # defaults will have been overridden if config file specified. + args = parser.parse_args(remaining) + + return Config(vars(args)) + + +def load_config(file_path): + BASE = "__BASE__" + assert os.path.splitext(file_path)[-1] in [".yaml", ".yml"], f"[{file_path}] not yaml format." + cfg_default, cfg_helper, cfg_choices = _parse_yaml(file_path) + + # NOTE: cfgs outside have higher priority than cfgs in _BASE_ + if BASE in cfg_default: + all_base_cfg_default = {} + all_base_cfg_helper = {} + all_base_cfg_choices = {} + base_yamls = list(cfg_default[BASE]) + for base_yaml in base_yamls: + if base_yaml.startswith("~"): + base_yaml = os.path.expanduser(base_yaml) + if not base_yaml.startswith("/"): + base_yaml = os.path.join(os.path.dirname(file_path), base_yaml) + + base_cfg_default, base_cfg_helper, base_cfg_choices = load_config(base_yaml) + all_base_cfg_default = _merge_config(base_cfg_default, all_base_cfg_default) + all_base_cfg_helper = _merge_config(base_cfg_helper, all_base_cfg_helper) + all_base_cfg_choices = _merge_config(base_cfg_choices, all_base_cfg_choices) + + del cfg_default[BASE] + return ( + _merge_config(cfg_default, all_base_cfg_default), + _merge_config(cfg_helper, all_base_cfg_helper), + _merge_config(cfg_choices, all_base_cfg_choices), + ) + + return cfg_default, cfg_helper, cfg_choices + + +def _parse_yaml(yaml_path): + """ + Parse the yaml config file. + + Args: + yaml_path: Path to the yaml config. + """ + with open(yaml_path, "r") as fin: + try: + cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader) + cfgs = [x for x in cfgs] + if len(cfgs) == 1: + cfg = cfgs[0] + cfg_helper = {} + cfg_choices = {} + elif len(cfgs) == 2: + cfg, cfg_helper = cfgs + cfg_choices = {} + elif len(cfgs) == 3: + cfg, cfg_helper, cfg_choices = cfgs + else: + raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml") + except: + raise ValueError("Failed to parse yaml") + return cfg, cfg_helper, cfg_choices + + +def _merge_config(config, base): + """Merge config""" + new = deepcopy(base) + for k, v in config.items(): + if k in new and isinstance(new[k], dict) and isinstance(config[k], collectionsAbc.Mapping): + new[k] = _merge_config(config[k], new[k]) + else: + new[k] = config[k] + return new + + +class Config(dict): + """ + Configuration namespace. Convert dictionary to members. + """ + + def __init__(self, cfg_dict): + super(Config, self).__init__() + for k, v in cfg_dict.items(): + setattr(self, k, Config(v) if isinstance(v, dict) else v) + + def __setattr__(self, name, value): + self[name] = value + self.__dict__.update({name: value}) + + def __getattr__(self, name): + if name in self: + return self[name] + else: + raise AttributeError(name) + + def __str__(self): + return config_format_func(self) + + def __repr__(self): + return self.__str__() + + +def config_format_func(config, prefix=""): + """ + Args: + config: dict-like object + Returns: + formatted str + """ + msg = "" + if prefix: + prefix += "." + + for k, v in config.__dict__.items(): + if isinstance(v, Config): + msg += config_format_func(v, prefix=str(k)) + else: + msg += format(prefix + str(k), "<40") + format(str(v), "<") + "\n" + return msg diff --git a/community/cv/ShipWise/mindyolo/utils/convert_weight_cspdarknet53.py b/community/cv/ShipWise/mindyolo/utils/convert_weight_cspdarknet53.py new file mode 100644 index 0000000000000000000000000000000000000000..663fc3616e73cd4cfd1f7d8cc5549a7f28aaff5f --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/convert_weight_cspdarknet53.py @@ -0,0 +1,167 @@ +import mindspore as ms + +convert_dict = { + "feature_map.backbone.conv0.1": "model.model.0.bn", + "feature_map.backbone.conv1.1": "model.model.1.bn", + "feature_map.backbone.conv2.1": "model.model.2.bn", + "feature_map.backbone.conv3.1": "model.model.3.conv1.bn", + "feature_map.backbone.conv4.1": "model.model.3.conv2.bn", + "feature_map.backbone.conv5.1": "model.model.4.bn", + "feature_map.backbone.conv6.1": "model.model.5.bn", + "feature_map.backbone.conv7.1": "model.model.7.bn", + "feature_map.backbone.conv8.1": "model.model.8.bn", + "feature_map.backbone.conv9.1": "model.model.9.bn", + "feature_map.backbone.layer2.0.conv1.1": "model.model.10.0.conv1.bn", + "feature_map.backbone.layer2.0.conv2.1": "model.model.10.0.conv2.bn", + "feature_map.backbone.layer2.1.conv1.1": "model.model.10.1.conv1.bn", + "feature_map.backbone.layer2.1.conv2.1": "model.model.10.1.conv2.bn", + "feature_map.backbone.conv10.1": "model.model.11.bn", + "feature_map.backbone.conv11.1": "model.model.12.bn", + "feature_map.backbone.conv12.1": "model.model.14.bn", + "feature_map.backbone.conv13.1": "model.model.15.bn", + "feature_map.backbone.conv14.1": "model.model.16.bn", + "feature_map.backbone.layer3.0.conv1.1": "model.model.17.0.conv1.bn", + "feature_map.backbone.layer3.1.conv1.1": "model.model.17.1.conv1.bn", + "feature_map.backbone.layer3.2.conv1.1": "model.model.17.2.conv1.bn", + "feature_map.backbone.layer3.3.conv1.1": "model.model.17.3.conv1.bn", + "feature_map.backbone.layer3.4.conv1.1": "model.model.17.4.conv1.bn", + "feature_map.backbone.layer3.5.conv1.1": "model.model.17.5.conv1.bn", + "feature_map.backbone.layer3.6.conv1.1": "model.model.17.6.conv1.bn", + "feature_map.backbone.layer3.7.conv1.1": "model.model.17.7.conv1.bn", + "feature_map.backbone.layer3.0.conv2.1": "model.model.17.0.conv2.bn", + "feature_map.backbone.layer3.1.conv2.1": "model.model.17.1.conv2.bn", + "feature_map.backbone.layer3.2.conv2.1": "model.model.17.2.conv2.bn", + "feature_map.backbone.layer3.3.conv2.1": "model.model.17.3.conv2.bn", + "feature_map.backbone.layer3.4.conv2.1": "model.model.17.4.conv2.bn", + "feature_map.backbone.layer3.5.conv2.1": "model.model.17.5.conv2.bn", + "feature_map.backbone.layer3.6.conv2.1": "model.model.17.6.conv2.bn", + "feature_map.backbone.layer3.7.conv2.1": "model.model.17.7.conv2.bn", + "feature_map.backbone.conv15.1": "model.model.18.bn", + "feature_map.backbone.conv16.1": "model.model.19.bn", + "feature_map.backbone.conv17.1": "model.model.21.bn", + "feature_map.backbone.conv18.1": "model.model.22.bn", + "feature_map.backbone.conv19.1": "model.model.23.bn", + "feature_map.backbone.layer4.0.conv1.1": "model.model.24.0.conv1.bn", + "feature_map.backbone.layer4.1.conv1.1": "model.model.24.1.conv1.bn", + "feature_map.backbone.layer4.2.conv1.1": "model.model.24.2.conv1.bn", + "feature_map.backbone.layer4.3.conv1.1": "model.model.24.3.conv1.bn", + "feature_map.backbone.layer4.4.conv1.1": "model.model.24.4.conv1.bn", + "feature_map.backbone.layer4.5.conv1.1": "model.model.24.5.conv1.bn", + "feature_map.backbone.layer4.6.conv1.1": "model.model.24.6.conv1.bn", + "feature_map.backbone.layer4.7.conv1.1": "model.model.24.7.conv1.bn", + "feature_map.backbone.layer4.0.conv2.1": "model.model.24.0.conv2.bn", + "feature_map.backbone.layer4.1.conv2.1": "model.model.24.1.conv2.bn", + "feature_map.backbone.layer4.2.conv2.1": "model.model.24.2.conv2.bn", + "feature_map.backbone.layer4.3.conv2.1": "model.model.24.3.conv2.bn", + "feature_map.backbone.layer4.4.conv2.1": "model.model.24.4.conv2.bn", + "feature_map.backbone.layer4.5.conv2.1": "model.model.24.5.conv2.bn", + "feature_map.backbone.layer4.6.conv2.1": "model.model.24.6.conv2.bn", + "feature_map.backbone.layer4.7.conv2.1": "model.model.24.7.conv2.bn", + "feature_map.backbone.conv20.1": "model.model.25.bn", + "feature_map.backbone.conv21.1": "model.model.26.bn", + "feature_map.backbone.conv22.1": "model.model.28.bn", + "feature_map.backbone.conv23.1": "model.model.29.bn", + "feature_map.backbone.conv24.1": "model.model.30.bn", + "feature_map.backbone.layer5.0.conv1.1": "model.model.31.0.conv1.bn", + "feature_map.backbone.layer5.1.conv1.1": "model.model.31.1.conv1.bn", + "feature_map.backbone.layer5.2.conv1.1": "model.model.31.2.conv1.bn", + "feature_map.backbone.layer5.3.conv1.1": "model.model.31.3.conv1.bn", + "feature_map.backbone.layer5.0.conv2.1": "model.model.31.0.conv2.bn", + "feature_map.backbone.layer5.1.conv2.1": "model.model.31.1.conv2.bn", + "feature_map.backbone.layer5.2.conv2.1": "model.model.31.2.conv2.bn", + "feature_map.backbone.layer5.3.conv2.1": "model.model.31.3.conv2.bn", + "feature_map.backbone.conv25.1": "model.model.32.bn", + "feature_map.backbone.conv26.1": "model.model.33.bn", + "feature_map.backbone.conv27.1": "model.model.35.bn", + "feature_map.backbone.conv0.0": "model.model.0.conv", + "feature_map.backbone.conv1.0": "model.model.1.conv", + "feature_map.backbone.conv2.0": "model.model.2.conv", + "feature_map.backbone.conv3.0": "model.model.3.conv1.conv", + "feature_map.backbone.conv4.0": "model.model.3.conv2.conv", + "feature_map.backbone.conv5.0": "model.model.4.conv", + "feature_map.backbone.conv6.0": "model.model.5.conv", + "feature_map.backbone.conv7.0": "model.model.7.conv", + "feature_map.backbone.conv8.0": "model.model.8.conv", + "feature_map.backbone.conv9.0": "model.model.9.conv", + "feature_map.backbone.layer2.0.conv1.0": "model.model.10.0.conv1.conv", + "feature_map.backbone.layer2.0.conv2.0": "model.model.10.0.conv2.conv", + "feature_map.backbone.layer2.1.conv1.0": "model.model.10.1.conv1.conv", + "feature_map.backbone.layer2.1.conv2.0": "model.model.10.1.conv2.conv", + "feature_map.backbone.conv10.0": "model.model.11.conv", + "feature_map.backbone.conv11.0": "model.model.12.conv", + "feature_map.backbone.conv12.0": "model.model.14.conv", + "feature_map.backbone.conv13.0": "model.model.15.conv", + "feature_map.backbone.conv14.0": "model.model.16.conv", + "feature_map.backbone.layer3.0.conv1.0": "model.model.17.0.conv1.conv", + "feature_map.backbone.layer3.1.conv1.0": "model.model.17.1.conv1.conv", + "feature_map.backbone.layer3.2.conv1.0": "model.model.17.2.conv1.conv", + "feature_map.backbone.layer3.3.conv1.0": "model.model.17.3.conv1.conv", + "feature_map.backbone.layer3.4.conv1.0": "model.model.17.4.conv1.conv", + "feature_map.backbone.layer3.5.conv1.0": "model.model.17.5.conv1.conv", + "feature_map.backbone.layer3.6.conv1.0": "model.model.17.6.conv1.conv", + "feature_map.backbone.layer3.7.conv1.0": "model.model.17.7.conv1.conv", + "feature_map.backbone.layer3.0.conv2.0": "model.model.17.0.conv2.conv", + "feature_map.backbone.layer3.1.conv2.0": "model.model.17.1.conv2.conv", + "feature_map.backbone.layer3.2.conv2.0": "model.model.17.2.conv2.conv", + "feature_map.backbone.layer3.3.conv2.0": "model.model.17.3.conv2.conv", + "feature_map.backbone.layer3.4.conv2.0": "model.model.17.4.conv2.conv", + "feature_map.backbone.layer3.5.conv2.0": "model.model.17.5.conv2.conv", + "feature_map.backbone.layer3.6.conv2.0": "model.model.17.6.conv2.conv", + "feature_map.backbone.layer3.7.conv2.0": "model.model.17.7.conv2.conv", + "feature_map.backbone.conv15.0": "model.model.18.conv", + "feature_map.backbone.conv16.0": "model.model.19.conv", + "feature_map.backbone.conv17.0": "model.model.21.conv", + "feature_map.backbone.conv18.0": "model.model.22.conv", + "feature_map.backbone.conv19.0": "model.model.23.conv", + "feature_map.backbone.layer4.0.conv1.0": "model.model.24.0.conv1.conv", + "feature_map.backbone.layer4.1.conv1.0": "model.model.24.1.conv1.conv", + "feature_map.backbone.layer4.2.conv1.0": "model.model.24.2.conv1.conv", + "feature_map.backbone.layer4.3.conv1.0": "model.model.24.3.conv1.conv", + "feature_map.backbone.layer4.4.conv1.0": "model.model.24.4.conv1.conv", + "feature_map.backbone.layer4.5.conv1.0": "model.model.24.5.conv1.conv", + "feature_map.backbone.layer4.6.conv1.0": "model.model.24.6.conv1.conv", + "feature_map.backbone.layer4.7.conv1.0": "model.model.24.7.conv1.conv", + "feature_map.backbone.layer4.0.conv2.0": "model.model.24.0.conv2.conv", + "feature_map.backbone.layer4.1.conv2.0": "model.model.24.1.conv2.conv", + "feature_map.backbone.layer4.2.conv2.0": "model.model.24.2.conv2.conv", + "feature_map.backbone.layer4.3.conv2.0": "model.model.24.3.conv2.conv", + "feature_map.backbone.layer4.4.conv2.0": "model.model.24.4.conv2.conv", + "feature_map.backbone.layer4.5.conv2.0": "model.model.24.5.conv2.conv", + "feature_map.backbone.layer4.6.conv2.0": "model.model.24.6.conv2.conv", + "feature_map.backbone.layer4.7.conv2.0": "model.model.24.7.conv2.conv", + "feature_map.backbone.conv20.0": "model.model.25.conv", + "feature_map.backbone.conv21.0": "model.model.26.conv", + "feature_map.backbone.conv22.0": "model.model.28.conv", + "feature_map.backbone.conv23.0": "model.model.29.conv", + "feature_map.backbone.conv24.0": "model.model.30.conv", + "feature_map.backbone.layer5.0.conv1.0": "model.model.31.0.conv1.conv", + "feature_map.backbone.layer5.1.conv1.0": "model.model.31.1.conv1.conv", + "feature_map.backbone.layer5.2.conv1.0": "model.model.31.2.conv1.conv", + "feature_map.backbone.layer5.3.conv1.0": "model.model.31.3.conv1.conv", + "feature_map.backbone.layer5.0.conv2.0": "model.model.31.0.conv2.conv", + "feature_map.backbone.layer5.1.conv2.0": "model.model.31.1.conv2.conv", + "feature_map.backbone.layer5.2.conv2.0": "model.model.31.2.conv2.conv", + "feature_map.backbone.layer5.3.conv2.0": "model.model.31.3.conv2.conv", + "feature_map.backbone.conv25.0": "model.model.32.conv", + "feature_map.backbone.conv26.0": "model.model.33.conv", + "feature_map.backbone.conv27.0": "model.model.35.conv", +} + + +def convert_weight(ori_weight, new_weight): + new_ckpt = [] + param_dict = ms.load_checkpoint(ori_weight) + for k, v in param_dict.items(): + if "feature_map.backbone" in k: + for key, val in convert_dict.items(): + if key in k: + k = k.replace(key, val) + new_ckpt.append({"name": k, "data": v}) + ms.save_checkpoint(new_ckpt, new_weight) + + +if __name__ == "__main__": + convert_weight( + "./cspdarknet53_ascend_v120_imagenet2012_official_cv_bs64_top1acc7854_top5acc9428.ckpt", + "./yolov4_backbone.ckpt", + ) diff --git a/community/cv/ShipWise/mindyolo/utils/convert_weight_darknet53.py b/community/cv/ShipWise/mindyolo/utils/convert_weight_darknet53.py new file mode 100644 index 0000000000000000000000000000000000000000..9233995d9138a05c930d31b19d9047ae32b0d996 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/convert_weight_darknet53.py @@ -0,0 +1,66 @@ +import os +import sys + +import numpy as np + +import mindspore as ms +from config import parse_args + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) +from mindyolo.models import create_model + + +def _load_weight(weights_file): + """Loads pre-trained weights.""" + if not os.path.isfile(weights_file): + raise ValueError(f'"{weights_file}" is not a valid weight file.') + with open(weights_file, "rb") as fp: + np.fromfile(fp, dtype=np.int32, count=5) + return np.fromfile(fp, dtype=np.float32) + + +def convert_weight(cfg, weights_file="./darknet53.conv.74", output_file="./yolov3_backbone.ckpt"): + """Convert weight to mindspore ckpt.""" + net = create_model(model_name=cfg.network.model_name, model_cfg=cfg.network) + params = net.get_parameters() + params = [p for p in params] + weights = _load_weight(weights_file) + index = 0 + param_list = [] + weights_num = len(weights) + for i in range(0, len(params), 5): + weight = params[i] + mean = params[i + 1] + var = params[i + 2] + gamma = params[i + 3] + beta = params[i + 4] + beta_data = weights[index : index + beta.size].reshape(beta.shape) + index += beta.size + gamma_data = weights[index : index + gamma.size].reshape(gamma.shape) + index += gamma.size + mean_data = weights[index : index + mean.size].reshape(mean.shape) + index += mean.size + var_data = weights[index : index + var.size].reshape(var.shape) + index += var.size + weight_data = weights[index : index + weight.size].reshape(weight.shape) + index += weight.size + + param_list.append( + {"name": weight.name, "type": weight.dtype, "shape": weight.shape, "data": ms.Tensor(weight_data)} + ) + param_list.append({"name": mean.name, "type": mean.dtype, "shape": mean.shape, "data": ms.Tensor(mean_data)}) + param_list.append({"name": var.name, "type": var.dtype, "shape": var.shape, "data": ms.Tensor(var_data)}) + param_list.append( + {"name": gamma.name, "type": gamma.dtype, "shape": gamma.shape, "data": ms.Tensor(gamma_data)} + ) + param_list.append({"name": beta.name, "type": beta.dtype, "shape": beta.shape, "data": ms.Tensor(beta_data)}) + + if index >= weights_num: + break + + ms.save_checkpoint(param_list, output_file) + + +if __name__ == "__main__": + args = parse_args() + convert_weight(args) diff --git a/community/cv/ShipWise/mindyolo/utils/logger.py b/community/cv/ShipWise/mindyolo/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..a0457a2eb7e466e23ae2bc126a64534be62b00e6 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/logger.py @@ -0,0 +1,182 @@ +"""Custom Logger.""" +import logging +import os +import sys +from datetime import datetime + +__all__ = ["get_logger"] + +GLOBAL_LOGGER = None + + +class CustomStreamHandler(logging.StreamHandler): + def __init__(self, stream=None): + super().__init__(stream) + + def emit(self, record): + # to start with logger header at every newline + # use __str__ to enable record.msg to be non-str object + messages = record.msg.__str__().split("\n") + for msg in messages: + record.msg = msg + super(CustomStreamHandler, self).emit(record) + + +class Logger(logging.Logger): + """ + Logger classes and functions, support print information on console and files. + + Args: + logger_name(str): The name of Logger. In most cases, it can be the name of the network. + """ + + def __init__(self, logger_name="MindYOLO"): + super(Logger, self).__init__(logger_name) + self.log_level = "INFO" + self.rank_id = _get_rank_id() + self.device_per_servers = 8 + self.formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") + + def write(self, msg): + """ + write method to simulate Stream class + """ + if msg and not msg.isspace(): # skip line with white spaces + self.info(msg) + + def flush(self): + """ + write method to simulate Stream class + """ + pass + + +def setup_logging(logger_name="MindYOLO", log_level="INFO", rank_id=None, device_per_servers=8): + """Setup logging file.""" + logger = get_logger() + logger.name = logger_name + logger.log_level = log_level + if rank_id is not None: + logger.rank_id = rank_id + logger.device_per_servers = device_per_servers + + if logger.log_level not in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]: + raise ValueError( + f"Not support log_level: {logger.log_level}, " + f"the log_level should be in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']" + ) + + # In the distributed scenario, only one card is printed on the console. + if logger.rank_id % logger.device_per_servers == 0: + console = CustomStreamHandler(sys.stdout) + console.setLevel(logger.log_level) + console.setFormatter(logger.formatter) + logger.addHandler(console) + + +def setup_logging_file(log_dir="./logs"): + """Setup logging file.""" + logger = get_logger() + if not os.path.exists(log_dir): + os.makedirs(log_dir, exist_ok=True) + + # Generate a file stream based on the log generation time and rank_id + log_name = f"{logger.name}_{datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')}_rank_{logger.rank_id}.log" + log_path = os.path.join(log_dir, log_name) + file_handler = logging.FileHandler(log_path) + file_handler.setLevel(logger.log_level) + file_handler.setFormatter(logger.formatter) + logger.addHandler(file_handler) + + +def print_args(args): + """Print hyper-parameter""" + get_logger().info("Args:") + args_dict = vars(args) + for key in args_dict.keys(): + get_logger().info("--> %s: %s", key, args_dict[key]) + get_logger().info("") + + +def important_info(msg, *args, **kwargs): + """For information that needs to be focused on, add special printing format.""" + line_width = 2 + important_msg = "\n" + important_msg += ("*" * 70 + "\n") * line_width + important_msg += ("*" * line_width + "\n") * 2 + important_msg += "*" * line_width + " " * 8 + msg + "\n" + important_msg += ("*" * line_width + "\n") * 2 + important_msg += ("*" * 70 + "\n") * line_width + get_logger().info(important_msg, *args, **kwargs) + + +def info(msg, *args, **kwargs): + """ + Log a message with severity 'INFO' on the MindYOLO logger. + + Examples: + >>> from mindyolo import logger + >>> logger.setup_logging(logger_name="MindYOLO", log_level="INFO", rank_id=0, device_per_servers=8) + >>> logger.setup_logging_file(log_dir="./logs") + >>> logger.info("test info") + """ + get_logger().info(msg, *args, **kwargs) + + +def debug(msg, *args, **kwargs): + """Log a message with severity 'DEBUG' on the MindYOLO logger.""" + get_logger().debug(msg, *args, **kwargs) + + +def error(msg, *args, **kwargs): + """Log a message with severity 'ERROR' on the MindYOLO logger.""" + get_logger().error(msg, *args, **kwargs) + + +def warning(msg, *args, **kwargs): + """Log a message with severity 'WARNING' on the MindYOLO logger.""" + get_logger().warning(msg, *args, **kwargs) + + +def critical(msg, *args, **kwargs): + """Log a message with severity 'CRITICAL' on the MindYOLO logger.""" + get_logger().critical(msg, *args, **kwargs) + + +def get_level(): + """ + Get the logger level. + + Returns: + str, the Log level includes 'CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'. + """ + # level and glog level mapping dictionary + + return get_logger().log_level + + +def _get_rank_id(): + """Get rank id.""" + rank_id = os.getenv("RANK_ID") + gpu_rank_id = os.getenv("OMPI_COMM_WORLD_RANK") + rank = "0" + if rank_id and gpu_rank_id and rank_id != gpu_rank_id: + print( + f"Environment variables RANK_ID and OMPI_COMM_WORLD_RANK set by different values, RANK_ID={rank_id}, " + f"OMPI_COMM_WORLD_RANK={gpu_rank_id}. We will use RANK_ID to get rank id by default.", + flush=True, + ) + if rank_id: + rank = rank_id + elif gpu_rank_id: + rank = gpu_rank_id + return int(rank) + + +def get_logger(): + """Get logger instance.""" + global GLOBAL_LOGGER + if GLOBAL_LOGGER: + return GLOBAL_LOGGER + GLOBAL_LOGGER = Logger() + return GLOBAL_LOGGER diff --git a/community/cv/ShipWise/mindyolo/utils/metrics.py b/community/cv/ShipWise/mindyolo/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..0f6d1733f38d4ca8a21dfd835d6f82c582b0c21b --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/metrics.py @@ -0,0 +1,355 @@ +import time +import cv2 +import numpy as np + +import mindspore as ms +from mindspore import ops, Tensor + +__all__ = ["non_max_suppression", "scale_coords", "xyxy2xywh", "xywh2xyxy"] + + +def non_max_suppression( + prediction, + mask_coefficient=None, + conf_thres=0.25, + iou_thres=0.45, + conf_free=False, + classes=None, + agnostic=False, + multi_label=False, + time_limit=20.0, +): + """Runs Non-Maximum Suppression (NMS) on inference results + + Args: + prediction (ndarray): Prediction. If conf_free is False, prediction on (bs, N, 5+nc) ndarray each point, + the last dimension meaning [center_x, center_y, width, height, conf, cls0, ...]; If conf_free is True, + prediction on (bs, N, 4+nc) ndarray each point, the last dimension meaning [center_x, center_y, width, height, cls0, ...]. + conf_free (bool): Whether the prediction result include conf. + + Returns: + list of detections, on (n,6) ndarray per image, the last dimension meaning [xyxy, conf, cls]. + """ + + if not conf_free: + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + else: + nc = prediction.shape[2] - 4 # number of classes + xc = prediction[..., 4:].max(-1) > conf_thres # candidates + prediction = np.concatenate( + (prediction[..., :4], prediction[..., 4:].max(-1, keepdims=True), prediction[..., 4:]), axis=-1 + ) + + nm = 0 + if mask_coefficient is not None: + assert mask_coefficient.shape[:2] == prediction.shape[:2], \ + f"mask_coefficient shape {mask_coefficient.shape[:2]} and " \ + f"prediction.shape {prediction.shape[:2]} are not equal." + nm = mask_coefficient.shape[2] + prediction = np.concatenate((prediction, mask_coefficient), axis=-1) + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_det = 300 # maximum number of detections per image + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = time_limit if time_limit > 0 else 1e3 # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [np.zeros((0, 6+nm))] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # If none remain process next image + if not x.shape[0]: + continue + + # Scale class with conf + if not conf_free: + if nc == 1: + x[:, 5:5+nc] = x[:, 4:5] # signle cls no need to multiplicate. + else: + x[:, 5:5+nc] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:5+nc] > conf_thres).nonzero() + x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(np.float32)), 1) if nm == 0 else \ + np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(np.float32), x[i, -nm:]), 1) + else: # best class only + conf = x[:, 5:5+nc].max(1, keepdims=True) # get maximum conf + j = np.argmax(x[:, 5:5+nc], axis=1,keepdims=True) # get maximum index + x = np.concatenate((box, conf, j.astype(np.float32)), 1)[conf.flatten() > conf_thres] if nm == 0 else \ + np.concatenate((box, conf, j.astype(np.float32), x[:, -nm:]), 1)[conf.flatten() > conf_thres] + + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == np.array(classes)).any(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort()[-max_nms:]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + + i = _nms(boxes, scores, iou_thres) # NMS for per sample + + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = _box_iou(boxes[i], boxes) > iou_thres # iou matrix # (N, M) + weights = iou * scores[None] # box weights + # (N, M) @ (M, 4) / (N, 1) + x[i, :4] = np.matmul(weights, x[:, :4]) / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print( + f"WARNING: Batch NMS time limit {time_limit}s exceeded, this batch " + f"process {xi + 1}/{prediction.shape[0]} sample." + ) + break # time limit exceeded + + return output + + +def scale_coords(img1_shape, coords, img0_shape, ratio=None, pad=None): + # Rescale coords (xyxy) from img1_shape to img0_shape + + if ratio is None: # calculate from img0_shape + ratio = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # ratio = old / new + else: + ratio = ratio[0] + + if pad is None: + padh, padw = (img1_shape[0] - img0_shape[0] * ratio) / 2, (img1_shape[1] - img0_shape[1] * ratio) / 2 + else: + padh, padw = pad[:] + + coords[:, [0, 2]] -= padw # x padding + coords[:, [1, 3]] -= padh # y padding + coords[:, [0, 2]] /= ratio # x rescale + coords[:, [1, 3]] /= ratio # y rescale + coords = _clip_coords(coords, img0_shape) + return coords + + +def _clip_coords(boxes, img_shape): + # Clip bounding xyxy bounding boxes to image shape (height, width) + boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, img_shape[1]) # x1, x2 + boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, img_shape[0]) # y1, y2 + return boxes + + +def _nms(xyxys, scores, threshold): + """Calculate NMS""" + s_time = time.time() + x1 = xyxys[:, 0] + y1 = xyxys[:, 1] + x2 = xyxys[:, 2] + y2 = xyxys[:, 3] + scores = scores + # areas = (x2 - x1 + 1) * (y2 - y1 + 1) + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + reserved_boxes = [] + while order.size > 0: + i = order[0] + reserved_boxes.append(i) + max_x1 = np.maximum(x1[i], x1[order[1:]]) + max_y1 = np.maximum(y1[i], y1[order[1:]]) + min_x2 = np.minimum(x2[i], x2[order[1:]]) + min_y2 = np.minimum(y2[i], y2[order[1:]]) + + # intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1) + # intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1) + intersect_w = np.maximum(0.0, min_x2 - max_x1) + intersect_h = np.maximum(0.0, min_y2 - max_y1) + intersect_area = intersect_w * intersect_h + + ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area + 1e-6) + indexes = np.where(ovr <= threshold)[0] + order = order[indexes + 1] + return np.array(reserved_boxes) + + +def _box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 ([N, 4]) + box2 ([M, 4]) + Returns: + iou ([N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = ( + (np.minimum(box1[:, None, 2:], box2[:, 2:]) - np.maximum(box1[:, None, :2], box2[:, :2])).clip(0, None).prod(2) + ) + return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +#------------------------for segment------------------------ + +def scale_image(masks, img0_shape, pad=None): + """ + Takes a mask, and resizes it to the original image size + Args: + masks (numpy.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. + img0_shape (tuple): the original image shape + ratio_pad (tuple): the ratio of the padding to the original image. + Returns: + masks (numpy.ndarray): The masks that are being returned. + """ + + # Rescale coordinates (xyxy) from img1_shape to img0_shape + img1_shape = masks.shape + if (np.array(img1_shape[:2]) == np.array(img0_shape[:2])).all(): + return masks + + if pad is None: + ratio = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # ratio = old / new + pad = (img1_shape[0] - img0_shape[0] * ratio) / 2, (img1_shape[1] - img0_shape[1] * ratio) / 2 + + top, left = int(pad[0]), int(pad[1]) # y, x + bottom, right = int(img1_shape[0] - pad[0]), int(img1_shape[1] - pad[1]) + + if len(masks.shape) < 2: + raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') + masks = masks[top:bottom, left:right] + masks = cv2.resize(masks, dsize=(img0_shape[1], img0_shape[0]), interpolation=cv2.INTER_LINEAR) + # masks = ops.interpolate(Tensor(masks, dtype=ms.float32)[None], shape, mode='bilinear', align_corners=False)[0].asnumpy() # CHW + if len(masks.shape) == 2: + masks = masks[:, :, None] + + return masks + + +def crop_mask(masks, boxes): + """ + It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box + Args: + masks (numpy.ndarray): [h, w, n] array of masks + boxes (numpy.ndarray): [n, 4] array of bbox coordinates in relative point form + Returns: + (numpy.ndarray): The masks are being cropped to the bounding box. + """ + n, h, w = masks.shape + x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1) # x1 shape(n,1,1) + r = np.arange(w, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w) + c = np.arange(h, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1) + + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) + + +def process_mask_upsample(protos, masks_in, bboxes, shape): + """ + It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher + quality but is slower. + Args: + protos (numpy.ndarray): [mask_dim, mask_h, mask_w] + masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms + bboxes (numpy.ndarray): [n, 4], n is number of masks after nms + shape (tuple): the size of the input image (h,w) + Returns: + (numpy.ndarray): The upsampled masks. + """ + assert len(shape) == 2, f"The length of the shape is {len(shape)}, expected to be 2." + c, mh, mw = protos.shape # CHW + masks = sigmoid((np.matmul(masks_in, protos.reshape(c, -1)))).reshape(-1, mh, mw) + + # interpolate bilinear + # (n, mh, mw) -> (mh, mw, n) -> (*shape, n) -> (n, *shape) + # masks = cv2.resize(masks.transpose(1, 2, 0), dsize=shape, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) + masks = ops.interpolate(Tensor(masks, dtype=ms.float32)[None], shape, mode='bilinear', align_corners=False)[0].asnumpy() # CHW + + masks = crop_mask(masks, bboxes) # CHW + return masks > 0.5 + + +def process_mask(protos, masks_in, bboxes, shape, upsample=False): + """ + Apply masks to bounding boxes using the output of the mask head. + + Args: + protos (numpy.ndarray): A array of shape [mask_dim, mask_h, mask_w]. + masks_in (numpy.ndarray): A array of shape [n, mask_dim], where n is the number of masks after NMS. + bboxes (numpy.ndarray): A array of shape [n, 4], where n is the number of masks after NMS. + shape (tuple): A tuple of integers representing the size of the input image in the format (h, w). + upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False. + + Returns: + (numpy.ndarray): A binary mask array of shape [n, h, w], where n is the number of masks after NMS, and h and w + are the height and width of the input image. The mask is applied to the bounding boxes. + """ + + assert len(shape) == 2, f"The length of the shape is {len(shape)}, expected to be 2." + c, mh, mw = protos.shape # CHW + ih, iw = shape + masks = sigmoid(np.matmul(masks_in, protos.view(c, -1))).reshape(-1, mh, mw) # CHW + + downsampled_bboxes = np.copy(bboxes) + downsampled_bboxes[:, 0] *= mw / iw + downsampled_bboxes[:, 2] *= mw / iw + downsampled_bboxes[:, 3] *= mh / ih + downsampled_bboxes[:, 1] *= mh / ih + + masks = crop_mask(masks, downsampled_bboxes) # CHW + if upsample: + # masks = cv2.resize(masks.transpose(1, 2, 0), dsize=shape, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) + masks = ops.interpolate(Tensor(masks, dtype=ms.float32)[None], shape, mode='bilinear', align_corners=False)[0].asnumpy() # CHW + return masks > 0.5 + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +#---------------------------------------------------------- diff --git a/community/cv/ShipWise/mindyolo/utils/modelarts.py b/community/cv/ShipWise/mindyolo/utils/modelarts.py new file mode 100644 index 0000000000000000000000000000000000000000..9da6975fc552b1a7d6b3c116aeb1314e24a10a27 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/modelarts.py @@ -0,0 +1,53 @@ +import os + +_global_sync_count = 0 + +__all__ = ["sync_data"] + + +def get_device_id(): + device_id = os.getenv("DEVICE_ID", "0") + return int(device_id) + + +def get_device_num(): + device_num = os.getenv("RANK_SIZE", "1") + return int(device_num) + + +def get_rank_id(): + global_rank_id = os.getenv("RANK_ID", "0") + return int(global_rank_id) + + +def sync_data(from_path, to_path): + """ + Download data from remote obs to local directory if the first url is remote url and the second one is local path + Upload data from local directory to remote obs in contrast. + """ + import time + + import moxing as mox + + global _global_sync_count + sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count) + _global_sync_count += 1 + + # Each server contains 8 devices as most. + if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock): + print("from path: ", from_path) + print("to path: ", to_path) + mox.file.copy_parallel(from_path, to_path) + print("===finish data synchronization===") + try: + os.mknod(sync_lock) + except IOError: + pass + print("===save flag===") + + while True: + if os.path.exists(sync_lock): + break + time.sleep(1) + + print("Finish sync data from {} to {}.".format(from_path, to_path)) diff --git a/community/cv/ShipWise/mindyolo/utils/poly.py b/community/cv/ShipWise/mindyolo/utils/poly.py new file mode 100644 index 0000000000000000000000000000000000000000..e5ec9e3911f2208be5cdf33370473d14b6721d4d --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/poly.py @@ -0,0 +1,58 @@ +import cv2 +import numpy as np + +from mindyolo.data.utils import xywhn2xyxy + + +def show_img_with_bbox(data_dict, classes): + """ + Image and bboxes visualization. If input multiple images, apply on the first image only. + Args: + record: related data of images + classes: all categories of the whole dataset + + Returns: an image with detection boxes and categories + """ + img, labels = data_dict["images"][0], data_dict["labels"][0] + img = img.transpose(1, 2, 0)[:, :, ::-1] * 255.0 + img = np.ascontiguousarray(img, dtype=np.uint8) + labels = labels[labels[:, 1] > 0] # filter invalid label + category_ids = labels[:, 1] + bboxes = labels[:, 2:] + + categories = [classes[int(category_id)] for category_id in category_ids] + bboxes = xywhn2xyxy(bboxes[category_ids >= 0]) + for bbox, category in zip(bboxes, categories): + bbox = bbox.astype(np.int32) + categories_size = cv2.getTextSize(category + "0", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] + color = ((np.random.random((3,)) * 0.6 + 0.4) * 255).astype(np.uint8) + color = np.array(color).astype(np.int32).tolist() + + if bbox[1] - categories_size[1] - 3 < 0: + cv2.rectangle( + img, + (bbox[0], bbox[1] + 2), + (bbox[0] + categories_size[0], bbox[1] + categories_size[1] + 3), + color=color, + thickness=-1, + ) + cv2.putText( + img, + category, + (bbox[0], bbox[1] + categories_size[1] + 3), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (0, 0, 0), + thickness=1, + ) + else: + cv2.rectangle( + img, + (bbox[0], bbox[1] - categories_size[1] - 3), + (bbox[0] + categories_size[0], bbox[1] - 3), + color, + thickness=-1, + ) + cv2.putText(img, category, (bbox[0], bbox[1] - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) + cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, thickness=2) + return img diff --git a/community/cv/ShipWise/mindyolo/utils/registry.py b/community/cv/ShipWise/mindyolo/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..5516e6d5f170f7b6939e4d7c4d9aaac07d206910 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/registry.py @@ -0,0 +1,88 @@ +import inspect +import os + + +class Registry: + """ + a registry that maps string to class + """ + + def __init__(self, name): + """ + Args: + name (str): registry name + """ + self._name = name + self._module_dict = dict() + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + f"(name={self._name}, total={len(self._module_dict)})\n" + class2path = lambda c: os.path.sep.join(c.__module__.split('.')) + '.py' + format_str += ''.join( + [f" ({i}): {k} in {class2path(v)}\n" for i, (k, v) in enumerate(self._module_dict.items())] + ) + return format_str + + @property + def name(self): + # registry name cannot be changed from outside + return self._name + + @property + def module_dict(self): + # module dict cannot be changed from outside + return self._module_dict + + def get(self, key): + """query the registry record""" + return self._module_dict.get(key, None) + + def registry_module(self, module_name=None): + """ + Registry a module. A record will be added to 'self._module_dict', whose key is the class name (by default) or + the specified name, and value is the class itself. + It is used as a decorator + + Example: + >>> network = Registry('network') + >>> # case1: default module name + >>> @network.registry_module() + >>> class ResNet() + >>> pass + >>> resnet = network.get('ResNet') + >>> + >>> # case2: customized module name + >>> @network.registry_module('yolov3') + >>> class YOLOv3() + >>> pass + >>> yolov3 = network.get('yolov3') + """ + if module_name is not None: + assert isinstance(module_name, str), f"module_name should be a str but got {type(module_name)} instead" + + # use as a decorator + def _registry(cls): + return self._registry_module(module_class=cls, module_name=module_name) + + return _registry + + def _registry_module(self, module_class, module_name=None): + """ + main worker of registry + """ + assert inspect.isclass( + module_class + ), f"module to register should be a class but got {type(module_class)} instead" + if module_name in self: + raise KeyError(f"{module_name} is already registered in {self._name}") + if module_name is None: + module_name = module_class.__name__ + self._module_dict[module_name] = module_class + + return module_class diff --git a/community/cv/ShipWise/mindyolo/utils/train_step_factory.py b/community/cv/ShipWise/mindyolo/utils/train_step_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..b7eec98b2391233b79fe2aa156d9e47250205956 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/train_step_factory.py @@ -0,0 +1,120 @@ +import mindspore as ms +from mindspore import context, nn, ops + +__all__ = ["create_train_step_fn", "get_gradreducer", "get_loss_scaler"] + + +def get_gradreducer(is_parallel, parameters): + if is_parallel: + mean = context.get_auto_parallel_context("gradients_mean") + degree = context.get_auto_parallel_context("device_num") + grad_reducer = nn.DistributedGradReducer(parameters, mean, degree) + else: + grad_reducer = ops.functional.identity + + return grad_reducer + + +def get_loss_scaler(ms_loss_scaler="static", scale_value=1024, scale_factor=2, scale_window=2000): + if ms_loss_scaler == "dynamic": + from mindspore.amp import DynamicLossScaler + + loss_scaler = DynamicLossScaler(scale_value=scale_value, scale_factor=scale_factor, scale_window=scale_window) + elif ms_loss_scaler == "static": + from mindspore.amp import StaticLossScaler + + loss_scaler = StaticLossScaler(scale_value=scale_value) + elif ms_loss_scaler in ("none", "None"): + from mindspore.amp import StaticLossScaler + + loss_scaler = StaticLossScaler(1.0) + else: + raise NotImplementedError(f"Not support ms_loss_scaler: {ms_loss_scaler}") + + return loss_scaler + + +def create_train_step_fn(task, network, loss_fn, optimizer, loss_ratio, scaler, reducer, + ema=None, overflow_still_update=False, ms_jit=False, clip_grad=False, clip_grad_value=10.): + from mindspore.amp import all_finite + + use_ema = True if ema else False + + if task == "detect": + + def forward_func(x, label): + pred = network(x) + loss, loss_items = loss_fn(pred, label, x) + loss *= loss_ratio + return scaler.scale(loss), ops.stop_gradient(loss_items) + + grad_fn = ops.value_and_grad(forward_func, grad_position=None, weights=optimizer.parameters, has_aux=True) + + def train_step_func(x, label, optimizer_update=True): + (loss, loss_items), grads = grad_fn(x, label) + grads = reducer(grads) + unscaled_grads = scaler.unscale(grads) + grads_finite = all_finite(unscaled_grads) + + if clip_grad: + unscaled_grads = ops.clip_by_global_norm(unscaled_grads, clip_norm=clip_grad_value) + + if optimizer_update: + if grads_finite: + loss = ops.depend(loss, optimizer(unscaled_grads)) + if use_ema: + loss = ops.depend(loss, ema.update()) + else: + if overflow_still_update: + loss = ops.depend(loss, optimizer(unscaled_grads)) + if use_ema: + loss = ops.depend(loss, ema.update()) + + return scaler.unscale(loss), loss_items, unscaled_grads, grads_finite + + @ms.jit + def jit_warpper(*args): + return train_step_func(*args) + + return train_step_func if not ms_jit else jit_warpper + + elif task == "segment": + + def forward_func(x, label, seg): + pred = network(x) + loss, loss_items = loss_fn(pred, label, seg) + loss *= loss_ratio + return scaler.scale(loss), ops.stop_gradient(loss_items) + + grad_fn = ops.value_and_grad(forward_func, grad_position=None, weights=optimizer.parameters, has_aux=True) + + def train_step_func(x, label, seg, optimizer_update=True): + (loss, loss_items), grads = grad_fn(x, label, seg) + grads = reducer(grads) + unscaled_grads = scaler.unscale(grads) + grads_finite = all_finite(unscaled_grads) + + if clip_grad: + unscaled_grads = ops.clip_by_global_norm(unscaled_grads, clip_norm=clip_grad_value) + + if optimizer_update: + if grads_finite: + loss = ops.depend(loss, optimizer(unscaled_grads)) + if use_ema: + loss = ops.depend(loss, ema.update()) + else: + if overflow_still_update: + loss = ops.depend(loss, optimizer(unscaled_grads)) + if use_ema: + loss = ops.depend(loss, ema.update()) + + return scaler.unscale(loss), loss_items, unscaled_grads, grads_finite + + @ms.jit + def jit_warpper(*args): + return train_step_func(*args) + + return train_step_func if not ms_jit else jit_warpper + + else: + raise NotImplementedError \ No newline at end of file diff --git a/community/cv/ShipWise/mindyolo/utils/trainer_factory.py b/community/cv/ShipWise/mindyolo/utils/trainer_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..b7e1f221bb88887a09322735a01190d945ba2c54 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/trainer_factory.py @@ -0,0 +1,518 @@ +import math +import os +import time +import types +from typing import Union, List + +import mindspore as ms +from mindspore import Tensor, nn, ops + +from mindyolo.utils import logger +from mindyolo.utils.callback import BaseCallback, EvalWhileTrain, RunContext +from mindyolo.utils.checkpoint_manager import CheckpointManager +from mindyolo.utils.modelarts import sync_data + +__all__ = [ + "create_trainer", +] + + +def create_trainer( + model_name: str, + train_step_fn: types.FunctionType, + scaler, + network: nn.Cell, + loss_fn: nn.Cell, + ema: nn.Cell, + optimizer: nn.Cell, + dataloader: ms.dataset.Dataset, + steps_per_epoch: int, + callback: List[BaseCallback], + reducer, + data_sink, + profiler +): + return Trainer( + model_name=model_name, + train_step_fn=train_step_fn, + scaler=scaler, + network=network, + loss_fn=loss_fn, + ema=ema, + optimizer=optimizer, + dataloader=dataloader, + steps_per_epoch=steps_per_epoch, + callback=callback, + reducer=reducer, + data_sink=data_sink, + profiler=profiler + ) + + +class Trainer: + def __init__( + self, + model_name, + train_step_fn, + scaler, + network, + loss_fn, + ema, + optimizer, + dataloader, + steps_per_epoch, + callback, + reducer, + data_sink, + profiler + ): + self.model_name = model_name + self.train_step_fn = train_step_fn + self.scaler = scaler + self.dataloader = dataloader + self.network = network # for save checkpoint + self.loss_fn = loss_fn + self.ema = ema # for save checkpoint and ema + self.optimizer = optimizer # for save checkpoint + self.global_step = 0 + self.steps_per_epoch = steps_per_epoch + self.callback = callback + self.reducer = reducer + self.data_sink = data_sink + self.profiler = profiler + + def train( + self, + epochs: int, + main_device: bool, + warmup_step: int = 0, + warmup_momentum: Union[list, None] = None, + accumulate: int = 1, + overflow_still_update: bool = False, + keep_checkpoint_max: int = 10, + log_interval: int = 1, + loss_item_name: list = [], + save_dir: str = "", + enable_modelarts: bool = False, + train_url: str = "", + run_eval: bool = False, + test_fn: types.FunctionType = None, + ms_jit: bool = True, + rank_size: int = 8, + profiler_step_num: int = 1 + ): + # Attr + self.epochs = epochs + self.main_device = main_device + self.log_interval = log_interval + self.overflow_still_update = overflow_still_update + self.loss_item_name = loss_item_name + self.profiler_step_num = profiler_step_num + + # Directories + ckpt_save_dir = os.path.join(save_dir, "weights") + if main_device: + os.makedirs(ckpt_save_dir, exist_ok=True) # save checkpoint path + + # to be compatible with old interface + has_eval_mask = list(isinstance(c, EvalWhileTrain) for c in self.callback) + if run_eval and not any(has_eval_mask): + self.callback.append(EvalWhileTrain()) + if not run_eval and any(has_eval_mask): + ind = has_eval_mask.index(True) + self.callback.pop(ind) + + # Grad Accumulate + self.accumulate_cur_step = 0 + self.accumulate_grads = None + self.accumulate = accumulate + self.accumulate_grads_fn = self._get_accumulate_grads_fn() + + # Set Checkpoint Manager + manager = CheckpointManager(ckpt_save_policy="latest_k") + manager_ema = CheckpointManager(ckpt_save_policy="latest_k") if self.ema else None + + loader = self.dataloader.create_dict_iterator(output_numpy=False, num_epochs=1) + s_step_time = time.time() + s_epoch_time = time.time() + run_context = RunContext( + epoch_num=epochs, + steps_per_epoch=self.steps_per_epoch, + total_steps=self.dataloader.dataset_size, + trainer=self, + test_fn=test_fn, + enable_modelarts=enable_modelarts, + ckpt_save_dir=ckpt_save_dir, + save_dir=save_dir, + train_url=train_url, + overflow_still_update=overflow_still_update, + ms_jit=ms_jit, + rank_size=rank_size, + ) + self._on_train_begin(run_context) + for i, data in enumerate(loader): + cur_epoch = (i // self.steps_per_epoch) + 1 + cur_step = (i % self.steps_per_epoch) + 1 + run_context.cur_epoch_index = cur_epoch + run_context.cur_step_index = cur_step + + if cur_step == 1: + self._on_train_epoch_begin(run_context) + self.global_step += 1 + if self.global_step < warmup_step: + if warmup_momentum and isinstance(self.optimizer, (nn.SGD, nn.Momentum)): + dtype = self.optimizer.momentum.dtype + self.optimizer.momentum = Tensor(warmup_momentum[i], dtype) + + imgs, labels = data["images"], data["labels"] + segments = None if 'masks' not in data else data["masks"] + self._on_train_step_begin(run_context) + run_context.loss, run_context.lr = self.train_step(imgs, labels, segments, + cur_step=cur_step,cur_epoch=cur_epoch) + self._on_train_step_end(run_context) + + # train log + if cur_step % self.log_interval == 0: + logger.info( + f"Epoch {cur_epoch}/{epochs}, Step {cur_step}/{self.steps_per_epoch}, " + f"step time: {(time.time() - s_step_time) * 1000 / self.log_interval:.2f} ms" + ) + s_step_time = time.time() + + # save checkpoint per epoch on main device + if self.main_device and (i + 1) % self.steps_per_epoch == 0: + # Save Checkpoint + ms.save_checkpoint( + self.optimizer, os.path.join(ckpt_save_dir, f"optim_{self.model_name}.ckpt"), async_save=True + ) + save_path = os.path.join(ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt") + manager.save_ckpoint(self.network, num_ckpt=keep_checkpoint_max, save_path=save_path) + if self.ema: + save_path_ema = os.path.join( + ckpt_save_dir, f"EMA_{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt" + ) + manager_ema.save_ckpoint(self.ema.ema, num_ckpt=keep_checkpoint_max, save_path=save_path_ema) + logger.info(f"Saving model to {save_path}") + + if enable_modelarts: + sync_data(save_path, train_url + "/weights/" + save_path.split("/")[-1]) + if self.ema: + sync_data(save_path_ema, train_url + "/weights/" + save_path_ema.split("/")[-1]) + + logger.info(f"Epoch {cur_epoch}/{epochs}, epoch time: {(time.time() - s_epoch_time) / 60:.2f} min.") + s_step_time = time.time() + s_epoch_time = time.time() + if self.profiler and self.profiler_step_num == cur_step: + break + if cur_step == self.steps_per_epoch: + self._on_train_epoch_end(run_context) + + self._on_train_end(run_context) + logger.info("End Train.") + + def train_with_datasink( + self, + task: str, + epochs: int, + main_device: bool, + warmup_epoch: int = 0, + warmup_momentum: Union[list, None] = None, + keep_checkpoint_max: int = 10, + log_interval: int = 1, + loss_item_name: list = [], + save_dir: str = "", + enable_modelarts: bool = False, + train_url: str = "", + run_eval: bool = False, + test_fn: types.FunctionType = None, + overflow_still_update: bool = False, + ms_jit: bool = True, + rank_size: int = 8, + profiler_step_num: int = 1 + ): + # Modify dataset columns name for data sink mode, because dataloader could not send string data to device. + if task == "detect": + loader = self.dataloader.project(["images", "labels"]) + elif task == "segment": + loader = self.dataloader.project(["images", "labels", "masks"]) + else: + raise NotImplementedError + + # to be compatible with old interface + has_eval_mask = list(isinstance(c, EvalWhileTrain) for c in self.callback) + if run_eval and not any(has_eval_mask): + self.callback.append(EvalWhileTrain()) + if not run_eval and any(has_eval_mask): + ind = has_eval_mask.index(True) + self.callback.pop(ind) + + # Change warmup_momentum, list of step -> list of epoch + warmup_momentum = ( + [warmup_momentum[_i * self.steps_per_epoch] for _i in range(warmup_epoch)] + + [warmup_momentum[-1], ] * (epochs - warmup_epoch) if warmup_momentum else None + ) + + # Build train epoch func with sink process + train_epoch_fn = ms.train.data_sink( + fn=self.train_step_fn, + dataset=loader, + sink_size=self.steps_per_epoch, + jit_config=ms.JitConfig() + ) + + # Attr + self.epochs = epochs + self.main_device = main_device + self.log_interval = log_interval + self.loss_item_name = loss_item_name + self.profiler_step_num = profiler_step_num + + # Directories + ckpt_save_dir = os.path.join(save_dir, "weights") + + if main_device: + os.makedirs(ckpt_save_dir, exist_ok=True) # save checkpoint path + + # Set Checkpoint Manager + manager = CheckpointManager(ckpt_save_policy="latest_k") + manager_ema = CheckpointManager(ckpt_save_policy="latest_k") if self.ema else None + + run_context = RunContext( + epoch_num=epochs, + steps_per_epoch=self.steps_per_epoch, + total_steps=self.dataloader.dataset_size, + trainer=self, + test_fn=test_fn, + enable_modelarts=enable_modelarts, + ckpt_save_dir=ckpt_save_dir, + save_dir=save_dir, + train_url=train_url, + overflow_still_update=overflow_still_update, + ms_jit=ms_jit, + rank_size=rank_size, + ) + + s_epoch_time = time.time() + self._on_train_begin(run_context) + for epoch in range(epochs): + cur_epoch = epoch + 1 + self.global_step += self.steps_per_epoch + run_context.cur_epoch_index = cur_epoch + if epoch == 0: + logger.warning("In the data sink mode, log output will only occur once each epoch is completed.") + logger.warning( + "The first epoch will be compiled for the graph, which may take a long time; " + "You can come back later :)." + ) + + if warmup_momentum and isinstance(self.optimizer, (nn.SGD, nn.Momentum)): + dtype = self.optimizer.momentum.dtype + self.optimizer.momentum = Tensor(warmup_momentum[epoch], dtype) + + # train one epoch with datasink + self._on_train_epoch_begin(run_context) + _, loss_item, _, _ = train_epoch_fn() + + # print loss and lr + log_string = f"Epoch {cur_epoch}/{epochs}, Step {self.steps_per_epoch}/{self.steps_per_epoch}" + if len(self.loss_item_name) < len(loss_item): + self.loss_item_name += [f"loss_item{i}" for i in range(len(loss_item) - len(self.loss_item_name))] + for i in range(len(loss_item)): + log_string += f", {self.loss_item_name[i]}: {loss_item[i].asnumpy():.4f}" + if self.optimizer.dynamic_lr: + if self.optimizer.is_group_lr: + lr_cell = self.optimizer.learning_rate[0] + cur_lr = lr_cell(Tensor(self.global_step, ms.int32)).asnumpy().item() + else: + cur_lr = self.optimizer.learning_rate(Tensor(self.global_step, ms.int32)).asnumpy().item() + else: + cur_lr = self.optimizer.learning_rate.asnumpy().item() + log_string += f", cur_lr: {cur_lr}" + logger.info(log_string) + run_context.loss, run_context.lr = loss_item, cur_lr + self._on_train_epoch_end(run_context) + + # save checkpoint per epoch on main device + if self.main_device: + # Save Checkpoint + ms.save_checkpoint( + self.optimizer, os.path.join(ckpt_save_dir, f"optim_{self.model_name}.ckpt"), async_save=True + ) + save_path = os.path.join(ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt") + manager.save_ckpoint(self.network, num_ckpt=keep_checkpoint_max, save_path=save_path) + if self.ema: + save_path_ema = os.path.join( + ckpt_save_dir, f"EMA_{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt" + ) + manager_ema.save_ckpoint(self.ema.ema, num_ckpt=keep_checkpoint_max, save_path=save_path_ema) + logger.info(f"Saving model to {save_path}") + + if enable_modelarts: + sync_data(save_path, train_url + "/weights/" + save_path.split("/")[-1]) + if self.ema: + sync_data(save_path_ema, train_url + "/weights/" + save_path_ema.split("/")[-1]) + + logger.info(f"Epoch {cur_epoch}/{epochs}, epoch time: {(time.time() - s_epoch_time) / 60:.2f} min.") + s_epoch_time = time.time() + + if self.profiler and math.ceil(self.profiler_step_num/self.steps_per_epoch) == cur_epoch: + break + self._on_train_end(run_context) + logger.info("End Train.") + + def train_step(self, imgs, labels, segments=None, cur_step=0, cur_epoch=0): + if self.accumulate == 1: + if segments is None: + loss, loss_item, _, grads_finite = self.train_step_fn(imgs, labels, True) + else: + loss, loss_item, _, grads_finite = self.train_step_fn(imgs, labels, segments, True) + self.scaler.adjust(grads_finite) + if not grads_finite and (cur_step % self.log_interval == 0): + if self.overflow_still_update: + logger.warning(f"overflow, still update, loss scale adjust to {self.scaler.scale_value.asnumpy()}") + else: + logger.warning(f"overflow, drop step, loss scale adjust to {self.scaler.scale_value.asnumpy()}") + else: + if segments is None: + loss, loss_item, grads, grads_finite = self.train_step_fn(imgs, labels, False) + else: + loss, loss_item, grads, grads_finite = self.train_step_fn(imgs, labels, segments, False) + self.scaler.adjust(grads_finite) + if grads_finite or self.overflow_still_update: + self.accumulate_cur_step += 1 + if self.accumulate_grads: + self.accumulate_grads = self.accumulate_grads_fn( + self.accumulate_grads, grads + ) # update self.accumulate_grads + else: + self.accumulate_grads = grads + + if self.accumulate_cur_step % self.accumulate == 0: + self.optimizer(self.accumulate_grads) + if self.ema: + self.ema.update() + logger.info( + f"Epoch {cur_epoch}/{self.epochs}, Step {cur_step}/{self.steps_per_epoch}, " + f"accumulate: {self.accumulate}, optimizer an accumulate step success." + ) + from mindspore.amp import all_finite + + if not all_finite(self.accumulate_grads): + logger.warning(f"overflow, still update.") + # reset accumulate + self.accumulate_grads, self.accumulate_cur_step = None, 0 + else: + logger.warning( + f"Epoch {cur_epoch}/{self.epochs}, Step {cur_step}/{self.steps_per_epoch}, " + f"accumulate: {self.accumulate}, this step grad overflow, drop. " + f"Loss scale adjust to {self.scaler.scale_value.asnumpy()}" + ) + + # train log + cur_lr = 0 + if cur_step % self.log_interval == 0: + log_string = ( + f"Epoch {cur_epoch}/{self.epochs}, Step {cur_step}/{self.steps_per_epoch}, imgsize {imgs.shape[2:]}" + ) + # print loss + if len(self.loss_item_name) < len(loss_item): + self.loss_item_name += [f"loss_item{i}" for i in range(len(loss_item) - len(self.loss_item_name))] + for i in range(len(loss_item)): + log_string += f", {self.loss_item_name[i]}: {loss_item[i].asnumpy():.4f}" + + # print lr + if self.optimizer.dynamic_lr: + if self.optimizer.is_group_lr: + lr_cell = self.optimizer.learning_rate[0] + cur_lr = lr_cell(Tensor(self.global_step, ms.int32)).asnumpy().item() + else: + cur_lr = self.optimizer.learning_rate(Tensor(self.global_step, ms.int32)).asnumpy().item() + else: + cur_lr = self.optimizer.learning_rate.asnumpy().item() + log_string += f", cur_lr: {cur_lr}" + logger.info(log_string) + return loss_item, cur_lr + + def _get_accumulate_grads_fn(self): + hyper_map = ops.HyperMap() + + def accu_fn(g1, g2): + g1 = g1 + g2 + return g1 + + def accumulate_grads_fn(accumulate_grads, grads): + success = hyper_map(accu_fn, accumulate_grads, grads) + return success + + return accumulate_grads_fn + + def _get_transform_stage(self, cur_epoch, stage_epochs=[]): + _cur_stage = 0 + for _i in range(len(stage_epochs)): + if cur_epoch <= stage_epochs[_i]: + _cur_stage = _i + else: + break + return _cur_stage + + def _on_train_begin(self, run_context: RunContext): + """hooks to run on the beginning of training process""" + + # check callback type validation + callback = self.callback + if callback is None: + callback = [] + assert isinstance(callback, (tuple, list)), ( + f"expect callback to be list of tuple, " f"but got {type(callback)} instead" + ) + for cb in callback: + assert isinstance(cb, BaseCallback), ( + f"expect callback element to be subclass of BaseCallback, " f"but got {type(cb)} instead" + ) + # log callback base info + logger.info(f"got {len(callback)} active callback as follows:") + for cb in self.callback: + logger.info(cb) + + # check range of log interval + if self.log_interval > self.steps_per_epoch: + logger.warning( + f"log interval should be less than total steps of one epoch, " + f"but got {self.log_interval} > {self.steps_per_epoch}, set log_interval as steps_per_epoch " + f"{self.steps_per_epoch}" + ) + self.log_interval = self.steps_per_epoch + + # throw warning of long time cost + logger.warning( + "The first epoch will be compiled for the graph, which may take a long time; " "You can come back later :)." + ) + + # execute customized callback + for cb in self.callback: + cb.on_train_begin(run_context) + + def _on_train_end(self, run_context: RunContext): + """hooks to run on the end of training process""" + for cb in self.callback: + cb.on_train_end(run_context) + + def _on_train_epoch_begin(self, run_context: RunContext): + """hooks to run on the beginning of a training epoch""" + for cb in self.callback: + cb.on_train_epoch_begin(run_context) + + def _on_train_epoch_end(self, run_context: RunContext): + """hooks to run on the end of a training epoch""" + for cb in self.callback: + cb.on_train_epoch_end(run_context) + + def _on_train_step_begin(self, run_context: RunContext): + """hooks to run on the beginning of a training step""" + for cb in self.callback: + cb.on_train_step_begin(run_context) + + def _on_train_step_end(self, run_context: RunContext): + """hooks to run on the end of a training step""" + for cb in self.callback: + cb.on_train_step_end(run_context) diff --git a/community/cv/ShipWise/mindyolo/utils/utils.py b/community/cv/ShipWise/mindyolo/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c364c94d10c17facf0cf5ed95b645297746b27b6 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/utils/utils.py @@ -0,0 +1,223 @@ +import os +import random +import yaml +import cv2 +from datetime import datetime +import numpy as np + +import mindspore as ms +from mindspore import ops, Tensor, nn +from mindspore.communication.management import get_group_size, get_rank, init +from mindspore import ParallelMode + +from mindyolo.utils import logger + + +def set_seed(seed=2): + np.random.seed(seed) + random.seed(seed) + ms.set_seed(seed) + + +def set_default(args): + # Set Context + ms.set_context(mode=args.ms_mode, device_target=args.device_target, max_call_depth=2000) + # if args.ms_mode == 0: + # ms.set_context(jit_config={"jit_level": "O2"}) + if args.device_target == "Ascend": + device_id = int(os.getenv("DEVICE_ID", 0)) + ms.set_context(device_id=device_id) + elif args.device_target == "GPU" and args.ms_enable_graph_kernel: + ms.set_context(enable_graph_kernel=True) + # Set Parallel + if args.is_parallel: + init() + args.rank, args.rank_size, parallel_mode = get_rank(), get_group_size(), ParallelMode.DATA_PARALLEL + ms.set_auto_parallel_context(device_num=args.rank_size, parallel_mode=parallel_mode, gradients_mean=True) + else: + args.rank, args.rank_size = 0, 1 + # Set Default + args.total_batch_size = args.per_batch_size * args.rank_size + args.sync_bn = args.sync_bn and ms.get_context("device_target") == "Ascend" and args.rank_size > 1 + args.accumulate = max(1, np.round(args.nbs / args.total_batch_size)) if args.auto_accumulate else args.accumulate + # optimizer + args.optimizer.warmup_epochs = args.optimizer.get("warmup_epochs", 0) + args.optimizer.min_warmup_step = args.optimizer.get("min_warmup_step", 0) + args.optimizer.epochs = args.epochs + args.optimizer.nbs = args.nbs + args.optimizer.accumulate = args.accumulate + args.optimizer.total_batch_size = args.total_batch_size + # data + cv2.setNumThreads(args.opencv_threads_num) # Set the number of threads for opencv. + args.data.nc = 1 if args.single_cls else int(args.data.nc) # number of classes + args.data.names = ["item"] if args.single_cls and len(args.names) != 1 else args.data.names # class names + assert len(args.data.names) == args.data.nc, "%g names found for nc=%g dataset in %s" % ( + len(args.data.names), + args.data.nc, + args.config, + ) + # Directories and Save run settings + time = get_broadcast_datetime(rank_size=args.rank_size) + args.save_dir = os.path.join( + args.save_dir, f'{time[0]:04d}.{time[1]:02d}.{time[2]:02d}-{time[3]:02d}.{time[4]:02d}.{time[5]:02d}') + os.makedirs(args.save_dir, exist_ok=True) + if args.rank % args.rank_size == 0: + with open(os.path.join(args.save_dir, "cfg.yaml"), "w") as f: + yaml.dump(vars(args), f, sort_keys=False) + + # callback + args.callback = args.get('callback', []) + + # Set Logger + logger.setup_logging( + logger_name="MindYOLO", log_level=args.log_level, rank_id=args.rank, device_per_servers=args.rank_size + ) + logger.setup_logging_file(log_dir=os.path.join(args.save_dir, "logs")) + + # Modelarts: Copy data, from the s3 bucket to the computing node; Reset dataset dir. + if args.enable_modelarts: + from mindyolo.utils.modelarts import sync_data + + os.makedirs(args.data_dir, exist_ok=True) + sync_data(args.data_url, args.data_dir) + sync_data(args.save_dir, args.train_url) + if args.ckpt_url: + sync_data(args.ckpt_url, args.ckpt_dir) # pretrain ckpt + # args.data.dataset_dir = os.path.join(args.data_dir, args.data.dataset_dir) + args.data.train_set = os.path.join(args.data_dir, args.data.train_set) + args.data.val_set = os.path.join(args.data_dir, args.data.val_set) + args.data.test_set = os.path.join(args.data_dir, args.data.test_set) + args.weight = args.ckpt_dir if args.ckpt_dir else "" + args.ema_weight = os.path.join(args.ckpt_dir, args.ema_weight) if args.ema_weight else "" + + +def drop_inconsistent_shape_parameters(model, param_dict): + updated_param_dict = dict() + + # TODO: hard code + param_dict = {k.replace('ema.', ''): v for k, v in param_dict.items()} + + for param in model.get_parameters(): + name = param.name + if name in param_dict: + if param_dict[name].shape == param.shape: + updated_param_dict[name] = param_dict[name] + else: + logger.warning( + f"Dropping checkpoint parameter `{name}` with shape `{param_dict[name].shape}`, " + f"which is inconsistent with cell shape `{param.shape}`" + ) + else: + logger.warning(f"Cannot find checkpoint parameter `{name}`.") + return updated_param_dict + + +def load_pretrain(network, weight, ema=None, ema_weight=None, strict=True): + if weight.endswith(".ckpt"): + param_dict = ms.load_checkpoint(weight) + if not strict: + param_dict = drop_inconsistent_shape_parameters(network, param_dict) + ms.load_param_into_net(network, param_dict) + logger.info(f'Pretrain model load from "{weight}" success.') + if ema: + if ema_weight.endswith(".ckpt"): + param_dict_ema = ms.load_checkpoint(ema_weight) + if not strict: + param_dict_ema = drop_inconsistent_shape_parameters(ema.ema, param_dict_ema) + ms.load_param_into_net(ema.ema, param_dict_ema) + logger.info(f'Ema pretrain model load from "{ema_weight}" success.') + else: + ema.clone_from_model() + logger.info("ema_weight not exist, default pretrain weight is currently used.") + + +def freeze_layers(network, freeze=[]): + if len(freeze) > 0: + freeze = [f"model.{x}." for x in freeze] # parameter names to freeze (full or partial) + for n, p in network.parameters_and_names(): + if any(x in n for x in freeze): + logger.info("freezing %s" % n) + p.requires_grad = False + + +def draw_result(img_path, result_dict, data_names, is_coco_dataset=True, save_path="./detect_results"): + import random + import cv2 + from mindyolo.data import COCO80_TO_COCO91_CLASS + + os.makedirs(save_path, exist_ok=True) + save_result_path = os.path.join(save_path, img_path.split("/")[-1]) + im = cv2.imread(img_path) + category_id, bbox, score = result_dict["category_id"], result_dict["bbox"], result_dict["score"] + seg = result_dict.get("segmentation", None) + mask = None if seg is None else np.zeros_like(im, dtype=np.float32) + for i in range(len(bbox)): + # draw box + x_l, y_t, w, h = bbox[i][:] + x_r, y_b = x_l + w, y_t + h + x_l, y_t, x_r, y_b = int(x_l), int(y_t), int(x_r), int(y_b) + _color = [random.randint(0, 255) for _ in range(3)] + cv2.rectangle(im, (x_l, y_t), (x_r, y_b), tuple(_color), 2) + if seg: + _color_seg = np.array([random.randint(0, 255) for _ in range(3)], np.float32) + mask += seg[i][:, :, None] * _color_seg[None, None, :] + + # draw label + if is_coco_dataset: + class_name_index = COCO80_TO_COCO91_CLASS.index(category_id[i]) + else: + class_name_index = category_id[i] + class_name = data_names[class_name_index] # args.data.names[class_name_index] + text = f"{class_name}: {score[i]}" + (text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) + cv2.rectangle(im, (x_l, y_t - text_h - baseline), (x_l + text_w, y_t), tuple(_color), -1) + cv2.putText(im, text, (x_l, y_t - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) + + # save results + if seg: + im = (0.7 * im + 0.3 * mask).astype(np.uint8) + cv2.imwrite(save_result_path, im) + + +def get_broadcast_datetime(rank_size=1, root_rank=0): + time = datetime.now() + time_list = [time.year, time.month, time.day, time.hour, time.minute, time.second, time.microsecond] + if rank_size <=1: + return time_list + + # only broadcast in distribution mode + x = broadcast((Tensor(time_list, dtype=ms.int32),), root_rank) + x = x[0].asnumpy().tolist() + return x + +@ms.jit +def broadcast(x, root_rank): + return ops.Broadcast(root_rank=root_rank)(x) + +class AllReduce(nn.Cell): + """ + a wrapper class to make ops.AllReduce become a Cell. This is a workaround for sync_wait + """ + def __init__(self): + super(AllReduce, self).__init__() + self.all_reduce = ops.AllReduce(op=ops.ReduceOp.SUM) + + def construct(self, x): + return self.all_reduce(x) + + +class Synchronizer: + def __init__(self, rank_size=1): + # this init method should be run only once + self.all_reduce = AllReduce() + self.rank_size = rank_size + + def __call__(self): + if self.rank_size <= 1: + return + sync = Tensor(np.array([1]).astype(np.int32)) + sync = self.all_reduce(sync) + sync = sync.asnumpy()[0] + if sync != self.rank_size: + raise ValueError(f'Sync value {sync} is not equal to rank size {self.rank_size}.' + f' There might be wrong with devices') diff --git a/community/cv/ShipWise/mindyolo/version.py b/community/cv/ShipWise/mindyolo/version.py new file mode 100644 index 0000000000000000000000000000000000000000..aea8b22a3c73772753b9708e70cae90d45345f02 --- /dev/null +++ b/community/cv/ShipWise/mindyolo/version.py @@ -0,0 +1,2 @@ +"""version init""" +__version__ = "0.4.0-dev" diff --git a/community/cv/ShipWise/requirements.txt b/community/cv/ShipWise/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f5202961583d74cdb2747836179c9109e0d6e13c --- /dev/null +++ b/community/cv/ShipWise/requirements.txt @@ -0,0 +1,16 @@ +# MindYOLO requirements +# Usage: pip install -r requirements.txt + +# Setup +pybind11>=2.10.4 + +# Base +numpy>=1.17.0 +PyYAML>=5.3 +tqdm +opencv-python>=4.7.0.68 +opencv-python-headless>=4.7.0.68 + +# Extras +albumentations>=1.0.3 +pycocotools>=2.0.2 diff --git a/community/cv/ShipWise/workspace/__init__.py b/community/cv/ShipWise/workspace/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb9cad7f36ac9db05a61ea2fba14c87a9b8f0dd0 --- /dev/null +++ b/community/cv/ShipWise/workspace/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# @Time : 2024-09-22 11:41 +# @Author : Jiang Liu + + +def main(): + pass + + +if __name__ == '__main__': + main() diff --git a/community/cv/ShipWise/workspace/configs/dataset/HRSC2016.yaml b/community/cv/ShipWise/workspace/configs/dataset/HRSC2016.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0be2c002421a512255e2af97c3067f13063cd36f --- /dev/null +++ b/community/cv/ShipWise/workspace/configs/dataset/HRSC2016.yaml @@ -0,0 +1,14 @@ +data: + dataset_name: HRSC2016 + + train_set: ./workspace/datasets/HRSC2016/train.txt + val_set: ./workspace/datasets/HRSC2016/val.txt + test_set: ./workspace/datasets/HRSC2016/test.txt + + nc: 28 + + # class names + names: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27' ] + + train_transforms: [ ] + test_transforms: [ ] \ No newline at end of file diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.high.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.high.yaml new file mode 100644 index 0000000000000000000000000000000000000000..706cb08df1e106489b035d138c0d2ae32060e4b6 --- /dev/null +++ b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.high.yaml @@ -0,0 +1,67 @@ +optimizer: + optimizer: momentum + lr_init: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) + momentum: 0.937 # SGD momentum/Adam beta1 + nesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm + loss_scale: 1.0 # loss scale for optimizer + warmup_epochs: 3 # warmup epochs (fractions ok) + warmup_momentum: 0.8 # warmup initial momentum + warmup_bias_lr: 0.1 # warmup initial bias lr + min_warmup_step: 1000 # minimum warmup step + group_param: yolov8 # group param strategy + gp_weight_decay: 0.0005 # group param weight decay 5e-4 + start_factor: 1.0 + end_factor: 0.01 + +loss: + name: YOLOv8Loss + box: 7.5 # box loss gain + cls: 0.5 # cls loss gain + dfl: 1.5 # dfl loss gain + reg_max: 16 + +data: + num_parallel_workers: 1 + + # multi-stage data augment + train_transforms: { + stage_epochs: [ 490, 10 ], + trans_list: [ + [ + { func_name: mosaic, prob: 1.0 }, + { func_name: copy_paste, prob: 0.3 }, + {func_name: resample_segments}, + { func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.9, shear: 0.0 }, + { func_name: mixup, alpha: 32.0, beta: 32.0, prob: 0.15, pre_transform: [ + { func_name: mosaic, prob: 1.0 }, + { func_name: copy_paste, prob: 0.3 }, + { func_name: resample_segments }, + { func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.9, shear: 0.0 }, ] + }, + {func_name: albumentations}, + {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}, + {func_name: fliplr, prob: 0.5}, + {func_name: label_norm, xyxy2xywh_: True}, + {func_name: label_pad, padding_size: 160, padding_value: -1}, + {func_name: image_norm, scale: 255.}, + {func_name: image_transpose, bgr2rgb: True, hwc2chw: True} + ], + [ + {func_name: letterbox, scaleup: True}, + {func_name: resample_segments}, + {func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.9, shear: 0.0}, + {func_name: albumentations}, + {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}, + {func_name: fliplr, prob: 0.5}, + {func_name: label_norm, xyxy2xywh_: True}, + {func_name: label_pad, padding_size: 160, padding_value: -1}, + {func_name: image_norm, scale: 255.}, + {func_name: image_transpose, bgr2rgb: True, hwc2chw: True} + ]] + } + + test_transforms: [ + {func_name: letterbox, scaleup: False, only_image: True}, + {func_name: image_norm, scale: 255.}, + {func_name: image_transpose, bgr2rgb: True, hwc2chw: True} + ] diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.low.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.low.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3a82698feaf4d9c2f747500a73f39a27d957b46 --- /dev/null +++ b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.low.yaml @@ -0,0 +1,62 @@ + + +optimizer: + optimizer: momentum + lr_init: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) + momentum: 0.937 # SGD momentum/Adam beta1 + nesterov: True # update gradients with NAG(Nesterov Accelerated Gradient) algorithm + loss_scale: 1.0 # loss scale for optimizer + warmup_epochs: 3 # warmup epochs (fractions ok) + warmup_momentum: 0.8 # warmup initial momentum + warmup_bias_lr: 0.1 # warmup initial bias lr + min_warmup_step: 1000 # minimum warmup step + group_param: yolov8 # group param strategy + gp_weight_decay: 0.0005 # group param weight decay 5e-4 + start_factor: 1.0 + end_factor: 0.01 + +loss: + name: YOLOv8Loss + box: 7.5 # box loss gain + cls: 0.5 # cls loss gain + dfl: 1.5 # dfl loss gain + reg_max: 16 + +data: + num_parallel_workers: 1 + + # multi-stage data augment + train_transforms: { + stage_epochs: [ 490, 10 ], + trans_list: [ + [ + { func_name: mosaic, prob: 1.0 }, + { func_name: resample_segments }, + { func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.5, shear: 0.0 }, + {func_name: albumentations}, + {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}, + {func_name: fliplr, prob: 0.5}, + {func_name: label_norm, xyxy2xywh_: True}, + {func_name: label_pad, padding_size: 160, padding_value: -1}, + {func_name: image_norm, scale: 255.}, + {func_name: image_transpose, bgr2rgb: True, hwc2chw: True} + ], + [ + {func_name: letterbox, scaleup: True}, + {func_name: resample_segments}, + {func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.5, shear: 0.0}, + {func_name: albumentations}, + {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4}, + {func_name: fliplr, prob: 0.5}, + {func_name: label_norm, xyxy2xywh_: True}, + {func_name: label_pad, padding_size: 160, padding_value: -1}, + {func_name: image_norm, scale: 255.}, + {func_name: image_transpose, bgr2rgb: True, hwc2chw: True} + ]] + } + + test_transforms: [ + {func_name: letterbox, scaleup: False, only_image: True}, + {func_name: image_norm, scale: 255.}, + {func_name: image_transpose, bgr2rgb: True, hwc2chw: True} + ] diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-base.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-base.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5738013cec0f89ffe555f6569a17a8877bd72deb --- /dev/null +++ b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-base.yaml @@ -0,0 +1,45 @@ +epochs: 500 +per_batch_size: 2 +img_size: 640 +iou_thres: 0.7 +conf_free: True +sync_bn: True +opencv_threads_num: 0 + +network: + model_name: shipwise + nc: 28 + reg_max: 16 + + stride: [ 8, 16, 32 ] + + backbone: + - [ -1, 1, ConvNormAct, [ 64, 3, 2 ] ] + - [ -1, 1, ConvNormAct, [ 128, 3, 2 ] ] + - [ -1, 3, C2f, [ 128, True ] ] + - [ -1, 1, ConvNormAct, [ 256, 3, 2 ] ] + - [ -1, 6, C2f, [ 256, True ] ] + - [ -1, 1, ConvNormAct, [ 512, 3, 2 ] ] + - [ -1, 6, C2f, [ 512, True ] ] + - [ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ] + - [ -1, 3, C2f, [ 1024, True ] ] + - [ -1, 1, SPPF, [ 1024, 5 ] ] + + head: + - [ -1, 1, Upsample, [ None, 2, 'nearest' ] ] + - [ [ -1, 6 ], 1, Concat, [ 1 ] ] + - [ -1, 3, C2f, [ 512 ] ] + + - [ -1, 1, Upsample, [ None, 2, 'nearest' ] ] + - [ [ -1, 4 ], 1, Concat, [ 1 ] ] + - [ -1, 3, C2f, [ 256 ] ] + + - [ -1, 1, ConvNormAct, [ 256, 3, 2 ] ] + - [ [ -1, 12 ], 1, Concat, [ 1 ] ] + - [ -1, 3, C2f, [ 512 ] ] + + - [ -1, 1, ConvNormAct, [ 512, 3, 2 ] ] + - [ [ -1, 9 ], 1, Concat, [ 1 ] ] + - [ -1, 3, C2f, [ 1024 ] ] + + - [ [ 15, 18, 21 ], 1, YOLOv8Head, [ nc, reg_max, stride ] ] diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-l.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-l.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3481f481d42fd7047f481384f4bc8db9bc38bc6e --- /dev/null +++ b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-l.yaml @@ -0,0 +1,11 @@ +__BASE__: [ + '../dataset/HRSC2016.yaml', + './hyp.scratch.high.yaml', + './ship-wise-base.yaml' +] + +overflow_still_update: False +network: + depth_multiple: 1.00 # scales module repeats + width_multiple: 1.00 # scales convolution channels + max_channels: 512 diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-s.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..79a63d1446514f1479ba869c54890168a70f344b --- /dev/null +++ b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-s.yaml @@ -0,0 +1,11 @@ +__BASE__: [ + '../dataset/HRSC2016.yaml', + './hyp.scratch.low.yaml', + './ship-wise-base.yaml' +] + +overflow_still_update: False +network: + depth_multiple: 0.33 # scales module repeats + width_multiple: 0.50 # scales convolution channels + max_channels: 1024 diff --git a/community/cv/ShipWise/workspace/flask/__init__.py b/community/cv/ShipWise/workspace/flask/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..39ecbcf98674cd90193d5fa44139d4ed406cfbad --- /dev/null +++ b/community/cv/ShipWise/workspace/flask/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# @Time : 2024-09-16 19:25 +# @Author : Jiang Liu + + +def main(): + pass + + +if __name__ == '__main__': + main() diff --git a/community/cv/ShipWise/workspace/flask/index.py b/community/cv/ShipWise/workspace/flask/index.py new file mode 100644 index 0000000000000000000000000000000000000000..308a9bb089dd97d7d5188bf87bbd026c391ce007 --- /dev/null +++ b/community/cv/ShipWise/workspace/flask/index.py @@ -0,0 +1,38 @@ +from io import BytesIO + +import cv2 +import numpy as np +from flask import Flask, request, jsonify + +from workspace.flask.model.ship_wise import init, infer + +app = Flask(__name__) + +# 在应用开始时加载模型 +user_config = { + "config": r"H:\Workspace\DeepLearning\mindyolo-summer-ospp/workspace/configs/ship-wise/ship-wise-s.yaml", + "weight": r"H:\Workspace\DeepLearning\mindyolo-summer-ospp/runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt", + "save_result": False, + "device_target": "CPU", +} +args, network = init(user_config) + + +@app.route('/detect', methods=['POST']) +def detect(): + """ + 检测图片中的物体 + 输入:图片文件 + 输出:{ "bbox": [[698.248,524.238,217.65,196.28]], "category_id": [18], "score": [0.82683] } + """ + file = request.files['image'] + in_memory_file = BytesIO() + file.save(in_memory_file) + data = np.fromstring(in_memory_file.getvalue(), dtype=np.uint8) + image = cv2.imdecode(data, cv2.IMREAD_COLOR) + result = infer(args, network, image) + return jsonify(result) + + +if __name__ == '__main__': + app.run(debug=True, port=8080) diff --git a/community/cv/ShipWise/workspace/flask/model/__init__.py b/community/cv/ShipWise/workspace/flask/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3badb5891f4a91820e90535413c4ff8f85686fb1 --- /dev/null +++ b/community/cv/ShipWise/workspace/flask/model/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# @Time : 2024-09-16 19:57 +# @Author : Jiang Liu + + +def main(): + pass + + +if __name__ == '__main__': + main() diff --git a/community/cv/ShipWise/workspace/flask/model/ship_wise.py b/community/cv/ShipWise/workspace/flask/model/ship_wise.py new file mode 100644 index 0000000000000000000000000000000000000000..6c4c1f686c352eefe8b5ddd9ba0e58af6145796f --- /dev/null +++ b/community/cv/ShipWise/workspace/flask/model/ship_wise.py @@ -0,0 +1,78 @@ +from demo.predict import get_parser_infer, set_default_infer, detect +from mindyolo.utils.config import load_config, Config +import os +import mindspore as ms +from mindyolo.models import create_model +from mindyolo.utils.utils import draw_result, set_seed + + +class NetworkSingleton: + _instance = None + _args = None + + def __new__(cls, args): + if cls._instance is None: + cls._instance = super(NetworkSingleton, cls).__new__(cls) + cls._instance.init_network(args) + cls._args = args + return cls._instance + + def init_network(self, args): + set_seed(args.seed) + set_default_infer(args) + self.network = create_model( + model_name=args.network.model_name, + model_cfg=args.network, + num_classes=args.data.nc, + sync_bn=False, + checkpoint_path=args.weight, + ) + self.network.set_train(False) + ms.amp.auto_mixed_precision(self.network, amp_level=args.ms_amp_level) + + def get_network(self): + return self.network + + def get_args(self): + return self._args + + +def infer(args, network, img): + is_coco_dataset = "coco" in args.data.dataset_name + # 默认任务为 Detection + result_dict = detect( + network=network, + img=img, + conf_thres=args.conf_thres, + iou_thres=args.iou_thres, + conf_free=args.conf_free, + nms_time_limit=args.nms_time_limit, + img_size=args.img_size, + stride=max(max(args.network.stride), 32), + num_class=args.data.nc, + is_coco_dataset=is_coco_dataset, + ) + if args.save_result: + save_path = os.path.join(args.save_dir, "detect_results") + draw_result(args.image_path, result_dict, args.data.names, is_coco_dataset=is_coco_dataset, + save_path=save_path) + return result_dict + + +def init(user_config=None): + parser = get_parser_infer() + test_img_path = r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test\100000630.bmp" + if user_config is None: + user_config = { + "config": "./workspace/configs/ship-wise/ship-wise-s.yaml", + "weight": "./runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt", + "device_target": "CPU", + } + cfg, _, _ = load_config(user_config["config"]) + cfg = Config(cfg) + parser.set_defaults(**cfg) + parser.set_defaults(**user_config) + args = parser.parse_args() + args = Config(vars(args)) + network = NetworkSingleton(args).get_network() + return args, network diff --git a/community/cv/ShipWise/workspace/predict.py b/community/cv/ShipWise/workspace/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..499a6d7cfff82c4f2ebaeca20d151fabe101fab4 --- /dev/null +++ b/community/cv/ShipWise/workspace/predict.py @@ -0,0 +1,346 @@ +import argparse +import ast +import math +import os +import sys +import time +import cv2 +import numpy as np +import yaml +from datetime import datetime + +import mindspore as ms +from mindspore import Tensor, nn + +from mindyolo.data import COCO80_TO_COCO91_CLASS +from mindyolo.models import create_model +from mindyolo.utils import logger +from mindyolo.utils.config import parse_args +from mindyolo.utils.metrics import non_max_suppression, scale_coords, xyxy2xywh, process_mask_upsample, scale_image +from mindyolo.utils.utils import draw_result, set_seed + + +def get_parser_infer(parents=None): + parser = argparse.ArgumentParser(description="Infer", parents=[parents] if parents else []) + parser.add_argument("--task", type=str, default="detect", choices=["detect", "segment"]) + parser.add_argument("--device_target", type=str, default="Ascend", help="device target, Ascend/GPU/CPU") + parser.add_argument("--ms_mode", type=int, default=0, help="train mode, graph/pynative") + parser.add_argument("--ms_amp_level", type=str, default="O0", help="amp level, O0/O1/O2") + parser.add_argument( + "--ms_enable_graph_kernel", type=ast.literal_eval, default=False, help="use enable_graph_kernel or not" + ) + parser.add_argument("--weight", type=str, default="yolov7_300.ckpt", help="model.ckpt path(s)") + parser.add_argument("--img_size", type=int, default=640, help="inference size (pixels)") + parser.add_argument( + "--single_cls", type=ast.literal_eval, default=False, help="train multi-class data as single-class" + ) + parser.add_argument("--nms_time_limit", type=float, default=60.0, help="time limit for NMS") + parser.add_argument("--conf_thres", type=float, default=0.25, help="object confidence threshold") + parser.add_argument("--iou_thres", type=float, default=0.65, help="IOU threshold for NMS") + parser.add_argument( + "--conf_free", type=ast.literal_eval, default=False, help="Whether the prediction result include conf" + ) + parser.add_argument("--seed", type=int, default=2, help="set global seed") + parser.add_argument("--log_level", type=str, default="INFO", help="save dir") + parser.add_argument("--save_dir", type=str, default="./runs_infer", help="save dir") + + parser.add_argument("--image_path", type=str, help="path to image") + parser.add_argument("--save_result", type=ast.literal_eval, default=True, help="whether save the inference result") + + return parser + + +def is_yolov7(args): + if "yolov7" not in args.config: + pass + else: + ms.set_context(ascend_config={"precision_mode": "allow_fp32_to_fp16"}) + + +def set_default_infer(args): + # Set Context + ms.set_context(mode=args.ms_mode, device_target=args.device_target, max_call_depth=2000) + # MaxPool2d does not support dtype=fp32, ops's bug. Needed to be updated when ops's demand is done. + is_yolov7(args) + # if args.ms_mode == 0: + # ms.set_context(jit_config={"jit_level": "O2"}) + if args.device_target == "Ascend": + ms.set_context(device_id=int(os.getenv("DEVICE_ID", 0))) + elif args.device_target == "GPU" and args.ms_enable_graph_kernel: + ms.set_context(enable_graph_kernel=True) + args.rank, args.rank_size = 0, 1 + # Set Data + args.data.nc = 1 if args.single_cls else int(args.data.nc) # number of classes + args.data.names = ["item"] if args.single_cls and len(args.names) != 1 else args.data.names # class names + assert len(args.data.names) == args.data.nc, "%g names found for nc=%g dataset in %s" % ( + len(args.data.names), + args.data.nc, + args.config, + ) + # Directories and Save run settings + platform = sys.platform + if platform == "win32": + args.save_dir = os.path.join(args.save_dir, datetime.now().strftime("%Y.%m.%d-%H.%M.%S")) + else: + args.save_dir = os.path.join(args.save_dir, datetime.now().strftime("%Y.%m.%d-%H:%M:%S")) + os.makedirs(args.save_dir, exist_ok=True) + if args.rank % args.rank_size == 0: + with open(os.path.join(args.save_dir, "cfg.yaml"), "w") as f: + yaml.dump(vars(args), f, sort_keys=False) + # Set Logger + logger.setup_logging(logger_name="MindYOLO", log_level="INFO", rank_id=args.rank, device_per_servers=args.rank_size) + logger.setup_logging_file(log_dir=os.path.join(args.save_dir, "logs")) + + +def detect( + network: nn.Cell, + img: np.ndarray, + conf_thres: float = 0.25, + iou_thres: float = 0.65, + conf_free: bool = False, + nms_time_limit: float = 60.0, + img_size: int = 640, + stride: int = 32, + num_class: int = 80, + is_coco_dataset: bool = True, +): + # Resize + h_ori, w_ori = img.shape[:2] # orig hw + r = img_size / max(h_ori, w_ori) # resize image to img_size + if r != 1: # always resize down, only resize up if training with augmentation + interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR + img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp) + h, w = img.shape[:2] + if h < img_size or w < img_size: + new_h, new_w = math.ceil(h / stride) * stride, math.ceil(w / stride) * stride + dh, dw = (new_h - h) / 2, (new_w - w) / 2 + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder( + img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) + ) # add border + + # Transpose Norm + img = img[:, :, ::-1].transpose(2, 0, 1) / 255.0 + imgs_tensor = Tensor(img[None], ms.float32) + + # Run infer + _t = time.time() + out = network(imgs_tensor) # inference and training outputs + out = out[0] if isinstance(out, (tuple, list)) else out + infer_times = time.time() - _t + + # Run NMS + t = time.time() + out = out.asnumpy() + out = non_max_suppression( + out, + conf_thres=conf_thres, + iou_thres=iou_thres, + conf_free=conf_free, + multi_label=True, + time_limit=nms_time_limit, + ) + nms_times = time.time() - t + + result_dict = {"category_id": [], "bbox": [], "score": []} + total_category_ids, total_bboxes, total_scores = [], [], [] + for si, pred in enumerate(out): + if len(pred) == 0: + continue + + # Predictions + predn = np.copy(pred) + scale_coords(img.shape[1:], predn[:, :4], (h_ori, w_ori)) # native-space pred + + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + category_ids, bboxes, scores = [], [], [] + for p, b in zip(pred.tolist(), box.tolist()): + category_ids.append(COCO80_TO_COCO91_CLASS[int(p[5])] if is_coco_dataset else int(p[5])) + bboxes.append([round(x, 3) for x in b]) + scores.append(round(p[4], 5)) + + total_category_ids.extend(category_ids) + total_bboxes.extend(bboxes) + total_scores.extend(scores) + + result_dict["category_id"].extend(total_category_ids) + result_dict["bbox"].extend(total_bboxes) + result_dict["score"].extend(total_scores) + + t = tuple(x * 1e3 for x in (infer_times, nms_times, infer_times + nms_times)) + (img_size, img_size, 1) # tuple + logger.info(f"Predict result is: {result_dict}") + logger.info(f"Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g;" % t) + logger.info(f"Detect a image success.") + + return result_dict + + +def segment( + network: nn.Cell, + img: np.ndarray, + conf_thres: float = 0.25, + iou_thres: float = 0.65, + conf_free: bool = False, + nms_time_limit: float = 60.0, + img_size: int = 640, + stride: int = 32, + num_class: int = 80, + is_coco_dataset: bool = True, +): + # Resize + h_ori, w_ori = img.shape[:2] # orig hw + r = img_size / max(h_ori, w_ori) # resize image to img_size + if r != 1: # always resize down, only resize up if training with augmentation + interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR + img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp) + h, w = img.shape[:2] + if h < img_size or w < img_size: + new_h, new_w = math.ceil(h / stride) * stride, math.ceil(w / stride) * stride + dh, dw = (new_h - h) / 2, (new_w - w) / 2 + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder( + img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) + ) # add border + + # Transpose Norm + img = img[:, :, ::-1].transpose(2, 0, 1) / 255.0 + imgs_tensor = Tensor(img[None], ms.float32) + + # Run infer + _t = time.time() + out, (_, _, prototypes) = network(imgs_tensor) # inference and training outputs + infer_times = time.time() - _t + + # Run NMS + t = time.time() + _c = num_class + 4 if conf_free else num_class + 5 + out = out.asnumpy() + bboxes, mask_coefficient = out[:, :, :_c], out[:, :, _c:] + out = non_max_suppression( + bboxes, + mask_coefficient, + conf_thres=conf_thres, + iou_thres=iou_thres, + conf_free=conf_free, + multi_label=True, + time_limit=nms_time_limit, + ) + nms_times = time.time() - t + + prototypes = prototypes.asnumpy() + + result_dict = {"category_id": [], "bbox": [], "score": [], "segmentation": []} + total_category_ids, total_bboxes, total_scores, total_seg = [], [], [], [] + for si, (pred, proto) in enumerate(zip(out, prototypes)): + if len(pred) == 0: + continue + + # Predictions + pred_masks = process_mask_upsample(proto, pred[:, 6:], pred[:, :4], shape=imgs_tensor[si].shape[1:]) + pred_masks = pred_masks.astype(np.float32) + pred_masks = scale_image((pred_masks.transpose(1, 2, 0)), (h_ori, w_ori)) + predn = np.copy(pred) + scale_coords(img.shape[1:], predn[:, :4], (h_ori, w_ori)) # native-space pred + + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + category_ids, bboxes, scores, segs = [], [], [], [] + for ii, (p, b) in enumerate(zip(pred.tolist(), box.tolist())): + category_ids.append(COCO80_TO_COCO91_CLASS[int(p[5])] if is_coco_dataset else int(p[5])) + bboxes.append([round(x, 3) for x in b]) + scores.append(round(p[4], 5)) + segs.append(pred_masks[:, :, ii]) + + total_category_ids.extend(category_ids) + total_bboxes.extend(bboxes) + total_scores.extend(scores) + total_seg.extend(segs) + + result_dict["category_id"].extend(total_category_ids) + result_dict["bbox"].extend(total_bboxes) + result_dict["score"].extend(total_scores) + result_dict["segmentation"].extend(total_seg) + + t = tuple(x * 1e3 for x in (infer_times, nms_times, infer_times + nms_times)) + (img_size, img_size, 1) # tuple + logger.info(f"Predict result is:") + for k, v in result_dict.items(): + if k == "segmentation": + logger.info(f"{k} shape: {v[0].shape}") + else: + logger.info(f"{k}: {v}") + logger.info(f"Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g;" % t) + logger.info(f"Detect a image success.") + + return result_dict + + +def infer(args): + # Init + set_seed(args.seed) + set_default_infer(args) + + # Create Network + network = create_model( + model_name=args.network.model_name, + model_cfg=args.network, + num_classes=args.data.nc, + sync_bn=False, + checkpoint_path=args.weight, + ) + network.set_train(False) + ms.amp.auto_mixed_precision(network, amp_level=args.ms_amp_level) + + # Load Image + if isinstance(args.image_path, str) and os.path.isfile(args.image_path): + import cv2 + img = cv2.imread(args.image_path) + else: + raise ValueError("Detect: input image file not available.") + + # Detect + is_coco_dataset = "coco" in args.data.dataset_name + if args.task == "detect": + result_dict = detect( + network=network, + img=img, + conf_thres=args.conf_thres, + iou_thres=args.iou_thres, + conf_free=args.conf_free, + nms_time_limit=args.nms_time_limit, + img_size=args.img_size, + stride=max(max(args.network.stride), 32), + num_class=args.data.nc, + is_coco_dataset=is_coco_dataset, + ) + if args.save_result: + save_path = os.path.join(args.save_dir, "detect_results") + draw_result(args.image_path, result_dict, args.data.names, is_coco_dataset=is_coco_dataset, + save_path=save_path) + elif args.task == "segment": + result_dict = segment( + network=network, + img=img, + conf_thres=args.conf_thres, + iou_thres=args.iou_thres, + conf_free=args.conf_free, + nms_time_limit=args.nms_time_limit, + img_size=args.img_size, + stride=max(max(args.network.stride), 32), + num_class=args.data.nc, + is_coco_dataset=is_coco_dataset, + ) + if args.save_result: + save_path = os.path.join(args.save_dir, "segment_results") + draw_result(args.image_path, result_dict, args.data.names, is_coco_dataset=is_coco_dataset, + save_path=save_path) + + logger.info("Infer completed.") + + +if __name__ == "__main__": + parser = get_parser_infer() + args = parse_args(parser) + infer(args) diff --git a/community/cv/ShipWise/workspace/script/__init__.py b/community/cv/ShipWise/workspace/script/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2a7aa3e85270eba5dce50ecf653b6684c16c14f --- /dev/null +++ b/community/cv/ShipWise/workspace/script/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# @Time : 2024-09-10 17:12 +# @Author : Jiang Liu + + +def main(): + pass + + +if __name__ == '__main__': + main() diff --git a/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/__init__.py b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0c6f3caf0b26f4e6e3c40ba3d7e7d3feb40922f8 --- /dev/null +++ b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# @Time : 2024-09-15 13:06 +# @Author : Jiang Liu + + +def main(): + pass + + +if __name__ == '__main__': + main() diff --git "a/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\345\210\207\345\210\206\346\225\260\346\215\256\351\233\206.py" "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\345\210\207\345\210\206\346\225\260\346\215\256\351\233\206.py" new file mode 100644 index 0000000000000000000000000000000000000000..ec1c2fe61556e815eca74e9d592b0a45f937f71a --- /dev/null +++ "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\345\210\207\345\210\206\346\225\260\346\215\256\351\233\206.py" @@ -0,0 +1,66 @@ +import os +import random +from shutil import copyfile + + +def split_dataset(img_dir, annotation_dir, + train_dir, val_dir, test_dir, + train_set_file_path, val_set_file_path, test_set_file_path, + train_ratio=0.8, val_ratio=0.1): + os.makedirs(train_dir, exist_ok=True) + os.makedirs(val_dir, exist_ok=True) + os.makedirs(test_dir, exist_ok=True) + + img_files = os.listdir(img_dir) + annotation_files = os.listdir(annotation_dir) + + # 过滤无标注的图片 + img_files = [img_file for img_file in img_files if img_file.replace('.bmp', '.txt') in annotation_files] + + random.shuffle(img_files) + + num_files = len(img_files) + num_train_files = int(num_files * train_ratio) + num_val_files = int(num_files * val_ratio) + num_test_files = num_files - num_train_files - num_val_files + + # 将划分的图片路径写入文件 + train_set = img_files[:num_train_files] + val_set = img_files[num_train_files:num_train_files + num_val_files] + test_set = img_files[num_train_files + num_val_files:] + for (set_file_path, set_dir, set_files) in zip([train_set_file_path, val_set_file_path, test_set_file_path], + [train_dir, val_dir, test_dir], + [train_set, val_set, test_set]): + with open(set_file_path, 'w') as file: + for set_file in set_files: + file.write(os.path.join(set_dir, set_file) + '\n') + + for i, img_file in enumerate(img_files): + annotation_file = img_file.replace('.bmp', '.txt') + if i < num_train_files: + copyfile(os.path.join(img_dir, img_file), os.path.join(train_dir, img_file)) + copyfile(os.path.join(annotation_dir, annotation_file), os.path.join(train_dir, annotation_file)) + elif i < num_train_files + num_val_files: + copyfile(os.path.join(img_dir, img_file), os.path.join(val_dir, img_file)) + copyfile(os.path.join(annotation_dir, annotation_file), os.path.join(val_dir, annotation_file)) + else: + copyfile(os.path.join(img_dir, img_file), os.path.join(test_dir, img_file)) + copyfile(os.path.join(annotation_dir, annotation_file), os.path.join(test_dir, annotation_file)) + + +def main(): + configs = { + 'img_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet\AllImages", + 'annotation_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO\Annotations", + 'train_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\train", + 'val_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\validation", + 'test_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test", + 'train_set_file_path': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\train.txt", + 'val_set_file_path': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\val.txt", + 'test_set_file_path': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test.txt", + } + split_dataset(**configs) + + +if __name__ == '__main__': + main() diff --git "a/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\350\275\254\346\215\242\346\225\260\346\215\256\351\233\206\344\270\272YOLO\346\240\274\345\274\217.py" "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\350\275\254\346\215\242\346\225\260\346\215\256\351\233\206\344\270\272YOLO\346\240\274\345\274\217.py" new file mode 100644 index 0000000000000000000000000000000000000000..f61a015eb4f8fa1c06bdfd820c0fea52a3f5e217 --- /dev/null +++ "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\350\275\254\346\215\242\346\225\260\346\215\256\351\233\206\344\270\272YOLO\346\240\274\345\274\217.py" @@ -0,0 +1,79 @@ +import xml.etree.ElementTree as ET +import os + + +def convert_cls_id(cls_id): + mapping = { + '100000001': 0, + '100000002': 1, + '100000003': 2, + '100000004': 3, + '100000005': 4, + '100000006': 5, + '100000007': 6, + '100000008': 7, + '100000009': 8, + '100000010': 9, + '100000011': 10, + '100000012': 11, + '100000013': 12, + '100000015': 13, + '100000016': 14, + '100000017': 15, + '100000018': 16, + '100000019': 17, + '100000020': 18, + '100000022': 19, + '100000024': 20, + '100000025': 21, + '100000026': 22, + '100000027': 23, + '100000028': 24, + '100000029': 25, + '100000030': 26, + '100000032': 27, + } + return mapping[cls_id] + + +def xml_to_txt(xml_file_dir, txt_file_dir): + os.makedirs(txt_file_dir, exist_ok=True) + all_class_ids = set() + for xml_file in os.listdir(xml_file_dir): + if not xml_file.endswith('.xml') or xml_file == 'annotation_fmt.xml': + continue + xml_file_path = os.path.join(xml_file_dir, xml_file) + tree = ET.parse(xml_file_path) + root = tree.getroot() + img_id = root.find('Img_ID').text + img_width = int(root.find('Img_SizeWidth').text) + img_height = int(root.find('Img_SizeHeight').text) + txt_file_path = os.path.join(txt_file_dir, f"{img_id}.txt") + objs = root.findall('.//HRSC_Object') + if len(objs) == 0: + continue + with open(txt_file_path, 'w') as txt_file: + for obj in objs: + class_id = convert_cls_id(obj.find('Class_ID').text) + all_class_ids.add(class_id) + box_xmin = int(obj.find('box_xmin').text) + box_ymin = int(obj.find('box_ymin').text) + box_xmax = int(obj.find('box_xmax').text) + box_ymax = int(obj.find('box_ymax').text) + x_center = ((box_xmin + box_xmax) / 2) / img_width + y_center = ((box_ymin + box_ymax) / 2) / img_height + box_width = (box_xmax - box_xmin) / img_width + box_height = (box_ymax - box_ymin) / img_height + txt_file.write(f"{class_id} {x_center} {y_center} {box_width} {box_height}\n") + print(f"Total class ids: {len(all_class_ids)}") + print([str(class_id) for class_id in all_class_ids]) + + +def main(): + xml_file_dir = r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet\Annotations" + txt_file_dir = r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO\Annotations" + xml_to_txt(xml_file_dir, txt_file_dir) + + +if __name__ == '__main__': + main() diff --git a/community/cv/ShipWise/workspace/script/dataset_tools/__init__.py b/community/cv/ShipWise/workspace/script/dataset_tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c20f18bb3615ba9bf3bad6a26c03b0b76507ee7c --- /dev/null +++ b/community/cv/ShipWise/workspace/script/dataset_tools/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# @Time : 2024-09-15 13:05 +# @Author : Jiang Liu + + +def main(): + pass + + +if __name__ == '__main__': + main() diff --git a/community/cv/ShipWise/workspace/train.py b/community/cv/ShipWise/workspace/train.py new file mode 100644 index 0000000000000000000000000000000000000000..cecce9956565deaadcfc58b4d418920af88842f8 --- /dev/null +++ b/community/cv/ShipWise/workspace/train.py @@ -0,0 +1,320 @@ +import argparse +import ast +import os +from functools import partial + +import mindspore as ms + +from mindyolo.data import COCODataset, create_loader +from mindyolo.models import create_loss, create_model +from mindyolo.optim import (EMA, create_group_param, create_lr_scheduler, + create_optimizer, create_warmup_momentum_scheduler) +from mindyolo.utils import logger +from mindyolo.utils.config import parse_args +from mindyolo.utils.train_step_factory import get_gradreducer, get_loss_scaler, create_train_step_fn +from mindyolo.utils.trainer_factory import create_trainer +from mindyolo.utils.callback import create_callback +from mindyolo.utils.utils import (freeze_layers, load_pretrain, set_default, + set_seed, Synchronizer) + + +def get_parser_train(parents=None): + parser = argparse.ArgumentParser(description="Train", parents=[parents] if parents else []) + parser.add_argument("--task", type=str, default="detect", choices=["detect", "segment"]) + parser.add_argument("--device_target", type=str, default="Ascend", help="device target, Ascend/GPU/CPU") + parser.add_argument("--save_dir", type=str, default="./runs", help="save dir") + parser.add_argument("--log_level", type=str, default="INFO", help="log level to print") + parser.add_argument("--is_parallel", type=ast.literal_eval, default=False, help="Distribute train or not") + parser.add_argument("--ms_mode", type=int, default=0, + help="Running in GRAPH_MODE(0) or PYNATIVE_MODE(1) (default=0)") + parser.add_argument("--ms_amp_level", type=str, default="O0", help="amp level, O0/O1/O2/O3") + parser.add_argument("--keep_loss_fp32", type=ast.literal_eval, default=True, + help="Whether to maintain loss using fp32/O0-level calculation") + parser.add_argument("--ms_loss_scaler", type=str, default="static", help="train loss scaler, static/dynamic/none") + parser.add_argument("--ms_loss_scaler_value", type=float, default=1024.0, help="static loss scale value") + parser.add_argument("--ms_jit", type=ast.literal_eval, default=True, help="use jit or not") + parser.add_argument("--ms_enable_graph_kernel", type=ast.literal_eval, default=False, + help="use enable_graph_kernel or not") + parser.add_argument("--ms_datasink", type=ast.literal_eval, default=False, help="Train with datasink.") + parser.add_argument("--overflow_still_update", type=ast.literal_eval, default=True, help="overflow still update") + parser.add_argument("--clip_grad", type=ast.literal_eval, default=False) + parser.add_argument("--clip_grad_value", type=float, default=10.0) + parser.add_argument("--ema", type=ast.literal_eval, default=True, help="ema") + parser.add_argument("--weight", type=str, default="", help="initial weight path") + parser.add_argument("--ema_weight", type=str, default="", help="initial ema weight path") + parser.add_argument("--freeze", type=list, default=[], help="Freeze layers: backbone of yolov7=50, first3=0 1 2") + parser.add_argument("--epochs", type=int, default=300, help="total train epochs") + parser.add_argument("--per_batch_size", type=int, default=32, help="per batch size for each device") + parser.add_argument("--img_size", type=list, default=640, help="train image sizes") + parser.add_argument("--nbs", type=list, default=64, help="nbs") + parser.add_argument("--accumulate", type=int, default=1, + help="grad accumulate step, recommended when batch-size is less than 64") + parser.add_argument("--auto_accumulate", type=ast.literal_eval, default=False, help="auto accumulate") + parser.add_argument("--log_interval", type=int, default=100, help="log interval") + parser.add_argument("--single_cls", type=ast.literal_eval, default=False, + help="train multi-class data as single-class") + parser.add_argument("--sync_bn", type=ast.literal_eval, default=False, + help="use SyncBatchNorm, only available in DDP mode") + parser.add_argument("--keep_checkpoint_max", type=int, default=100) + parser.add_argument("--run_eval", type=ast.literal_eval, default=False, help="Whether to run eval during training") + parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold for run_eval") + parser.add_argument("--iou_thres", type=float, default=0.65, help="IOU threshold for NMS for run_eval") + parser.add_argument("--conf_free", type=ast.literal_eval, default=False, + help="Whether the prediction result include conf") + parser.add_argument("--rect", type=ast.literal_eval, default=False, help="rectangular training") + parser.add_argument("--nms_time_limit", type=float, default=20.0, help="time limit for NMS") + parser.add_argument("--recompute", type=ast.literal_eval, default=False, help="Recompute") + parser.add_argument("--recompute_layers", type=int, default=0) + parser.add_argument("--seed", type=int, default=2, help="set global seed") + parser.add_argument("--summary", type=ast.literal_eval, default=True, help="collect train loss scaler or not") + parser.add_argument("--profiler", type=ast.literal_eval, default=False, help="collect profiling data or not") + parser.add_argument("--profiler_step_num", type=int, default=1, help="collect profiler data for how many steps.") + parser.add_argument("--opencv_threads_num", type=int, default=2, help="set the number of threads for opencv") + parser.add_argument("--strict_load", type=ast.literal_eval, default=True, help="strictly load the pretrain model") + + # args for ModelArts + parser.add_argument("--enable_modelarts", type=ast.literal_eval, default=False, help="enable modelarts") + parser.add_argument("--data_url", type=str, default="", help="ModelArts: obs path to dataset folder") + parser.add_argument("--ckpt_url", type=str, default="", help="ModelArts: obs path to pretrain model checkpoint file") + parser.add_argument("--multi_data_url", type=str, default="", help="ModelArts: list of obs paths to multi-dataset folders") + parser.add_argument("--pretrain_url", type=str, default="", help="ModelArts: list of obs paths to multi-pretrain model files") + parser.add_argument("--train_url", type=str, default="", help="ModelArts: obs path to output folder") + parser.add_argument("--data_dir", type=str, default="/cache/data/", + help="ModelArts: local device path to dataset folder") + parser.add_argument("--ckpt_dir", type=str, default="/cache/pretrain_ckpt/", + help="ModelArts: local device path to checkpoint folder") + return parser + + +def train(args): + # Set Default + set_seed(args.seed) + set_default(args) + main_device = args.rank % args.rank_size == 0 + + logger.info(f"parse_args:\n{args}") + logger.info("Please check the above information for the configurations") + + # Create Network + args.network.recompute = args.recompute + args.network.recompute_layers = args.recompute_layers + network = create_model( + model_name=args.network.model_name, + model_cfg=args.network, + num_classes=args.data.nc, + sync_bn=args.sync_bn, + ) + + if args.ema: + ema_network = create_model( + model_name=args.network.model_name, + model_cfg=args.network, + num_classes=args.data.nc, + ) + ema = EMA(network, ema_network) + else: + ema = None + load_pretrain(network, args.weight, ema, args.ema_weight, args.strict_load) # load pretrain + freeze_layers(network, args.freeze) # freeze Layers + ms.amp.auto_mixed_precision(network, amp_level=args.ms_amp_level) + if ema: + ms.amp.auto_mixed_precision(ema.ema, amp_level=args.ms_amp_level) + + # Create Dataloaders + transforms = args.data.train_transforms + stage_dataloaders = [] + stage_epochs = [args.epochs,] if not isinstance(transforms, dict) else transforms['stage_epochs'] + stage_transforms = [transforms,] if not isinstance(transforms, dict) else transforms['trans_list'] + assert len(stage_epochs) == len(stage_transforms), "The length of transforms and stage_epochs is not equal." + assert sum(stage_epochs) == args.epochs, f"Stage epochs [{sum(stage_epochs)}] not equal args.epochs [{args.epochs}]" + for stage in range(len(stage_epochs)): + _dataset = COCODataset( + dataset_path=args.data.train_set, + img_size=args.img_size, + transforms_dict=stage_transforms[stage], + is_training=True, + augment=True, + rect=args.rect, + single_cls=args.single_cls, + batch_size=args.total_batch_size, + stride=max(args.network.stride), + return_segments=(args.task == "segment") + ) + _dataloader = create_loader( + dataset=_dataset, + batch_collate_fn=_dataset.train_collate_fn, + column_names_getitem=_dataset.column_names_getitem, + column_names_collate=_dataset.column_names_collate, + batch_size=args.per_batch_size, + epoch_size=stage_epochs[stage], + rank=args.rank, + rank_size=args.rank_size, + shuffle=True, + drop_remainder=True, + num_parallel_workers=args.data.num_parallel_workers, + python_multiprocessing=True, + ) + stage_dataloaders.append(_dataloader) + dataloader = stage_dataloaders[0] if len(stage_dataloaders) == 1 else ms.dataset.ConcatDataset(stage_dataloaders) + steps_per_epoch = dataloader.get_dataset_size() // args.epochs + + if args.run_eval: + from test import test + eval_dataset = COCODataset( + dataset_path=args.data.val_set, + img_size=args.img_size, + transforms_dict=args.data.test_transforms, + is_training=False, + augment=False, + rect=args.rect, + single_cls=args.single_cls, + batch_size=args.per_batch_size, + stride=max(args.network.stride), + ) + eval_dataloader = create_loader( + dataset=eval_dataset, + batch_collate_fn=eval_dataset.test_collate_fn, + column_names_getitem=eval_dataset.column_names_getitem, + column_names_collate=eval_dataset.column_names_collate, + batch_size=args.per_batch_size, + epoch_size=1, + rank=args.rank, + rank_size=args.rank_size, + shuffle=False, + drop_remainder=False, + num_parallel_workers=1, + python_multiprocessing=True, + ) + else: + eval_dataset, eval_dataloader = None, None + + # Create Loss + loss_fn = create_loss( + **args.loss, anchors=args.network.get("anchors", 1), stride=args.network.stride, nc=args.data.nc + ) + ms.amp.auto_mixed_precision(loss_fn, amp_level="O0" if args.keep_loss_fp32 else args.ms_amp_level) + + # Create Optimizer + args.optimizer.steps_per_epoch = steps_per_epoch + lr = create_lr_scheduler(**args.optimizer) + params = create_group_param(params=network.trainable_params(), **args.optimizer) + optimizer = create_optimizer(params=params, lr=lr, **args.optimizer) + warmup_momentum = create_warmup_momentum_scheduler(**args.optimizer) + + # Create train_step_fn + reducer = get_gradreducer(args.is_parallel, optimizer.parameters) + scaler = get_loss_scaler(args.ms_loss_scaler, scale_value=args.ms_loss_scaler_value) + train_step_fn = create_train_step_fn( + task=args.task, + network=network, + loss_fn=loss_fn, + optimizer=optimizer, + loss_ratio=args.rank_size, + scaler=scaler, + reducer=reducer, + ema=ema, + overflow_still_update=args.overflow_still_update, + ms_jit=args.ms_jit, + clip_grad=args.clip_grad, + clip_grad_value=args.clip_grad_value + ) + + # Create callbacks + if args.summary: + args.callback.append({"name": "SummaryCallback"}) + if args.profiler: + args.callback.append({"name": "ProfilerCallback", "profiler_step_num": args.profiler_step_num}) + callback_fns = create_callback(args.callback) + + # Create test function for run eval while train + if args.run_eval: + is_coco_dataset = "coco" in args.data.dataset_name + test_fn = partial( + test, + task=args.task, + dataloader=eval_dataloader, + anno_json_path=os.path.join( + args.data.val_set[: -len(args.data.val_set.split("/")[-1])], "annotations/instances_val2017.json" + ), + conf_thres=args.conf_thres, + iou_thres=args.iou_thres, + conf_free=args.conf_free, + num_class=args.data.nc, + nms_time_limit=args.nms_time_limit, + is_coco_dataset=is_coco_dataset, + imgIds=None if not is_coco_dataset else eval_dataset.imgIds, + per_batch_size=args.per_batch_size, + rank=args.rank, + rank_size=args.rank_size, + save_dir=args.save_dir, + synchronizer=Synchronizer(args.rank_size) if args.rank_size > 1 else None, + ) + else: + test_fn = None + + # Create Trainer + network.set_train(True) + optimizer.set_train(True) + model_name = os.path.basename(args.config)[:-5] # delete ".yaml" + trainer = create_trainer( + model_name=model_name, + train_step_fn=train_step_fn, + scaler=scaler, + dataloader=dataloader, + steps_per_epoch=steps_per_epoch, + network=network, + loss_fn=loss_fn, + ema=ema, + optimizer=optimizer, + callback=callback_fns, + reducer=reducer, + data_sink=args.ms_datasink, + profiler=args.profiler + ) + if not args.ms_datasink: + trainer.train( + epochs=args.epochs, + main_device=main_device, + warmup_step=max(round(args.optimizer.warmup_epochs * steps_per_epoch), args.optimizer.min_warmup_step), + warmup_momentum=warmup_momentum, + accumulate=args.accumulate, + overflow_still_update=args.overflow_still_update, + keep_checkpoint_max=args.keep_checkpoint_max, + log_interval=args.log_interval, + loss_item_name=[] if not hasattr(loss_fn, "loss_item_name") else loss_fn.loss_item_name, + save_dir=args.save_dir, + enable_modelarts=args.enable_modelarts, + train_url=args.train_url, + run_eval=args.run_eval, + test_fn=test_fn, + rank_size=args.rank_size, + ms_jit=args.ms_jit, + profiler_step_num=args.profiler_step_num + ) + else: + logger.warning("DataSink is an experimental interface under development.") + logger.warning("Train with data sink mode.") + assert args.accumulate == 1, "datasink mode not support grad accumulate." + trainer.train_with_datasink( + task=args.task, + epochs=args.epochs, + main_device=main_device, + warmup_epoch=max(args.optimizer.warmup_epochs, args.optimizer.min_warmup_step // steps_per_epoch), + warmup_momentum=warmup_momentum, + keep_checkpoint_max=args.keep_checkpoint_max, + log_interval=args.log_interval, + loss_item_name=[] if not hasattr(loss_fn, "loss_item_name") else loss_fn.loss_item_name, + save_dir=args.save_dir, + enable_modelarts=args.enable_modelarts, + train_url=args.train_url, + run_eval=args.run_eval, + test_fn=test_fn, + profiler_step_num=args.profiler_step_num + ) + logger.info("Training completed.") + + +if __name__ == "__main__": + parser = get_parser_train() + args = parse_args(parser) + train(args)