diff --git a/community/cv/ShipWise/.gitignore b/community/cv/ShipWise/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..958108fe86836bde34187d7cd3b7b91f8c0cfdc5
--- /dev/null
+++ b/community/cv/ShipWise/.gitignore
@@ -0,0 +1,102 @@
+# MindSpore
+*.ir
+kernel_meta/
+somas_meta/
+trace_code_graph_*
+
+# Cmake files
+CMakeFiles/
+cmake_install.cmake
+CMakeCache.txt
+Makefile
+cmake-build-debug
+
+# Dynamic libraries
+*.so
+*.so.*
+*.dylib
+
+# Static libraries
+*.la
+*.lai
+*.a
+*.lib
+
+# Protocol buffers
+*_pb2.py
+*.pb.h
+*.pb.cc
+*.pb
+*_grpc.py
+
+# Object files
+*.o
+
+# Editor
+.vscode
+.idea/
+
+# Cquery
+.cquery_cached_index/
+compile_commands.json
+
+# Ctags and cscope
+tags
+TAGS
+CTAGS
+GTAGS
+GRTAGS
+GSYMS
+GPATH
+cscope.*
+
+# Python files
+*__pycache__*
+.pytest_cache
+
+# Mac files
+*.DS_Store
+
+# Test results
+test_temp_summary_event_file/
+*.dot
+*.dat
+*.svg
+*.perf
+*.info
+*.ckpt
+*.shp
+*.pkl
+*.pb
+.clangd
+
+# lite opencl compile file
+*.cl.inc
+
+# Custom
+build/
+configs/
+data/
+demo/
+deploy/
+docs/
+examples/
+mindinsight/
+mindyolo.egg-info/
+requirements/
+runs/
+runs_infer/
+tests/
+tutorials/
+datasets
+benchmark_results.md
+CONTRIBUTING.md
+GETTING_STARTED.md
+GETTING_STARTED_CN.md
+LICENSE.md
+ma-pre-start.sh
+mkdocs.yml
+package.sh
+setup.py
+test.py
+
diff --git a/community/cv/ShipWise/README.md b/community/cv/ShipWise/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c4d9c999cf13ba79a4583ebb9a06c53e5ff1a54c
--- /dev/null
+++ b/community/cv/ShipWise/README.md
@@ -0,0 +1,282 @@
+# 目录
+
+- [1. ShipWiseNet 描述](#1-shipwisenet-描述)
+- [2. 模型架构](#2-模型架构)
+- [3. 数据集](#3-数据集)
+    - [3.1 预训练数据集](#31-预训练数据集)
+    - [3.2 项目任务数据集](#32-项目任务数据集)
+    - [3.3 数据处理](#33-数据处理)
+- [4. 快速使用](#4-快速使用)
+    - [4.1 模型训练](#41-模型训练)
+    - [4.2 模型推理](#42-模型推理)
+- [5. 脚本说明](#5-脚本说明)
+    - [5.1 脚本及样例代码](#51-脚本及样例代码)
+    - [5.2 脚本参数](#52-脚本参数)
+- [6. 模型说明](#6-模型说明)
+    - [6.1 评估性能](#61-评估性能)
+    - [6.2 推理性能](#62-推理性能)
+- [7. 项目展示](#7-项目展示)
+
+# 1. ShipWiseNet 描述
+
+ShipWiseNet
+是一个高效、轻量级的目标检测网络，专门设计用于检测海上船只和航行目标，适应复杂的海洋环境和实时需求。该模型基于先进的深度学习架构，结合特定的优化技术，实现了在高精度和低计算资源需求间的平衡，适用于嵌入式设备和无人系统等资源受限的场景。ShipWiseNet
+提供了卓越的检测速度和鲁棒性，即使在天气恶劣、光照变化大或背景复杂的情况下，依旧能准确识别和定位船只目标。
+
+# 2. 模型架构
+
+ShipWiseNet 的架构设计聚焦于海上目标检测的特定需求。模型由特征提取、检测头、以及优化模块组成，以实现对船只的精准识别和高效检测：
+
+1. 特征提取模块：采用多层卷积神经网络（CNN）构建的特征提取层，经过优化以捕获船只的边缘特征和形态细节，确保在复杂背景中提取有效信息。该模块能够在多尺度上进行特征表达，增强了模型的适应性。
+2. 检测头：在检测层中，ShipWiseNet 使用了基于自适应锚框的检测策略，使其能够灵活应对不同尺寸、不同距离的目标。检测头采用了轻量级设计，进一步降低了计算复杂度。
+3. 优化模块：通过引入特定的正则化和损失函数优化策略，ShipWiseNet
+   可以在保持检测精度的同时减少模型体积。此外，该模块还包含了精细化的背景抑制技术，能够有效过滤无关信息，从而提升海上复杂场景中的鲁棒性。
+
+该架构使 ShipWiseNet 能够在低算力设备上流畅运行，同时保持高精度检测能力，非常适合海洋环境中的实时船只检测需求。
+
+# 3. 数据集
+
+## 3.1 预训练数据集
+
+COCO 2017 数据集 是一个广泛用于目标检测、分割和关键点检测的通用大型数据集，包含了多样化的场景和对象类别。COCO 2017
+拥有超过20万张图像，涵盖了80类物体，为 ShipWiseNet 提供了丰富的预训练数据，使其具备较强的目标检测基础能力。
+
+- 数据集规模：COCO 2017 数据集包括约118,000张训练图像和5,000张验证图像，图像内容涵盖了大量自然场景，帮助模型在预训练阶段学到丰富的特征表达。
+- 物体多样性：数据集中的物体种类和姿态多样，为 ShipWiseNet 模型在进行船只检测前提供了广泛的视觉先验知识，提升了模型的泛化能力。
+- 标注信息：COCO 2017 提供了精准的标注信息，支持边界框和分割等多种检测任务，使得模型能够在多尺度、多姿态下的检测任务中保持稳健的性能。
+
+通过在 COCO 2017 数据集上进行预训练，ShipWiseNet 的基础特征提取能力得到强化，有助于其在 HRSC2016
+等专用数据集上进行微调时，更快适应海上目标检测的需求。
+
+## 3.2 项目任务数据集
+
+HRSC2016 数据集 是一个专门用于船只检测的高分辨率遥感图像数据集，广泛应用于海上目标检测和识别任务。该数据集包含了多种尺度、角度、姿态的船只目标，能够很好地模拟实际海上场景的复杂性，是训练和评估
+ShipWiseNet 模型的理想选择。
+
+1. 数据集规模：HRSC2016 包含1000余张高分辨率图像，涵盖了船只的多种类型和复杂背景，适合用于提升模型在各种海洋环境中的适应能力。
+2. 图像特征：数据集中的图像具有丰富的细节，且角度多变，从不同视角展现了船只的形态特征，有助于模型在不同俯仰角和复杂背景下准确检测。
+3. 标注信息：每张图像都提供了精确的船只位置标注，包括船只的轮廓和姿态信息，适用于旋转框标注格式，支持模型在多角度下的检测能力。
+
+利用 HRSC2016 数据集，ShipWiseNet 能够学习到海上船只的多种视觉特征，提升对复杂背景和船只多样性的适应性。这使得 ShipWiseNet
+在实际海上环境中具有更强的泛化能力和鲁棒性。
+
+## 3.3 数据处理
+
+在使用数据集前，需要对数据进行预处理，以适应模型的输入要求。数据处理主要包括图像读取、数据增强、标注解析等步骤：
+
+```python
+def xml_to_txt(xml_file_dir, txt_file_dir):
+    os.makedirs(txt_file_dir, exist_ok=True)
+    all_class_ids = set()
+    for xml_file in os.listdir(xml_file_dir):
+        if not xml_file.endswith('.xml') or xml_file == 'annotation_fmt.xml':
+            continue
+        xml_file_path = os.path.join(xml_file_dir, xml_file)
+        tree = ET.parse(xml_file_path)
+        root = tree.getroot()
+        img_id = root.find('Img_ID').text
+        img_width = int(root.find('Img_SizeWidth').text)
+        img_height = int(root.find('Img_SizeHeight').text)
+        txt_file_path = os.path.join(txt_file_dir, f"{img_id}.txt")
+        objs = root.findall('.//HRSC_Object')
+        if len(objs) == 0:
+            continue
+        with open(txt_file_path, 'w') as txt_file:
+            for obj in objs:
+                class_id = convert_cls_id(obj.find('Class_ID').text)
+                all_class_ids.add(class_id)
+                box_xmin = int(obj.find('box_xmin').text)
+                box_ymin = int(obj.find('box_ymin').text)
+                box_xmax = int(obj.find('box_xmax').text)
+                box_ymax = int(obj.find('box_ymax').text)
+                x_center = ((box_xmin + box_xmax) / 2) / img_width
+                y_center = ((box_ymin + box_ymax) / 2) / img_height
+                box_width = (box_xmax - box_xmin) / img_width
+                box_height = (box_ymax - box_ymin) / img_height
+                txt_file.write(f"{class_id} {x_center} {y_center} {box_width} {box_height}\n")
+    print(f"Total class ids: {len(all_class_ids)}")
+    print([str(class_id) for class_id in all_class_ids])
+```
+
+# 4. 快速使用
+
+通过官方网站安装 MindSpore 后，您可以按照如下步骤进行训练和评估：
+
+## 4.1 模型训练
+
+```bash
+# 训练模型
+python train.py --config ./workspace/configs/ship-wise/ship-wise-s.yaml --log_interval 52
+```
+
+```txt
+2024-09-16 17:20:40,329 [INFO] Epoch 146/500, Step 52/422, imgsize (640, 640), loss: 3.8198, lbox: 0.7182, lcls: 1.4714, dfl: 1.6302, cur_lr: 0.007129000034183264
+2024-09-16 17:20:40,329 [INFO] Epoch 146/500, Step 52/422, step time: 993.04 ms
+2024-09-16 17:21:31,956 [INFO] Epoch 146/500, Step 104/422, imgsize (640, 640), loss: 4.2556, lbox: 1.0156, lcls: 1.9761, dfl: 1.2639, cur_lr: 0.007129000034183264
+2024-09-16 17:21:31,957 [INFO] Epoch 146/500, Step 104/422, step time: 992.82 ms
+2024-09-16 17:22:23,579 [INFO] Epoch 146/500, Step 156/422, imgsize (640, 640), loss: 3.3041, lbox: 0.7597, lcls: 1.2315, dfl: 1.3129, cur_lr: 0.007129000034183264
+2024-09-16 17:22:23,579 [INFO] Epoch 146/500, Step 156/422, step time: 992.73 ms
+2024-09-16 17:23:15,262 [INFO] Epoch 146/500, Step 208/422, imgsize (640, 640), loss: 3.7350, lbox: 0.8932, lcls: 1.7161, dfl: 1.1257, cur_lr: 0.007129000034183264
+2024-09-16 17:23:15,263 [INFO] Epoch 146/500, Step 208/422, step time: 993.93 ms
+2024-09-16 17:24:06,943 [INFO] Epoch 146/500, Step 260/422, imgsize (640, 640), loss: 3.1018, lbox: 0.7251, lcls: 1.1993, dfl: 1.1774, cur_lr: 0.007129000034183264
+2024-09-16 17:24:06,944 [INFO] Epoch 146/500, Step 260/422, step time: 993.86 ms
+2024-09-16 17:24:58,402 [INFO] Epoch 146/500, Step 312/422, imgsize (640, 640), loss: 3.4070, lbox: 0.8956, lcls: 1.3462, dfl: 1.1653, cur_lr: 0.007129000034183264
+2024-09-16 17:24:58,403 [INFO] Epoch 146/500, Step 312/422, step time: 989.59 ms
+2024-09-16 17:25:49,862 [INFO] Epoch 146/500, Step 364/422, imgsize (640, 640), loss: 2.2349, lbox: 0.5543, lcls: 0.7309, dfl: 0.9497, cur_lr: 0.007129000034183264
+2024-09-16 17:25:49,863 [INFO] Epoch 146/500, Step 364/422, step time: 989.62 ms
+2024-09-16 17:26:41,257 [INFO] Epoch 146/500, Step 416/422, imgsize (640, 640), loss: 2.7312, lbox: 0.5390, lcls: 1.0165, dfl: 1.1757, cur_lr: 0.007129000034183264
+2024-09-16 17:26:41,258 [INFO] Epoch 146/500, Step 416/422, step time: 988.38 ms
+2024-09-16 17:26:47,405 [INFO] Saving model to ./runs\2024.09.15-22.56.30\weights\ship-wise-s-146_422.ckpt
+2024-09-16 17:26:47,405 [INFO] Epoch 146/500, epoch time: 6.98 min.
+```
+
+## 4.2 模型推理
+
+```bash
+python predict.py --config=./workspace/configs/ship-wise/ship-wise-s.yaml --weight=./runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt --image_path=H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test\100000630.bmp
+```
+
+```txt
+2024-09-16 18:33:22,989 [INFO] number of network params, total: 11.166471M, trainable: 11.14642M
+2024-09-16 18:33:23,172 [INFO] Load checkpoint from [runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt] success.
+2024-09-16 18:33:25,788 [INFO] Predict result is: {'category_id': [18], 'bbox': [[699.768, 529.886, 216.182, 189.988]], 'score': [0.76474]}
+2024-09-16 18:33:25,788 [INFO] Speed: 2594.7/1.8/2596.4 ms inference/NMS/total per 640x640 image at batch-size 1;
+2024-09-16 18:33:25,788 [INFO] Detect a image success.
+2024-09-16 18:33:25,797 [INFO] Infer completed.
+```
+
+# 5. 脚本说明
+
+## 5.1 脚本及样例代码
+
+```text
+├── root_directory                              // 项目的根目录
+│   ├── predict.py                              // 用于预测的主脚本
+│   ├── train.py                                // 用于训练的主脚本
+│   ├── __init__.py                             // 初始化脚本
+│
+├── configs                                     // 配置文件目录
+│   ├── dataset                                 // 数据集配置文件
+│   │   ├── HRSC2016.yaml                       // HRSC2016 数据集的配置文件
+│   │
+│   └── ship-wise                               // ShipWiseNet 模型的配置文件
+│       ├── hyp.scratch.high.yaml               // 高精度模式的超参数配置
+│       ├── hyp.scratch.low.yaml                // 低精度模式的超参数配置
+│       ├── ship-wise-base.yaml                 // ShipWiseNet 基础模型配置
+│       ├── ship-wise-l.yaml                    // ShipWiseNet 大型模型配置
+│       ├── ship-wise-s.yaml                    // ShipWiseNet 小型模型配置
+│
+├── datasets                                    // 数据集文件夹
+│   └── HRSC2016                                // HRSC2016 数据集目录
+│       ├── test.txt                            // 测试集文件列表
+│       ├── train.cache.npy                     // 训练集缓存文件
+│       ├── train.txt                           // 训练集文件列表
+│       ├── val.txt                             // 验证集文件列表
+│
+├── flask                                       // 用于模型服务的 Flask 项目目录
+│   ├── index.py                                // 主入口文件
+│   ├── __init__.py                             // 初始化脚本
+│   │
+│   ├── model                                   // 模型相关文件
+│   │   ├── yolov8.py                           // YOLOv8 模型实现文件
+│   │   ├── __init__.py                         // 初始化脚本
+│   │   └── __pycache__                         // Python 字节码缓存目录
+│   │       ├── yolov8.cpython-38.pyc           // YOLOv8 字节码缓存
+│   │       └── __init__.cpython-38.pyc         // 初始化脚本字节码缓存
+│   │
+│   └── __pycache__                             // Python 字节码缓存目录
+│
+└── script                                      // 脚本文件夹
+    ├── train.md                                // 训练文档
+    ├── __init__.py                             // 初始化脚本
+    │
+    ├── dataset_tools                           // 数据集工具文件夹
+    │   ├── __init__.py                         // 初始化脚本
+    │   │
+    │   └── HRSC                                // HRSC 数据集工具
+    │       ├── __init__.py                     // 初始化脚本
+    │       ├── 切分数据集.py                   // 数据集分割脚本
+    │       └── 转换数据集为YOLO格式.py         // 数据集格式转换脚本
+    │
+    └── __pycache__                             // Python 字节码缓存目录
+```
+
+## 5.2 脚本参数
+
+```text
+train.py 中主要的参数有：
+
+可选参数：
+
+  --device_target       实现代码的设备。默认值：Ascend
+  --data_dir            训练数据集目录
+  --per_batch_size      训练的批处理大小。默认值：32（单卡），16（Ascend 8卡）或32（GPU 8卡）
+  --resume_yolov5       用于微调的YoLOv5的CKPT文件。默认值：""。
+  --lr_scheduler        学习率调度器。可选值：exponential或cosine_annealing
+                        默认值：cosine_annealing
+  --lr                  学习率。默认值：0.01（单卡），0.02（Ascend 8卡）或0.025（GPU 8卡）
+  --lr_epochs           学习率变化轮次，用英文逗号（,）分割。默认值为'220,250'。
+  --lr_gamma            指数级lr_scheduler系数降低学习率。默认值为0.1。
+  --eta_min             cosine_annealing调度器中的eta_min。默认值为0。
+  --t_max               在cosine_annealing调度器中的T-max。默认值为300（8卡）。
+  --max_epoch           模型训练最大轮次。默认值为300（8卡）。
+  --warmup_epochs       热身总轮次。默认值为20（8卡）。
+  --weight_decay        权重衰减因子。默认值为0.0005。
+  --momentum            动量参数。默认值为0.9。
+  --loss_scale          静态损失缩放。默认值为64。
+  --label_smooth        是否在CE中使用标签平滑。默认值为0。
+  --label_smooth_factor 初始one-hot编码的平滑强度。默认值为0.1。
+  --log_interval        日志记录间隔步骤。默认值为100。
+  --ckpt_path           CKPT文件保存位置。默认值为outputs/。
+  --is_distributed      是否进行分布式训练，1表示是，0表示否。默认值为0。
+  --rank                分布式训练的本地序号。默认值为0。
+  --group_size          设备的全局大小。默认值为1。
+  --need_profiler       是否使用Profiler，0表示否，1表示是。默认值为0。
+  --training_shape     设置固定训练shape。默认值为""。
+  --resize_rate         调整多尺度训练率。默认值为10。
+  --bind_cpu            分布式训练时是否绑定cpu。默认值为True。
+  --device_num          每台服务器的设备数量。默认值为8。
+```
+
+# 6. 模型说明
+
+## 6.1 评估性能
+
+| 参数          | ShipWise-s                                                     |
+|-------------|----------------------------------------------------------------|
+| 资源          | CPU 5.10GHz，14核；内存32GB                                         |
+| 上传日期        | 26/10/2024                                                     |
+| MindSpore版本 | 2.2.14                                                         |
+| 数据集         | 1680张图                                                         |
+| 训练参数        | epoch=300, batch_size=2, lr=0.01,momentum=0.937,warmup_epoch=3 |
+| 优化器         | 动量                                                             |
+| 损失函数        | YOLOv8Loss                                                     |
+| 输出          | 框和标签                                                           |
+| 损失          | 2.6635                                                         |
+| 总时长         | 19小时20分58秒                                                     |
+| 微调检查点       | 42.6MB（.ckpt文件）                                                |
+
+## 6.2 推理性能
+
+| 参数          | YOLOv5s                 |
+|-------------|-------------------------|
+| 资源          | CPU 5.10GHz，14核；内存32GB  |
+| 上传日期        | 26/10/2024              |
+| MindSpore版本 | 2.2.14                  |
+| 数据集         | 1680张图                  |
+| batch_size  | 1                       |
+| 输出          | 边框位置和分数，以及概率            |
+| 准确率         | mAP >= 82.6%（shape=640） |
+| 微调检查点       | 42.6MB（.ckpt文件）         |
+
+
+
+# 7. 项目展示
+
+首页展示
+
+![首页展示](./assets/pic-1.png)
+
+数据可视化大屏
+
+![数据可视化大屏](./assets/pic-2.png)
\ No newline at end of file
diff --git a/community/cv/ShipWise/assets/pic-1.png b/community/cv/ShipWise/assets/pic-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..baa800c3af63ced712170aa288348bc281a09c32
Binary files /dev/null and b/community/cv/ShipWise/assets/pic-1.png differ
diff --git a/community/cv/ShipWise/assets/pic-2.png b/community/cv/ShipWise/assets/pic-2.png
new file mode 100644
index 0000000000000000000000000000000000000000..dda605ccf235116e14b3035755a508cd99f7e378
Binary files /dev/null and b/community/cv/ShipWise/assets/pic-2.png differ
diff --git a/community/cv/ShipWise/mindyolo/__init__.py b/community/cv/ShipWise/mindyolo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ac0e002036597328a58a34b8a2c9d7793a03564
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/__init__.py
@@ -0,0 +1,12 @@
+"""mindyolo init"""
+from . import data, models, optim, utils
+from .data import *
+from .models import *
+from .optim import *
+from .utils import *
+from .version import __version__
+
+__all__ = []
+__all__.extend(data.__all__)
+__all__.extend(models.__all__)
+__all__.extend(optim.__all__)
diff --git a/community/cv/ShipWise/mindyolo/csrc/__init__.py b/community/cv/ShipWise/mindyolo/csrc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..180424bb011d0347456fc329776a173635b60b8f
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/csrc/__init__.py
@@ -0,0 +1,3 @@
+from .fast_coco_eval import COCOeval_fast
+
+__all__ = ['COCOeval_fast']
diff --git a/community/cv/ShipWise/mindyolo/csrc/build.sh b/community/cv/ShipWise/mindyolo/csrc/build.sh
new file mode 100644
index 0000000000000000000000000000000000000000..69fe1e4d95da111a536c8e9b66698185ab902416
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/csrc/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# Build dynamic library
+python setup.py build_ext --inplace && echo "Build fast_coco_eval successfully."
diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/__init__.py b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbb55e37a25d74ca00711aa5eb61dc8291813d3e
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/__init__.py
@@ -0,0 +1,3 @@
+from .fast_coco_eval_api import COCOeval_fast
+
+__all = ['COCOeval_fast']
diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.cpp b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..880b89956ed12e5b9694a4f0ba78086370385ea7
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.cpp
@@ -0,0 +1,504 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// This file was copied from project facebookresearch/detectron2
+// The file link is https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/cocoeval/cocoeval.cpp
+#include "cocoeval.h"
+#include <time.h>
+#include <algorithm>
+#include <cstdint>
+#include <numeric>
+
+using namespace pybind11::literals;
+
+namespace COCOeval {
+
+// Sort detections from highest score to lowest, such that
+// detection_instances[detection_sorted_indices[t]] >=
+// detection_instances[detection_sorted_indices[t+1]].  Use stable_sort to match
+// original COCO API
+void SortInstancesByDetectionScore(
+    const std::vector<InstanceAnnotation>& detection_instances,
+    std::vector<uint64_t>* detection_sorted_indices) {
+  detection_sorted_indices->resize(detection_instances.size());
+  std::iota(
+      detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
+  std::stable_sort(
+      detection_sorted_indices->begin(),
+      detection_sorted_indices->end(),
+      [&detection_instances](size_t j1, size_t j2) {
+        return detection_instances[j1].score > detection_instances[j2].score;
+      });
+}
+
+// Partition the ground truth objects based on whether or not to ignore them
+// based on area
+void SortInstancesByIgnore(
+    const std::array<double, 2>& area_range,
+    const std::vector<InstanceAnnotation>& ground_truth_instances,
+    std::vector<uint64_t>* ground_truth_sorted_indices,
+    std::vector<bool>* ignores) {
+  ignores->clear();
+  ignores->reserve(ground_truth_instances.size());
+  for (auto o : ground_truth_instances) {
+    ignores->push_back(
+        o.ignore || o.area < area_range[0] || o.area > area_range[1]);
+  }
+
+  ground_truth_sorted_indices->resize(ground_truth_instances.size());
+  std::iota(
+      ground_truth_sorted_indices->begin(),
+      ground_truth_sorted_indices->end(),
+      0);
+  std::stable_sort(
+      ground_truth_sorted_indices->begin(),
+      ground_truth_sorted_indices->end(),
+      [&ignores](size_t j1, size_t j2) {
+        return (int)(*ignores)[j1] < (int)(*ignores)[j2];
+      });
+}
+
+// For each IOU threshold, greedily match each detected instance to a ground
+// truth instance (if possible) and store the results
+void MatchDetectionsToGroundTruth(
+    const std::vector<InstanceAnnotation>& detection_instances,
+    const std::vector<uint64_t>& detection_sorted_indices,
+    const std::vector<InstanceAnnotation>& ground_truth_instances,
+    const std::vector<uint64_t>& ground_truth_sorted_indices,
+    const std::vector<bool>& ignores,
+    const std::vector<std::vector<double>>& ious,
+    const std::vector<double>& iou_thresholds,
+    const std::array<double, 2>& area_range,
+    ImageEvaluation* results) {
+  // Initialize memory to store return data matches and ignore
+  const int num_iou_thresholds = iou_thresholds.size();
+  const int num_ground_truth = ground_truth_sorted_indices.size();
+  const int num_detections = detection_sorted_indices.size();
+  std::vector<uint64_t> ground_truth_matches(
+      num_iou_thresholds * num_ground_truth, 0);
+  std::vector<uint64_t>& detection_matches = results->detection_matches;
+  std::vector<bool>& detection_ignores = results->detection_ignores;
+  std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores;
+  detection_matches.resize(num_iou_thresholds * num_detections, 0);
+  detection_ignores.resize(num_iou_thresholds * num_detections, false);
+  ground_truth_ignores.resize(num_ground_truth);
+  for (auto g = 0; g < num_ground_truth; ++g) {
+    ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]];
+  }
+
+  for (auto t = 0; t < num_iou_thresholds; ++t) {
+    for (auto d = 0; d < num_detections; ++d) {
+      // information about best match so far (match=-1 -> unmatched)
+      double best_iou = std::min(iou_thresholds[t], 1 - 1e-10);
+      int match = -1;
+      for (auto g = 0; g < num_ground_truth; ++g) {
+        // if this ground truth instance is already matched and not a
+        // crowd, it cannot be matched to another detection
+        if (ground_truth_matches[t * num_ground_truth + g] > 0 &&
+            !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) {
+          continue;
+        }
+
+        // if detected instance matched to a regular ground truth
+        // instance, we can break on the first ground truth instance
+        // tagged as ignore (because they are sorted by the ignore tag)
+        if (match >= 0 && !ground_truth_ignores[match] &&
+            ground_truth_ignores[g]) {
+          break;
+        }
+
+        // if IOU overlap is the best so far, store the match appropriately
+        if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) {
+          best_iou = ious[d][ground_truth_sorted_indices[g]];
+          match = g;
+        }
+      }
+      // if match was made, store id of match for both detection and
+      // ground truth
+      if (match >= 0) {
+        detection_ignores[t * num_detections + d] = ground_truth_ignores[match];
+        detection_matches[t * num_detections + d] =
+            ground_truth_instances[ground_truth_sorted_indices[match]].id;
+        ground_truth_matches[t * num_ground_truth + match] =
+            detection_instances[detection_sorted_indices[d]].id;
+      }
+
+      // set unmatched detections outside of area range to ignore
+      const InstanceAnnotation& detection =
+          detection_instances[detection_sorted_indices[d]];
+      detection_ignores[t * num_detections + d] =
+          detection_ignores[t * num_detections + d] ||
+          (detection_matches[t * num_detections + d] == 0 &&
+           (detection.area < area_range[0] || detection.area > area_range[1]));
+    }
+  }
+
+  // store detection score results
+  results->detection_scores.resize(detection_sorted_indices.size());
+  for (size_t d = 0; d < detection_sorted_indices.size(); ++d) {
+    results->detection_scores[d] =
+        detection_instances[detection_sorted_indices[d]].score;
+  }
+}
+
+std::vector<ImageEvaluation> EvaluateImages(
+    const std::vector<std::array<double, 2>>& area_ranges,
+    int max_detections,
+    const std::vector<double>& iou_thresholds,
+    const ImageCategoryInstances<std::vector<double>>& image_category_ious,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_ground_truth_instances,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_detection_instances) {
+  const int num_area_ranges = area_ranges.size();
+  const int num_images = image_category_ground_truth_instances.size();
+  const int num_categories =
+      image_category_ious.size() > 0 ? image_category_ious[0].size() : 0;
+  std::vector<uint64_t> detection_sorted_indices;
+  std::vector<uint64_t> ground_truth_sorted_indices;
+  std::vector<bool> ignores;
+  std::vector<ImageEvaluation> results_all(
+      num_images * num_area_ranges * num_categories);
+
+  // Store results for each image, category, and area range combination. Results
+  // for each IOU threshold are packed into the same ImageEvaluation object
+  for (auto i = 0; i < num_images; ++i) {
+    for (auto c = 0; c < num_categories; ++c) {
+      const std::vector<InstanceAnnotation>& ground_truth_instances =
+          image_category_ground_truth_instances[i][c];
+      const std::vector<InstanceAnnotation>& detection_instances =
+          image_category_detection_instances[i][c];
+
+      SortInstancesByDetectionScore(
+          detection_instances, &detection_sorted_indices);
+      if ((int)detection_sorted_indices.size() > max_detections) {
+        detection_sorted_indices.resize(max_detections);
+      }
+
+      for (size_t a = 0; a < area_ranges.size(); ++a) {
+        SortInstancesByIgnore(
+            area_ranges[a],
+            ground_truth_instances,
+            &ground_truth_sorted_indices,
+            &ignores);
+
+        MatchDetectionsToGroundTruth(
+            detection_instances,
+            detection_sorted_indices,
+            ground_truth_instances,
+            ground_truth_sorted_indices,
+            ignores,
+            image_category_ious[i][c],
+            iou_thresholds,
+            area_ranges[a],
+            &results_all
+                [c * num_area_ranges * num_images + a * num_images + i]);
+      }
+    }
+  }
+
+  return results_all;
+}
+
+// Convert a python list to a vector
+template <typename T>
+std::vector<T> list_to_vec(const py::list& l) {
+  std::vector<T> v(py::len(l));
+  for (int i = 0; i < (int)py::len(l); ++i) {
+    v[i] = l[i].cast<T>();
+  }
+  return v;
+}
+
+// Helper function to Accumulate()
+// Considers the evaluation results applicable to a particular category, area
+// range, and max_detections parameter setting, which begin at
+// evaluations[evaluation_index].  Extracts a sorted list of length n of all
+// applicable detection instances concatenated across all images in the dataset,
+// which are represented by the outputs evaluation_indices, detection_scores,
+// image_detection_indices, and detection_sorted_indices--all of which are
+// length n. evaluation_indices[i] stores the applicable index into
+// evaluations[] for instance i, which has detection score detection_score[i],
+// and is the image_detection_indices[i]'th of the list of detections
+// for the image containing i.  detection_sorted_indices[] defines a sorted
+// permutation of the 3 other outputs
+int BuildSortedDetectionList(
+    const std::vector<ImageEvaluation>& evaluations,
+    const int64_t evaluation_index,
+    const int64_t num_images,
+    const int max_detections,
+    std::vector<uint64_t>* evaluation_indices,
+    std::vector<double>* detection_scores,
+    std::vector<uint64_t>* detection_sorted_indices,
+    std::vector<uint64_t>* image_detection_indices) {
+  assert(evaluations.size() >= evaluation_index + num_images);
+
+  // Extract a list of object instances of the applicable category, area
+  // range, and max detections requirements such that they can be sorted
+  image_detection_indices->clear();
+  evaluation_indices->clear();
+  detection_scores->clear();
+  image_detection_indices->reserve(num_images * max_detections);
+  evaluation_indices->reserve(num_images * max_detections);
+  detection_scores->reserve(num_images * max_detections);
+  int num_valid_ground_truth = 0;
+  for (auto i = 0; i < num_images; ++i) {
+    const ImageEvaluation& evaluation = evaluations[evaluation_index + i];
+
+    for (int d = 0;
+         d < (int)evaluation.detection_scores.size() && d < max_detections;
+         ++d) { // detected instances
+      evaluation_indices->push_back(evaluation_index + i);
+      image_detection_indices->push_back(d);
+      detection_scores->push_back(evaluation.detection_scores[d]);
+    }
+    for (auto ground_truth_ignore : evaluation.ground_truth_ignores) {
+      if (!ground_truth_ignore) {
+        ++num_valid_ground_truth;
+      }
+    }
+  }
+
+  // Sort detections by decreasing score, using stable sort to match
+  // python implementation
+  detection_sorted_indices->resize(detection_scores->size());
+  std::iota(
+      detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);
+  std::stable_sort(
+      detection_sorted_indices->begin(),
+      detection_sorted_indices->end(),
+      [&detection_scores](size_t j1, size_t j2) {
+        return (*detection_scores)[j1] > (*detection_scores)[j2];
+      });
+
+  return num_valid_ground_truth;
+}
+
+// Helper function to Accumulate()
+// Compute a precision recall curve given a sorted list of detected instances
+// encoded in evaluations, evaluation_indices, detection_scores,
+// detection_sorted_indices, image_detection_indices (see
+// BuildSortedDetectionList()). Using vectors precisions and recalls
+// and temporary storage, output the results into precisions_out, recalls_out,
+// and scores_out, which are large buffers containing many precion/recall curves
+// for all possible parameter settings, with precisions_out_index and
+// recalls_out_index defining the applicable indices to store results.
+void ComputePrecisionRecallCurve(
+    const int64_t precisions_out_index,
+    const int64_t precisions_out_stride,
+    const int64_t recalls_out_index,
+    const std::vector<double>& recall_thresholds,
+    const int iou_threshold_index,
+    const int num_iou_thresholds,
+    const int num_valid_ground_truth,
+    const std::vector<ImageEvaluation>& evaluations,
+    const std::vector<uint64_t>& evaluation_indices,
+    const std::vector<double>& detection_scores,
+    const std::vector<uint64_t>& detection_sorted_indices,
+    const std::vector<uint64_t>& image_detection_indices,
+    std::vector<double>* precisions,
+    std::vector<double>* recalls,
+    std::vector<double>* precisions_out,
+    std::vector<double>* scores_out,
+    std::vector<double>* recalls_out) {
+  assert(recalls_out->size() > recalls_out_index);
+
+  // Compute precision/recall for each instance in the sorted list of detections
+  int64_t true_positives_sum = 0, false_positives_sum = 0;
+  precisions->clear();
+  recalls->clear();
+  precisions->reserve(detection_sorted_indices.size());
+  recalls->reserve(detection_sorted_indices.size());
+  assert(!evaluations.empty() || detection_sorted_indices.empty());
+  for (auto detection_sorted_index : detection_sorted_indices) {
+    const ImageEvaluation& evaluation =
+        evaluations[evaluation_indices[detection_sorted_index]];
+    const auto num_detections =
+        evaluation.detection_matches.size() / num_iou_thresholds;
+    const auto detection_index = iou_threshold_index * num_detections +
+        image_detection_indices[detection_sorted_index];
+    assert(evaluation.detection_matches.size() > detection_index);
+    assert(evaluation.detection_ignores.size() > detection_index);
+    const int64_t detection_match =
+        evaluation.detection_matches[detection_index];
+    const bool detection_ignores =
+        evaluation.detection_ignores[detection_index];
+    const auto true_positive = detection_match > 0 && !detection_ignores;
+    const auto false_positive = detection_match == 0 && !detection_ignores;
+    if (true_positive) {
+      ++true_positives_sum;
+    }
+    if (false_positive) {
+      ++false_positives_sum;
+    }
+
+    const double recall =
+        static_cast<double>(true_positives_sum) / num_valid_ground_truth;
+    recalls->push_back(recall);
+    const int64_t num_valid_detections =
+        true_positives_sum + false_positives_sum;
+    const double precision = num_valid_detections > 0
+        ? static_cast<double>(true_positives_sum) / num_valid_detections
+        : 0.0;
+    precisions->push_back(precision);
+  }
+
+  (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0;
+
+  for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) {
+    if ((*precisions)[i] > (*precisions)[i - 1]) {
+      (*precisions)[i - 1] = (*precisions)[i];
+    }
+  }
+
+  // Sample the per instance precision/recall list at each recall threshold
+  for (size_t r = 0; r < recall_thresholds.size(); ++r) {
+    // first index in recalls >= recall_thresholds[r]
+    std::vector<double>::iterator low = std::lower_bound(
+        recalls->begin(), recalls->end(), recall_thresholds[r]);
+    size_t precisions_index = low - recalls->begin();
+
+    const auto results_ind = precisions_out_index + r * precisions_out_stride;
+    assert(results_ind < precisions_out->size());
+    assert(results_ind < scores_out->size());
+    if (precisions_index < precisions->size()) {
+      (*precisions_out)[results_ind] = (*precisions)[precisions_index];
+      (*scores_out)[results_ind] =
+          detection_scores[detection_sorted_indices[precisions_index]];
+    } else {
+      (*precisions_out)[results_ind] = 0;
+      (*scores_out)[results_ind] = 0;
+    }
+  }
+}
+py::dict Accumulate(
+    const py::object& params,
+    const std::vector<ImageEvaluation>& evaluations) {
+  const std::vector<double> recall_thresholds =
+      list_to_vec<double>(params.attr("recThrs"));
+  const std::vector<int> max_detections =
+      list_to_vec<int>(params.attr("maxDets"));
+  const int num_iou_thresholds = py::len(params.attr("iouThrs"));
+  const int num_recall_thresholds = py::len(params.attr("recThrs"));
+  const int num_categories = params.attr("useCats").cast<int>() == 1
+      ? py::len(params.attr("catIds"))
+      : 1;
+  const int num_area_ranges = py::len(params.attr("areaRng"));
+  const int num_max_detections = py::len(params.attr("maxDets"));
+  const int num_images = py::len(params.attr("imgIds"));
+
+  std::vector<double> precisions_out(
+      num_iou_thresholds * num_recall_thresholds * num_categories *
+          num_area_ranges * num_max_detections,
+      -1);
+  std::vector<double> recalls_out(
+      num_iou_thresholds * num_categories * num_area_ranges *
+          num_max_detections,
+      -1);
+  std::vector<double> scores_out(
+      num_iou_thresholds * num_recall_thresholds * num_categories *
+          num_area_ranges * num_max_detections,
+      -1);
+
+  // Consider the list of all detected instances in the entire dataset in one
+  // large list.  evaluation_indices, detection_scores,
+  // image_detection_indices, and detection_sorted_indices all have the same
+  // length as this list, such that each entry corresponds to one detected
+  // instance
+  std::vector<uint64_t> evaluation_indices; // indices into evaluations[]
+  std::vector<double> detection_scores; // detection scores of each instance
+  std::vector<uint64_t> detection_sorted_indices; // sorted indices of all
+                                                  // instances in the dataset
+  std::vector<uint64_t>
+      image_detection_indices; // indices into the list of detected instances in
+                               // the same image as each instance
+  std::vector<double> precisions, recalls;
+
+  for (auto c = 0; c < num_categories; ++c) {
+    for (auto a = 0; a < num_area_ranges; ++a) {
+      for (auto m = 0; m < num_max_detections; ++m) {
+        // The COCO PythonAPI assumes evaluations[] (the return value of
+        // COCOeval::EvaluateImages() is one long list storing results for each
+        // combination of category, area range, and image id, with categories in
+        // the outermost loop and images in the innermost loop.
+        const int64_t evaluations_index =
+            c * num_area_ranges * num_images + a * num_images;
+        int num_valid_ground_truth = BuildSortedDetectionList(
+            evaluations,
+            evaluations_index,
+            num_images,
+            max_detections[m],
+            &evaluation_indices,
+            &detection_scores,
+            &detection_sorted_indices,
+            &image_detection_indices);
+
+        if (num_valid_ground_truth == 0) {
+          continue;
+        }
+
+        for (auto t = 0; t < num_iou_thresholds; ++t) {
+          // recalls_out is a flattened vectors representing a
+          // num_iou_thresholds X num_categories X num_area_ranges X
+          // num_max_detections matrix
+          const int64_t recalls_out_index =
+              t * num_categories * num_area_ranges * num_max_detections +
+              c * num_area_ranges * num_max_detections +
+              a * num_max_detections + m;
+
+          // precisions_out and scores_out are flattened vectors
+          // representing a num_iou_thresholds X num_recall_thresholds X
+          // num_categories X num_area_ranges X num_max_detections matrix
+          const int64_t precisions_out_stride =
+              num_categories * num_area_ranges * num_max_detections;
+          const int64_t precisions_out_index = t * num_recall_thresholds *
+                  num_categories * num_area_ranges * num_max_detections +
+              c * num_area_ranges * num_max_detections +
+              a * num_max_detections + m;
+
+          ComputePrecisionRecallCurve(
+              precisions_out_index,
+              precisions_out_stride,
+              recalls_out_index,
+              recall_thresholds,
+              t,
+              num_iou_thresholds,
+              num_valid_ground_truth,
+              evaluations,
+              evaluation_indices,
+              detection_scores,
+              detection_sorted_indices,
+              image_detection_indices,
+              &precisions,
+              &recalls,
+              &precisions_out,
+              &scores_out,
+              &recalls_out);
+        }
+      }
+    }
+  }
+
+  time_t rawtime;
+  struct tm local_time;
+  std::array<char, 200> buffer;
+  time(&rawtime);
+#ifdef _WIN32
+  localtime_s(&local_time, &rawtime);
+#else
+  localtime_r(&rawtime, &local_time);
+#endif
+  strftime(
+      buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time);
+  return py::dict(
+      "params"_a = params,
+      "counts"_a = std::vector<int64_t>({num_iou_thresholds,
+                                         num_recall_thresholds,
+                                         num_categories,
+                                         num_area_ranges,
+                                         num_max_detections}),
+      "date"_a = buffer,
+      "precision"_a = precisions_out,
+      "recall"_a = recalls_out,
+      "scores"_a = scores_out);
+}
+
+} // namespace COCOeval
diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.h b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.h
new file mode 100644
index 0000000000000000000000000000000000000000..1febb409edc8bd8b5d67dc85c34fe3e8372d94ff
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/cocoeval/cocoeval.h
@@ -0,0 +1,100 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// This file was copied from project facebookresearch/detectron2
+// The file link is https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/csrc/cocoeval/cocoeval.h
+#pragma once
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+#include <vector>
+
+namespace py = pybind11;
+
+namespace COCOeval {
+
+// Annotation data for a single object instance in an image
+struct InstanceAnnotation {
+  InstanceAnnotation(
+      uint64_t id,
+      double score,
+      double area,
+      bool is_crowd,
+      bool ignore)
+      : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
+  uint64_t id;
+  double score = 0.;
+  double area = 0.;
+  bool is_crowd = false;
+  bool ignore = false;
+};
+
+// Stores intermediate results for evaluating detection results for a single
+// image that has D detected instances and G ground truth instances. This stores
+// matches between detected and ground truth instances
+struct ImageEvaluation {
+  // For each of the D detected instances, the id of the matched ground truth
+  // instance, or 0 if unmatched
+  std::vector<uint64_t> detection_matches;
+
+  // The detection score of each of the D detected instances
+  std::vector<double> detection_scores;
+
+  // Marks whether or not each of G instances was ignored from evaluation (e.g.,
+  // because it's outside area_range)
+  std::vector<bool> ground_truth_ignores;
+
+  // Marks whether or not each of D instances was ignored from evaluation (e.g.,
+  // because it's outside aRng)
+  std::vector<bool> detection_ignores;
+};
+
+template <class T>
+using ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;
+
+// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each
+// combination of image, category, area range settings, and IOU thresholds to
+// evaluate, it matches detected instances to ground truth instances and stores
+// the results into a vector of ImageEvaluation results, which will be
+// interpreted by the COCOeval::Accumulate() function to produce precion-recall
+// curves.  The parameters of nested vectors have the following semantics:
+//   image_category_ious[i][c][d][g] is the intersection over union of the d'th
+//     detected instance and g'th ground truth instance of
+//     category category_ids[c] in image image_ids[i]
+//   image_category_ground_truth_instances[i][c] is a vector of ground truth
+//     instances in image image_ids[i] of category category_ids[c]
+//   image_category_detection_instances[i][c] is a vector of detected
+//     instances in image image_ids[i] of category category_ids[c]
+std::vector<ImageEvaluation> EvaluateImages(
+    const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples
+    int max_detections,
+    const std::vector<double>& iou_thresholds,
+    const ImageCategoryInstances<std::vector<double>>& image_category_ious,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_ground_truth_instances,
+    const ImageCategoryInstances<InstanceAnnotation>&
+        image_category_detection_instances);
+
+// C++ implementation of COCOeval.accumulate(), which generates precision
+// recall curves for each set of category, IOU threshold, detection area range,
+// and max number of detections parameters.  It is assumed that the parameter
+// evaluations is the return value of the functon COCOeval::EvaluateImages(),
+// which was called with the same parameter settings params
+py::dict Accumulate(
+    const py::object& params,
+    const std::vector<ImageEvaluation>& evalutations);
+
+} // namespace COCOeval
+
+PYBIND11_MODULE(fast_coco_eval, m)
+{
+    m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
+    m.def(
+        "COCOevalEvaluateImages",
+        &COCOeval::EvaluateImages,
+        "COCOeval::EvaluateImages");
+    pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
+        .def(pybind11::init<uint64_t, double, double, bool, bool>());
+    pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
+        .def(pybind11::init<>());
+}
diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval.cp38-win_amd64.pyd b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval.cp38-win_amd64.pyd
new file mode 100644
index 0000000000000000000000000000000000000000..62e4aeeb5f3767910f1f7c33a6cc74babd48d2b6
Binary files /dev/null and b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval.cp38-win_amd64.pyd differ
diff --git a/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval_api.py b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..7be4d742c450752f0d9b9fd7d4ea79fae148c3f8
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/csrc/fast_coco_eval/fast_coco_eval_api.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# This file is modified from
+# https://github.com/facebookresearch/detectron2/blob/master/detectron2/evaluation/fast_eval_api.py
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import copy
+import time
+
+import numpy as np
+from pycocotools.cocoeval import COCOeval
+
+from . import fast_coco_eval
+
+class COCOeval_fast(COCOeval):
+    """
+    This is a slightly modified version of the original COCO API, where the functions evaluateImg()
+    and accumulate() are implemented in C++ to speedup evaluation
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.module = fast_coco_eval
+
+    def evaluate(self):
+        """
+        Run per image evaluation on given images and store results in self.evalImgs_cpp, a
+        datastructure that isn't readable from Python but is used by a c++ implementation of
+        accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
+        self.evalImgs because this datastructure is a computational bottleneck.
+        :return: None
+        """
+        tic = time.time()
+
+        print("Running per image evaluation...")
+        p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if p.useSegm is not None:
+            p.iouType = "segm" if p.useSegm == 1 else "bbox"
+            print(
+                "useSegm (deprecated) is not None. Running {} evaluation".format(
+                    p.iouType
+                )
+            )
+        print("Evaluate annotation type *{}*".format(p.iouType))
+        p.imgIds = list(np.unique(p.imgIds))
+        if p.useCats:
+            p.catIds = list(np.unique(p.catIds))
+        p.maxDets = sorted(p.maxDets)
+        self.params = p
+
+        self._prepare()
+
+        # loop through images, area range, max detection number
+        catIds = p.catIds if p.useCats else [-1]
+
+        if p.iouType == "segm" or p.iouType == "bbox":
+            computeIoU = self.computeIoU
+        elif p.iouType == "keypoints":
+            computeIoU = self.computeOks
+        self.ious = {
+            (imgId, catId): computeIoU(imgId, catId)
+            for imgId in p.imgIds
+            for catId in catIds
+        }
+
+        maxDet = p.maxDets[-1]
+
+        # <<<< Beginning of code differences with original COCO API
+        def convert_instances_to_cpp(instances, is_det=False):
+            # Convert annotations for a list of instances in an image to a format that's fast
+            # to access in C++
+            instances_cpp = []
+            for instance in instances:
+                instance_cpp = self.module.InstanceAnnotation(
+                    int(instance["id"]),
+                    instance["score"] if is_det else instance.get("score", 0.0),
+                    instance["area"],
+                    bool(instance.get("iscrowd", 0)),
+                    bool(instance.get("ignore", 0)),
+                )
+                instances_cpp.append(instance_cpp)
+            return instances_cpp
+
+        # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
+        ground_truth_instances = [
+            [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        detected_instances = [
+            [
+                convert_instances_to_cpp(self._dts[imgId, catId], is_det=True)
+                for catId in p.catIds
+            ]
+            for imgId in p.imgIds
+        ]
+        ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
+
+        if not p.useCats:
+            # For each image, flatten per-category lists into a single list
+            ground_truth_instances = [
+                [[o for c in i for o in c]] for i in ground_truth_instances
+            ]
+            detected_instances = [
+                [[o for c in i for o in c]] for i in detected_instances
+            ]
+
+        # Call C++ implementation of self.evaluateImgs()
+        self._evalImgs_cpp = self.module.COCOevalEvaluateImages(
+            p.areaRng,
+            maxDet,
+            p.iouThrs,
+            ious,
+            ground_truth_instances,
+            detected_instances,
+        )
+        self._evalImgs = None
+
+        self._paramsEval = copy.deepcopy(self.params)
+        toc = time.time()
+        print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
+        # >>>> End of code differences with original COCO API
+
+    def accumulate(self):
+        """
+        Accumulate per image evaluation results and store the result in self.eval.  Does not
+        support changing parameter settings from those used by self.evaluate()
+        """
+        print("Accumulating evaluation results...")
+        tic = time.time()
+        if not hasattr(self, "_evalImgs_cpp"):
+            print("Please run evaluate() first")
+
+        self.eval = self.module.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
+
+        # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
+        self.eval["recall"] = np.array(self.eval["recall"]).reshape(
+            self.eval["counts"][:1] + self.eval["counts"][2:]
+        )
+
+        # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
+        # num_area_ranges X num_max_detections
+        self.eval["precision"] = np.array(self.eval["precision"]).reshape(
+            self.eval["counts"]
+        )
+        self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
+        toc = time.time()
+        print(
+            "COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)
+        )
diff --git a/community/cv/ShipWise/mindyolo/models/__init__.py b/community/cv/ShipWise/mindyolo/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..683aaa6978e08cc1ad5568e0106462600689a8f7
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/__init__.py
@@ -0,0 +1,33 @@
+from . import (heads, initializer, layers, losses, model_factory, yolov3,
+               yolov4, yolov5, yolov7, yolov8)
+
+from . import shipwise
+
+__all__ = []
+
+__all__.extend(heads.__all__)
+__all__.extend(layers.__all__)
+__all__.extend(losses.__all__)
+__all__.extend(yolov8.__all__)
+__all__.extend(yolov7.__all__)
+__all__.extend(yolov5.__all__)
+__all__.extend(yolov4.__all__)
+__all__.extend(yolov3.__all__)
+__all__.extend(initializer.__all__)
+__all__.extend(model_factory.__all__)
+__all__.extend(shipwise.__all__)
+
+# fixme: since yolov7 is used as both the file and function name, we need to import * after __all__
+
+from .heads import *
+from .initializer import *
+from .layers import *
+from .losses import *
+from .model_factory import *
+from .yolov3 import *
+from .yolov4 import *
+from .yolov5 import *
+from .yolov7 import *
+from .yolov8 import *
+from .yolox import *
+from .shipwise import *
diff --git a/community/cv/ShipWise/mindyolo/models/heads/__init__.py b/community/cv/ShipWise/mindyolo/models/heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..593e3df833d2e4179022f94b5bbef650bf25b6c3
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/__init__.py
@@ -0,0 +1,17 @@
+"""layers init"""
+from .yolov3_head import *
+from .yolov4_head import *
+from .yolov5_head import *
+from .yolov7_head import *
+from .yolov8_head import *
+from .yolox_head import *
+
+
+__all__ = [
+    "YOLOv3Head",
+    "YOLOv4Head",
+    "YOLOv5Head",
+    "YOLOv7Head", "YOLOv7AuxHead",
+    "YOLOv8Head", "YOLOv8SegHead",
+    "YOLOXHead"
+]
diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov3_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov3_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..e28e5ef795d132ab3e1e3b6a75ca981036eb6813
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/yolov3_head.py
@@ -0,0 +1,92 @@
+import math
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Parameter, Tensor, nn, ops
+
+from mindyolo.utils import logger
+from ..layers.utils import meshgrid
+
+
+class YOLOv3Head(nn.Cell):
+    """
+    YOLOv3 Detect Head, convert the output result to a prediction box based on the anchor point.
+    """
+
+    def __init__(self, nc=80, anchors=(), stride=(), ch=()):  # detection layer
+        super(YOLOv3Head, self).__init__()
+
+        assert isinstance(anchors, (tuple, list)) and len(anchors) > 0
+        assert isinstance(stride, (tuple, list)) and len(stride) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+
+        # anchor preprocess
+        anchors = np.array(anchors)
+        stride = np.array(stride)
+        anchors, anchor_grid = self._check_anchor_order(
+            anchors=anchors.reshape((self.nl, -1, 2)),
+            anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)),
+            stride=stride,
+        )
+        anchors = anchors / stride.reshape((-1, 1, 1))
+
+        self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False)
+        self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False)  # shape(nl,na,2)
+        self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False)  # shape(nl,1,na,1,1,2)
+
+        self.m = nn.CellList(
+            [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch]
+        )  # output conv
+
+    def construct(self, x):
+        z = ()  # inference output
+        outs = ()
+        for i in range(self.nl):
+            out = self.m[i](x[i])  # conv
+            bs, _, ny, nx = out.shape  # (bs,255,20,20)
+            out = out.view(bs, self.na, self.no, ny, nx).transpose((0, 1, 3, 4, 2))  # (bs,3,20,20,85)
+            outs += (out,)
+
+            if not self.training:  # inference
+                grid_tensor = self._make_grid(nx, ny, out.dtype)
+
+                y = ops.Sigmoid()(out)
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z += (y.view(bs, -1, self.no),)
+
+        return outs if self.training else (ops.concat(z, 1), outs)
+
+    def initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        m = self
+        for mi, s in zip(m.m, m.stride):  # from
+            s = s.asnumpy()
+            b = mi.bias.view(m.na, -1).asnumpy()  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else np.log(cf / cf.sum())  # cls
+            mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1))
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20, dtype=ms.float32):
+        # FIXME: Not supported on a specific model of machine
+        xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny)))
+        return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype)
+
+    @staticmethod
+    def _check_anchor_order(anchors, anchor_grid, stride):
+        # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary
+        a = np.prod(anchor_grid, -1).reshape((-1,))  # anchor area
+        da = a[-1] - a[0]  # delta a
+        ds = stride[-1] - stride[0]  # delta s
+        if np.sign(da) != np.sign(ds):  # same order
+            logger.warning("Reversing anchor order")
+            anchors = anchors[::-1, ...]
+            anchor_grid = anchor_grid[::-1, ...]
+        return anchors, anchor_grid
diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov4_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov4_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..06a46f0fdc28543f5b5a48e55d281c8f17f03fa6
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/yolov4_head.py
@@ -0,0 +1,121 @@
+import mindspore as ms
+from mindspore import Tensor, nn, ops
+
+
+class YOLOv4Head(nn.Cell):
+    """
+    YOLOv4 Detect Head, convert the output result to a prediction box based on the anchor point.
+    """
+
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(YOLOv4Head, self).__init__()
+
+        assert isinstance(anchors, (tuple, list)) and len(anchors) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = 3  # number of detection layers
+        self.na = len(anchors) // 3  # number of anchors
+
+        self.m = nn.CellList(
+            [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch]
+        )  # output conv
+
+        # prediction on the default anchor boxes
+        self.detect_1 = DetectionBlock("l", anchors, self.no)
+        self.detect_2 = DetectionBlock("m", anchors, self.no)
+        self.detect_3 = DetectionBlock("s", anchors, self.no)
+
+    def construct(self, x):
+        big_object_output = self.m[0](x[0])
+        medium_object_output = self.m[1](x[1])
+        small_object_output = self.m[2](x[2])
+        bs = small_object_output.shape[0]
+        output_big = self.detect_1(big_object_output)
+        output_me = self.detect_2(medium_object_output)
+        output_small = self.detect_3(small_object_output)
+        if not self.training:
+            big = output_big.view(bs, -1, self.no)
+            me = output_me.view(bs, -1, self.no)
+            small = output_small.view(bs, -1, self.no)
+            return ops.concat((big, me, small), 1), (output_big, output_me, output_small)
+
+        return output_big, output_me, output_small
+
+
+class DetectionBlock(nn.Cell):
+    """
+    YOLOv4 detection Network. It will finally output the detection result.
+    """
+
+    def __init__(self, scale, anchor_scales, no):
+        super(DetectionBlock, self).__init__()
+        if scale == "s":
+            idx = (6, 7, 8)
+            self.scale_x_y = 1.2
+            self.offset_x_y = 0.1
+            self.stride = 8
+        elif scale == "m":
+            idx = (3, 4, 5)
+            self.scale_x_y = 1.1
+            self.offset_x_y = 0.05
+            self.stride = 16
+        elif scale == "l":
+            idx = (0, 1, 2)
+            self.scale_x_y = 1.05
+            self.offset_x_y = 0.025
+            self.stride = 32
+        else:
+            raise KeyError("Invalid scale value for DetectionBlock")
+        self.anchors = Tensor([anchor_scales[i] for i in idx], ms.float32)
+        self.num_anchors_per_scale = 3
+        self.num_attrib = no
+
+        self.sigmoid = ops.Sigmoid()
+
+    def construct(self, x):
+        """construct method"""
+        num_batch = x.shape[0]
+        grid_size = x.shape[2:4]
+        input_shape = [size * self.stride for size in grid_size]
+        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
+
+        # Reshape and transpose the feature to [n, grid_size[0], grid_size[1], 3, num_attrib]
+        prediction = x.view(num_batch, self.num_anchors_per_scale, self.num_attrib, grid_size[0], grid_size[1])
+        prediction = prediction.transpose((0, 3, 4, 1, 2))
+
+        range_x = range(grid_size[1])
+        range_y = range(grid_size[0])
+        grid_x = ops.cast(ops.tuple_to_array(range_x), ms.float32)
+        grid_y = ops.cast(ops.tuple_to_array(range_y), ms.float32)
+        # Tensor of shape [grid_size[0], grid_size[1], 1, 1] representing the coordinate of x/y axis for each grid
+        # [batch, gridx, gridy, 1, 1]
+        grid_x = ops.tile(grid_x.view(1, 1, -1, 1, 1), (1, grid_size[0], 1, 1, 1))
+        grid_y = ops.tile(grid_y.view(1, -1, 1, 1, 1), (1, 1, grid_size[1], 1, 1))
+        # Shape is [grid_size[0], grid_size[1], 1, 2]
+        grid = ops.concat((grid_x, grid_y), -1)
+
+        box_xy = prediction[:, :, :, :, :2]
+        box_wh = prediction[:, :, :, :, 2:4]
+        box_confidence = prediction[:, :, :, :, 4:5]
+        box_probs = prediction[:, :, :, :, 5:]
+
+        # gridsize1 is x
+        # gridsize0 is y
+        box_xy = (self.scale_x_y * self.sigmoid(box_xy) - self.offset_x_y + grid) / ops.cast(
+            ops.tuple_to_array((grid_size[1], grid_size[0])), ms.float32
+        )
+        # box_wh is w->h
+        box_wh = ops.exp(box_wh) * self.anchors / input_shape
+        box_confidence = self.sigmoid(box_confidence)
+        box_probs = self.sigmoid(box_probs)
+
+        if self.training:
+            return prediction, box_xy, box_wh
+        box_xy *= input_shape
+        box_wh *= input_shape
+        return ops.concat((box_xy.astype(ms.float32),
+                           box_wh.astype(ms.float32),
+                           box_confidence.astype(ms.float32),
+                           box_probs.astype(ms.float32)), -1)
diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov5_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov5_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..183e503ca318cec1854d82887228b1c5e0b0fcf5
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/yolov5_head.py
@@ -0,0 +1,105 @@
+import math
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Parameter, Tensor, nn, ops
+
+from mindyolo.utils import logger
+from ..layers.utils import meshgrid
+
+
+class YOLOv5Head(nn.Cell):
+    def __init__(self, nc=80, anchors=(), stride=(), ch=()):  # detection layer
+        super(YOLOv5Head, self).__init__()
+
+        assert isinstance(anchors, (tuple, list)) and len(anchors) > 0
+        assert isinstance(stride, (tuple, list)) and len(stride) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+
+        # anchor preprocess
+        anchors = np.array(anchors)
+        stride = np.array(stride)
+        anchors, anchor_grid = self._check_anchor_order(
+            anchors=anchors.reshape((self.nl, -1, 2)),
+            anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)),
+            stride=stride,
+        )
+        anchors = anchors / stride.reshape((-1, 1, 1))
+
+        self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False)
+        self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False)  # shape(nl,na,2)
+        self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False)  # shape(nl,1,na,1,1,2)
+
+        self.m = nn.CellList(
+            [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch]
+        )  # output conv
+
+    def construct(self, x):
+        z = ()  # inference output
+        outs = ()
+        for i in range(self.nl):
+            out = self.m[i](x[i])  # conv
+            bs, _, ny, nx = out.shape  # (bs,255,20,20)
+            out = ops.Transpose()(out.view(bs, self.na, self.no, ny, nx), (0, 1, 3, 4, 2))  # (bs,3,20,20,85)
+            out = out
+            outs += (out,)
+
+            if not self.training:  # inference
+                grid_tensor = self._make_grid(nx, ny, out.dtype)
+
+                y = ops.Sigmoid()(out)
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z += (y.view(bs, -1, self.no),)
+
+        # return outs
+        return outs if self.training else (ops.concat(z, 1), outs)
+
+    def initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        m = self
+        for mi, s in zip(m.m, m.stride):  # from
+            s = s.asnumpy()
+            b = mi.bias.view(m.na, -1).asnumpy()  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else np.log(cf / cf.sum())  # cls
+            mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1))
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20, dtype=ms.float32):
+        # FIXME: Not supported on a specific model of machine
+        xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny)))
+        return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype)
+
+    @staticmethod
+    def _check_anchor_order(anchors, anchor_grid, stride):
+        # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary
+        a = np.prod(anchor_grid, -1).reshape((-1,))  # anchor area
+        da = a[-1] - a[0]  # delta a
+        ds = stride[-1] - stride[0]  # delta s
+        if np.sign(da) != np.sign(ds):  # same order
+            logger.warning("Reversing anchor order")
+            anchors = anchors[::-1, ...]
+            anchor_grid = anchor_grid[::-1, ...]
+        return anchors, anchor_grid
+
+    def convert(self, z):
+        z = ops.concat(z, 1)
+        box = z[:, :, :4]
+        conf = z[:, :, 4:5]
+        score = z[:, :, 5:]
+        score *= conf
+        convert_matrix = get_convert_matrix()
+        box = ops.matmul(box, convert_matrix)
+        return (box, score)
+
+
+@ops.constexpr(reuse_result=True)
+def get_convert_matrix():
+    return Tensor(np.array([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]]), dtype=ms.float32)
diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov7_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov7_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c60cde9b0bb0e7fd847911ad1c4b015383ca676
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/yolov7_head.py
@@ -0,0 +1,207 @@
+import math
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Parameter, Tensor, nn, ops
+
+from mindyolo.utils import logger
+from ..layers.implicit import ImplicitA, ImplicitM
+from ..layers.utils import meshgrid
+
+
+class YOLOv7Head(nn.Cell):
+    """
+    YOLOv7 Detect Head, convert the output result to a prediction box based on the anchor point.
+    """
+
+    def __init__(self, nc=80, anchors=(), stride=(), ch=()):  # detection layer
+        super(YOLOv7Head, self).__init__()
+
+        assert isinstance(anchors, (tuple, list)) and len(anchors) > 0
+        assert isinstance(stride, (tuple, list)) and len(stride) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+
+        # anchor preprocess
+        anchors = np.array(anchors)
+        stride = np.array(stride)
+        anchors, anchor_grid = self._check_anchor_order(
+            anchors=anchors.reshape((self.nl, -1, 2)),
+            anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)),
+            stride=stride,
+        )
+        anchors = anchors / stride.reshape((-1, 1, 1))
+
+        self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False)
+        self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False)  # shape(nl,na,2)
+        self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False)  # shape(nl,1,na,1,1,2)
+        self.convert_matrix = Parameter(
+            Tensor(np.array([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]]), dtype=ms.float32),
+            requires_grad=False,
+        )
+
+        self.m = nn.CellList(
+            [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch]
+        )  # output conv
+
+        self.ia = nn.CellList([ImplicitA(x) for x in ch])
+        self.im = nn.CellList([ImplicitM(self.no * self.na) for _ in ch])
+
+    def construct(self, x):
+        z = ()  # inference output
+        outs = ()
+        for i in range(self.nl):
+            out = self.m[i](self.ia[i](x[i]))  # conv
+            out = self.im[i](out)
+            bs, _, ny, nx = out.shape  # (bs,255,20,20)
+            out = out.view(bs, self.na, self.no, ny, nx).transpose((0, 1, 3, 4, 2))  # (bs,3,20,20,85)
+            outs += (out,)
+
+            if not self.training:  # inference
+                grid_tensor = self._make_grid(nx, ny, out.dtype)
+
+                # y = ops.sigmoid(out)
+                y = ops.Sigmoid()(out)
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z += (y.view(bs, -1, self.no),)
+
+        return outs if self.training else (ops.concat(z, 1), outs)
+
+    def initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        m = self
+        for mi, s in zip(m.m, m.stride):  # from
+            s = s.asnumpy()
+            b = mi.bias.view(m.na, -1).asnumpy()  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else np.log(cf / cf.sum())  # cls
+            mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1))
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20, dtype=ms.float32):
+        # FIXME: Not supported on a specific model of machine
+        xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny)))
+        return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype)
+
+    @staticmethod
+    def _check_anchor_order(anchors, anchor_grid, stride):
+        # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary
+        a = np.prod(anchor_grid, -1).reshape((-1,))  # anchor area
+        da = a[-1] - a[0]  # delta a
+        ds = stride[-1] - stride[0]  # delta s
+        if np.sign(da) != np.sign(ds):  # same order
+            logger.warning("Reversing anchor order")
+            anchors = anchors[::-1, ...]
+            anchor_grid = anchor_grid[::-1, ...]
+        return anchors, anchor_grid
+
+
+class YOLOv7AuxHead(nn.Cell):
+    """
+    YOLOv7 Detect Aux Head, convert the output result to a prediction box based on the anchor point.
+    """
+
+    def __init__(self, nc=80, anchors=(), stride=(), ch=()):  # detection layer
+        super(YOLOv7AuxHead, self).__init__()
+
+        assert isinstance(anchors, (tuple, list)) and len(anchors) > 0
+        assert isinstance(stride, (tuple, list)) and len(stride) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+
+        # anchor preprocess
+        anchors = np.array(anchors)
+        stride = np.array(stride)
+        anchors, anchor_grid = self._check_anchor_order(
+            anchors=anchors.reshape((self.nl, -1, 2)),
+            anchor_grid=anchors.reshape((self.nl, 1, -1, 1, 1, 2)),
+            stride=stride,
+        )
+        anchors /= stride.reshape((-1, 1, 1))
+
+        self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False)
+        self.anchors = Parameter(Tensor(anchors, ms.float32), requires_grad=False)  # shape(nl,na,2)
+        self.anchor_grid = Parameter(Tensor(anchor_grid, ms.float32), requires_grad=False)  # shape(nl,1,na,1,1,2)
+        self.convert_matrix = Parameter(
+            Tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=ms.float32),
+            requires_grad=False,
+        )
+
+        self.m = nn.CellList(
+            [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch[: self.nl]]
+        )  # output conv
+        self.m2 = nn.CellList(
+            [nn.Conv2d(x, self.no * self.na, 1, pad_mode="valid", has_bias=True) for x in ch[self.nl :]]
+        )  # output conv
+
+        self.ia = nn.CellList([ImplicitA(x) for x in ch[: self.nl]])
+        self.im = nn.CellList([ImplicitM(self.no * self.na) for _ in ch[: self.nl]])
+
+    def construct(self, x):
+        z = ()  # inference output
+        outs_1 = ()
+        outs_2 = ()
+        for i in range(self.nl):
+            out1 = self.m[i](self.ia[i](x[i]))  # conv
+            out1 = self.im[i](out1)
+            bs, _, ny, nx = out1.shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            out1 = ops.Transpose()(out1.view(bs, self.na, self.no, ny, nx), (0, 1, 3, 4, 2))
+            outs_1 += (out1,)
+
+            out2 = self.m2[i](x[i + self.nl])
+            out2 = ops.Transpose()(out2.view(bs, self.na, self.no, ny, nx), (0, 1, 3, 4, 2))
+            outs_2 += (out2,)
+
+            if not self.training:  # inference
+                grid_tensor = self._make_grid(nx, ny, out1.dtype)
+
+                # y = ops.sigmoid(out1)
+                y = ops.Sigmoid()(out1)
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + grid_tensor) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z += (y.view(bs, -1, self.no),)
+        outs = outs_1 + outs_2
+        return outs if self.training else (ops.concat(z, 1), outs_1)
+
+    def _initialize_aux_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        m = self
+        for mi, mi2, s in zip(m.m, m.m2, m.stride):  # from
+            s = s.asnumpy()
+
+            b = mi.bias.view(m.na, -1).asnumpy()  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else np.log(cf / cf.sum())  # cls
+            mi.bias = ops.assign(mi.bias, Tensor(b, ms.float32).view(-1))
+
+            b2 = mi2.bias.view(m.na, -1).asnumpy()  # conv.bias(255) to (3,85)
+            b2[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b2[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else np.log(cf / cf.sum())  # cls
+            mi2.bias = ops.assign(mi2.bias, Tensor(b2, ms.float32).view(-1))
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20, dtype=ms.float32):
+        xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny)))
+        return ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).astype(dtype)
+
+    @staticmethod
+    def _check_anchor_order(anchors, anchor_grid, stride):
+        # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary
+        a = np.prod(anchor_grid, -1).reshape((-1,))  # anchor area
+        da = a[-1] - a[0]  # delta a
+        ds = stride[-1] - stride[0]  # delta s
+        if np.sign(da) != np.sign(ds):  # same order
+            logger.warning("Reversing anchor order")
+            anchors = anchors[::-1, ...]
+            anchor_grid = anchor_grid[::-1, ...]
+        return anchors, anchor_grid
diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolov8_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolov8_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..c821409fc5c85f1f59e4668437b35465d1576e3b
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/yolov8_head.py
@@ -0,0 +1,155 @@
+import math
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Parameter, Tensor, nn, ops
+
+from ..layers import DFL, ConvNormAct, Identity
+from ..layers.utils import meshgrid
+
+
+class YOLOv8Head(nn.Cell):
+    # YOLOv8 Detect head for detection models
+    def __init__(self, nc=80, reg_max=16, stride=(), ch=(), sync_bn=False):  # detection layer
+        super().__init__()
+        # self.dynamic = False # force grid reconstruction
+
+        assert isinstance(stride, (tuple, list)) and len(stride) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc  # number of classes
+        self.nl = len(ch)  # number of detection layers
+        self.reg_max = reg_max  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
+        self.no = nc + self.reg_max * 4  # number of outputs per anchor
+        self.stride = Parameter(Tensor(stride, ms.int32), requires_grad=False)
+
+        c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # channels
+        self.cv2 = nn.CellList(
+            [
+                nn.SequentialCell(
+                    [
+                        ConvNormAct(x, c2, 3, sync_bn=sync_bn),
+                        ConvNormAct(c2, c2, 3, sync_bn=sync_bn),
+                        nn.Conv2d(c2, 4 * self.reg_max, 1, has_bias=True),
+                    ]
+                )
+                for x in ch
+            ]
+        )
+        self.cv3 = nn.CellList(
+            [
+                nn.SequentialCell(
+                    [
+                        ConvNormAct(x, c3, 3, sync_bn=sync_bn),
+                        ConvNormAct(c3, c3, 3, sync_bn=sync_bn),
+                        nn.Conv2d(c3, self.nc, 1, has_bias=True),
+                    ]
+                )
+                for x in ch
+            ]
+        )
+        self.dfl = DFL(self.reg_max) if self.reg_max > 1 else Identity()
+
+    def construct(self, x):
+        shape = x[0].shape  # BCHW
+        out = ()
+        for i in range(self.nl):
+            out += (ops.concat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1),)
+
+        p = None
+        if not self.training:
+            _anchors, _strides = self.make_anchors(out, self.stride, 0.5)
+            _anchors, _strides = _anchors.swapaxes(0, 1), _strides.swapaxes(0, 1)
+            _x = ()
+            for i in range(len(out)):
+                _x += (out[i].view(shape[0], self.no, -1),)
+            _x = ops.concat(_x, 2)
+            box, cls = _x[:, : self.reg_max * 4, :], _x[:, self.reg_max * 4 : self.reg_max * 4 + self.nc, :]
+            # box, cls = ops.concat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
+            dbox = self.dist2bbox(self.dfl(box), ops.expand_dims(_anchors, 0), xywh=True, axis=1) * _strides
+            p = ops.concat((dbox, ops.Sigmoid()(cls)), 1)
+            p = ops.transpose(p, (0, 2, 1))  # (bs, no-84, nbox) -> (bs, nbox, no-84)
+
+        return out if self.training else (p, out)
+
+    @staticmethod
+    def make_anchors(feats, strides, grid_cell_offset=0.5):
+        """Generate anchors from features."""
+        anchor_points, stride_tensor = (), ()
+        dtype = feats[0].dtype
+        for i, stride in enumerate(strides):
+            _, _, h, w = feats[i].shape
+            sx = mnp.arange(w, dtype=dtype) + grid_cell_offset  # shift x
+            sy = mnp.arange(h, dtype=dtype) + grid_cell_offset  # shift y
+            # FIXME: Not supported on a specific model of machine
+            sy, sx = meshgrid((sy, sx), indexing="ij")
+            anchor_points += (ops.stack((sx, sy), -1).view(-1, 2),)
+            stride_tensor += (ops.ones((h * w, 1), dtype) * stride,)
+        return ops.concat(anchor_points), ops.concat(stride_tensor)
+
+    @staticmethod
+    def dist2bbox(distance, anchor_points, xywh=True, axis=-1):
+        """Transform distance(ltrb) to box(xywh or xyxy)."""
+        lt, rb = ops.split(distance, split_size_or_sections=2, axis=axis)
+        x1y1 = anchor_points - lt
+        x2y2 = anchor_points + rb
+        if xywh:
+            c_xy = (x1y1 + x2y2) / 2
+            wh = x2y2 - x1y1
+            return ops.concat((c_xy, wh), axis)  # xywh bbox
+        return ops.concat((x1y1, x2y2), axis)  # xyxy bbox
+
+    def initialize_biases(self):
+        # Initialize Detect() biases, WARNING: requires stride availability
+        m = self
+        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
+            s = s.asnumpy()
+            a[-1].bias = ops.assign(a[-1].bias, Tensor(np.ones(a[-1].bias.shape), ms.float32))
+            b_np = b[-1].bias.data.asnumpy()
+            b_np[: m.nc] = math.log(5 / m.nc / (640 / int(s)) ** 2)
+            b[-1].bias = ops.assign(b[-1].bias, Tensor(b_np, ms.float32))
+
+
+class YOLOv8SegHead(YOLOv8Head):
+    """YOLOv8 Segment head for segmentation models."""
+
+    def __init__(self, nc=80, reg_max=16, nm=32, npr=256, stride=(), ch=()):
+        """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
+        super().__init__(nc, reg_max, stride, ch)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = YOLOv8Head.construct
+
+        c4 = max(ch[0] // 4, self.nm)
+        self.cv4 = nn.CellList([nn.SequentialCell(ConvNormAct(x, c4, 3), ConvNormAct(c4, c4, 3), nn.Conv2d(c4, self.nm, 1, has_bias=True)) for x in ch])
+
+    def construct(self, x):
+        """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
+        p = self.proto(x[0])  # mask protos
+        bs = p.shape[0]  # batch size
+
+        mc = ops.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)  # mask coefficients
+        x = self.detect(self, x)  # x: out if self.training else (p, out)
+        if self.training:
+            return x, mc, p
+
+        mc = ops.transpose(mc, (0, 2, 1))  # (bs, 32, nbox) -> (bs, nbox, 32)
+        # cat: (bs, nbox, no-84), (bs, nbox, 32) -> (bs, nbox, 84+32)
+        return ops.cat([x[0], mc], 2), (x[1], mc, p)
+
+
+class Proto(nn.Cell):
+    """YOLOv8 mask Proto module for segmentation models."""
+
+    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
+        super().__init__()
+        self.cv1 = ConvNormAct(c1, c_, k=3)
+        self.upsample = nn.Conv2dTranspose(c_, c_, 2, 2, padding=0, has_bias=True)  # nn.Upsample(scale_factor=2, mode='nearest')
+        self.cv2 = ConvNormAct(c_, c_, k=3)
+        self.cv3 = ConvNormAct(c_, c2)
+
+    def construct(self, x):
+        """Performs a forward pass through layers using an upsampled input image."""
+        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
diff --git a/community/cv/ShipWise/mindyolo/models/heads/yolox_head.py b/community/cv/ShipWise/mindyolo/models/heads/yolox_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..a74598a9b5ee84488d3590b87e88915db9328610
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/heads/yolox_head.py
@@ -0,0 +1,127 @@
+import math
+
+import mindspore as ms
+from mindspore import Tensor, nn, ops
+from mindspore import numpy as mnp
+from mindspore.common import initializer as init
+
+from mindyolo.models.layers.conv import ConvNormAct, DWConvNormAct
+from ..layers.utils import meshgrid
+
+
+class YOLOXHead(nn.Cell):
+    def __init__(
+        self,
+        nc=80,
+        stride=(8, 16, 32),
+        ch=(256, 512, 1024),
+        is_standard_backbone=True,
+        act=True,
+        depth_wise=False,
+        sync_bn=False,
+    ):
+        """
+        YOlOx head
+        Args:
+            is_standard_backbone: whether the predecessor backbone is a standard one or darknet53. default, True
+        """
+        super().__init__()
+        assert isinstance(stride, (tuple, list)) and len(stride) > 0
+        assert isinstance(ch, (tuple, list)) and len(ch) > 0
+
+        self.nc = nc
+        self.nl = len(ch)
+        self.no = nc + 4 + 1
+        self.stride = Tensor(stride, ms.int32)
+
+        self.stems = nn.CellList()  # len = num_layer
+        self.cls_convs = nn.CellList()
+        self.reg_convs = nn.CellList()
+        self.cls_preds = nn.CellList()
+        self.reg_preds = nn.CellList()
+        self.obj_preds = nn.CellList()
+
+        hidden_ch = ch[2] // 4 if is_standard_backbone else 256
+        HeadCNA = DWConvNormAct if depth_wise else ConvNormAct
+        for i in range(self.nl):  # three kind of resolution, 80, 40, 20
+            self.stems.append(ConvNormAct(ch[i], hidden_ch, 1, act=act, sync_bn=sync_bn))
+            self.cls_convs.append(
+                nn.SequentialCell(
+                    [
+                        HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn),
+                        HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn),
+                    ]
+                )
+            )
+            self.reg_convs.append(
+                nn.SequentialCell(
+                    [
+                        HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn),
+                        HeadCNA(hidden_ch, hidden_ch, 3, act=act, sync_bn=sync_bn),
+                    ]
+                )
+            )
+            self.cls_preds.append(nn.Conv2d(hidden_ch, self.nc, 1, pad_mode="pad", has_bias=True))
+            self.reg_preds.append(nn.Conv2d(hidden_ch, 4, 1, pad_mode="pad", has_bias=True))
+            self.obj_preds.append(nn.Conv2d(hidden_ch, 1, 1, pad_mode="pad", has_bias=True))
+
+    def construct(self, feat_list):
+        assert isinstance(feat_list, (tuple, list)) and len(feat_list) == self.nl
+        outputs = []
+        for i in range(self.nl):  # 80, 40, 20
+            # Get head features
+            x = self.stems[i](feat_list[i])
+
+            cls_feat = self.cls_convs[i](x)
+            cls_output = self.cls_preds[i](cls_feat)
+
+            reg_feat = self.reg_convs[i](x)
+            reg_output = self.reg_preds[i](reg_feat)
+            obj_output = self.obj_preds[i](reg_feat)
+
+            # Convert to origin image scale (640)
+            output = (
+                ops.concat([reg_output, obj_output, cls_output], 1)
+                if self.training
+                else ops.concat([reg_output, ops.sigmoid(obj_output), ops.sigmoid(cls_output)], 1)
+            )
+            output = self.convert_to_origin_scale(output, stride=self.stride[i])
+            outputs.append(output)
+        outputs_cat = ops.concat(outputs, 1)
+        return outputs_cat if self.training else (outputs_cat, 1)
+
+    def initialize_biases(self, prior_prob=1e-2):
+        for i in range(self.nl):  # 80, 40, 20
+            for cell in [self.cls_preds[i], self.obj_preds[i]]:
+                cell.bias.set_data(
+                    init.initializer(-math.log((1 - prior_prob) / prior_prob), cell.bias.shape, cell.bias.dtype)
+                )
+
+    def convert_to_origin_scale(self, output, stride):
+        """map to origin image scale for each fpn"""
+        batch_size = ops.shape(output)[0]
+        grid_size = ops.shape(output)[2:4]
+        stride = ops.cast(stride, output.dtype)
+
+        # reshape predictions
+        output = ops.transpose(output, (0, 2, 3, 1))  # (bs,85,80,80)-->(bs, 80, 80, 85)
+        output = ops.reshape(output, (batch_size, 1 * grid_size[0] * grid_size[1], -1))  # bs, 6400, 85
+
+        # make grid
+        grid = self._make_grid(nx=grid_size[1], ny=grid_size[0], dtype=output.dtype)  # (1,1,80,80,2)
+        grid = ops.reshape(grid, (1, -1, 2))  # grid(1, 6400, 2)
+
+        # feature map scale to origin scale
+        output_xy = output[..., :2]
+        output_xy = (output_xy + grid) * stride
+        output_wh = output[..., 2:4]
+        output_wh = ops.exp(output_wh) * stride
+        output_other = output[..., 4:]
+        output_t = ops.concat([output_xy, output_wh, output_other], -1)
+        return output_t  # bs, 6400, 85
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20, dtype=ms.float32):
+        # FIXME: Not supported on a specific model of machine
+        xv, yv = meshgrid((mnp.arange(nx), mnp.arange(ny)))
+        return ops.cast(ops.stack((xv, yv), 2).view((1, 1, ny, nx, 2)), dtype)
diff --git a/community/cv/ShipWise/mindyolo/models/initializer.py b/community/cv/ShipWise/mindyolo/models/initializer.py
new file mode 100644
index 0000000000000000000000000000000000000000..39f42f3503cc15112b47367027a7c63c7287ae25
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/initializer.py
@@ -0,0 +1,45 @@
+import math
+
+from mindspore import nn
+from mindspore.common import initializer as init
+
+__all__ = ["initialize_defult"]
+
+
+def initialize_defult(model):
+    for _, cell in model.cells_and_names():
+        if isinstance(cell, nn.Conv2d):
+            cell.weight.set_data(
+                init.initializer(init.HeUniform(negative_slope=math.sqrt(5)), cell.weight.shape, cell.weight.dtype)
+            )
+            if cell.bias is not None:
+                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.shape)
+                bound = 1 / math.sqrt(fan_in)
+                cell.bias.set_data(init.initializer(init.Uniform(bound), cell.bias.shape, cell.bias.dtype))
+        elif isinstance(cell, nn.Dense):
+            cell.weight.set_data(
+                init.initializer(init.HeUniform(negative_slope=math.sqrt(5)), cell.weight.shape, cell.weight.dtype)
+            )
+            if cell.bias is not None:
+                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.shape)
+                bound = 1 / math.sqrt(fan_in)
+                cell.bias.set_data(init.initializer(init.Uniform(bound), cell.bias.shape, cell.bias.dtype))
+
+
+def _calculate_fan_in_and_fan_out(shape):
+    dimensions = len(shape)
+    if dimensions < 2:
+        raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions")
+
+    num_input_fmaps = shape[1]
+    num_output_fmaps = shape[0]
+    receptive_field_size = 1
+    if dimensions > 2:
+        # math.prod is not always available, accumulate the product manually
+        # we could use functools.reduce but that is not supported by TorchScript
+        for s in shape[2:]:
+            receptive_field_size *= s
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+
+    return fan_in, fan_out
diff --git a/community/cv/ShipWise/mindyolo/models/layers/__init__.py b/community/cv/ShipWise/mindyolo/models/layers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a86b9485a3abc0a7b5190bd683f2569cb18a788c
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/__init__.py
@@ -0,0 +1,37 @@
+"""layers init"""
+from .activation import *
+from .bottleneck import *
+from .common import *
+from .conv import *
+from .implicit import *
+from .pool import *
+from .spp import *
+from .upsample import *
+
+__all__ = [
+    "Swish",
+    "Shortcut",
+    "Concat",
+    "ReOrg",
+    "Identity",
+    "DFL",
+    "ConvNormAct",
+    "RepConv",
+    "DownC",
+    "Focus",
+    "Bottleneck",
+    "C3",
+    "C2f",
+    "DWConvNormAct",
+    "DWBottleneck",
+    "DWC3",
+    "ImplicitA",
+    "ImplicitM",
+    "MP",
+    "SP",
+    "MaxPool2d",
+    "SPPCSPC",
+    "SPPF",
+    "Upsample",
+    "Residualblock",
+]
diff --git a/community/cv/ShipWise/mindyolo/models/layers/activation.py b/community/cv/ShipWise/mindyolo/models/layers/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..59e051bf92bc607576baa9ed7da0260f0bd348f8
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/activation.py
@@ -0,0 +1,17 @@
+"""
+Custom activation operators.
+"""
+from mindspore import nn, ops
+
+
+class Swish(nn.Cell):
+    """
+    Swish activation function: x * sigmoid(βx). If beta equals 1, you can use nn.SiLU instead.
+    """
+
+    def __init__(self, beta=1.0):
+        super().__init__()
+        self.beta = beta
+
+    def construct(self, x):
+        return x * ops.sigmoid(self.beta * x)
diff --git a/community/cv/ShipWise/mindyolo/models/layers/bottleneck.py b/community/cv/ShipWise/mindyolo/models/layers/bottleneck.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bd9bb81d1830d944bb6efdc6f729994dc259d41
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/bottleneck.py
@@ -0,0 +1,138 @@
+from mindspore import nn, ops
+
+from .conv import ConvNormAct, DWConvNormAct
+
+
+class Bottleneck(nn.Cell):
+    # Standard bottleneck
+    def __init__(
+        self, c1, c2, shortcut=True, k=(1, 3), g=(1, 1), e=0.5, act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, shortcut, kernels, groups, expand
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.conv1 = ConvNormAct(c1, c_, k[0], 1, g=g[0], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv2 = ConvNormAct(c_, c2, k[1], 1, g=g[1], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.add = shortcut and c1 == c2
+
+    def construct(self, x):
+        if self.add:
+            out = x + self.conv2(self.conv1(x))
+        else:
+            out = self.conv2(self.conv1(x))
+        return out
+
+    
+class Residualblock(nn.Cell):
+    def __init__(
+        self, c1, c2, k=(1, 3), g=(1, 1), act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, kernels, groups, expand
+        super().__init__()
+        self.conv1 = ConvNormAct(c1, c2, k[0], 1, g=g[0], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv2 = ConvNormAct(c2, c2, k[1], 1, g=g[1], act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+
+    def construct(self, x):
+        out = x + self.conv2(self.conv1(x))
+        return out
+
+
+class C3(nn.Cell):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, e=0.5, momentum=0.97, eps=1e-3, sync_bn=False):
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.conv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv2 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv3 = ConvNormAct(2 * c_, c2, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)  # act=FReLU(c2)
+        self.m = nn.SequentialCell(
+            [
+                Bottleneck(c_, c_, shortcut, k=(1, 3), e=1.0, momentum=momentum, eps=eps, sync_bn=sync_bn)
+                for _ in range(n)
+            ]
+        )
+        self.concat = ops.Concat(axis=1)
+
+    def construct(self, x):
+        c1 = self.conv1(x)
+        c2 = self.m(c1)
+        c3 = self.conv2(x)
+        c4 = self.concat((c2, c3))
+        c5 = self.conv3(c4)
+
+        return c5
+
+
+class C2f(nn.Cell):
+    # CSP Bottleneck with 2 convolutions
+    def __init__(
+        self, c1, c2, n=1, shortcut=False, g=1, e=0.5, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        _c = int(c2 * e)  # hidden channels
+        self.cv1 = ConvNormAct(c1, 2 * _c, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv2 = ConvNormAct(
+            (2 + n) * _c, c2, 1, momentum=momentum, eps=eps, sync_bn=sync_bn
+        )  # optional act=FReLU(c2)
+        self.m = nn.CellList(
+            [
+                Bottleneck(_c, _c, shortcut, k=(3, 3), g=(1, g), e=1.0, momentum=momentum, eps=eps, sync_bn=sync_bn)
+                for _ in range(n)
+            ]
+        )
+
+    def construct(self, x):
+        y = ()
+        x = self.cv1(x)
+        _c = x.shape[1] // 2
+        x_tuple = ops.split(x, axis=1, split_size_or_sections=_c)
+        y += x_tuple
+        for i in range(len(self.m)):
+            m = self.m[i]
+            out = m(y[-1])
+            y += (out,)
+
+        return self.cv2(ops.concat(y, axis=1))
+
+
+class DWBottleneck(nn.Cell):
+    # depthwise bottleneck used in yolox nano scale
+    def __init__(
+        self, c1, c2, shortcut=True, k=(1, 3), e=0.5, act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, shortcut, groups, kernels, expand
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.conv1 = ConvNormAct(c1, c_, k[0], 1, act=True, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv2 = DWConvNormAct(c_, c2, k[1], 1, act=True, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.add = shortcut and c1 == c2
+
+    def construct(self, x):
+        if self.add:
+            out = x + self.conv2(self.conv1(x))
+        else:
+            out = self.conv2(self.conv1(x))
+        return out
+
+
+class DWC3(nn.Cell):
+    # depthwise DwC3 used in yolox nano scale, similar as C3
+    def __init__(self, c1, c2, n=1, shortcut=True, e=0.5, momentum=0.97, eps=1e-3, sync_bn=False):
+        super(DWC3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.conv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv2 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv3 = ConvNormAct(2 * c_, c2, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)  # act=FReLU(c2)
+        self.m = nn.SequentialCell(
+            [
+                DWBottleneck(c_, c_, shortcut, k=(1, 3), e=1.0, momentum=momentum, eps=eps, sync_bn=sync_bn)
+                for _ in range(n)
+            ]
+        )
+        self.concat = ops.Concat(axis=1)
+
+    def construct(self, x):
+        c1 = self.conv1(x)
+        c2 = self.m(c1)
+        c3 = self.conv2(x)
+        c4 = self.concat((c2, c3))
+        c5 = self.conv3(c4)
+
+        return c5
diff --git a/community/cv/ShipWise/mindyolo/models/layers/common.py b/community/cv/ShipWise/mindyolo/models/layers/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c45f02d62efbe2cdf6d48fbaee7a8aebbee39f2
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/common.py
@@ -0,0 +1,73 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn, ops
+
+
+class Shortcut(nn.Cell):
+    """
+    Shortcut layer.
+    """
+
+    def construct(self, x):
+        if isinstance(x, (tuple, list)) and len(x) == 2:
+            return x[0] + x[1]
+        return x
+
+
+class Concat(nn.Cell):
+    """
+    Connect tensor in the specified axis.
+    """
+
+    def __init__(self, axis=1):
+        super(Concat, self).__init__()
+        self.axis = axis
+
+    def construct(self, x):
+        return ops.concat(x, self.axis)
+
+
+class ReOrg(nn.Cell):
+    """
+    Reorganize the input Tensor (b, c, w, h) into a new shape (b, 4c, w/2, h/2).
+    """
+
+    def __init__(self):
+        super(ReOrg, self).__init__()
+
+    def construct(self, x):
+        # in: (b,c,w,h) -> out: (b,4c,w/2,h/2)
+        x1 = x[:, :, ::2, ::2]
+        x2 = x[:, :, 1::2, ::2]
+        x3 = x[:, :, ::2, 1::2]
+        x4 = x[:, :, 1::2, 1::2]
+        out = ops.concat((x1, x2, x3, x4), 1)
+        return out
+
+
+class Identity(nn.Cell):
+    def construct(self, x):
+        return x
+
+
+class DFL(nn.Cell):
+    # Integral module of Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
+    def __init__(self, c1=16):
+        super().__init__()
+        self.conv = nn.Conv2d(c1, 1, 1, has_bias=False)
+        self.conv.weight.requires_grad = False
+        self.c1 = c1
+        self.softmax = ops.Softmax(axis=1)
+
+    def construct(self, x):
+        b, c, a = x.shape  # batch, channels, anchors
+        x = self.softmax(x.view(b, 4, self.c1, a).swapaxes(2, 1))
+        x = self.conv(x)
+        x = x.view(b, 4, a)
+        return x
+
+    def initialize_conv_weight(self):
+        self.conv.weight = ops.assign(
+            self.conv.weight, Tensor(np.arange(self.c1).reshape((1, self.c1, 1, 1)), dtype=ms.float32)
+        )
diff --git a/community/cv/ShipWise/mindyolo/models/layers/conv.py b/community/cv/ShipWise/mindyolo/models/layers/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff801ec39b0d17ce49e010db0ac77a058a182bd7
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/conv.py
@@ -0,0 +1,168 @@
+from mindspore import nn, ops
+
+from .common import Identity
+from .utils import autopad
+
+
+class ConvNormAct(nn.Cell):
+    """Conv2d + BN + Act
+
+    Args:
+        c1 (int): In channels, the channel number of the input tensor of the Conv2d layer.
+        c2 (int): Out channels, the channel number of the output tensor of the Conv2d layer.
+        k (Union[int, tuple[int]]): Kernel size, Specifies the height and width of the 2D convolution kernel.
+            The data type is an integer or a tuple of two integers. An integer represents the height
+            and width of the convolution kernel. A tuple of two integers represents the height
+            and width of the convolution kernel respectively. Default: 1.
+        s (Union[int, tuple[int]]): Stride, the movement stride of the 2D convolution kernel.
+            The data type is an integer or a tuple of two integers. An integer represents the movement step size
+            in both height and width directions. A tuple of two integers represents the movement step size in the height
+            and width directions respectively. Default: 1.
+        p (Union[None, int, tuple[int]]): Padding, the number of padding on the height and width directions of the input.
+            The data type is None or an integer or a tuple of four integers. If `padding` is an None, then padding with autopad.
+            If `padding` is an integer, then the top, bottom, left, and right padding are all equal to `padding`.
+            If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding
+            is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively.
+            The value should be greater than or equal to 0. Default: None.
+        g (int): Group, Splits filter into groups, `c1` and `c2` must be
+            divisible by `group`. If the group is equal to `c1` and `c2`,
+            this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1.
+        d (Union[int, tuple[int]]): Dilation, Dilation size of 2D convolution kernel.
+            The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled
+            every `k` elements. The value of `k` on the height and width directions is in range of [1, H]
+            and [1, W] respectively. Default: 1.
+        act (Union[bool, nn.Cell]): Activation. The data type is bool or nn.Cell. If `act` is True,
+            then the activation function uses nn.SiLU. If `act` is False, do not use activation function.
+            If 'act' is nn.Cell, use the object of this cell as the activation function. Default: True.
+        sync_bn (bool): Whether the BN layer use nn.SyncBatchNorm. Default: False.
+    """
+
+    def __init__(
+        self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(ConvNormAct, self).__init__()
+        self.conv = nn.Conv2d(
+            c1, c2, k, s, pad_mode="pad", padding=autopad(k, p, d), group=g, dilation=d, has_bias=False
+        )
+
+        if sync_bn:
+            self.bn = nn.SyncBatchNorm(c2, momentum=momentum, eps=eps)
+        else:
+            self.bn = nn.BatchNorm2d(c2, momentum=momentum, eps=eps)
+        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Cell) else Identity)
+
+    def construct(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+
+class RepConv(nn.Cell):
+    """Represented convolution, https://arxiv.org/abs/2101.03697
+
+    Args:
+        c1 (int): In channels, the channel number of the input tensor of the Conv2d layer.
+        c2 (int): Out channels, the channel number of the output tensor of the Conv2d layer.
+        k (Union[int, tuple[int]]): Kernel size, Specifies the height and width of the 2D convolution kernel.
+            The data type is an integer or a tuple of two integers. An integer represents the height
+            and width of the convolution kernel. A tuple of two integers represents the height
+            and width of the convolution kernel respectively. Default: 1.
+        s (Union[int, tuple[int]]): Stride, the movement stride of the 2D convolution kernel.
+            The data type is an integer or a tuple of two integers. An integer represents the movement step size
+            in both height and width directions. A tuple of two integers represents the movement step size in the height
+            and width directions respectively. Default: 1.
+        p (Union[None, int, tuple[int]]): Padding, the number of padding on the height and width directions of the input.
+            The data type is None or an integer or a tuple of four integers. If `padding` is an None, then padding with autopad.
+            If `padding` is an integer, then the top, bottom, left, and right padding are all equal to `padding`.
+            If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding
+            is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively.
+            The value should be greater than or equal to 0. Default: None.
+        g (int): Group, Splits filter into groups, `c1` and `c2` must be
+            divisible by `group`. If the group is equal to `c1` and `c2`,
+            this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1.
+        act (Union[bool, nn.Cell]): Activation. The data type is bool or nn.Cell. If `act` is True,
+            then the activation function uses nn.SiLU. If `act` is False, do not use activation function.
+            If 'act' is nn.Cell, use the object of this cell as the activation function. Default: True.
+        sync_bn (bool): Whether the BN layer use nn.SyncBatchNorm. Default: False.
+    """
+
+    def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False):
+        super(RepConv, self).__init__()
+
+        self.groups = g
+        self.in_channels = c1
+        self.out_channels = c2
+
+        assert k == 3
+        assert autopad(k, p) == 1
+
+        padding_11 = autopad(k, p) - k // 2
+
+        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Cell) else Identity)
+
+        if sync_bn:
+            BatchNorm = nn.SyncBatchNorm
+        else:
+            BatchNorm = nn.BatchNorm2d
+
+        self.rbr_identity = BatchNorm(num_features=c1, momentum=(1 - 0.03), eps=1e-3) if c2 == c1 and s == 1 else None
+        self.rbr_dense = nn.SequentialCell(
+            [
+                nn.Conv2d(c1, c2, k, s, pad_mode="pad", padding=autopad(k, p), group=g, has_bias=False),
+                BatchNorm(num_features=c2, momentum=momentum, eps=eps),
+            ]
+        )
+        self.rbr_1x1 = nn.SequentialCell(
+            nn.Conv2d(c1, c2, 1, s, pad_mode="pad", padding=padding_11, group=g, has_bias=False),
+            BatchNorm(num_features=c2, momentum=momentum, eps=eps),
+        )
+
+    def construct(self, inputs):
+        if self.rbr_identity is None:
+            id_out = 0.0
+        else:
+            id_out = self.rbr_identity(inputs)
+
+        return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
+
+    def fuse(self):
+        # TODO: The reparameterization function will be developed in subsequent versions
+        pass
+
+
+class DownC(nn.Cell):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, n=1, k=2, momentum=0.97, eps=1e-3, sync_bn=False):
+        super(DownC, self).__init__()
+        c_ = c1  # hidden channels
+        self.cv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv2 = ConvNormAct(c_, c2 // 2, 3, k, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv3 = ConvNormAct(c1, c2 // 2, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.mp = nn.MaxPool2d(kernel_size=k, stride=k)
+
+    def construct(self, x):
+        return ops.concat((self.cv2(self.cv1(x)), self.cv3(self.mp(x))), axis=1)
+
+
+class Focus(nn.Cell):
+    # Focus wh information into c-space
+    def __init__(
+        self, c1, c2, k=1, s=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = ConvNormAct(c1 * 4, c2, k, s, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+
+    def construct(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(ops.concat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+
+
+class DWConvNormAct(nn.Cell):
+    """Conv2d + BN + Act, depthwise ConvNormAct used in yolox nano scale, an approach to reduce parameter number"""
+
+    def __init__(
+        self, c1, c2, k=1, s=1, p=None, d=1, act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(DWConvNormAct, self).__init__()
+        self.dconv = ConvNormAct(c1, c1, k, s, p, g=c1, d=d, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.pconv = ConvNormAct(c1, c2, k=1, s=1, p=p, g=1, d=d, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+
+    def construct(self, x):
+        return self.pconv(self.dconv(x))
diff --git a/community/cv/ShipWise/mindyolo/models/layers/implicit.py b/community/cv/ShipWise/mindyolo/models/layers/implicit.py
new file mode 100644
index 0000000000000000000000000000000000000000..dde91d81a488a6b975a6ee8697c1073bb877e189
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/implicit.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Parameter, Tensor, nn
+
+
+class ImplicitA(nn.Cell):
+    """
+    https://arxiv.org/pdf/2105.04206v1.pdf. Implicit knowledge in YOLOR combined with convolution
+        feature map in addition and multiplication manner: Implicit knowledge in YOLOR can be simplified to a vector by
+        pre-computing at the inference stage. This vector can be combined with the bias and weight of the previous or
+        subsequent convolutional layer.
+    """
+
+    def __init__(self, channel, mean=0.0, std=0.02):
+        super(ImplicitA, self).__init__()
+        self.channel = channel
+        self.mean = mean
+        self.std = std
+        self.implicit = Parameter(Tensor(np.random.normal(self.mean, self.std, (1, channel, 1, 1)), ms.float32))
+
+    def construct(self, x):
+        return self.implicit + x
+
+
+class ImplicitM(nn.Cell):
+    """
+    https://arxiv.org/pdf/2105.04206v1.pdf. Implicit knowledge in YOLOR combined with convolution
+        feature map in addition and multiplication manner: Implicit knowledge in YOLOR can be simplified to a vector by
+        pre-computing at the inference stage. This vector can be combined with the bias and weight of the previous or
+        subsequent convolutional layer.
+    """
+
+    def __init__(self, channel, mean=0.0, std=0.02):
+        super(ImplicitM, self).__init__()
+        self.channel = channel
+        self.mean = mean
+        self.std = std
+        self.implicit = Parameter(Tensor(np.random.normal(self.mean, self.std, (1, channel, 1, 1)), ms.float32))
+
+    def construct(self, x):
+        return self.implicit * x
diff --git a/community/cv/ShipWise/mindyolo/models/layers/pool.py b/community/cv/ShipWise/mindyolo/models/layers/pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..342fcff5409b28c4c850a7c838e850441bb19092
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/pool.py
@@ -0,0 +1,44 @@
+from mindspore import nn
+
+
+class MP(nn.Cell):
+    """
+    Use the same step size and kernel size for maxpool.
+    """
+
+    def __init__(self, k=2):
+        super(MP, self).__init__()
+        self.m = nn.MaxPool2d(kernel_size=k, stride=k)
+
+    def construct(self, x):
+        return self.m(x)
+
+
+class SP(nn.Cell):
+    """
+    Use autopad for maxpool.
+    """
+
+    def __init__(self, k=3, s=1):
+        super(SP, self).__init__()
+        self.m = MaxPool2d(kernel_size=k, stride=s, padding=k // 2)
+
+    def construct(self, x):
+        return self.m(x)
+
+
+class MaxPool2d(nn.Cell):
+    """
+    Maxpool with pad.
+    """
+
+    def __init__(self, kernel_size, stride, padding=0):
+        super(MaxPool2d, self).__init__()
+        assert isinstance(padding, int)
+        self.pad = nn.Pad(paddings=((0, 0), (0, 0), (padding, padding), (padding, padding)))
+        self.pool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride)
+
+    def construct(self, x):
+        x = self.pad(x)
+        x = self.pool(x)
+        return x
diff --git a/community/cv/ShipWise/mindyolo/models/layers/spp.py b/community/cv/ShipWise/mindyolo/models/layers/spp.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d7351e983b1bd9bc4cd40dd8dcc7ccca0117d87
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/spp.py
@@ -0,0 +1,57 @@
+from mindspore import nn, ops
+
+from .conv import ConvNormAct
+from .pool import MaxPool2d
+
+
+class SPPCSPC(nn.Cell):
+    """
+    CSPNet, https://arxiv.org/pdf/1911.11929v1.pdf. The main purpose of designing CSPNet is to enable
+        this architecture to achieve a richer gradient combination while reducing the amount of computation. This aim
+        is achieved by partitioning feature map of the base layer into two parts and then merging them through a proposed
+        cross-stage hierarchy. Our main concept is to make the gradient flow propagate through different network paths
+        by splitting the gradient flow. In this way, we have confirmed that the propagated gradient information can
+        have a large correlation difference by switching concatenation and transition steps.
+    """
+
+    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13), momentum=0.97, eps=1e-3, sync_bn=False):
+        super(SPPCSPC, self).__init__()
+        c_ = int(2 * c2 * e)  # hidden channels
+        self.cv1 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv2 = ConvNormAct(c1, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv3 = ConvNormAct(c_, c_, 3, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv4 = ConvNormAct(c_, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.m = nn.CellList([MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+        self.cv5 = ConvNormAct(4 * c_, c_, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv6 = ConvNormAct(c_, c_, 3, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.cv7 = ConvNormAct(2 * c_, c2, 1, 1, momentum=momentum, eps=eps, sync_bn=sync_bn)
+
+    def construct(self, x):
+        x1 = self.cv4(self.cv3(self.cv1(x)))
+        m_tuple = (x1,)
+        for i in range(len(self.m)):
+            m_tuple += (self.m[i](x1),)
+        y1 = self.cv6(self.cv5(ops.Concat(axis=1)(m_tuple)))
+        y2 = self.cv2(x)
+        return self.cv7(ops.Concat(axis=1)((y1, y2)))
+
+
+class SPPF(nn.Cell):
+    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+    def __init__(
+        self, c1, c2, k=5, act=True, momentum=0.97, eps=1e-3, sync_bn=False
+    ):  # equivalent to SPP(k=(5, 9, 13))
+        super(SPPF, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.conv1 = ConvNormAct(c1, c_, 1, 1, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.conv2 = ConvNormAct(c_ * 4, c2, 1, 1, act=act, momentum=momentum, eps=eps, sync_bn=sync_bn)
+        self.concat = ops.Concat(axis=1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, pad_mode="same")
+
+    def construct(self, x):
+        x = self.conv1(x)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        y3 = self.m(y2)
+        y = self.conv2(self.concat((x, y1, y2, y3)))
+        return y
diff --git a/community/cv/ShipWise/mindyolo/models/layers/upsample.py b/community/cv/ShipWise/mindyolo/models/layers/upsample.py
new file mode 100644
index 0000000000000000000000000000000000000000..96f3e2c8d8f41fdd45eacc27fbd6dd5dafe74421
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/upsample.py
@@ -0,0 +1,29 @@
+from mindspore import nn, ops
+
+
+class Upsample(nn.Cell):
+    """
+    Using the interpolate method specified by `mode` resize the input tensor.
+
+    Args:
+        scales (tuple[float], optional): a tuple of float. Describe the scale along each dimension.
+            Its length is the same as that of shape of `x`. The numbers in `scales` must all be positive. Only one of
+            `scales` and `sizes` can be specified.
+        sizes (tuple[int], optional): a tuple of int, describes the shape of the output tensor. The numbers in `sizes`
+            must all be positive. Only one of `scales` and `sizes` can be specified.  If `sizes` is specified, then set
+            `scales` to 'None' in this operator's input list. It is 1 int elements :math:`(new\_width,)` when `mode`
+            is "linear". It is 2 int elements :math:`(new\_height, new\_width)` when `mode` is "bilinear".
+        mode (string): The method used to interpolate: 'linear' | 'bilinear'. Default is 'linear'.
+    """
+
+    def __init__(self, sizes=None, scales=None, mode="nearest"):
+        super(Upsample, self).__init__()
+        self.sizes = sizes
+        self.scales = scales
+        self.mode = mode
+
+    def construct(self, x):
+        if self.mode == "nearest" and self.scales:
+            return ops.ResizeNearestNeighbor((x.shape[-2] * self.scales, x.shape[-1] * self.scales))(x)
+        else:
+            return ops.interpolate(x, sizes=self.sizes, scales=self.scales, mode=self.mode)
diff --git a/community/cv/ShipWise/mindyolo/models/layers/utils.py b/community/cv/ShipWise/mindyolo/models/layers/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7a29bcedb0bffa5558275998e6f67257bd136aa
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/layers/utils.py
@@ -0,0 +1,106 @@
+import math
+from typing import Tuple
+
+from mindspore import Tensor, ops
+
+
+def make_divisible(x, divisor):
+    # Returns x evenly divisible by divisor
+    return math.ceil(x / divisor) * divisor
+
+
+def autopad(k, p=None, d=1):  # kernel, padding, dilation
+    # Pad to 'same' shape outputs
+    if d > 1:
+        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    if isinstance(p, list):
+        assert len(p) == 2
+        p = (p[0], p[0], p[1], p[1])
+    return p
+
+
+# ------------------------box operation starts--------------------------
+def meshgrid(inputs, indexing="xy"):
+    # An alternative implementation of ops.meshgrid, Only supports inputs with a length of 2.
+    # Meshgrid op is not supported on a specific model of machine an alternative
+    # solution is adopted, which will be updated later.
+    x, y = inputs
+    nx, ny = x.shape[0], y.shape[0]
+    xv, yv = None, None
+    if indexing == "xy":
+        xv = ops.tile(x.view(1, -1), (ny, 1))
+        yv = ops.tile(y.view(-1, 1), (1, nx))
+    elif indexing == "ij":
+        xv = ops.tile(x.view(-1, 1), (1, ny))
+        yv = ops.tile(y.view(1, -1), (nx, 1))
+
+    return xv, yv
+
+
+def box_cxcywh_to_xyxy(bbox) -> Tensor:
+    """Convert bbox coordinates from (cx, cy, w, h) to (x1, y1, x2, y2)
+
+    Args:
+        bbox (ms.Tensor): Shape (n, 4) for bboxes.
+
+    Returns:
+        torch.Tensor: Converted bboxes.
+    """
+    cx, cy, w, h = ops.unstack(bbox, axis=-1)
+    new_bbox = tuple([(cx - 0.5 * w), (cy - 0.5 * h), (cx + 0.5 * w), (cy + 0.5 * h)])
+    aa = ops.stack(new_bbox, axis=-1)
+    return aa
+
+
+def box_xyxy_to_cxcywh(bbox) -> Tensor:
+    """Convert bbox coordinates from (x1, y1, x2, y2) to (cx, cy, w, h)
+
+    Args:
+        bbox (torch.Tensor): Shape (n, 4) for bboxes.
+
+    Returns:
+        torch.Tensor: Converted bboxes.
+    """
+    x0, y0, x1, y1 = ops.unstack(bbox, axis=-1)
+    new_bbox = tuple([(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)])
+    return ops.stack(new_bbox, axis=-1)
+
+
+def box_scale(boxes, scale, sclale_reciprocal=False) -> Tensor:
+    """
+    Scale the box with horizontal and vertical scaling factors
+
+    Args:
+        boxes (Tensor[N, 4] or [bs, N, 4]): boxes are specified by their (x1, y1, x2, y2) coordinates
+        scale (Tuple[2]): scale factors for x and y coordinates
+    """
+    assert len(boxes.shape) in [2, 3]
+    scale_x, scale_y = scale
+    if sclale_reciprocal:
+        scale_x, scale_y = 1.0 / scale_x, 1.0 / scale_y
+    new_scale = Tensor([scale_x, scale_y, scale_x, scale_y])  # (4,) or (bs, 4)
+    boxes *= new_scale
+    return boxes
+
+
+def box_clip(boxes, clip_size: Tuple[int, int]) -> Tensor:
+    """
+    Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+    and y coordinates to the range [0, height].
+
+    Args:
+        boxes (Tensor[N, 4]): boxes are specified by their (x1, y1, x2, y2) coordinates
+        clip_size (height, width): The clipping box's size.
+    """
+    h, w = clip_size
+    x1 = boxes[..., 0].clip(0, w)
+    y1 = boxes[..., 1].clip(0, h)
+    x2 = boxes[..., 2].clip(0, w)
+    y2 = boxes[..., 3].clip(0, h)
+    boxes = ops.stack((x1, y1, x2, y2), axis=-1)
+    return boxes
+
+
+# ------------------------box operation ends--------------------------
diff --git a/community/cv/ShipWise/mindyolo/models/losses/__init__.py b/community/cv/ShipWise/mindyolo/models/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b54930a120e46df30d6ea4d2387daf53618030db
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/__init__.py
@@ -0,0 +1,17 @@
+from . import (loss_factory, yolov3_loss, yolov4_loss, yolov5_loss,
+               yolov7_loss, yolov8_loss)
+from .loss_factory import *
+from .yolov3_loss import *
+from .yolov4_loss import *
+from .yolov5_loss import *
+from .yolov7_loss import *
+from .yolov8_loss import *
+from .yolox_loss import *
+
+__all__ = []
+__all__.extend(yolov3_loss.__all__)
+__all__.extend(yolov4_loss.__all__)
+__all__.extend(yolov5_loss.__all__)
+__all__.extend(yolov7_loss.__all__)
+__all__.extend(yolov8_loss.__all__)
+__all__.extend(loss_factory.__all__)
diff --git a/community/cv/ShipWise/mindyolo/models/losses/focal_loss.py b/community/cv/ShipWise/mindyolo/models/losses/focal_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..371b5dddee7285420e09e30a830d47543041680b
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/focal_loss.py
@@ -0,0 +1,98 @@
+import mindspore as ms
+from mindspore import nn, ops
+
+
+def smooth_BCE(eps=0.1):
+    """
+    Return positive, negative label smoothing BCE targets,
+    https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
+    """
+    return 1.0 - 0.5 * eps, 0.5 * eps
+
+
+class FocalLoss(nn.Cell):
+    """
+    Focal Loss for Dense Object Detection, https://arxiv.org/pdf/1708.02002v2.pdf
+
+    Args:
+        bce_weight (Tensor, optional): A rescaling weight applied to the loss of each batch element for BCEWithLogitsLoss.
+            If not None, it can be broadcast to a tensor with shape of `logits`,
+            data type must be float16 or float32. Default: None.
+        bce_pos_weight (Tensor, optional): A weight of positive examples for BCEWithLogitsLoss. Must be a vector with length equal to the
+            number of classes. If not None, it must be broadcast to a tensor with shape of `logits`, data type
+            must be float16 or float32. Default: None.
+        gamma: A modulating factor (1 − pt)^gamma to the cross entropy loss, with tunable focusing. Default: 1.5
+        alpha: An alpha-balanced variant of the focal loss. Default: 0.25
+        reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none'.
+            If 'none', do not perform reduction. Default: 'mean'.
+    """
+
+    def __init__(self, bce_weight=None, bce_pos_weight=None, gamma=1.5, alpha=0.25, reduction="mean"):
+        super(FocalLoss, self).__init__()
+        self.loss_fcn = nn.BCEWithLogitsLoss(weight=bce_weight, pos_weight=bce_pos_weight, reduction="none")
+        self.gamma = gamma
+        self.alpha = alpha
+        self.reduction = reduction  # default mean
+        assert self.loss_fcn.reduction == "none"  # required to apply FL to each element
+
+    def construct(self, pred, true, mask=None):
+        ori_dtype = pred.dtype
+        loss = self.loss_fcn(pred.astype(ms.float32), true.astype(ms.float32))
+
+        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
+        pred_prob = ops.sigmoid(pred)  # prob from logits
+        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
+        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
+        modulating_factor = (1.0 - p_t) ** self.gamma
+        loss *= alpha_factor * modulating_factor
+
+        if mask is not None:
+            loss *= mask
+
+        if self.reduction == "mean":
+            if mask is not None:
+                return (loss.sum() / mask.astype(loss.dtype).sum().clip(1, None)).astype(ori_dtype)
+            return loss.mean().astype(ori_dtype)
+        elif self.reduction == "sum":
+            return loss.sum().astype(ori_dtype)
+        else:  # 'none'
+            return loss.astype(ori_dtype)
+
+
+class BCEWithLogitsLoss(nn.Cell):
+    def __init__(self, bce_weight=None, bce_pos_weight=None, reduction="mean"):
+        """
+        Adds sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy
+        between the logits and the labels.
+
+        Args:
+            bce_weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
+                If not None, it can be broadcast to a tensor with shape of `logits`,
+                data type must be float16 or float32. Default: None.
+            bce_pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
+                number of classes. If not None, it must be broadcast to a tensor with shape of `logits`, data type
+                must be float16 or float32. Default: None.
+            reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none'.
+                If 'none', do not perform reduction. Default: 'mean'.
+        """
+
+        super(BCEWithLogitsLoss, self).__init__()
+        self.loss_fcn = nn.BCEWithLogitsLoss(weight=bce_weight, pos_weight=bce_pos_weight, reduction="none")
+        self.reduction = reduction  # default mean
+        assert self.loss_fcn.reduction == "none"  # required to apply FL to each element
+
+    def construct(self, pred, true, mask=None):
+        ori_dtype = pred.dtype
+        loss = self.loss_fcn(pred.astype(ms.float32), true.astype(ms.float32))
+
+        if mask is not None:
+            loss *= mask
+
+        if self.reduction == "mean":
+            if mask is not None:
+                return (loss.sum() / mask.astype(loss.dtype).sum().clip(1, None)).astype(ori_dtype)
+            return loss.mean().astype(ori_dtype)
+        elif self.reduction == "sum":
+            return loss.sum().astype(ori_dtype)
+        else:  # 'none'
+            return loss.astype(ori_dtype)
diff --git a/community/cv/ShipWise/mindyolo/models/losses/iou_loss.py b/community/cv/ShipWise/mindyolo/models/losses/iou_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3227778b6a84dbef221f83bf393b2ae99cdb632
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/iou_loss.py
@@ -0,0 +1,152 @@
+import math
+
+import mindspore as ms
+from mindspore import Tensor, ops
+
+from mindyolo.models.layers.utils import box_cxcywh_to_xyxy
+
+PI = Tensor(math.pi, ms.float32)
+EPS = 1e-7
+
+
+def box_area(box):
+    """
+    Return area of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box (Tensor[N, 4])
+    Returns:
+        area (Tensor[N,])
+    """
+    return (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
+
+
+def batch_box_area(box):
+    """
+    Return area of batch boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box (Tensor[B, N, 4])
+    Returns:
+        area (Tensor[B, N])
+    """
+    return (box[:, :, 2] - box[:, :, 0]) * (box[:, :, 3] - box[:, :, 1])
+
+
+def box_iou(box1, box2):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+
+    area1 = box_area(box1)
+    area2 = box_area(box2)
+
+    expand_size_1 = box2.shape[0]
+    expand_size_2 = box1.shape[0]
+
+    box1 = ops.tile(ops.expand_dims(box1, 1), (1, expand_size_1, 1))
+    box2 = ops.tile(ops.expand_dims(box2, 0), (expand_size_2, 1, 1))
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    # inter = ops.minimum(box1[:, None, 2:], box2[None, :, 2:]) - ops.maximum(box1[:, None, :2], box2[None, :, :2])
+    inter = ops.minimum(box1[..., 2:], box2[..., 2:]) - ops.maximum(box1[..., :2], box2[..., :2])
+    inter = inter.clip(0.0, None)
+    inter = inter[:, :, 0] * inter[:, :, 1]
+    return inter / (area1[:, None] + area2[None, :] - inter).clip(EPS, None)  # iou = inter / (area1 + area2 - inter)
+
+
+def batch_box_iou(batch_box1, batch_box2, xywh=False):
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[B, N, 4])
+        box2 (Tensor[B, M, 4])
+    Returns:
+        iou (Tensor[B, N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    if xywh:
+        batch_box1 = box_cxcywh_to_xyxy(batch_box1)
+        batch_box2 = box_cxcywh_to_xyxy(batch_box2)
+
+    area1 = batch_box_area(batch_box1)
+    area2 = batch_box_area(batch_box2)
+
+    expand_size_1 = batch_box2.shape[1]
+    expand_size_2 = batch_box1.shape[1]
+    batch_box1 = ops.tile(ops.expand_dims(batch_box1, 2), (1, 1, expand_size_1, 1))
+    batch_box2 = ops.tile(ops.expand_dims(batch_box2, 1), (1, expand_size_2, 1, 1))
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = ops.minimum(batch_box1[..., 2:], batch_box2[..., 2:]) - ops.maximum(
+        batch_box1[..., :2], batch_box2[..., :2]
+    )
+    inter = inter.clip(0.0, None)
+    inter = inter[:, :, :, 0] * inter[:, :, :, 1]
+    return inter / (area1[:, :, None] + area2[:, None, :] - inter).clip(
+        EPS, None
+    )  # iou = inter / (area1 + area2 - inter)
+
+
+def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    """
+    Return intersection-over-union (IoU) of boxes.
+    Arguments:
+        box1 (Tensor[N, 4]) or (Tensor[bs, N, 4])
+        box2 (Tensor[N, 4]) or (Tensor[bs, N, 4])
+        xywh (bool): Whether the box format is (x_center, y_center, w, h) or (x1, y1, x2, y2). Default: True.
+        GIoU (bool): Whether to use GIoU. Default: False.
+        DIoU (bool): Whether to use DIoU. Default: False.
+        CIoU (bool): Whether to use CIoU. Default: False.
+    Returns:
+        iou (Tensor[N,]): the IoU values for every element in boxes1 and boxes2
+    """
+
+    # Get the coordinates of bounding boxes
+    if xywh:  # transform from xywh to xyxy
+        x1, y1, w1, h1 = ops.split(box1, split_size_or_sections=1, axis=-1)
+        x2, y2, w2, h2 = ops.split(box2, split_size_or_sections=1, axis=-1)
+        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
+        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
+        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
+    else:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = ops.split(box1, split_size_or_sections=1, axis=-1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = ops.split(box2, split_size_or_sections=1, axis=-1)
+
+    # Intersection area
+    inter = (ops.minimum(b1_x2, b2_x2) - ops.maximum(b1_x1, b2_x1)).clip(0., None) * \
+            (ops.minimum(b1_y2, b2_y2) - ops.maximum(b1_y1, b2_y1)).clip(0., None)
+
+    # Union Area
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    union = w1 * h1 + w2 * h2 - inter + eps
+
+    # IoU
+    iou = inter / union
+
+    if CIoU or DIoU or GIoU:
+        cw = ops.maximum(b1_x2, b2_x2) - ops.minimum(b1_x1, b2_x1)  # convex (smallest enclosing box) width
+        ch = ops.maximum(b1_y2, b2_y2) - ops.minimum(b1_y1, b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center dist ** 2
+            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                # v = (4 / get_pi(iou.dtype) ** 2) * ops.pow(ops.atan(w2 / (h2 + eps)) - ops.atan(w1 / (h1 + eps)), 2)
+                v = (4 / PI.astype(iou.dtype) ** 2) * ops.pow(ops.atan(w2 / (h2 + eps)) - ops.atan(w1 / (h1 + eps)), 2)
+                alpha = v / (v - iou + (1 + eps))
+                alpha = ops.stop_gradient(alpha)
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+            return iou - rho2 / c2  # DIoU
+        c_area = cw * ch + eps  # convex area
+        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+    return iou  # IoU
diff --git a/community/cv/ShipWise/mindyolo/models/losses/loss_factory.py b/community/cv/ShipWise/mindyolo/models/losses/loss_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..79d0dd93e377e98dee5bdef291669c76431b7694
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/loss_factory.py
@@ -0,0 +1,15 @@
+from mindyolo.models.registry import is_model, model_entrypoint
+
+__all__ = ["create_loss"]
+
+
+def create_loss(name: str, **kwargs):
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(name):
+        raise RuntimeError(f"Unknown loss module {name}")
+
+    create_fn = model_entrypoint(name)
+    loss_fn = create_fn(**kwargs)
+
+    return loss_fn
diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov3_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov3_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..516f06bc1cf7224d302810bddd5ba1647f50d8fd
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/yolov3_loss.py
@@ -0,0 +1,249 @@
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Tensor, nn, ops
+
+from mindyolo.models.registry import register_model
+from .focal_loss import BCEWithLogitsLoss, FocalLoss, smooth_BCE
+from .iou_loss import batch_box_iou, bbox_iou
+
+CLIP_VALUE = 1000.0
+EPS = 1e-7
+
+__all__ = ["YOLOv3Loss"]
+
+
+@register_model
+class YOLOv3Loss(nn.Cell):
+    def __init__(
+        self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs
+    ):
+        super(YOLOv3Loss, self).__init__()
+        self.hyp_box = box
+        self.hyp_obj = obj
+        self.hyp_cls = cls
+        self.hyp_anchor_t = anchor_t
+        self.nc = nc  # number of classes
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.nl = len(anchors)  # number of layers
+
+        stride = np.array(stride)
+        anchors = np.array(anchors).reshape((self.nl, -1, 2))
+        anchors = anchors / stride.reshape((-1, 1, 1))
+        self.stride = Tensor(stride, ms.int32)
+        self.anchors = Tensor(anchors, ms.float32)  # shape(nl,na,2)
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=label_smoothing)  # positive, negative BCE targets
+        # Focal loss
+        g = fl_gamma  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss(
+                bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g
+            )
+        else:
+            # Define criteria
+            BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32))
+            BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32))
+
+        _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.balance = ms.Parameter(Tensor(_balance, ms.float32), requires_grad=False)
+        self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0
+
+        self._off = Tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+            ],
+            dtype=ms.float32,
+        )
+
+        self.loss_item_name = ["loss", "lbox", "lobj", "lcls"]  # branch name returned by lossitem for print
+
+    def construct(self, p, targets, imgs):
+        lcls, lbox, lobj = 0.0, 0.0, 0.0
+        tcls, tbox, indices, anchors, tmasks = self.build_targets(
+            p, targets
+        )  # class, box, (image, anchor, gridj, gridi), anchors, mask
+        tcls, tbox, indices, anchors, tmasks = (
+            ops.stop_gradient(tcls),
+            ops.stop_gradient(tbox),
+            ops.stop_gradient(indices),
+            ops.stop_gradient(anchors),
+            ops.stop_gradient(tmasks),
+        )
+
+        # Losses
+        for layer_index, pi in enumerate(p):  # layer index, layer predictions
+            tmask = tmasks[layer_index]
+            b, a, gj, gi = ops.split(indices[layer_index] * tmask[None, :], split_size_or_sections=1, axis=0)  # image, anchor, gridy, gridx
+            b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1)
+            tobj = ops.zeros(pi.shape[:4], pi.dtype)  # target obj
+
+            n = b.shape[0]  # number of targets
+            if n:
+                _meta_pred = pi[b, a, gj, gi]  # gather from (bs,na,h,w,nc)
+                pxy, pwh, _, pcls = _meta_pred[:, :2], _meta_pred[:, 2:4], _meta_pred[:, 4:5], _meta_pred[:, 5:]
+
+                # Regression
+                pxy = ops.Sigmoid()(pxy) * 2 - 0.5
+                pwh = (ops.Sigmoid()(pwh) * 2) ** 2 * anchors[layer_index]
+                pbox = ops.concat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(pbox, tbox[layer_index], CIoU=True).squeeze()  # iou(prediction, target)
+                # iou = iou * tmask
+                # lbox += ((1.0 - iou) * tmask).mean()  # iou loss
+                lbox += (((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum().clip(1, None)).astype(iou.dtype)
+
+                # Objectness
+                iou = ops.stop_gradient(iou).clip(0, None).astype(pi.dtype)
+                # tobj[b, a, gj, gi] = iou * tmask  # iou ratio
+                tobj[b, a, gj, gi] = (
+                    (1.0 - self.gr) + self.gr * ops.stop_gradient(iou).clip(0, None)
+                ) * tmask  # iou ratio
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = ops.fill(pcls.dtype, pcls.shape, self.cn)  # targets
+
+                    t[mnp.arange(n), tcls[layer_index]] = self.cp
+                    lcls += self.BCEcls(pcls, t, ops.tile(tmask[:, None], (1, t.shape[-1])))  # BCE
+
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[layer_index]  # obj loss
+
+        lbox *= self.hyp_box
+        lobj *= self.hyp_obj
+        lcls *= self.hyp_cls
+        bs = p[0].shape[0]  # batch size
+
+        loss = lbox + lobj + lcls
+
+        return loss * bs, ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls)))
+
+    def build_targets(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        targets = targets.view(-1, 6)
+        mask_t = targets[:, 1] >= 0
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        tcls, tbox, indices, anch, tmasks = (), (), (), (), ()
+        gain = ops.ones(7, ms.int32)  # normalized to gridspace gain
+        ai = ops.tile(mnp.arange(na).view(-1, 1), (1, nt))  # shape: (na, nt)
+        ai = ops.cast(ai, targets.dtype)
+        targets = ops.concat(
+            (ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2
+        )  # append anchor indices # shape: (na, nt, 7)
+
+        g = 0.5  # bias
+        off = ops.cast(self._off, targets.dtype) * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]]  # xyxy gain
+
+            # Match targets to anchors
+            t = targets * gain  # shape(na,nt,7) # xywhn -> xywh
+            # Matches
+            # if nt:
+            r = t[..., 4:6] / anchors[:, None]  # wh ratio
+            j = ops.maximum(r, 1 / r).max(2) < self.hyp_anchor_t  # compare
+
+            # t = t[j]  # filter
+            mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1)
+            t = t.view(-1, 7)
+
+            # Offsets
+            gxy = t[:, 2:4]  # grid xy
+            gxi = gain[[2, 3]] - gxy  # inverse
+            jk = ops.logical_and((gxy % 1 < g), (gxy > 1))
+            lm = ops.logical_and((gxi % 1 < g), (gxi > 1))
+            j, k = jk[:, 0], jk[:, 1]
+            l, m = lm[:, 0], lm[:, 1]
+
+            # # original
+            # j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *)
+            # t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7)
+            # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            # t = t.view(-1, 7)
+            # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :]) #(1,*,2) + (5,1,2) -> (5,*,2)
+            # offsets = offsets.view(-1, 2)
+
+            # faster,
+            tag1, tag2 = ops.identity(j), ops.identity(k)
+            tag1, tag2 = ops.tile(tag1[:, None], (1, 2)), ops.tile(tag2[:, None], (1, 2))
+            j_l = ops.logical_or(j, l).astype(ms.int32)
+            k_m = ops.logical_or(k, m).astype(ms.int32)
+            center = ops.ones_like(j_l)
+            j = ops.stack((center, j_l, k_m))
+            t = ops.tile(t, (3, 1, 1))  # shape(5, *, 7)
+            t = t.view(-1, 7)
+            mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :]  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype)
+            # offsets_new[0, :, :] = offsets[0, :, :]
+            offsets_new[1:2, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, :, :], offsets[3, :, :])
+            offsets_new[2:3, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, :, :], offsets[4, :, :])
+            offsets = offsets_new
+            offsets = offsets.view(-1, 2)
+
+            # Define
+            b, c, gxy, gwh, a = (
+                ops.cast(t[:, 0], ms.int32),
+                ops.cast(t[:, 1], ms.int32),
+                t[:, 2:4],
+                t[:, 4:6],
+                ops.cast(t[:, 6], ms.int32),
+            )  # (image, class), grid xy, grid wh, anchors
+            gij = ops.cast(gxy - offsets, ms.int32)
+            gij = gij[:]
+            gi, gj = gij[:, 0], gij[:, 1]  # grid indices
+            gi = gi.clip(0, shape[3] - 1)
+            gj = gj.clip(0, shape[2] - 1)
+
+            # Append
+            indices += (ops.stack((b, a, gj, gi), 0),)  # image, anchor, grid
+            tbox += (ops.concat((gxy - gij, gwh), 1),)  # box
+            anch += (anchors[a],)  # anchors
+            tcls += (c,)  # class
+            tmasks += (mask_m_t,)
+
+        return (
+            ops.stack(tcls),
+            ops.stack(tbox),
+            ops.stack(indices),
+            ops.stack(anch),
+            ops.stack(tmasks),
+        )  # class, box, (image, anchor, gridj, gridi), anchors, mask
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = ops.Identity()(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+
+@ops.constexpr
+def get_tensor(x, dtype=ms.float32):
+    return Tensor(x, dtype)
+
+
+if __name__ == "__main__":
+    from mindyolo.models.losses.loss_factory import create_loss
+    from mindyolo.utils.config import parse_config
+
+    cfg = parse_config()
+    loss_fn = create_loss(
+        name="YOLOv7Loss",
+        **cfg.loss,
+        anchors=cfg.network.get("anchors", None),
+        stride=cfg.network.get("stride", None),
+        nc=cfg.data.get("nc", None),
+    )
+    print(f"loss_fn is {loss_fn}")
diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov4_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov4_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..51abf2cb3073eb01bf47531bf29598c44a22bd9a
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/yolov4_loss.py
@@ -0,0 +1,278 @@
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Tensor, nn, ops
+
+from mindyolo.models.registry import register_model
+from .focal_loss import BCEWithLogitsLoss, smooth_BCE
+from .iou_loss import bbox_iou
+
+CLIP_VALUE = 1000.0
+EPS = 1e-7
+
+__all__ = ["YOLOv4Loss"]
+
+
+class ConfidenceLoss(nn.Cell):
+    """Loss for confidence."""
+
+    def __init__(self):
+        super(ConfidenceLoss, self).__init__()
+        self.cross_entropy = ops.SigmoidCrossEntropyWithLogits()
+        self.reduce_sum = ops.ReduceSum()
+
+    def construct(self, object_mask, predict_confidence, ignore_mask):
+        confidence_loss = self.cross_entropy(predict_confidence, object_mask)
+        confidence_loss = object_mask * confidence_loss + (1 - object_mask) * confidence_loss * ignore_mask
+        confidence_loss = self.reduce_sum(confidence_loss, ())
+        return confidence_loss
+
+
+@register_model
+class YOLOv4Loss(nn.Cell):
+    def __init__(self, box, obj, cls, label_smoothing, ignore_threshold, iou_threshold, anchors, nc, **kwargs):
+        super(YOLOv4Loss, self).__init__()
+        self.ignore_threshold = ignore_threshold
+        self.iou = Iou()
+        self.iou_threshold = iou_threshold
+        self.hyp_box = box
+        self.hyp_obj = obj
+        self.hyp_cls = cls
+        self.nc = nc  # number of classes
+
+        anchors = np.array(anchors)
+        self.na = anchors.shape[0]  # number of anchors
+        self.nl = 3  # number of layers
+
+        self.anchors = Tensor(anchors, ms.float32)  # shape(na,2)
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=label_smoothing)  # positive, negative BCE targets
+
+        self.BCEobj = ConfidenceLoss()
+        self.BCEcls = BCEWithLogitsLoss(reduction="sum")
+
+        self.loss_item_name = ["loss", "lbox", "lobj", "lcls"]  # branch name returned by lossitem for print
+
+        self.concat = ops.Concat(axis=-1)
+        self.reduce_max = ops.ReduceMax(keep_dims=False)
+
+    def construct(self, p, targets, imgs):
+        image_shape = imgs.shape
+        gain = get_tensor(image_shape, targets.dtype)[[3, 2]]
+        ori_targets = targets.copy()
+        lcls, lbox, lobj = 0.0, 0.0, 0.0
+        tcls, tbox, indices, anchors, tmasks = self.build_targets(
+            p, targets, imgs
+        )  # class, box, (image, anchor, gridj, gridi), anchors, mask
+        tcls, tbox, indices, anchors, tmasks = (
+            ops.stop_gradient(tcls),
+            ops.stop_gradient(tbox),
+            ops.stop_gradient(indices),
+            ops.stop_gradient(anchors),
+            ops.stop_gradient(tmasks),
+        )
+
+        # Losses
+        for layer_index, yolo_out in enumerate(p):  # layer index, layer predictions
+            pi = yolo_out[0]
+            tmask = tmasks[layer_index]
+            b, a, gj, gi = ops.split(indices[layer_index] * tmask[None, :], split_size_or_sections=1, axis=0)  # image, anchor, gridy, gridx
+            b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1)
+
+            pi_shape = pi.shape
+            y_true = ops.zeros((pi_shape[0], pi_shape[1], pi_shape[2], pi_shape[3], 1), pi.dtype)
+            y_true[b, gj, gi, a][:, 0] = 1.0
+
+            n = b.shape[0]  # number of targets
+            if n:
+                pxy = yolo_out[1][b, gj, gi, a]
+                pwh = yolo_out[2][b, gj, gi, a]
+                _meta_pred = pi[b, gj, gi, a]  # gather from (bs,na,h,w,nc)
+                pcls = _meta_pred[:, 5:]
+
+                # Regression
+                pbox = ops.concat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(pbox, tbox, GIoU=True).squeeze()  # iou(prediction, target)
+                # iou = iou * tmask
+                # lbox += ((1.0 - iou) * tmask).mean()  # iou loss
+                box_loss_scale = 2 - tbox[:, 2] * tbox[:, 3] / gain[0] / gain[1]
+                lbox += (((1.0 - iou) * tmask * box_loss_scale).sum()).astype(iou.dtype)
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = ops.fill(pcls.dtype, pcls.shape, self.cn)  # targets
+
+                    t[mnp.arange(n), tcls] = self.cp
+                    lcls += self.BCEcls(pcls, t, ops.tile(tmask[:, None], (1, t.shape[-1])))  # BCE
+
+            gt_box = ori_targets[:, :, 2:]
+            pred_boxes = self.concat((yolo_out[1], yolo_out[2]))
+            gt_shape = ops.Shape()(gt_box)
+            gt_box = ops.Reshape()(gt_box, (gt_shape[0], 1, 1, 1, gt_shape[1], gt_shape[2]))
+            iou = self.iou(ops.ExpandDims()(pred_boxes, -2), gt_box)
+            best_iou = self.reduce_max(iou, -1)
+            ignore_mask = best_iou < self.ignore_threshold
+            ignore_mask = ops.Cast()(ignore_mask, ms.float32)
+            ignore_mask = ops.ExpandDims()(ignore_mask, -1)
+            ignore_mask = ops.stop_gradient(ignore_mask)
+            object_mask = y_true[:, :, :, :, 0:1]
+            lobj += self.BCEobj(object_mask, pi[:, :, :, :, 4:5], ignore_mask)  # obj loss
+
+        lbox *= self.hyp_box
+        lobj *= self.hyp_obj
+        lcls *= self.hyp_cls
+        bs = p[0][0].shape[0]  # batch size
+
+        loss = lbox + lobj + lcls
+
+        # ops.stack doesn't support type ms.float16 under ascend ms2.0,
+        # refer to issue #154 (https://github.com/mindspore-lab/mindyolo/issues/154)
+        return loss / bs / 8, ops.stop_gradient(ops.stack(
+            (loss.astype(ms.float32) / bs,
+             lbox.astype(ms.float32) / bs,
+             lobj.astype(ms.float32) / bs,
+             lcls.astype(ms.float32) / bs)
+        ))
+
+    def build_targets(self, p, targets, imgs):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        image_shape = imgs.shape
+        targets = targets.view(-1, 6)
+        mask_t = targets[:, 1] >= 0
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        indices, anch, tmasks = (), (), ()
+        gain_wh = ops.ones(7, ms.int32)  # normalized to gridspace gain
+        ai = ops.tile(mnp.arange(na).view(-1, 1), (1, nt))  # shape: (na, nt)
+        ai = ops.cast(ai, targets.dtype)
+        targets_9_anchors = ops.concat(
+            (ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2
+        )  # append anchor indices # shape: (na, nt, 7)
+
+        gain_wh[4:6] = get_tensor(image_shape, targets_9_anchors.dtype)[[3, 2]]  # xyxy gain
+
+        # Match targets to anchors
+        t_wh = targets_9_anchors * gain_wh
+        # Matches
+        gt_box = ops.zeros((na, nt, 4), ms.float32)
+        gt_box[..., 2:] = t_wh[..., 4:6]
+
+        anchor_shapes = ops.zeros((na, 1, 4), ms.float32)
+        anchor_shapes[..., 2:] = ops.ExpandDims()(self.anchors, 1)
+        anch_ious = bbox_iou(gt_box, anchor_shapes).squeeze()
+
+        j = anch_ious == anch_ious.max(axis=0)
+        l = anch_ious > self.iou_threshold
+
+        j_l = ops.logical_or(j, l).astype(ms.int32).reshape((self.nl, -1, nt))
+
+        anchor_scales = self.anchors.reshape((self.nl, -1, 2))
+        ai = ops.tile(mnp.arange(na // self.nl).view(-1, 1), (1, nt))  # shape: (na, nt)
+        ai = ops.cast(ai, targets.dtype)
+        targets_3_anchors = ops.concat((ops.tile(targets, (na // self.nl, 1, 1)), ai[:, :, None]), 2)
+        for i in range(self.nl):
+            anchors, shape = anchor_scales[i], p[i][0].shape
+            gain_xy = ops.ones(7, ms.int32)  # normalized to gridspace gain
+            gain_xy[2:4] = get_tensor(shape, targets_3_anchors.dtype)[[2, 1]]  # xyxy gain
+
+            t = targets_3_anchors * gain_xy
+            mask_m_t = (j_l[i] * ops.cast(mask_t[None, :], ms.int32)).view(-1)
+            t = t.view(-1, 7)
+
+            # Define
+            b, gxy, a = (
+                ops.cast(t[:, 0], ms.int32),
+                t[:, 2:4],
+                ops.cast(t[:, 6], ms.int32),
+            )  # (image, class), grid xy, grid wh, anchors
+            gij = ops.cast(gxy, ms.int32)
+            gij = gij[:]
+            gi, gj = gij[:, 0], gij[:, 1]  # grid indices
+            gi = gi.clip(0, shape[2] - 1)
+            gj = gj.clip(0, shape[1] - 1)
+
+            # Append
+            indices += (ops.stack((b, a, gj, gi), 0),)  # image, anchor, grid
+            anch += (anchors[a],)  # anchors
+            tmasks += (mask_m_t,)
+
+        targets_3_anchors = targets_3_anchors.view(-1, 7)
+        tcls = ops.cast(targets_3_anchors[:, 1], ms.int32)  # class
+        tbox = targets_3_anchors[:, 2:6]  # box
+
+        return (
+            tcls,
+            tbox,
+            ops.stack(indices),
+            ops.stack(anch),
+            ops.stack(tmasks),
+        )  # class, box, (image, anchor, gridj, gridi), anchors, mask
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = ops.Identity()(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+
+@ops.constexpr
+def get_tensor(x, dtype=ms.float32):
+    return Tensor(x, dtype)
+
+
+class Iou(nn.Cell):
+    """Calculate the iou of boxes"""
+
+    def __init__(self):
+        super(Iou, self).__init__()
+        self.min = ops.Minimum()
+        self.max = ops.Maximum()
+
+    def construct(self, box1, box2):
+        """
+        box1: pred_box [batch, gx, gy, anchors, 1,      4] ->4: [x_center, y_center, w, h]
+        box2: gt_box   [batch, 1,  1,  1,       maxbox, 4]
+        convert to topLeft and rightDown
+        """
+        box1_xy = box1[:, :, :, :, :, :2]
+        box1_wh = box1[:, :, :, :, :, 2:4]
+        box1_mins = box1_xy - box1_wh / ops.scalar_to_tensor(2.0)  # topLeft
+        box1_maxs = box1_xy + box1_wh / ops.scalar_to_tensor(2.0)  # rightDown
+
+        box2_xy = box2[:, :, :, :, :, :2]
+        box2_wh = box2[:, :, :, :, :, 2:4]
+        box2_mins = box2_xy - box2_wh / ops.scalar_to_tensor(2.0)
+        box2_maxs = box2_xy + box2_wh / ops.scalar_to_tensor(2.0)
+
+        intersect_mins = self.max(box1_mins, box2_mins)
+        intersect_maxs = self.min(box1_maxs, box2_maxs)
+        intersect_wh = self.max(intersect_maxs - intersect_mins, ops.scalar_to_tensor(0.0))
+        # P.squeeze: for effiecient slice
+        intersect_area = ops.Squeeze(-1)(intersect_wh[:, :, :, :, :, 0:1]) * ops.Squeeze(-1)(
+            intersect_wh[:, :, :, :, :, 1:2]
+        )
+        box1_area = ops.Squeeze(-1)(box1_wh[:, :, :, :, :, 0:1]) * ops.Squeeze(-1)(box1_wh[:, :, :, :, :, 1:2])
+        box2_area = ops.Squeeze(-1)(box2_wh[:, :, :, :, :, 0:1]) * ops.Squeeze(-1)(box2_wh[:, :, :, :, :, 1:2])
+        iou = intersect_area / (box1_area + box2_area - intersect_area)
+        # iou : [batch, gx, gy, anchors, maxboxes]
+        return iou
+
+
+if __name__ == "__main__":
+    from mindyolo.models.losses.loss_factory import create_loss
+    from mindyolo.utils.config import parse_config
+
+    cfg = parse_config()
+    loss_fn = create_loss(
+        name="YOLOv7Loss",
+        **cfg.loss,
+        anchors=cfg.network.get("anchors", None),
+        stride=cfg.network.get("stride", None),
+        nc=cfg.data.get("nc", None),
+    )
+    print(f"loss_fn is {loss_fn}")
diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov5_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov5_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..890cefef5f50e9626899932918f04b6766919e07
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/yolov5_loss.py
@@ -0,0 +1,231 @@
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Parameter, Tensor, nn, ops
+
+from mindyolo.models.registry import register_model
+from .focal_loss import BCEWithLogitsLoss, FocalLoss, smooth_BCE
+from .iou_loss import bbox_iou
+
+__all__ = ["YOLOv5Loss"]
+
+
+@register_model
+class YOLOv5Loss(nn.Cell):
+    # Compute losses
+    def __init__(
+        self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs
+    ):
+        super(YOLOv5Loss, self).__init__()
+
+        self.sort_obj_iou = False
+        self.hyp_anchor_t = anchor_t
+        self.hyp_box = box
+        self.hyp_obj = obj
+        self.hyp_cls = cls
+        self.nc = nc  # number of classes
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.nl = len(anchors)  # number of layers
+        stride = np.array(stride)
+        anchors = np.array(anchors).reshape((self.nl, -1, 2))
+        anchors = anchors / stride.reshape((-1, 1, 1))
+        self.stride = Tensor(stride, ms.int32)
+        self.anchors = Tensor(anchors, ms.float32)  # shape(nl,na,2)
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=label_smoothing)  # positive, negative BCE targets
+
+        # Focal loss
+        g = fl_gamma  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss(
+                bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g
+            )
+        else:
+            # Define criteria
+            BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32))
+            BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32))
+
+        _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.balance = Parameter(Tensor(_balance, ms.float32), requires_grad=False)
+        self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0
+
+        self._off = Tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            dtype=ms.float32,
+        )
+
+        self.loss_item_name = ["loss", "lbox", "lobj", "lcls"]  # branch name returned by loss for print
+
+    def scatter_index_tensor(self, x, index):
+        x_tmp = ops.transpose(x.reshape((-1, x.shape[-1])), (1, 0))
+        res = x_tmp[index].reshape(x.shape[:-1])
+        return res
+
+    def construct(self, p, targets, imgs):  # predictions, targets
+        lcls, lbox, lobj = 0.0, 0.0, 0.0
+
+        tcls, tbox, indices, anchors, tmasks = self.build_targets(
+            p, targets
+        )  # class, box, (image, anchor, gridj, gridi), anchors, mask
+        tcls, tbox, indices, anchors, tmasks = (
+            ops.stop_gradient(tcls),
+            ops.stop_gradient(tbox),
+            ops.stop_gradient(indices),
+            ops.stop_gradient(anchors),
+            ops.stop_gradient(tmasks),
+        )
+
+        # Losses
+        for layer_index, pi in enumerate(p):  # layer index, layer predictions
+            pi = ops.cast(pi, ms.float32)
+            tmask = tmasks[layer_index]
+            b, a, gj, gi = ops.split(indices[layer_index] * tmask[None, :], split_size_or_sections=1, axis=0)  # image, anchor, gridy, gridx
+            b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1)
+            tobj = ops.zeros(pi.shape[:4], pi.dtype)  # target obj
+
+            n = b.shape[0]  # number of targets
+            if n:
+                _meta_pred = pi[b, a, gj, gi]  # gather from (bs,na,h,w,nc)
+                pxy, pwh, _, pcls = _meta_pred[:, :2], _meta_pred[:, 2:4], _meta_pred[:, 4:5], _meta_pred[:, 5:]
+
+                # Regression
+                pxy = ops.Sigmoid()(pxy) * 2 - 0.5
+                pwh = (ops.Sigmoid()(pwh) * 2) ** 2 * anchors[layer_index]
+                pbox = ops.concat((pxy, pwh), 1)  # predicted box
+                iou = bbox_iou(pbox, tbox[layer_index], CIoU=True).squeeze()  # iou(prediction, target)
+                lbox += ((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum()  # iou loss
+
+                # Objectness
+                iou = ops.stop_gradient(iou).clip(0, None)
+                if self.sort_obj_iou:
+                    _, j = ops.sort(iou)
+                    b, a, gj, gi, iou, tmask = b[j], a[j], gj[j], gi[j], iou[j], tmask[j]
+                if self.gr < 1:
+                    iou = (1.0 - self.gr) + self.gr * iou
+                tobj[b, a, gj, gi] = ops.stop_gradient(iou) * tmask  # iou ratio
+
+                # Classification
+                if self.nc > 1:  # cls loss (only if multiple classes)
+                    t = ops.fill(pcls.dtype, pcls.shape, self.cn)  # targets
+
+                    t[mnp.arange(n), tcls[layer_index]] = self.cp
+                    lcls += self.BCEcls(pcls, t, ops.tile(tmask[:, None], (1, t.shape[-1])))  # BCE
+
+            # obji = self.BCEobj(pi[..., 4], tobj)
+            obji = self.BCEobj(self.scatter_index_tensor(pi, 4), tobj)
+            lobj += obji * self.balance[layer_index]  # obj loss
+
+        lbox *= self.hyp_box
+        lobj *= self.hyp_obj
+        lcls *= self.hyp_cls
+        bs = p[0].shape[0]  # batch size
+
+        loss = lbox + lobj + lcls
+        loss_item = ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls)))
+        return loss * bs, loss_item
+
+    def build_targets(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        targets = targets.view(-1, 6)
+        mask_t = targets[:, 1] >= 0
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        tcls, tbox, indices, anch, tmasks = (), (), (), (), ()
+        gain = ops.ones(7, ms.int32)  # normalized to gridspace gain
+        ai = ops.tile(mnp.arange(na).view(-1, 1), (1, nt))  # shape: (na, nt)
+        ai = ops.cast(ai, targets.dtype)
+        targets = ops.concat(
+            (ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2
+        )  # append anchor indices # shape: (na, nt, 7)
+
+        g = 0.5  # bias
+        off = ops.cast(self._off, targets.dtype) * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]]  # xyxy gain
+
+            # Match targets to anchors
+            t = targets * gain  # shape(na,nt,7) # xywhn -> xywh
+            # Matches
+            r = t[..., 4:6] / anchors[:, None]  # wh ratio
+            j = ops.maximum(r, 1 / r).max(2) < self.hyp_anchor_t  # compare
+
+            mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1)
+            t = t.view(-1, 7)
+
+            # Offsets
+            gxy = t[:, 2:4]  # grid xy
+            gxi = gain[[2, 3]] - gxy  # inverse
+            jk = ops.logical_and((gxy % 1 < g), (gxy > 1))  # .astype(ms.int32)
+            lm = ops.logical_and((gxi % 1 < g), (gxi > 1))  # .astype(ms.int32)
+            j, k = jk[:, 0], jk[:, 1]
+            l, m = lm[:, 0], lm[:, 1]
+
+            # Original
+            # j = ops.stack((ops.ones_like(j), j, k, l, m)) # shape: (5, *)
+            # t = ops.tile(t, (5, 1, 1)) # shape(5, *, 7)
+            # t = t.view(-1, 7)
+            # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            # # t = t.repeat((5, 1, 1))[j]
+            # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :]) #(1,*,2) + (5,1,2) -> (5,*,2)
+            # offsets = offsets.view(-1, 2)
+
+            # Faster
+            tag1, tag2 = ops.identity(j), ops.identity(k)
+            tag1, tag2 = ops.tile(tag1[:, None], (1, 2)), ops.tile(tag2[:, None], (1, 2))
+            j_l = ops.logical_or(j, l).astype(ms.int32)
+            k_m = ops.logical_or(k, m).astype(ms.int32)
+            center = ops.ones_like(j_l)
+            j = ops.stack((center, j_l, k_m))
+            t = ops.tile(t, (3, 1, 1))  # shape(5, *, 7)
+            t = t.view(-1, 7)
+            mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :]  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype)
+            offsets_new[1:2, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, :, :], offsets[3, :, :])
+            offsets_new[2:3, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, :, :], offsets[4, :, :])
+            offsets = offsets_new
+            offsets = offsets.view(-1, 2)
+
+            # Define
+            b, c, gxy, gwh, a = (
+                ops.cast(t[:, 0], ms.int32),
+                ops.cast(t[:, 1], ms.int32),
+                t[:, 2:4],
+                t[:, 4:6],
+                ops.cast(t[:, 6], ms.int32),
+            )  # (image, class), grid xy, grid wh, anchors
+            gij = ops.cast(gxy - offsets, ms.int32)
+            gij = gij[:]
+            gi, gj = gij[:, 0], gij[:, 1]  # grid indices
+            gi = gi.clip(0, shape[3] - 1)
+            gj = gj.clip(0, shape[2] - 1)
+
+            # Append
+            indices += (ops.stack((b, a, gj, gi), 0),)  # image, anchor, grid
+            tbox += (ops.concat((gxy - gij, gwh), 1),)  # box
+            anch += (anchors[a],)  # anchors
+            tcls += (c,)  # class
+            tmasks += (mask_m_t,)
+
+        return (
+            ops.stack(tcls),
+            ops.stack(tbox),
+            ops.stack(indices),
+            ops.stack(anch),
+            ops.stack(tmasks),
+        )  # class, box, (image, anchor, gridj, gridi), anchors, mask
+
+
+@ops.constexpr
+def get_tensor(x, dtype=ms.float32):
+    return Tensor(x, dtype)
diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov7_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov7_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..46258369fc387ea9a6d962afb909cab54d801799
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/yolov7_loss.py
@@ -0,0 +1,1018 @@
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Parameter, Tensor, nn, ops
+
+from mindyolo.models.registry import register_model
+from .focal_loss import BCEWithLogitsLoss, FocalLoss, smooth_BCE
+from .iou_loss import batch_box_iou, bbox_iou
+
+CLIP_VALUE = 1000.0
+EPS = 1e-7
+
+__all__ = ["YOLOv7Loss", "YOLOv7AuxLoss"]
+
+
+@register_model
+class YOLOv7Loss(nn.Cell):
+    def __init__(
+        self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs
+    ):
+        super(YOLOv7Loss, self).__init__()
+        self.hyp_box = box
+        self.hyp_obj = obj
+        self.hyp_cls = cls
+        self.hyp_anchor_t = anchor_t
+        self.nc = nc  # number of classes
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.nl = len(anchors)  # number of layers
+
+        stride = np.array(stride)
+        anchors = np.array(anchors).reshape((self.nl, -1, 2))
+        anchors = anchors / stride.reshape((-1, 1, 1))
+        self.stride = Tensor(stride, ms.int32)
+        self.anchors = Tensor(anchors, ms.float32)  # shape(nl,na,2)
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=label_smoothing)  # positive, negative BCE targets
+        # Focal loss
+        g = fl_gamma  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss(
+                bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g
+            )
+        else:
+            # Define criteria
+            BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32))
+            BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32))
+
+        _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.balance = ms.Parameter(Tensor(_balance, ms.float32), requires_grad=False)
+        self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0
+
+        self._off = Tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+            ],
+            dtype=ms.float32,
+        )
+
+        self.loss_item_name = ["loss", "lbox", "lobj", "lcls"]  # branch name returned by lossitem for print
+
+    def construct(self, p, targets, imgs):
+        lcls, lbox, lobj = 0.0, 0.0, 0.0
+        bs, as_, gjs, gis, targets, anchors, tmasks = self.build_targets(p, targets, imgs)  # bs: (nl, bs*5*na*gt_max)
+        bs, as_, gjs, gis, targets, anchors, tmasks = (
+            ops.stop_gradient(bs),
+            ops.stop_gradient(as_),
+            ops.stop_gradient(gjs),
+            ops.stop_gradient(gis),
+            ops.stop_gradient(targets),
+            ops.stop_gradient(anchors),
+            ops.stop_gradient(tmasks),
+        )
+
+        pre_gen_gains = ()
+        for pp in p:
+            pre_gen_gains += (get_tensor(pp.shape, targets.dtype)[[3, 2, 3, 2]],)
+
+        # Losses
+        # for i, pi in enumerate(p):  # layer index, layer predictions
+        for i in range(self.nl):  # layer index
+            pi = p[i]  # layer predictions
+            b, a, gj, gi, tmask = bs[i], as_[i], gjs[i], gis[i], tmasks[i]  # image, anchor, gridy, gridx, tmask
+            tobj = ops.zeros_like(pi[..., 0])  # target obj
+
+            n = b.shape[0]  # number of targets
+            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+
+            # Regression
+            grid = ops.stack([gi, gj], axis=1)
+            pxy = ops.Sigmoid()(ps[:, :2]) * 2.0 - 0.5
+            pwh = (ops.Sigmoid()(ps[:, 2:4]) * 2) ** 2 * anchors[i]
+            pbox = ops.concat((pxy, pwh), 1)  # predicted box
+            selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]
+            selected_tbox[:, :2] -= grid
+            iou = bbox_iou(pbox, selected_tbox, xywh=True, CIoU=True).view(-1)
+            lbox += ((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum().clip(1, None)  # iou loss
+
+            # Objectness
+            tobj[b, a, gj, gi] = ((1.0 - self.gr) + self.gr * ops.stop_gradient(iou).clip(0, None)) * tmask  # iou ratio
+
+            # Classification
+            selected_tcls = ops.cast(targets[i][:, 1], ms.int32)
+            if self.nc > 1:  # cls loss (only if multiple classes)
+                t = ops.ones_like(ps[:, 5:]) * self.cn  # targets
+                t[mnp.arange(n, dtype=ms.int32), selected_tcls] = self.cp
+                lcls += self.BCEcls(ps[:, 5:], t, ops.tile(tmask[:, None], (1, t.shape[1])))  # BCE
+
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[i]  # obj loss
+
+        lbox *= self.hyp_box
+        lobj *= self.hyp_obj
+        lcls *= self.hyp_cls
+        bs = p[0].shape[0]  # batch size
+
+        loss = lbox + lobj + lcls
+        return loss * bs, ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls)))
+
+    def build_targets(self, p, targets, imgs):
+        indices, anch, tmasks = self.find_3_positive(p, targets)
+
+        na, n_gt_max = self.na, targets.shape[1]
+        nl, batch_size, img_size = len(p), p[0].shape[0], imgs[0].shape[1]
+
+        this_target = targets.view(-1, 6)
+
+        txywh = this_target[:, 2:6] * img_size
+        txyxy = xywh2xyxy(txywh)
+        txyxy = txyxy.view(batch_size, n_gt_max, 4)
+        this_target = this_target.view(batch_size, n_gt_max, 6)
+        this_mask = this_target[:, :, 1] >= 0  # (bs, gt_max)
+
+        pxyxys = ()
+        p_cls = ()
+        p_obj = ()
+        all_b = ()
+        all_a = ()
+        all_gj = ()
+        all_gi = ()
+        all_anch = ()
+        all_tmasks = ()
+
+        # for i, pi in enumerate(p):
+        for i in range(self.nl):
+            pi = p[i]
+            _this_indices = indices[i].view(4, 3 * na, batch_size, n_gt_max).transpose(0, 2, 1, 3).view(4, -1)
+            _this_anch = anch[i].view(3 * na, batch_size, n_gt_max * 2).transpose(1, 0, 2).view(-1, 2)
+            _this_mask = tmasks[i].view(3 * na, batch_size, n_gt_max).transpose(1, 0, 2).view(-1)
+
+            _this_indices *= _this_mask[None, :]
+            _this_anch *= _this_mask[:, None]
+
+            b, a, gj, gi = ops.split(_this_indices, split_size_or_sections=1, axis=0)
+            b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1)
+
+            fg_pred = pi[b, a, gj, gi]
+            p_obj += (fg_pred[:, 4:5].view(batch_size, 3 * na * n_gt_max, 1),)
+            p_cls += (fg_pred[:, 5:].view(batch_size, 3 * na * n_gt_max, -1),)
+
+            grid = ops.stack((gi, gj), axis=1)
+            pxy = (ops.Sigmoid()(fg_pred[:, :2]) * 2.0 - 0.5 + grid) * self.stride[i]  # / 8.
+            pwh = (ops.Sigmoid()(fg_pred[:, 2:4]) * 2) ** 2 * _this_anch * self.stride[i]  # / 8.
+            pxywh = ops.concat((pxy, pwh), axis=-1)
+            pxyxy = xywh2xyxy(pxywh)
+
+            b, a, gj, gi, pxyxy, _this_anch, _this_mask = (
+                b.view(batch_size, -1),
+                a.view(batch_size, -1),
+                gj.view(batch_size, -1),
+                gi.view(batch_size, -1),
+                pxyxy.view(batch_size, -1, 4),
+                _this_anch.view(batch_size, -1, 2),
+                _this_mask.view(batch_size, -1),
+            )
+            all_b += (b,)
+            all_a += (a,)
+            all_gj += (gj,)
+            all_gi += (gi,)
+            pxyxys += (pxyxy,)
+            all_anch += (_this_anch,)
+            all_tmasks += (_this_mask,)
+
+        pxyxys = ops.concat(pxyxys, axis=1)  # nl * (bs, 5*na*gt_max, 4) -> cat -> (bs, c, 4) # nt = bs * gt_max
+        p_obj = ops.concat(p_obj, axis=1)
+        p_cls = ops.concat(p_cls, axis=1)  # nl * (bs, 5*na*gt_max, 80) -> (bs, nl*5*na*gt_max, 80)
+        all_b = ops.concat(all_b, axis=1)  # nl * (bs, 5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        all_a = ops.concat(all_a, axis=1)
+        all_gj = ops.concat(all_gj, axis=1)
+        all_gi = ops.concat(all_gi, axis=1)
+        all_anch = ops.concat(all_anch, axis=1)
+        all_tmasks = ops.concat(all_tmasks, axis=1)  # (bs, nl*5*na*gt_max)
+
+        this_mask = all_tmasks[:, None, :] * this_mask[:, :, None]  # (bs, gt_max, nl*5*na*gt_max,)
+
+        # (bs, gt_max, 4), (bs, nl*5*na*gt_max, 4) -> (bs, gt_max, nl*5*na*gt_max)
+        pair_wise_iou = batch_box_iou(txyxy, pxyxys) * this_mask  # (bs, gt_max, nl*5*na*gt_max,)
+        pair_wise_iou_loss = -ops.log(pair_wise_iou + EPS)
+
+        v, _ = ops.top_k(pair_wise_iou, 10)  # (bs, gt_max, 10)
+        dynamic_ks = ops.cast(v.sum(-1).clip(1, 10), ms.int32)  # (bs, gt_max)
+
+        # (bs, gt_max, 80)
+        gt_cls_per_image = ops.one_hot(
+            indices=ops.cast(this_target[:, :, 1], ms.int32),
+            depth=self.nc,
+            on_value=ops.ones(1, p_cls.dtype),
+            off_value=ops.zeros(1, p_cls.dtype),
+        )
+        # (bs, gt_max, nl*5*na*gt_max, 80)
+        gt_cls_per_image = ops.tile(
+            ops.expand_dims(ops.cast(gt_cls_per_image, p_cls.dtype), 2), (1, 1, pxyxys.shape[1], 1)
+        )
+
+        cls_preds_ = ops.sqrt(ops.Sigmoid()(p_cls) * ops.Sigmoid()(p_obj))
+        cls_preds_ = ops.tile(
+            ops.expand_dims(cls_preds_, 1), (1, n_gt_max, 1, 1)
+        )  # (bs, nl*5*na*gt_max, 80) -> (bs, gt_max, nl*5*na*gt_max, 80)
+        y = cls_preds_
+
+        pair_wise_cls_loss = ops.binary_cross_entropy_with_logits(
+            ops.log(y / (1 - y) + EPS),
+            gt_cls_per_image,
+            ops.ones(1, cls_preds_.dtype),
+            ops.ones(1, cls_preds_.dtype),
+            reduction="none",
+        ).sum(
+            -1
+        )  # (bs, gt_max, nl*5*na*gt_max)
+
+        cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss
+        cost = cost * this_mask
+        cost += CLIP_VALUE * (1.0 - ops.cast(this_mask, cost.dtype))
+
+        sort_cost, sort_idx = ops.top_k(-cost, 10, sorted=True)  # (bs, gt_max, 10)
+        sort_cost = -sort_cost
+        pos_idx = ops.stack((mnp.arange(batch_size * n_gt_max, dtype=ms.int32), dynamic_ks.view(-1) - 1), -1)
+        pos_v = ops.gather_nd(sort_cost.view(batch_size * n_gt_max, 10), pos_idx).view(batch_size, n_gt_max)
+        matching_matrix = ops.cast(cost <= pos_v[:, :, None], ms.int32) * this_mask
+
+        # delete reduplicate match label, one anchor only match one gt
+        cost_argmin = mnp.argmin(cost, axis=1)  # (bs, nl*5*na*gt_max)
+        anchor_matching_gt_mask = ops.one_hot(
+            cost_argmin, n_gt_max, ops.ones(1, ms.float16), ops.zeros(1, ms.float16), axis=-1
+        ).transpose(
+            0, 2, 1
+        )  # (bs, gt_max, nl*5*na*gt_max)
+        matching_matrix = matching_matrix * ops.cast(anchor_matching_gt_mask, matching_matrix.dtype)
+
+        fg_mask_inboxes = (
+            matching_matrix.astype(ms.float16).sum(1) > 0.0
+        )  # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        all_tmasks = all_tmasks * ops.cast(fg_mask_inboxes, ms.int32)  # (bs, nl*5*na*gt_max)
+        matched_gt_inds = matching_matrix.argmax(1).astype(ms.int32)  # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        matched_bs_inds = ops.tile(
+            mnp.arange(batch_size, dtype=ms.int32)[:, None], (1, matching_matrix.shape[2])
+        )  # (bs, nl*5*na*gt_max)
+        matched_inds = ops.stack((matched_bs_inds.view(-1), matched_gt_inds.view(-1)), 1)  # (bs*nl*5*na*gt_max, 2)
+        matched_inds *= all_tmasks.view(-1)[:, None]
+        this_target = ops.gather_nd(this_target, matched_inds)  # (bs*nl*5*na*gt_max, 6)
+        # this_target = this_target.view(-1, 6)[matched_gt_inds.view(-1,)] # (bs*nl*5*na*gt_max, 6)
+
+        # (bs, nl*5*na*gt_max,) -> (bs, nl, 5*na*gt_max) -> (nl, bs*5*na*gt_max)
+        matching_tmasks = all_tmasks.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1)
+        matching_bs = all_b.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_as = all_a.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_gjs = all_gj.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_gis = all_gi.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_targets = (
+            this_target.view(batch_size, nl, -1, 6).transpose(1, 0, 2, 3).view(nl, -1, 6) * matching_tmasks[..., None]
+        )
+        matching_anchs = (
+            all_anch.view(batch_size, nl, -1, 2).transpose(1, 0, 2, 3).view(nl, -1, 2) * matching_tmasks[..., None]
+        )
+
+        return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs, matching_tmasks
+
+    def find_3_positive(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        targets = targets.view(-1, 6)  # (bs, gt_max, 6) -> (bs*gt_max, 6)
+        mask_t = targets[:, 1] >= 0  # (bs*gt_max,)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        indices, anch, tmasks = (), (), ()
+        gain = ops.ones(7, ms.int32)  # normalized to gridspace gain
+        ai = ops.tile(mnp.arange(na, dtype=targets.dtype).view(na, 1), (1, nt))  # shape: (na, nt)
+        targets = ops.concat((ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2)  # append anchor indices # (na, nt, 7)
+
+        g = 0.5  # bias
+        off = ops.cast(self._off, targets.dtype) * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]]  # xyxy gain # [W, H, W, H]
+
+            # Match targets to anchors
+            t = targets * gain  # (na, nt, 7)
+            # Matches
+            r = t[:, :, 4:6] / anchors[:, None, :]  # wh ratio
+            j = ops.maximum(r, 1.0 / r).max(2) < self.hyp_anchor_t  # compare # (na, nt)
+
+            mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1)
+            t = t.view(-1, 7)  # (na*nt, 7)
+
+            # Offsets
+            gxy = t[:, 2:4]  # grid xy
+            gxi = gain[[2, 3]] - gxy  # inverse
+            jk = ops.logical_and((gxy % 1.0 < g), (gxy > 1.0))
+            lm = ops.logical_and((gxi % 1.0 < g), (gxi > 1.0))
+            j, k = jk[:, 0], jk[:, 1]
+            l, m = lm[:, 0], lm[:, 1]
+
+            # original
+            # j = ops.stack((ops.ones_like(j), j, k, l, m))  # shape: (5, *)
+            # t = ops.tile(t, (5, 1, 1))  # shape(5, *, 7)
+            # t = t.view(-1, 7)
+            # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            # # t = t.repeat((5, 1, 1))[j]
+            # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :])  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            # offsets = offsets.view(-1, 2) # (5*na*nt, 2)
+            # # offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+
+            # Faster
+            tag1, tag2 = ops.tile(j[:, None], (1, 2)), ops.tile(k[:, None], (1, 2))
+            j_l = ops.logical_or(j, l).astype(ms.int32)
+            k_m = ops.logical_or(k, m).astype(ms.int32)
+            center = ops.ones_like(j_l)
+            j = ops.stack((center, j_l, k_m))
+            mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            t = ops.tile(t, (3, 1, 1))  # shape(5, *, 7)
+            t = t.view(-1, 7)
+            offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :]  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype)
+            offsets_new[1, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, ...], offsets[3, ...])
+            offsets_new[2, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, ...], offsets[4, ...])
+            offsets = offsets_new
+            offsets = offsets.view(-1, 2)
+
+            # Define
+            b, c, gxy, gwh, a = (
+                ops.cast(t[:, 0], ms.int32),
+                ops.cast(t[:, 1], ms.int32),
+                t[:, 2:4],
+                t[:, 4:6],
+                ops.cast(t[:, 6], ms.int32),
+            )  # (image, class), grid xy, grid wh, anchors # b: (5*na*nt,), gxy: (5*na*nt, 2)
+            gij = ops.cast(gxy - offsets, ms.int32)
+            gi, gj = gij[:, 0], gij[:, 1]  # grid indices
+            gi = gi.clip(0, shape[3] - 1)
+            gj = gj.clip(0, shape[2] - 1)
+
+            # Append
+            indices += (ops.stack((b, a, gj, gi), 0),)  # image, anchor, grid
+            anch += (anchors[a],)  # anchors
+            tmasks += (mask_m_t,)
+
+        return indices, anch, tmasks
+
+
+@register_model
+class YOLOv7AuxLoss(nn.Cell):
+    def __init__(
+        self, box, obj, cls, anchor_t, label_smoothing, fl_gamma, cls_pw, obj_pw, anchors, stride, nc, **kwargs
+    ):
+        super(YOLOv7AuxLoss, self).__init__()
+        self.hyp_box = box
+        self.hyp_obj = obj
+        self.hyp_cls = cls
+        self.hyp_anchor_t = anchor_t
+        self.nc = nc  # number of classes
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.nl = len(anchors)  # number of layers
+
+        # modify weight of box/obj/cls when aux_loss
+        self.hyp_box *= 3.0 / self.nl
+        self.hyp_cls *= self.nc / 80.0 * 3.0 / self.nl
+        self.hyp_obj *= 2.0**2 * 3.0 / self.nl
+
+        stride = np.array(stride)
+        anchors = np.array(anchors).reshape((self.nl, -1, 2))
+        anchors = anchors / stride.reshape((-1, 1, 1))
+        self.stride = Tensor(stride, ms.int32)
+        self.anchors = Tensor(anchors, ms.float32)  # shape(nl,na,2)
+
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        self.cp, self.cn = smooth_BCE(eps=label_smoothing)  # positive, negative BCE targets
+        # Focal loss
+        g = fl_gamma  # focal loss gamma
+        if g > 0:
+            BCEcls, BCEobj = FocalLoss(bce_pos_weight=Tensor([cls_pw], ms.float32), gamma=g), FocalLoss(
+                bce_pos_weight=Tensor([obj_pw], ms.float32), gamma=g
+            )
+        else:
+            # Define criteria
+            BCEcls = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([cls_pw]), ms.float32))
+            BCEobj = BCEWithLogitsLoss(bce_pos_weight=Tensor(np.array([obj_pw]), ms.float32))
+
+        _balance = {3: [4.0, 1.0, 0.4]}.get(self.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
+        self.balance = ms.Parameter(Tensor(_balance, ms.float32), requires_grad=False)
+        self.BCEcls, self.BCEobj, self.gr = BCEcls, BCEobj, 1.0
+
+        self._off = Tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            dtype=ms.float32,
+        )
+
+        self.loss_item_name = ["loss", "lbox", "lobj", "lcls"]  # branch name returned by loss for print
+
+    def construct(self, p, targets, imgs):
+        lcls, lbox, lobj = 0.0, 0.0, 0.0
+        targets_ori = targets
+        bs, as_, gjs, gis, targets, anchors, tmasks = self.build_targets(
+            p[: self.nl], targets_ori, imgs
+        )  # bs: (nl, bs*3*na*gt_max)
+        bs_aux, as_aux_, gjs_aux, gis_aux, targets_aux, anchors_aux, tmasks_aux = self.build_targets_2(
+            p[: self.nl], targets_ori, imgs
+        )  # bs: (nl, bs*5*na*gt_max)
+
+        bs, as_, gjs, gis, targets, anchors, tmasks = (
+            ops.stop_gradient(bs),
+            ops.stop_gradient(as_),
+            ops.stop_gradient(gjs),
+            ops.stop_gradient(gis),
+            ops.stop_gradient(targets),
+            ops.stop_gradient(anchors),
+            ops.stop_gradient(tmasks),
+        )
+        bs_aux, as_aux_, gjs_aux, gis_aux, targets_aux, anchors_aux, tmasks_aux = (
+            ops.stop_gradient(bs_aux),
+            ops.stop_gradient(as_aux_),
+            ops.stop_gradient(gjs_aux),
+            ops.stop_gradient(gis_aux),
+            ops.stop_gradient(targets_aux),
+            ops.stop_gradient(anchors_aux),
+            ops.stop_gradient(tmasks_aux),
+        )
+
+        pre_gen_gains = ()
+        # pre_gen_gains_aux = ()
+        for pp in p[: self.nl]:
+            pre_gen_gains += (get_tensor(pp.shape, targets.dtype)[[3, 2, 3, 2]],)
+            # pre_gen_gains_aux += (get_tensor(pp.shape, targets.dtype)[[3, 2, 3, 2]],)
+
+        # Losses
+        for i in range(self.nl):  # layer index
+            pi = p[i]  # layer predictions
+            pi_aux = p[i + self.nl]
+            b, a, gj, gi, tmask = bs[i], as_[i], gjs[i], gis[i], tmasks[i]  # image, anchor, gridy, gridx, tmask
+            b_aux, a_aux, gj_aux, gi_aux, tmask_aux = bs_aux[i], as_aux_[i], gjs_aux[i], gis_aux[i], tmasks_aux[i]
+            tobj = ops.zeros_like(pi[..., 0])  # target obj
+            tobj_aux = ops.zeros_like(pi_aux[..., 0])  # target obj
+
+            # 1. Branch1, Compute main branch loss
+            n = b.shape[0]  # number of targets
+            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+            # 1.1. Regression
+            grid = ops.stack([gi, gj], axis=1)
+            pxy = ops.Sigmoid()(ps[:, :2]) * 2.0 - 0.5
+            pwh = (ops.Sigmoid()(ps[:, 2:4]) * 2) ** 2 * anchors[i]
+            pbox = ops.concat((pxy, pwh), 1)  # predicted box
+            selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]
+            selected_tbox[:, :2] -= grid
+            iou = bbox_iou(pbox, selected_tbox, xywh=True, CIoU=True).view(-1)
+            lbox += ((1.0 - iou) * tmask).sum() / tmask.astype(iou.dtype).sum().clip(1, None)  # iou loss
+            # 1.2. Objectness
+            tobj[b, a, gj, gi] = ((1.0 - self.gr) + self.gr * ops.stop_gradient(iou).clip(0, None)) * tmask  # iou ratio
+            obji = self.BCEobj(pi[..., 4], tobj)
+            lobj += obji * self.balance[i]  # obj loss
+            # 1.3. Classification
+            selected_tcls = ops.cast(targets[i][:, 1], ms.int32)
+            if self.nc > 1:  # cls loss (only if multiple classes)
+                t = ops.ones_like(ps[:, 5:]) * self.cn  # targets
+                t[mnp.arange(n, dtype=ms.int32), selected_tcls] = self.cp
+                lcls += self.BCEcls(ps[:, 5:], t, ops.tile(tmask[:, None], (1, t.shape[1])))  # BCE
+
+            # 2. Branch2, Compute Aux branch loss
+            n_aux = b_aux.shape[0]  # number of targets
+            ps_aux = pi[b_aux, a_aux, gj_aux, gi_aux]  # prediction subset corresponding to targets
+            # 2.1. Regression
+            grid_aux = ops.stack([gi_aux, gj_aux], axis=1)
+            pxy_aux = ops.Sigmoid()(ps_aux[:, :2]) * 2.0 - 0.5
+            pwh_aux = (ops.Sigmoid()(ps_aux[:, 2:4]) * 2) ** 2 * anchors_aux[i]
+            pbox_aux = ops.concat((pxy_aux, pwh_aux), 1)  # predicted box
+            selected_tbox_aux = targets_aux[i][:, 2:6] * pre_gen_gains[i]
+            selected_tbox_aux[:, :2] -= grid_aux
+            iou_aux = bbox_iou(pbox_aux, selected_tbox_aux, xywh=True, CIoU=True).view(-1)
+            lbox += (
+                0.25 * ((1.0 - iou_aux) * tmask_aux).sum() / tmask_aux.astype(iou_aux.dtype).sum().clip(1, None)
+            )  # iou loss
+            # 1.2. Objectness
+            tobj_aux[b_aux, a_aux, gj_aux, gi_aux] = (
+                (1.0 - self.gr) + self.gr * ops.stop_gradient(iou_aux).clip(0, None)
+            ) * tmask_aux  # iou ratio
+            obji_aux = self.BCEobj(pi_aux[..., 4], tobj_aux)
+            lobj += 0.25 * obji_aux * self.balance[i]  # obj loss
+            # 1.3. Classification
+            selected_tcls_aux = ops.cast(targets_aux[i][:, 1], ms.int32)
+            if self.nc > 1:  # cls loss (only if multiple classes)
+                t_aux = ops.ones_like(ps_aux[:, 5:]) * self.cn  # targets
+                t_aux[mnp.arange(n_aux, dtype=ms.int32), selected_tcls_aux] = self.cp
+                lcls += 0.25 * self.BCEcls(
+                    ps_aux[:, 5:], t_aux, ops.tile(tmask_aux[:, None], (1, t_aux.shape[1]))
+                )  # BCE
+
+        lbox *= self.hyp_box
+        lobj *= self.hyp_obj
+        lcls *= self.hyp_cls
+        bs = p[0].shape[0]  # batch size
+
+        loss = lbox + lobj + lcls
+        return loss * bs, ops.stop_gradient(ops.stack((loss, lbox, lobj, lcls)))
+
+    def build_targets(self, p, targets, imgs):
+        indices, anch, tmasks = self.find_3_positive(p, targets)
+
+        na, n_gt_max = self.na, targets.shape[1]
+        nl, batch_size, img_size = len(p), p[0].shape[0], imgs[0].shape[1]
+
+        this_target = targets.view(-1, 6)
+
+        txywh = this_target[:, 2:6] * img_size
+        txyxy = xywh2xyxy(txywh)
+        txyxy = txyxy.view(batch_size, n_gt_max, 4)
+        this_target = this_target.view(batch_size, n_gt_max, 6)
+        this_mask = this_target[:, :, 1] >= 0  # (bs, gt_max)
+
+        pxyxys = ()
+        p_cls = ()
+        p_obj = ()
+        all_b = ()
+        all_a = ()
+        all_gj = ()
+        all_gi = ()
+        all_anch = ()
+        all_tmasks = ()
+
+        # for i, pi in enumerate(p):
+        for i in range(self.nl):
+            pi = p[i]
+            _this_indices = indices[i].view(4, 3 * na, batch_size, n_gt_max).transpose(0, 2, 1, 3).view(4, -1)
+            _this_anch = anch[i].view(3 * na, batch_size, n_gt_max * 2).transpose(1, 0, 2).view(-1, 2)
+            _this_mask = tmasks[i].view(3 * na, batch_size, n_gt_max).transpose(1, 0, 2).view(-1)
+
+            _this_indices *= _this_mask[None, :]
+            _this_anch *= _this_mask[:, None]
+
+            b, a, gj, gi = ops.split(_this_indices, split_size_or_sections=1, axis=0)
+            b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1)
+
+            fg_pred = pi[b, a, gj, gi]
+            p_obj += (fg_pred[:, 4:5].view(batch_size, 3 * na * n_gt_max, 1),)
+            p_cls += (fg_pred[:, 5:].view(batch_size, 3 * na * n_gt_max, -1),)
+
+            grid = ops.stack((gi, gj), axis=1)
+            pxy = (ops.Sigmoid()(fg_pred[:, :2]) * 2.0 - 0.5 + grid) * self.stride[i]  # / 8.
+            pwh = (ops.Sigmoid()(fg_pred[:, 2:4]) * 2) ** 2 * _this_anch * self.stride[i]  # / 8.
+            pxywh = ops.concat((pxy, pwh), axis=-1)
+            pxyxy = xywh2xyxy(pxywh)
+
+            b, a, gj, gi, pxyxy, _this_anch, _this_mask = (
+                b.view(batch_size, -1),
+                a.view(batch_size, -1),
+                gj.view(batch_size, -1),
+                gi.view(batch_size, -1),
+                pxyxy.view(batch_size, -1, 4),
+                _this_anch.view(batch_size, -1, 2),
+                _this_mask.view(batch_size, -1),
+            )
+            all_b += (b,)
+            all_a += (a,)
+            all_gj += (gj,)
+            all_gi += (gi,)
+            pxyxys += (pxyxy,)
+            all_anch += (_this_anch,)
+            all_tmasks += (_this_mask,)
+
+        pxyxys = ops.concat(pxyxys, axis=1)  # nl * (bs, 5*na*gt_max, 4) -> cat -> (bs, c, 4) # nt = bs * gt_max
+        p_obj = ops.concat(p_obj, axis=1)
+        p_cls = ops.concat(p_cls, axis=1)  # nl * (bs, 5*na*gt_max, 80) -> (bs, nl*5*na*gt_max, 80)
+        all_b = ops.concat(all_b, axis=1)  # nl * (bs, 5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        all_a = ops.concat(all_a, axis=1)
+        all_gj = ops.concat(all_gj, axis=1)
+        all_gi = ops.concat(all_gi, axis=1)
+        all_anch = ops.concat(all_anch, axis=1)
+        all_tmasks = ops.concat(all_tmasks, axis=1)  # (bs, nl*5*na*gt_max)
+
+        this_mask = all_tmasks[:, None, :] * this_mask[:, :, None]  # (bs, gt_max, nl*5*na*gt_max,)
+
+        # (bs, gt_max, 4), (bs, nl*5*na*gt_max, 4) -> (bs, gt_max, nl*5*na*gt_max)
+        pair_wise_iou = batch_box_iou(txyxy, pxyxys) * this_mask  # (bs, gt_max, nl*5*na*gt_max,)
+        pair_wise_iou_loss = -ops.log(pair_wise_iou + EPS)
+
+        # Top 20 iou sum for aux, default 10
+        v, _ = ops.top_k(pair_wise_iou, 20)  # (bs, gt_max, 20)
+        dynamic_ks = ops.cast(v.sum(-1).clip(1, 20), ms.int32)  # (bs, gt_max)
+
+        # (bs, gt_max, 80)
+        gt_cls_per_image = ops.one_hot(
+            indices=ops.cast(this_target[:, :, 1], ms.int32),
+            depth=self.nc,
+            on_value=ops.ones(1, p_cls.dtype),
+            off_value=ops.zeros(1, p_cls.dtype),
+        )
+        # (bs, gt_max, nl*5*na*gt_max, 80)
+        gt_cls_per_image = ops.tile(
+            ops.expand_dims(ops.cast(gt_cls_per_image, p_cls.dtype), 2), (1, 1, pxyxys.shape[1], 1)
+        )
+
+        cls_preds_ = ops.sqrt(ops.Sigmoid()(p_cls) * ops.Sigmoid()(p_obj))
+        cls_preds_ = ops.tile(
+            ops.expand_dims(cls_preds_, 1), (1, n_gt_max, 1, 1)
+        )  # (bs, nl*5*na*gt_max, 80) -> (bs, gt_max, nl*5*na*gt_max, 80)
+        y = cls_preds_
+
+        pair_wise_cls_loss = ops.binary_cross_entropy_with_logits(
+            ops.log(y / (1 - y) + EPS),
+            gt_cls_per_image,
+            ops.ones(1, cls_preds_.dtype),
+            ops.ones(1, cls_preds_.dtype),
+            reduction="none",
+        ).sum(
+            -1
+        )  # (bs, gt_max, nl*5*na*gt_max)
+
+        cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss
+        cost = cost * this_mask
+        cost += CLIP_VALUE * (1.0 - ops.cast(this_mask, cost.dtype))
+
+        sort_cost, sort_idx = ops.top_k(-cost, 20, sorted=True)  # (bs, gt_max, 20)
+        sort_cost = -sort_cost
+        pos_idx = ops.stack((mnp.arange(batch_size * n_gt_max, dtype=ms.int32), dynamic_ks.view(-1) - 1), -1)
+        pos_v = ops.gather_nd(sort_cost.view(batch_size * n_gt_max, 20), pos_idx).view(batch_size, n_gt_max)
+        matching_matrix = ops.cast(cost <= pos_v[:, :, None], ms.int32) * this_mask
+
+        # delete reduplicate match label, one anchor only match one gt
+        cost_argmin = mnp.argmin(cost, axis=1)  # (bs, nl*5*na*gt_max)
+        anchor_matching_gt_mask = ops.one_hot(
+            cost_argmin, n_gt_max, ops.ones(1, ms.float16), ops.zeros(1, ms.float16), axis=-1
+        ).transpose(
+            0, 2, 1
+        )  # (bs, gt_max, nl*5*na*gt_max)
+        matching_matrix = matching_matrix * ops.cast(anchor_matching_gt_mask, matching_matrix.dtype)
+
+        fg_mask_inboxes = (
+            matching_matrix.astype(ms.float16).sum(1) > 0.0
+        )  # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        all_tmasks = all_tmasks * ops.cast(fg_mask_inboxes, ms.int32)  # (bs, nl*5*na*gt_max)
+        matched_gt_inds = matching_matrix.argmax(1).astype(ms.int32)  # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        matched_bs_inds = ops.tile(
+            mnp.arange(batch_size, dtype=ms.int32)[:, None], (1, matching_matrix.shape[2])
+        )  # (bs, nl*5*na*gt_max)
+        matched_inds = ops.stack((matched_bs_inds.view(-1), matched_gt_inds.view(-1)), 1)  # (bs*nl*5*na*gt_max, 2)
+        matched_inds *= all_tmasks.view(-1)[:, None]
+        this_target = ops.gather_nd(this_target, matched_inds)  # (bs*nl*5*na*gt_max, 6)
+        # this_target = this_target.view(-1, 6)[matched_gt_inds.view(-1,)] # (bs*nl*5*na*gt_max, 6)
+
+        # (bs, nl*5*na*gt_max,) -> (bs, nl, 5*na*gt_max) -> (nl, bs*5*na*gt_max)
+        matching_tmasks = all_tmasks.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1)
+        matching_bs = all_b.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_as = all_a.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_gjs = all_gj.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_gis = all_gi.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_targets = (
+            this_target.view(batch_size, nl, -1, 6).transpose(1, 0, 2, 3).view(nl, -1, 6) * matching_tmasks[..., None]
+        )
+        matching_anchs = (
+            all_anch.view(batch_size, nl, -1, 2).transpose(1, 0, 2, 3).view(nl, -1, 2) * matching_tmasks[..., None]
+        )
+
+        return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs, matching_tmasks
+
+    def build_targets_2(self, p, targets, imgs):
+        indices, anch, tmasks = self.find_5_positive(p, targets)
+
+        na, n_gt_max = self.na, targets.shape[1]
+        nl, batch_size, img_size = len(p), p[0].shape[0], imgs[0].shape[1]
+
+        this_target = targets.view(-1, 6)
+
+        txywh = this_target[:, 2:6] * img_size
+        txyxy = xywh2xyxy(txywh)
+        txyxy = txyxy.view(batch_size, n_gt_max, 4)
+        this_target = this_target.view(batch_size, n_gt_max, 6)
+        this_mask = this_target[:, :, 1] >= 0  # (bs, gt_max)
+
+        pxyxys = ()
+        p_cls = ()
+        p_obj = ()
+        all_b = ()
+        all_a = ()
+        all_gj = ()
+        all_gi = ()
+        all_anch = ()
+        all_tmasks = ()
+
+        # for i, pi in enumerate(p):
+        for i in range(self.nl):
+            pi = p[i]
+            _this_indices = indices[i].view(4, 5 * na, batch_size, n_gt_max).transpose(0, 2, 1, 3).view(4, -1)
+            _this_anch = anch[i].view(5 * na, batch_size, n_gt_max * 2).transpose(1, 0, 2).view(-1, 2)
+            _this_mask = tmasks[i].view(5 * na, batch_size, n_gt_max).transpose(1, 0, 2).view(-1)
+
+            _this_indices *= _this_mask[None, :]
+            _this_anch *= _this_mask[:, None]
+
+            b, a, gj, gi = ops.split(_this_indices, split_size_or_sections=1, axis=0)
+            b, a, gj, gi = b.view(-1), a.view(-1), gj.view(-1), gi.view(-1)
+
+            fg_pred = pi[b, a, gj, gi]
+            p_obj += (fg_pred[:, 4:5].view(batch_size, 5 * na * n_gt_max, 1),)
+            p_cls += (fg_pred[:, 5:].view(batch_size, 5 * na * n_gt_max, -1),)
+
+            grid = ops.stack((gi, gj), axis=1)
+            pxy = (ops.Sigmoid()(fg_pred[:, :2]) * 2.0 - 0.5 + grid) * self.stride[i]  # / 8.
+            pwh = (ops.Sigmoid()(fg_pred[:, 2:4]) * 2) ** 2 * _this_anch * self.stride[i]  # / 8.
+            pxywh = ops.concat((pxy, pwh), axis=-1)
+            pxyxy = xywh2xyxy(pxywh)
+
+            b, a, gj, gi, pxyxy, _this_anch, _this_mask = (
+                b.view(batch_size, -1),
+                a.view(batch_size, -1),
+                gj.view(batch_size, -1),
+                gi.view(batch_size, -1),
+                pxyxy.view(batch_size, -1, 4),
+                _this_anch.view(batch_size, -1, 2),
+                _this_mask.view(batch_size, -1),
+            )
+            all_b += (b,)
+            all_a += (a,)
+            all_gj += (gj,)
+            all_gi += (gi,)
+            pxyxys += (pxyxy,)
+            all_anch += (_this_anch,)
+            all_tmasks += (_this_mask,)
+
+        pxyxys = ops.concat(pxyxys, axis=1)  # nl * (bs, 5*na*gt_max, 4) -> cat -> (bs, c, 4) # nt = bs * gt_max
+        p_obj = ops.concat(p_obj, axis=1)
+        p_cls = ops.concat(p_cls, axis=1)  # nl * (bs, 5*na*gt_max, 80) -> (bs, nl*5*na*gt_max, 80)
+        all_b = ops.concat(all_b, axis=1)  # nl * (bs, 5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        all_a = ops.concat(all_a, axis=1)
+        all_gj = ops.concat(all_gj, axis=1)
+        all_gi = ops.concat(all_gi, axis=1)
+        all_anch = ops.concat(all_anch, axis=1)
+        all_tmasks = ops.concat(all_tmasks, axis=1)  # (bs, nl*5*na*gt_max)
+
+        this_mask = all_tmasks[:, None, :] * this_mask[:, :, None]  # (bs, gt_max, nl*5*na*gt_max,)
+
+        # (bs, gt_max, 4), (bs, nl*5*na*gt_max, 4) -> (bs, gt_max, nl*5*na*gt_max)
+        pair_wise_iou = batch_box_iou(txyxy, pxyxys) * this_mask  # (bs, gt_max, nl*5*na*gt_max,)
+        pair_wise_iou_loss = -ops.log(pair_wise_iou + EPS)
+
+        # Top 20 iou sum for aux, default 10
+        v, _ = ops.top_k(pair_wise_iou, 20)  # (bs, gt_max, 20)
+        dynamic_ks = ops.cast(v.sum(-1).clip(1, 20), ms.int32)  # (bs, gt_max)
+
+        # (bs, gt_max, 80)
+        gt_cls_per_image = ops.one_hot(
+            indices=ops.cast(this_target[:, :, 1], ms.int32),
+            depth=self.nc,
+            on_value=ops.ones(1, p_cls.dtype),
+            off_value=ops.zeros(1, p_cls.dtype),
+        )
+        # (bs, gt_max, nl*5*na*gt_max, 80)
+        gt_cls_per_image = ops.tile(
+            ops.expand_dims(ops.cast(gt_cls_per_image, p_cls.dtype), 2), (1, 1, pxyxys.shape[1], 1)
+        )
+
+        cls_preds_ = ops.sqrt(ops.Sigmoid()(p_cls) * ops.Sigmoid()(p_obj))
+        cls_preds_ = ops.tile(
+            ops.expand_dims(cls_preds_, 1), (1, n_gt_max, 1, 1)
+        )  # (bs, nl*5*na*gt_max, 80) -> (bs, gt_max, nl*5*na*gt_max, 80)
+        y = cls_preds_
+
+        pair_wise_cls_loss = ops.binary_cross_entropy_with_logits(
+            ops.log(y / (1 - y) + EPS),
+            gt_cls_per_image,
+            ops.ones(1, cls_preds_.dtype),
+            ops.ones(1, cls_preds_.dtype),
+            reduction="none",
+        ).sum(
+            -1
+        )  # (bs, gt_max, nl*5*na*gt_max)
+
+        cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss
+        cost = cost * this_mask
+        cost += CLIP_VALUE * (1.0 - ops.cast(this_mask, cost.dtype))
+
+        sort_cost, sort_idx = ops.top_k(-cost, 20, sorted=True)  # (bs, gt_max, 20)
+        sort_cost = -sort_cost
+        pos_idx = ops.stack((mnp.arange(batch_size * n_gt_max, dtype=ms.int32), dynamic_ks.view(-1) - 1), -1)
+        pos_v = ops.gather_nd(sort_cost.view(batch_size * n_gt_max, 20), pos_idx).view(batch_size, n_gt_max)
+        matching_matrix = ops.cast(cost <= pos_v[:, :, None], ms.int32) * this_mask
+
+        # delete reduplicate match label, one anchor only match one gt
+        cost_argmin = mnp.argmin(cost, axis=1)  # (bs, nl*5*na*gt_max)
+        anchor_matching_gt_mask = ops.one_hot(
+            cost_argmin, n_gt_max, ops.ones(1, ms.float16), ops.zeros(1, ms.float16), axis=-1
+        ).transpose(
+            0, 2, 1
+        )  # (bs, gt_max, nl*5*na*gt_max)
+        matching_matrix = matching_matrix * ops.cast(anchor_matching_gt_mask, matching_matrix.dtype)
+
+        fg_mask_inboxes = (
+            matching_matrix.astype(ms.float16).sum(1) > 0.0
+        )  # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        all_tmasks = all_tmasks * ops.cast(fg_mask_inboxes, ms.int32)  # (bs, nl*5*na*gt_max)
+        matched_gt_inds = matching_matrix.argmax(1).astype(ms.int32)  # (bs, gt_max, nl*5*na*gt_max) -> (bs, nl*5*na*gt_max)
+        matched_bs_inds = ops.tile(
+            mnp.arange(batch_size, dtype=ms.int32)[:, None], (1, matching_matrix.shape[2])
+        )  # (bs, nl*5*na*gt_max)
+        matched_inds = ops.stack((matched_bs_inds.view(-1), matched_gt_inds.view(-1)), 1)  # (bs*nl*5*na*gt_max, 2)
+        matched_inds *= all_tmasks.view(-1)[:, None]
+        this_target = ops.gather_nd(this_target, matched_inds)  # (bs*nl*5*na*gt_max, 6)
+        # this_target = this_target.view(-1, 6)[matched_gt_inds.view(-1,)] # (bs*nl*5*na*gt_max, 6)
+
+        # (bs, nl*5*na*gt_max,) -> (bs, nl, 5*na*gt_max) -> (nl, bs*5*na*gt_max)
+        matching_tmasks = all_tmasks.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1)
+        matching_bs = all_b.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_as = all_a.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_gjs = all_gj.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_gis = all_gi.view(batch_size, nl, -1).transpose(1, 0, 2).view(nl, -1) * matching_tmasks
+        matching_targets = (
+            this_target.view(batch_size, nl, -1, 6).transpose(1, 0, 2, 3).view(nl, -1, 6) * matching_tmasks[..., None]
+        )
+        matching_anchs = (
+            all_anch.view(batch_size, nl, -1, 2).transpose(1, 0, 2, 3).view(nl, -1, 2) * matching_tmasks[..., None]
+        )
+
+        return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs, matching_tmasks
+
+    def find_3_positive(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        targets = targets.view(-1, 6)  # (bs, gt_max, 6) -> (bs*gt_max, 6)
+        mask_t = targets[:, 1] >= 0  # (bs*gt_max,)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        indices, anch, tmasks = (), (), ()
+        gain = ops.ones(7, ms.int32)  # normalized to gridspace gain
+        ai = ops.tile(mnp.arange(na, dtype=targets.dtype).view(na, 1), (1, nt))  # shape: (na, nt)
+        targets = ops.concat((ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2)  # append anchor indices # (na, nt, 7)
+
+        g = 0.5  # bias
+        off = ops.cast(self._off, targets.dtype) * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]]  # xyxy gain # [W, H, W, H]
+
+            # Match targets to anchors
+            t = targets * gain  # (na, nt, 7)
+            # Matches
+            # if nt:
+            r = t[:, :, 4:6] / anchors[:, None, :]  # wh ratio
+            j = ops.maximum(r, 1.0 / r).max(2) < self.hyp_anchor_t  # compare # (na, nt)
+
+            # t = t[j]  # filter
+            mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1)
+            t = t.view(-1, 7)  # (na*nt, 7)
+
+            # Offsets
+            gxy = t[:, 2:4]  # grid xy
+            gxi = gain[[2, 3]] - gxy  # inverse
+            jk = ops.logical_and((gxy % 1.0 < g), (gxy > 1.0))
+            lm = ops.logical_and((gxi % 1.0 < g), (gxi > 1.0))
+            j, k = jk[:, 0], jk[:, 1]
+            l, m = lm[:, 0], lm[:, 1]
+
+            # original
+            # j = ops.stack((ops.ones_like(j), j, k, l, m))  # shape: (5, *)
+            # t = ops.tile(t, (5, 1, 1))  # shape(5, *, 7)
+            # t = t.view(-1, 7)
+            # mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            # # t = t.repeat((5, 1, 1))[j]
+            # offsets = (ops.zeros_like(gxy)[None, :, :] + off[:, None, :])  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            # offsets = offsets.view(-1, 2) # (5*na*nt, 2)
+            # # offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+
+            # Faster
+            tag1, tag2 = ops.tile(j[:, None], (1, 2)), ops.tile(k[:, None], (1, 2))
+            j_l = ops.logical_or(j, l).astype(ms.int32)
+            k_m = ops.logical_or(k, m).astype(ms.int32)
+            center = ops.ones_like(j_l)
+            j = ops.stack((center, j_l, k_m))
+            mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            t = ops.tile(t, (3, 1, 1))  # shape(5, *, 7)
+            t = t.view(-1, 7)
+            offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :]  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            offsets_new = ops.zeros((3,) + offsets.shape[1:], offsets.dtype)
+            # offsets_new[0, :, :] = offsets[0, :, :]
+            offsets_new[1, :, :] = ops.select(tag1.astype(ms.bool_), offsets[1, ...], offsets[3, ...])
+            offsets_new[2, :, :] = ops.select(tag2.astype(ms.bool_), offsets[2, ...], offsets[4, ...])
+            offsets = offsets_new
+            offsets = offsets.view(-1, 2)
+
+            # Define
+            b, c, gxy, gwh, a = (
+                ops.cast(t[:, 0], ms.int32),
+                ops.cast(t[:, 1], ms.int32),
+                t[:, 2:4],
+                t[:, 4:6],
+                ops.cast(t[:, 6], ms.int32),
+            )  # (image, class), grid xy, grid wh, anchors # b: (5*na*nt,), gxy: (5*na*nt, 2)
+            # gij = gxy - offsets
+            gij = ops.cast(gxy - offsets, ms.int32)
+            gi, gj = gij[:, 0], gij[:, 1]  # grid indices
+            gi = gi.clip(0, shape[3] - 1)
+            gj = gj.clip(0, shape[2] - 1)
+
+            # Append
+            indices += (ops.stack((b, a, gj, gi), 0),)  # image, anchor, grid
+            anch += (anchors[a],)  # anchors
+            tmasks += (mask_m_t,)
+
+        return indices, anch, tmasks
+
+    def find_5_positive(self, p, targets):
+        # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+        targets = targets.view(-1, 6)  # (bs, gt_max, 6) -> (bs*gt_max, 6)
+        mask_t = targets[:, 1] >= 0  # (bs*gt_max,)
+        na, nt = self.na, targets.shape[0]  # number of anchors, targets
+        indices, anch, tmasks = (), (), ()
+        gain = ops.ones(7, ms.int32)  # normalized to gridspace gain
+        ai = ops.tile(mnp.arange(na, dtype=targets.dtype).view(na, 1), (1, nt))  # shape: (na, nt)
+        targets = ops.concat((ops.tile(targets, (na, 1, 1)), ai[:, :, None]), 2)  # append anchor indices # (na, nt, 7)
+
+        g = 1.0  # bias
+        off = ops.cast(self._off, targets.dtype) * g  # offsets
+
+        for i in range(self.nl):
+            anchors, shape = self.anchors[i], p[i].shape
+            gain[2:6] = get_tensor(shape, targets.dtype)[[3, 2, 3, 2]]  # xyxy gain # [W, H, W, H]
+
+            # Match targets to anchors
+            t = targets * gain  # (na, nt, 7)
+            # Matches
+            r = t[:, :, 4:6] / anchors[:, None, :]  # wh ratio
+            j = ops.maximum(r, 1.0 / r).max(2) < self.hyp_anchor_t  # compare # (na, nt)
+
+            # t = t[j]  # filter
+            mask_m_t = ops.logical_and(j, mask_t[None, :]).view(-1)  # filter
+            t = t.view(-1, 7)  # (na*nt, 7)
+
+            # Offsets
+            gxy = t[:, 2:4]  # grid xy
+            gxi = gain[[2, 3]] - gxy  # inverse
+            jk = ops.logical_and((gxy % 1.0 < g), (gxy > 1.0)).astype(ms.int32)
+            lm = ops.logical_and((gxi % 1.0 < g), (gxi > 1.0)).astype(ms.int32)
+            j, k = jk[:, 0], jk[:, 1]
+            l, m = lm[:, 0], lm[:, 1]
+
+            # original
+            j = ops.stack((ops.ones_like(j), j, k, l, m))  # shape: (5, *)
+            t = ops.tile(t, (5, 1, 1))  # shape(5, *, 7)
+            t = t.view(-1, 7)
+            mask_m_t = (ops.cast(j, ms.int32) * ops.cast(mask_m_t[None, :], ms.int32)).view(-1)
+            offsets = ops.zeros_like(gxy)[None, :, :] + off[:, None, :]  # (1,*,2) + (5,1,2) -> (5,na*nt,2)
+            offsets = offsets.view(-1, 2)  # (5*na*nt, 2)
+
+            # Define
+            b, c, gxy, gwh, a = (
+                ops.cast(t[:, 0], ms.int32),
+                ops.cast(t[:, 1], ms.int32),
+                t[:, 2:4],
+                t[:, 4:6],
+                ops.cast(t[:, 6], ms.int32),
+            )  # (image, class), grid xy, grid wh, anchors # b: (5*na*nt,), gxy: (5*na*nt, 2)
+            # gij = gxy - offsets
+            gij = ops.cast(gxy - offsets, ms.int32)
+            gi, gj = gij[:, 0], gij[:, 1]  # grid indices
+            gi = gi.clip(0, shape[3] - 1)
+            gj = gj.clip(0, shape[2] - 1)
+
+            # Append
+            indices += (ops.stack((b, a, gj, gi), 0),)  # image, anchor, grid
+            anch += (anchors[a],)  # anchors
+            tmasks += (mask_m_t,)
+
+        return indices, anch, tmasks
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = ops.Identity()(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+
+@ops.constexpr
+def get_tensor(x, dtype=ms.float32):
+    return Tensor(x, dtype)
+
+
+if __name__ == "__main__":
+    from mindyolo.models.losses.loss_factory import create_loss
+    from mindyolo.utils.config import parse_config
+
+    cfg = parse_config()
+    loss_fn = create_loss(
+        name="YOLOv7Loss",
+        **cfg.loss,
+        anchors=cfg.network.get("anchors", None),
+        stride=cfg.network.get("stride", None),
+        nc=cfg.data.get("nc", None),
+    )
+    print(f"loss_fn is {loss_fn}")
diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolov8_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolov8_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e85efe626ca855881612757d10fe4e95844cf6b
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/yolov8_loss.py
@@ -0,0 +1,595 @@
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Tensor, nn, ops
+
+from mindyolo.models.registry import register_model
+
+from .iou_loss import bbox_iou
+
+CLIP_VALUE = 1000.0
+EPS = 1e-7
+
+__all__ = ["YOLOv8Loss", "YOLOv8SegLoss"]
+
+
+@register_model
+class YOLOv8Loss(nn.Cell):
+    def __init__(self, box, cls, dfl, stride, nc, reg_max=16, **kwargs):
+        super(YOLOv8Loss, self).__init__()
+
+        self.bce = nn.BCEWithLogitsLoss(reduction="none")
+        self.hyp_box = box
+        self.hyp_cls = cls
+        self.hyp_dfl = dfl
+        self.stride = stride  # model strides
+        self.nc = nc  # number of classes
+        self.no = nc + reg_max * 4
+        self.reg_max = reg_max
+
+        self.use_dfl = reg_max > 1
+        self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
+        self.bbox_loss = BboxLoss(reg_max, use_dfl=self.use_dfl)
+        self.proj = mnp.arange(reg_max)
+
+        # ops
+        self.sigmoid = ops.Sigmoid()
+
+        # branch name returned by lossitem for print
+        self.loss_item_name = ["loss", "lbox", "lcls", "dfl"]
+
+    def construct(self, feats, targets, imgs):
+        """YOLOv8 Loss
+        Args:
+            feats: list of tensor, feats[i] shape: (bs, nc+reg_max*4, hi, wi)
+            targets: [image_idx,cls,x,y,w,h], shape: (bs, gt_max, 6)
+        """
+        loss = ops.zeros(3, ms.float32)  # box, cls, dfl
+        batch_size = feats[0].shape[0]
+        _x = ()
+        for xi in feats:
+            _x += (xi.view(batch_size, self.no, -1),)
+        _x = ops.concat(_x, 2)
+        pred_distri, pred_scores = _x[:, : self.reg_max * 4, :], _x[:, -self.nc :, :]  # (bs, nc, h*w)
+        pred_distri, pred_scores = pred_distri.transpose((0, 2, 1)), pred_scores.transpose((0, 2, 1))
+
+        dtype = pred_scores.dtype
+        imgsz = get_tensor(feats[0].shape[2:], dtype) * self.stride[0]  # image size (h,w)
+        anchor_points, stride_tensor = self.make_anchors(feats, self.stride, 0.5)
+
+        # targets
+        targets, mask_gt = self.preprocess(targets, scale_tensor=imgsz[[1, 0, 1, 0]])
+        gt_labels, gt_bboxes = targets[:, :, :1], targets[:, :, 1:5]  # cls, xyxy
+
+        # pboxes
+        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, shape: (b, h*w, 4)
+
+        _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
+            self.sigmoid(pred_scores),
+            (pred_bboxes * stride_tensor).astype(gt_bboxes.dtype),
+            anchor_points * stride_tensor,
+            gt_labels,
+            gt_bboxes,
+            mask_gt,
+        )
+        # stop gradient
+        target_bboxes, target_scores, fg_mask = (
+            ops.stop_gradient(target_bboxes),
+            ops.stop_gradient(target_scores),
+            ops.stop_gradient(fg_mask),
+        )
+
+        target_bboxes /= stride_tensor
+
+        target_scores_sum = ops.maximum(target_scores.sum(), 1)
+
+        # cls loss
+        # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way
+        loss[1] = self.bce(pred_scores, ops.cast(target_scores, dtype)).sum() / target_scores_sum  # BCE
+
+        # bbox loss
+        # if fg_mask.sum():
+        loss[0], loss[2] = self.bbox_loss(
+            pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
+        )
+
+        loss[0] *= self.hyp_box  # box gain
+        loss[1] *= self.hyp_cls  # cls gain
+        loss[2] *= self.hyp_dfl  # dfl gain
+
+        return loss.sum() * batch_size, ops.stop_gradient(
+            ops.concat((loss.sum(keepdims=True), loss))
+        )  # loss(box, cls, dfl)
+
+    def bbox_decode(self, anchor_points, pred_dist):
+        if self.use_dfl:
+            b, a, c = pred_dist.shape  # batch, anchors, channels
+            pred_dist = pred_dist.view(b, a, 4, c // 4)
+            # pred_dist = ops.softmax(pred_dist, axis=3) # ms version >= 1.9.0
+            pred_dist = ops.Softmax(axis=3)(pred_dist)  # ms version <= 1.8.1
+            # (batch, anchors, 4, reg_max) @ (reg_max,) -> (batch, anchors, 4)
+            _dtype = pred_dist.dtype
+            pred_dist = ops.matmul(pred_dist.astype(ms.float16), self.proj.astype(ms.float16)).astype(_dtype)
+        return self.dist2bbox(pred_dist, anchor_points, xywh=False)
+
+    def preprocess(self, targets, scale_tensor):
+        """preprocess gt boxes
+
+        Args:
+            targets: [image_idx,cls,x,y,w,h], shape: (bs, gt_max, 6)
+            scale_tensor: (4,)
+        Return:
+            out: [cls,x,y,x,y], shape: (bs, gt_max, 5)
+            mask_gt: (bs, gt_max)
+        """
+        mask_gt = targets[:, :, 1] >= 0  # (bs, gt_max)
+        out = targets[:, :, 1:] * mask_gt[:, :, None]  # [cls,x,y,w,h], shape: (bs, gt_max, 5)
+        out[..., 1:5] = xywh2xyxy(out[..., 1:5] * scale_tensor)
+        return out, mask_gt
+
+    @staticmethod
+    def dist2bbox(distance, anchor_points, xywh=True, axis=-1):
+        """Transform distance(ltrb) to box(xywh or xyxy)."""
+        lt, rb = ops.split(distance, split_size_or_sections=2, axis=axis)
+        x1y1 = anchor_points - lt
+        x2y2 = anchor_points + rb
+        if xywh:
+            c_xy = (x1y1 + x2y2) / 2
+            wh = x2y2 - x1y1
+            return ops.concat((c_xy, wh), axis)  # xywh bbox
+        return ops.concat((x1y1, x2y2), axis)  # xyxy bbox
+
+    @staticmethod
+    def make_anchors(feats, strides, grid_cell_offset=0.5):
+        """Generate anchors from features."""
+        anchor_points, stride_tensor = (), ()
+        dtype = feats[0].dtype
+        for i, stride in enumerate(strides):
+            _, _, h, w = feats[i].shape
+            sx = mnp.arange(w, dtype=dtype) + grid_cell_offset  # shift x
+            sy = mnp.arange(h, dtype=dtype) + grid_cell_offset  # shift y
+            sy, sx = ops.meshgrid(sy, sx, indexing="ij")
+            anchor_points += (ops.stack((sx, sy), -1).view(-1, 2),)
+            stride_tensor += (ops.ones((h * w, 1), dtype) * stride,)
+        return ops.concat(anchor_points), ops.concat(stride_tensor)
+
+
+@register_model
+class YOLOv8SegLoss(YOLOv8Loss):
+    def __init__(self, box, cls, dfl, stride, nc, reg_max=16, nm=32, overlap=True, max_object_num=600, **kwargs):
+        super(YOLOv8SegLoss, self).__init__(box, cls, dfl, stride, nc, reg_max)
+
+        self.overlap = overlap
+        self.nm = nm
+        self.max_object_num = max_object_num
+
+        # branch name returned by lossitem for print
+        self.loss_item_name = ["loss", "lbox", "lseg", "lcls", "dfl"]
+
+    def construct(self, preds, target_box, target_seg):
+        """YOLOv8 Loss
+        Args:
+            feats: list of tensor, feats[i] shape: (bs, nc+reg_max*4, hi, wi)
+            targets: [image_idx,cls,x,y,w,h], shape: (bs, gt_max, 6)
+        """
+        loss = ops.zeros(4, ms.float32)  # box, cls, dfl, mask
+        # (bs, nc+reg_max*4, hi, wi), (bs, k, hi*wi), (bs, k, 138, 138); k = 32;
+        feats, pred_masks, proto = preds # x, mc, p;
+        batch_size, _, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
+
+        _x = ()
+        for xi in feats:
+            _x += (xi.view(batch_size, self.no, -1),)
+        _x = ops.concat(_x, 2)
+        pred_distri, pred_scores = _x[:, :self.reg_max * 4, :], _x[:, -self.nc:, :]  # (bs, nc, h*w)
+
+        # b, grids, ..
+        pred_scores = pred_scores.transpose(0, 2, 1)  # (bs, h*w, nc)
+        pred_distri = pred_distri.transpose(0, 2, 1)  # (bs, h*w, regmax * 4)
+        pred_masks = pred_masks.transpose(0, 2, 1)    # (bs, h*w, k)
+
+        dtype = pred_scores.dtype
+        imgsz = get_tensor(feats[0].shape[2:], dtype) * self.stride[0]  # image size (h,w)
+        anchor_points, stride_tensor = self.make_anchors(feats, self.stride, 0.5)
+
+        # targets
+        target_box, mask_gt = self.preprocess(target_box, scale_tensor=imgsz[[1, 0, 1, 0]])
+        gt_labels, gt_bboxes = target_box[:, :, :1], target_box[:, :, 1:5]  # cls, xyxy
+
+        # pboxes
+        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, shape: (b, h*w, 4)
+
+        _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
+            self.sigmoid(pred_scores),
+            (pred_bboxes * stride_tensor).astype(gt_bboxes.dtype),
+            anchor_points * stride_tensor,
+            gt_labels,
+            gt_bboxes,
+            mask_gt,
+        )
+
+        # stop gradient
+        target_bboxes, target_scores, fg_mask, target_gt_idx = (
+            ops.stop_gradient(target_bboxes),
+            ops.stop_gradient(target_scores),
+            ops.stop_gradient(fg_mask),
+            ops.stop_gradient(target_gt_idx)
+        )
+
+        target_scores_sum = ops.maximum(target_scores.sum(), 1)
+
+        # cls loss
+        loss[2] = self.bce(pred_scores, ops.cast(target_scores, dtype)).sum() / target_scores_sum  # BCE
+
+        # bbox loss
+        loss[0], loss[3] = self.bbox_loss(
+            pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask
+        )
+
+        # FIXME: mask target reshape, dynamic shape feature required.
+        # masks = target_seg # (b, 1, mask_h, mask_w) if overlap else (bs, N, mask_h, mask_w)
+        # if tuple(masks.shape[-2:]) != (mask_h, mask_w):  # downsample
+        #     masks = ops.interpolate(ops.expand_dims(masks, 0), size=(mask_h, mask_w), mode="nearest")[0]
+
+        for i in range(batch_size):
+            _fg_mask, _fg_mask_index = ops.topk(fg_mask[i].astype(ms.float16), self.max_object_num)
+            _mask = target_seg[i]  # (mask_h, mask_w) if overlap else (n_gt, mask_h, mask_w)
+            _mask_idx = target_gt_idx[i]  # (b, N) -> (N,)
+            _mask_idx = ops.gather(_mask_idx, _fg_mask_index, axis=0)  # (max_object_num,)
+
+            if self.overlap:
+                _cond = _mask[None, :, :] == (_mask_idx[:, None, None] + 1)
+                gt_mask = ops.where(
+                    _cond,
+                    ops.ones(_cond.shape, pred_masks.dtype),
+                    ops.zeros(_cond.shape, pred_masks.dtype)
+                )
+            else:
+                gt_mask = _mask[_mask_idx]  # (n_gt, mask_h, mask_w) -> (N, mask_h, mask_w)/(max_object_num, mask_h, mask_w)
+
+            xyxyn = target_bboxes[i] / imgsz[[1, 0, 1, 0]]
+            marea = xyxy2xywh(xyxyn)[:, 2:].prod(1)
+            mxyxy = xyxyn * get_tensor((mask_w, mask_h, mask_w, mask_h), xyxyn.dtype)
+
+            _loss_1 = self.single_mask_loss(
+                gt_mask, pred_masks[i], proto[i], mxyxy, marea, _fg_mask, _fg_mask_index
+            )
+            loss[1] += _loss_1
+
+        loss[0] *= self.hyp_box  # box gain
+        loss[1] *= self.hyp_box / batch_size  # seg gain
+        loss[2] *= self.hyp_cls  # cls gain
+        loss[3] *= self.hyp_dfl  # dfl gain
+
+        return loss.sum() * batch_size, ops.stop_gradient(
+            ops.concat((loss.sum(keepdims=True), loss))
+        )  # loss, lbox, lseg, lcls, ldfl
+
+    def single_mask_loss(self, gt_mask, pred, proto, xyxy, area, _fg_mask, _fg_mask_index):
+        """Mask loss for one image."""
+        pred = ops.gather(pred, _fg_mask_index, axis=0)
+        xyxy = ops.gather(xyxy, _fg_mask_index, axis=0)
+        area = ops.gather(area, _fg_mask_index, axis=0)
+
+        _dtype = pred.dtype
+        pred_mask = ops.matmul(
+            pred.astype(ms.float16),
+            proto.astype(ms.float16).view(self.nm, -1)
+        ).view(-1, *proto.shape[1:]).astype(_dtype)  # (n, 32) @ (32,80,80) -> (n,80,80)
+
+        loss = ops.binary_cross_entropy_with_logits(
+            pred_mask, gt_mask, reduction='none',
+            weight=ops.ones(1, pred_mask.dtype),
+            pos_weight=ops.ones(1, pred_mask.dtype)
+        )
+
+        single_loss = (self.crop_mask(loss, xyxy).mean(axis=(1, 2)) / ops.clip(area, min=1e-4))
+        single_loss *= _fg_mask
+
+        num_seg = ops.clip(_fg_mask.sum(), min=1.0)
+
+        return single_loss.sum() / num_seg
+
+    @staticmethod
+    def crop_mask(masks, boxes):
+        """
+        It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
+
+        Args:
+          masks (Tensor): [h, w, n] tensor of masks
+          boxes (Tensor): [n, 4] tensor of bbox coordinates in relative point form
+
+        Returns:
+          (Tensor): The masks are being cropped to the bounding box.
+        """
+        n, h, w = masks.shape
+        x1, y1, x2, y2 = ops.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
+        r = ops.arange(w, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
+        c = ops.arange(h, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)
+
+        return masks * ops.logical_and(
+            ops.logical_and((r >= x1), (r < x2)),
+            ops.logical_and((c >= y1), (c < y2))
+        ).astype(x1.dtype)
+
+
+class BboxLoss(nn.Cell):
+    def __init__(self, reg_max, use_dfl=False):
+        super().__init__()
+        self.reg_max = reg_max
+        self.use_dfl = use_dfl
+
+    def construct(
+        self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
+    ):
+        """
+        Args:
+            pred_dist: (bs, N, reg_max * 4)
+            pred_bboxes: (bs, N, 4)
+            anchor_points: (N, 2)
+            target_bboxes: (bs, N, 4)
+            target_scores: (bs, N, num_classes)
+            target_scores_sum: (1,)
+            fg_mask: (bs, N)
+        """
+        # IoU loss
+        weight = target_scores.sum(-1).expand_dims(-1)  # (bs, N, num_classes) -> (bs, N) -> (bs, N, 1)
+        iou = bbox_iou(pred_bboxes, target_bboxes, xywh=False, CIoU=True)
+        loss_iou = ((1.0 - iou) * weight * fg_mask.expand_dims(2)).sum() / target_scores_sum
+
+        # DFL loss
+        if self.use_dfl:
+            target_ltrb = self.bbox2dist(anchor_points, target_bboxes, self.reg_max - 1)
+            loss_dfl = self._df_loss(pred_dist.view(-1, self.reg_max), target_ltrb) * weight * fg_mask[:, :, None]
+            loss_dfl = loss_dfl.sum() / target_scores_sum
+        else:
+            loss_dfl = ops.zeros(1, ms.float32)
+
+        return loss_iou, loss_dfl
+
+    @staticmethod
+    def bbox2dist(anchor_points, bbox, reg_max):
+        """Transform bbox(xyxy) to dist(ltrb)."""
+        x1y1, x2y2 = ops.split(bbox, split_size_or_sections=2, axis=-1)
+        return ops.concat((anchor_points - x1y1, x2y2 - anchor_points), -1).clip(0, reg_max - 0.01)  # dist (lt, rb)
+
+    @staticmethod
+    def _df_loss(pred_dist, target):
+        # Return sum of left and right DFL losses
+        # Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
+        """
+        Args:
+            pred_dist: (bs*N*4, reg_max)
+            target: (bs, N, 4)
+            fg_mask: (bs, N)
+        Return:
+            loss: (bs, N, 1)
+        """
+        tl = ops.cast(target, ms.int32)  # target left
+        tr = tl + 1  # target right
+        wl = tr - target  # weight left
+        wr = 1 - wl  # weight right
+
+        loss = (
+            ops.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl
+            + ops.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr
+        ).mean(-1, keep_dims=True)
+
+        return loss
+
+
+class TaskAlignedAssigner(nn.Cell):
+    def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9):
+        super().__init__()
+        self.topk = topk
+        self.num_classes = num_classes
+        self.bg_idx = num_classes
+        self.alpha = alpha
+        self.beta = beta
+        self.eps = eps
+
+    def construct(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
+        """This code referenced to
+               https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py
+
+        Args:
+            pd_scores: (bs, N, num_classes)
+            pd_bboxes: (bs, N, 4)
+            anc_points: (N, 2)
+            gt_labels: (bs, n_gt, 1)
+            gt_bboxes: (bs, n_gt, 4)
+            mask_gt: (bs, n_gt)
+        Returns:
+            target_labels: (bs, N)
+            target_bboxes: (bs, N, 4)
+            target_scores: (bs, N, num_classes)
+            fg_mask: (bs, N)
+            target_gt_idx: (bs, N)
+        """
+        bs, n_gt, _ = gt_labels.shape
+        mask_pos, align_metric, overlaps = self.get_pos_mask(
+            pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt
+        )
+
+        target_gt_idx, fg_mask, mask_pos = self.select_highest_overlaps(mask_pos, overlaps, n_gt)
+
+        # assigned target
+        target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask)
+
+        # normalize
+        align_metric *= mask_pos
+        pos_align_metrics = align_metric.max(axis=-1, keepdims=True)  # (b, n_gt)
+        pos_overlaps = (overlaps * mask_pos).max(axis=-1, keepdims=True)  # (b, n_gt)
+        norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).max(-2).expand_dims(-1)
+        target_scores = target_scores * norm_align_metric
+
+        return target_labels, target_bboxes, target_scores, ops.cast(fg_mask, ms.bool_), target_gt_idx
+
+    def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt):
+        align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes)  # (b, n_gt, N)
+        mask_in_gts = self.select_candidates_in_gts(anc_points, gt_bboxes, mask_gt)  # (b, n_gt, N)
+        mask_topk = self.select_topk_candidates(
+            align_metric * mask_in_gts, topk_mask=ops.cast(ops.tile(mask_gt[..., None], (1, 1, self.topk)), ms.bool_)
+        )  # (b, n_gt, h*w)
+        mask_pos = mask_topk * mask_in_gts * mask_gt[:, :, None]  # (b, n_gt, N)
+
+        return mask_pos, align_metric, overlaps
+
+    def select_topk_candidates(self, metrics, topk_mask=None):
+        """
+        Args:
+            metrics: (b, n_gt, N).
+            topk_mask: (b, n_gt, topk) or None
+        Returns:
+            mask: (b, n_gt, N)
+        """
+
+        num_anchors = metrics.shape[-1]  # N
+        topk_metrics, topk_idxs = ops.top_k(metrics, self.topk)  # (b, n_gt, topk)
+        if topk_mask is None:
+            topk_mask = ops.tile(topk_metrics.max(-1, keepdims=True) > self.eps, (1, 1, self.topk))  # (b, n_gt, topk)
+        topk_idxs = mnp.where(topk_mask, topk_idxs, ops.zeros_like(topk_idxs))  # (b, n_gt, topk)
+        is_in_topk = ops.one_hot(topk_idxs, num_anchors, ops.ones(1, ms.float32), ops.zeros(1, ms.float32)).sum(
+            -2
+        )  # (b, n_gt, topk, N) -> (b, n_gt, N)
+        # filter invalid bboxes
+        is_in_topk = mnp.where(is_in_topk > 1, ops.zeros(1, ms.float32), is_in_topk)
+        is_in_topk = ops.cast(is_in_topk, metrics.dtype)
+
+        return is_in_topk
+
+    def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes):
+        bs, n_gt, _ = gt_labels.shape
+
+        ind0 = ops.tile(mnp.arange(bs, dtype=ms.int32).view(-1, 1), (1, n_gt)).view(-1, 1)  # (b*n_gt, 1)
+        ind1 = ops.cast(gt_labels, ms.int32).squeeze(-1).view(-1, 1)  # (b*n_gt, 1)
+        bbox_scores = ops.gather_nd(
+            pd_scores.transpose((0, 2, 1)), ops.concat((ind0, ind1), axis=1)
+        )  # (b, N, 80)->(b, 80, N)->(b*n_gt, N)
+        bbox_scores = bbox_scores.view(bs, n_gt, -1)
+
+        # (b, n_gt, 1, 4), (b, 1, N, 4) -> (b, n_gt, N)
+        overlaps = (
+            bbox_iou(gt_bboxes.expand_dims(2), pd_bboxes.expand_dims(1), xywh=False, CIoU=True).squeeze(3).clip(0, None)
+        )
+        align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
+        return align_metric, overlaps
+
+    def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask):
+        """
+        Args:
+            gt_labels: (b, n_gt, 1)
+            gt_bboxes: (b, n_gt, 4)
+            target_gt_idx: (b, N)
+            fg_mask: (b, N)
+        """
+
+        # assigned target labels
+        bs, n_gt, _ = gt_labels.shape
+        batch_ind = mnp.arange(bs)[:, None]  # (b, 1)
+        target_gt_idx = target_gt_idx + batch_ind * n_gt  # (b, N)
+        target_labels = ops.cast(gt_labels, ms.int32).flatten()[target_gt_idx]  # (b, N)
+
+        # assigned target boxes
+        target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx]  # (b, n_gt, 4) -> (b * n_gt, 4) -> (b, N)
+
+        # assigned target scores
+        target_labels.clip(0, None)
+        target_scores = ops.one_hot(
+            target_labels, self.num_classes, on_value=ops.ones(1, ms.int32), off_value=ops.zeros(1, ms.int32)
+        )  # (b, N, 80)
+        fg_scores_mask = ops.tile(fg_mask[:, :, None], (1, 1, self.num_classes))  # (b, N) -> (b, N, 80)
+        target_scores = mnp.where(fg_scores_mask > 0, target_scores, ops.zeros(1, ms.int32))
+
+        return target_labels, target_bboxes, target_scores
+
+    @staticmethod
+    def select_candidates_in_gts(xy_centers, gt_bboxes, mask_gt=None, eps=1e-9):
+        """select the positive anchor center in gt
+
+        Args:
+            xy_centers: (N, 2)
+            gt_bboxes: (bs, n_gt, 4)
+            mask_gt: (bs, n_gt) or None
+        Return:
+            select: shape(bs, n_gt, N)
+        """
+        n_anchors = xy_centers.shape[0]
+        bs, n_boxes, _ = gt_bboxes.shape
+        x, y = ops.split(xy_centers.view(1, -1, 2), split_size_or_sections=1, axis=-1)  # (1, N, 2) -> (1, N, 1)
+        left, top, right, bottom = ops.split(
+            gt_bboxes.view(-1, 1, 4), split_size_or_sections=1, axis=-1
+        )  # (bs, n_gt, 4)->(bs*n_gt, 1, 4)->(bs*n_gt, 1, 1)
+        select = ops.logical_and(
+            ops.logical_and((x - left) > eps, (y - top) > eps), ops.logical_and((right - x) > eps, (bottom - y) > eps)
+        ).view(
+            bs, n_boxes, n_anchors
+        )  # (bs, n_gt, N)
+
+        if mask_gt is not None:
+            select = ops.cast(select, ms.float32) * ops.cast(mask_gt[..., None], ms.float32)
+
+        return select
+
+    @staticmethod
+    def select_highest_overlaps(mask_pos, overlaps, n_gt):
+        """if an anchor box is assigned to multiple gts,
+            the one with the highest iou will be selected.
+
+        Args:
+            mask_pos: (b, n_gt, N)
+            overlaps: (b, n_gt, N)
+        Return:
+            target_gt_idx: (b, N)
+            fg_mask: (b, N)
+            mask_pos: (b, n_gt, N)
+        """
+
+        fg_mask = mask_pos.sum(-2)  # (b, n_gt, N) -> (b, N)
+
+        # if fg_mask.max() > 1:  # one anchor is assigned to multiple gt_bboxes
+        mask_multi_gts = ops.tile(ops.expand_dims(fg_mask > 1, 1), (1, n_gt, 1))  # (b, n_gt, N)
+        max_overlaps_idx = overlaps.argmax(1)  # (b, n_gt, N) -> (b, N)
+        is_max_overlaps = ops.one_hot(
+            max_overlaps_idx, n_gt, on_value=ops.ones(1, ms.int32), off_value=ops.zeros(1, ms.int32)
+        )  # (b, N, n_gt)
+        is_max_overlaps = ops.cast(
+            ops.transpose(is_max_overlaps, (0, 2, 1)), overlaps.dtype
+        )  # (b, N, n_gt) -> (b, n_gt, N)
+        mask_pos = mnp.where(mask_multi_gts, is_max_overlaps, mask_pos)
+        fg_mask = mask_pos.sum(-2)
+
+        # find each grid serve which gt(index)
+        target_gt_idx = mask_pos.argmax(-2)  # (b, h*w)
+        return target_gt_idx, fg_mask, mask_pos
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = ops.Identity()(x)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
+    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
+    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
+    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
+    return y
+
+
+def xyxy2xywh(x):
+    """
+    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format.
+
+    Args:
+        x (Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
+    Returns:
+       y (Tensor): The bounding box coordinates in (x, y, width, height) format.
+    """
+    y = ops.Identity()(x)
+    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
+    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
+    y[..., 2] = x[..., 2] - x[..., 0]  # width
+    y[..., 3] = x[..., 3] - x[..., 1]  # height
+    return y
+
+
+@ops.constexpr
+def get_tensor(x, dtype=ms.float32):
+    return Tensor(x, dtype)
diff --git a/community/cv/ShipWise/mindyolo/models/losses/yolox_loss.py b/community/cv/ShipWise/mindyolo/models/losses/yolox_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..2946bfe2ba1cba7a590cb17c44a26d624b0b83d4
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/losses/yolox_loss.py
@@ -0,0 +1,306 @@
+import numpy as np
+
+import mindspore as ms
+import mindspore.numpy as mnp
+from mindspore import Tensor, nn, ops
+
+from mindyolo.models.layers.utils import box_clip, box_cxcywh_to_xyxy, box_scale, box_xyxy_to_cxcywh
+from mindyolo.models.losses.iou_loss import batch_box_iou, bbox_iou
+from mindyolo.models.registry import register_model
+
+__all__ = ["YOLOXLoss"]
+
+
+@register_model
+class YOLOXLoss(nn.Cell):
+    """yolox with loss cell"""
+
+    def __init__(
+        self,
+        nc=80,
+        input_size=(640, 640),
+        num_candidate_ota=10,
+        strides=(8, 16, 32),
+        use_l1=False,
+        use_summary=False,
+        **kwargs
+    ):
+        super(YOLOXLoss, self).__init__()
+        self.n_candidate_k = num_candidate_ota
+        self.on_value = Tensor(1.0, ms.float32)
+        self.off_value = Tensor(0.0, ms.float32)
+        self.num_class = nc
+
+        self.unsqueeze = ops.ExpandDims()
+        self.reshape = ops.Reshape()
+        self.one_hot = ops.OneHot()
+        self.zeros = ops.ZerosLike()
+        self.sort_ascending = ops.Sort(descending=False)
+        self.batch_matmul_trans_a = ops.BatchMatMul(transpose_a=True)
+        self.bce_loss = nn.BCEWithLogitsLoss(reduction="none")
+        self.l1_loss = nn.L1Loss(reduction="none")
+
+        self.strides = strides
+        self.input_size = input_size
+        self.grids = [(input_size[0] // _stride) * (input_size[1] // _stride) for _stride in strides]
+        self.num_total_anchor = sum(self.grids)
+        self.anchor_center_pos, self.anchor_strides = self._get_anchor_center_and_stride(norm=False)
+
+        self.use_l1 = use_l1
+        self.use_summary = use_summary
+        self.summary = ops.ScalarSummary()
+        self.assign = ops.Assign()
+
+        self.loss_item_name = ["loss", "lbox", "lobj", "lcls", "lboxl1"]  # branch name returned by lossitem for print
+
+    def _get_anchor_center_and_stride(self, norm=False):
+        """
+        creat a table for all layer of anchors(grids), the value is the pixel position of the grid center and its stride.
+        The coordinate of the value is relative to the input img
+        Returns:
+            anchor_center_pos (Tensor[num_total_anchor, 2]): pixel position of the grid center
+            anchor_strides (Tensor[num_total_anchor,]): anchor strides
+        """
+
+        anchor_strides_list = []
+        for s, g in zip(self.strides, self.grids):
+            layer_stride = ops.ones((g,), ms.float32) * float(s)
+            anchor_strides_list.append(layer_stride)
+        anchor_strides = ops.concat(anchor_strides_list)
+        # (num_total_anchor, 2)
+        anchor_strides = ops.stack([anchor_strides, anchor_strides], axis=1)
+
+        anchor_center_pos_list = []
+        for stride in self.strides:
+            size_x = self.input_size[0] // stride
+            size_y = self.input_size[1] // stride
+            grid_x, grid_y = ops.meshgrid(mnp.arange(size_x), mnp.arange(size_y))
+            grids = ops.stack((grid_x, grid_y), 2).reshape(-1, 2)
+            anchor_center_pos_list.append(grids)
+
+        # (num_total_anchor, 2)
+        anchor_center_pos = ops.concat(anchor_center_pos_list, 0)
+
+        # to the scale of input img
+        anchor_center_pos = (anchor_center_pos + 0.5) * anchor_strides
+
+        if norm:
+            anchor_center_pos[..., 0] /= self.input_size[0]
+            anchor_center_pos[..., 1] /= self.input_size[1]
+
+            anchor_strides[..., 0] /= self.input_size[0]
+            anchor_strides[..., 1] /= self.input_size[1]
+
+        return anchor_center_pos, anchor_strides
+
+    def in_box(self, anchors, boxes):
+        splitted_diff1 = anchors - boxes[..., :2]
+        splitted_diff2 = boxes[..., 2:] - anchors
+        temp1 = ops.logical_and(splitted_diff1[..., 0] > 0.0, splitted_diff1[..., 1] > 0.0)
+        temp2 = ops.logical_and(splitted_diff2[..., 0] > 0.0, splitted_diff2[..., 1] > 0.0)
+        in_mask = ops.logical_and(temp1, temp2)
+
+        return in_mask
+
+    def _get_foreground(self, gt_boxes, gt_valid_mask, center_radius=1.5):
+        """
+        get the mask of foreground anchor point,
+        ref: simOTA, link
+        Args:
+             gt_boxes (Tensor[bs, num_gt_max, 4]): gt box in [x1,y1, x2, y2] format, normed
+             gt_valid_mask (Tensor[bs, num_gt_max]) : gt box valid mask, indicates valid if true
+             num_valid_gt (int): num of valid gt boxes
+             center_radius (float): radius threshold to judge whether an anchor is an inlier of the gt center.
+                The unit is pixel in the feature map scale.
+        Returns:
+             fg_mask (Tensor(bs, num_total_anchor)): mask to indicate whether an anchor falls in any gt box
+             in_center_box_mask (Tensor(bs, num_gt_max, num_total_anchor)): mask to indicate whether an anchor
+                falls both in a specific gt box and the core box with radius center_radius
+
+        """
+        bs, num_gt_max, _ = gt_boxes.shape
+
+        gt_box_xyxy = gt_boxes
+        gt_box_center = 0.5 * (gt_box_xyxy[..., :2] + gt_box_xyxy[..., 2:])
+        # 1. Gt box mask
+        # (bs, num_gt_max, num_total_anchor)
+        in_box_mask = self.in_box(self.anchor_center_pos, gt_box_xyxy.expand_dims(2))
+        # fg_mask = in_box_mask.any(1)
+
+        # 2. Gt core box mask
+        # (bs, num_gt_max, num_total_anchor, 4)
+        gt_core_box_xyxy = ops.concat(
+            [
+                gt_box_center[:, :, None, :] - center_radius * self.anchor_strides,
+                gt_box_center[:, :, None, :] + center_radius * self.anchor_strides,
+            ],
+            axis=-1,
+        )
+        # (bs, num_gt_max, num_total_anchor)
+        in_center_mask = self.in_box(self.anchor_center_pos, gt_core_box_xyxy)
+        in_center_box_mask = ops.logical_and(in_box_mask, in_center_mask)
+
+        # 3. Fill padding pos with false (bs, num_gt_max, num_total_anchor)
+        expanded_gt_valid_mask = ops.repeat_elements(
+            gt_valid_mask[:, :, None].astype(ms.int32), rep=self.num_total_anchor, axis=2
+        ).astype(ms.bool_)
+        in_center_box_mask = ops.logical_and(expanded_gt_valid_mask, in_center_box_mask)
+        pre_fg_mask = ops.logical_and(expanded_gt_valid_mask, in_box_mask.any(1, keep_dims=True))
+        return in_center_box_mask, pre_fg_mask
+
+    def construct(self, preds, targets, imgs=None):
+        """
+        forward with loss return
+        Args:
+            preds (Tensor[bs, num_total_anchor, 85]):
+            targets (Tensor[bs, num_gt_max, 6]): 0: batch_id, 1: label, 2-6: box
+        """
+        gt_valid_mask = targets[..., 1] >= 0  # defalut class column
+        gt_box_xyxy = box_cxcywh_to_xyxy(targets[:, :, 2:])  # (batch_size, gt_max, 4) in [xyxy] format
+        # reverse norm
+        gt_box_xyxy_raw = box_clip(box_scale(gt_box_xyxy, self.input_size), self.input_size)
+        # to cxcywh format
+        bbox_true = box_xyxy_to_cxcywh(gt_box_xyxy_raw)
+        is_inbox_and_incenter, pre_fg_mask = self._get_foreground(gt_box_xyxy_raw, gt_valid_mask)
+
+        batch_size = preds.shape[0]
+        gt_max = targets.shape[1]
+        outputs = preds  # batch_size, 8400, 85
+        total_num_anchors = outputs.shape[1]
+        bbox_preds = outputs[:, :, :4]  # batch_size, num_total_anchor, 4
+
+        obj_preds = outputs[:, :, 4:5]  # batch_size, num_total_anchor, 1
+        cls_preds = outputs[:, :, 5:]  # (batch_size, num_total_anchor, num_class)
+
+        # process label
+        gt_classes = ops.cast(targets[:, :, 1:2].squeeze(-1), ms.int32)
+        pair_wise_ious = batch_box_iou(bbox_true, bbox_preds, xywh=True)  # (batch_size, gt_max, 8400)
+        pair_wise_ious = pair_wise_ious * pre_fg_mask
+        pair_wise_iou_loss = -ops.log(pair_wise_ious + 1e-8) * pre_fg_mask
+        gt_classes_ = self.one_hot(gt_classes, self.num_class, self.on_value, self.off_value)
+        # (bs, num_gt_max, num_class) -> (bs, num_gt_max, num_total_anchor, num_class)
+        gt_classes_expaned = ops.repeat_elements(self.unsqueeze(gt_classes_, 2), rep=total_num_anchors, axis=2)
+        gt_classes_expaned = ops.stop_gradient(gt_classes_expaned)
+        cls_preds_ = ops.sigmoid(ops.repeat_elements(self.unsqueeze(cls_preds, 1), rep=gt_max, axis=1)) * ops.sigmoid(
+            ops.repeat_elements(self.unsqueeze(obj_preds, 1), rep=gt_max, axis=1)
+        )
+        # (bs, num_gt_max, num_total_anchor, num_class) -> (bs, num_gt_max, num_total_anchor)
+        pair_wise_cls_loss = ops.reduce_sum(
+            ops.binary_cross_entropy(ops.sqrt(cls_preds_), gt_classes_expaned, None, reduction="none"), -1
+        )
+
+        pair_wise_cls_loss = pair_wise_cls_loss * pre_fg_mask
+        cost = pair_wise_cls_loss + 3.0 * pair_wise_iou_loss
+        punishment_cost = 1000.0 * (1.0 - ops.cast(is_inbox_and_incenter, ms.float32))
+        cost = ops.cast(cost + punishment_cost, ms.float16)
+        # dynamic k matching
+        ious_in_boxes_matrix = pair_wise_ious  # (batch_size, gt_max, 8400)
+        ious_in_boxes_matrix = ops.cast(pre_fg_mask * ious_in_boxes_matrix, ms.float16)
+        topk_ious, _ = ops.top_k(ious_in_boxes_matrix, self.n_candidate_k, sorted=True)
+
+        dynamic_ks = ops.reduce_sum(topk_ious, 2).astype(ms.int32).clip(min=1, max=total_num_anchors - 1)
+
+        # (1, batch_size * gt_max, 2)
+        batch_iter = Tensor(np.arange(0, batch_size * gt_max), ms.int32)
+        dynamic_ks_indices = ops.stack((batch_iter, dynamic_ks.reshape((-1,))), axis=1)
+
+        dynamic_ks_indices = ops.stop_gradient(dynamic_ks_indices)
+
+        values, _ = ops.top_k(-cost, self.n_candidate_k, sorted=True)  # b_s , 50, 8400
+        values = ops.reshape(-values, (-1, self.n_candidate_k))
+        max_neg_score = self.unsqueeze(ops.gather_nd(values, dynamic_ks_indices).reshape(batch_size, -1), 2)
+        # positive sample for each gt
+        pos_mask = ops.cast(cost < max_neg_score, ms.float32)  # (batch_size, gt_num, 8400)
+        pos_mask = pre_fg_mask * pos_mask
+        # ----dynamic_k---- END-----------------------------------------------------------------------------------------
+
+        # pick the one with the lower cost if a sample is positive for more than one gt
+        cost_t = cost * pos_mask + (1.0 - pos_mask) * 2000.0
+        min_index = ops.argmin(cost_t, axis=1)
+        ret_posk = ops.transpose(ops.one_hot(min_index, gt_max, self.on_value, self.off_value), (0, 2, 1))
+        pos_mask = pos_mask * ret_posk
+        pos_mask = ops.stop_gradient(pos_mask)
+        # AA problem--------------END ----------------------------------------------------------------------------------
+
+        # calculate target ---------------------------------------------------------------------------------------------
+        # Cast precision
+        pos_mask = ops.cast(pos_mask, ms.float16)
+        bbox_true = ops.cast(bbox_true, ms.float16)
+        gt_classes_ = ops.cast(gt_classes_, ms.float16)
+
+        reg_target = self.batch_matmul_trans_a(pos_mask, bbox_true)  # (batch_size, 8400, 4)
+        pred_ious_this_matching = self.unsqueeze(ops.reduce_sum((ious_in_boxes_matrix * pos_mask), 1), -1)
+        cls_target = self.batch_matmul_trans_a(pos_mask, gt_classes_)
+
+        cls_target = cls_target * pred_ious_this_matching
+        obj_target = ops.reduce_max(pos_mask, 1)  # (batch_size, 8400)
+
+        # calculate l1_target
+        reg_target = ops.stop_gradient(reg_target)
+        cls_target = ops.stop_gradient(cls_target)
+        obj_target = ops.stop_gradient(obj_target)
+        bbox_preds = ops.cast(bbox_preds, ms.float32)
+        reg_target = ops.cast(reg_target, ms.float32)
+        obj_preds = ops.cast(obj_preds, ms.float32)
+        obj_target = ops.cast(obj_target, ms.float32)
+        cls_preds = ops.cast(cls_preds, ms.float32)
+        cls_target = ops.cast(cls_target, ms.float32)
+        loss_l1 = 0.0
+        if self.use_l1:
+            l1_target = self.get_l1_format(reg_target)
+            l1_preds = self.get_l1_format(bbox_preds)
+            l1_target = ops.stop_gradient(l1_target)
+            l1_target = ops.cast(l1_target, ms.float32)
+            l1_preds = ops.cast(l1_preds, ms.float32)
+            loss_l1 = ops.reduce_sum(self.l1_loss(l1_preds, l1_target), -1) * obj_target
+            loss_l1 = ops.reduce_sum(loss_l1)
+        # calculate target -----------END-------------------------------------------------------------------------------
+        iou = bbox_iou(bbox_preds.reshape(-1, 4), reg_target.reshape(-1, 4), xywh=True).reshape(batch_size, -1)
+        loss_iou = (1 - iou * iou) * obj_target  # (bs, num_total_anchor)
+        loss_iou = ops.reduce_sum(loss_iou)
+
+        loss_obj = self.bce_loss(ops.reshape(obj_preds, (-1, 1)), ops.reshape(obj_target, (-1, 1)))
+        loss_obj = ops.reduce_sum(loss_obj)
+
+        loss_cls = ops.reduce_sum(self.bce_loss(cls_preds, cls_target), -1) * obj_target
+        loss_cls = ops.reduce_sum(loss_cls)
+
+        num_fg_mask = ops.reduce_sum(obj_target) == 0
+        num_fg = (num_fg_mask == 0) * ops.reduce_sum(obj_target) + 1.0 * num_fg_mask
+
+        loss_iou = 5 * loss_iou / num_fg
+        loss_cls = loss_cls / num_fg
+        loss_obj = loss_obj / num_fg
+        loss_l1 = loss_l1 / num_fg
+        loss_all = loss_iou + loss_cls + loss_obj + loss_l1
+
+        if self.use_summary:
+            self.summary("loss", loss_all)
+            self.summary("num_fg", num_fg)
+            self.summary("loss_iou", loss_iou)
+            self.summary("loss_cls", loss_cls)
+            self.summary("loss_obj", loss_obj)
+            self.summary("loss_l1", loss_l1)
+
+        return loss_all, ops.stop_gradient(ops.stack((loss_all, loss_iou, loss_obj, loss_cls, loss_l1)))
+
+    def get_l1_format_single(self, reg_target, stride, eps):
+        """calculate L1 loss related"""
+        reg_target = reg_target / stride
+        reg_target_xy = reg_target[:, :, :2]
+        reg_target_wh = reg_target[:, :, 2:]
+        reg_target_wh = ops.log(reg_target_wh + eps)
+        return ops.concat((reg_target_xy, reg_target_wh), -1)
+
+    def get_l1_format(self, reg_target, eps=1e-8):
+        """calculate L1 loss related"""
+        reg_target_l = reg_target[:, 0 : self.grids[0], :]  # (bs, 6400, 4)
+        reg_target_m = reg_target[:, self.grids[0] : self.grids[1] + self.grids[0], :]  # (bs, 1600, 4)
+        reg_target_s = reg_target[:, -self.grids[2] :, :]  # (bs, 400, 4)
+
+        reg_target_l = self.get_l1_format_single(reg_target_l, self.strides[0], eps)
+        reg_target_m = self.get_l1_format_single(reg_target_m, self.strides[1], eps)
+        reg_target_s = self.get_l1_format_single(reg_target_s, self.strides[2], eps)
+
+        l1_target = ops.concat([reg_target_l, reg_target_m, reg_target_s], axis=1)
+        return l1_target
diff --git a/community/cv/ShipWise/mindyolo/models/model_factory.py b/community/cv/ShipWise/mindyolo/models/model_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d7c57511d05a565e6085e663def1488a5e3f40b
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/model_factory.py
@@ -0,0 +1,214 @@
+import math
+import os
+from copy import deepcopy
+
+from mindspore import load_checkpoint, load_param_into_net, nn, ops
+
+from mindyolo.utils import logger
+from .heads import *
+from .layers import *
+from .registry import is_model, model_entrypoint
+from .initializer import initialize_defult
+
+__all__ = ["create_model", "build_model_from_cfg"]
+
+
+def create_model(
+    model_name: str,
+    model_cfg: dict = None,
+    in_channels: int = 3,
+    num_classes: int = 80,
+    checkpoint_path: str = "",
+    **kwargs,
+):
+    model_args = dict(cfg=model_cfg, num_classes=num_classes, in_channels=in_channels)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if not is_model(model_name):
+        raise RuntimeError(f"Unknown model {model_name}")
+
+    create_fn = model_entrypoint(model_name)
+    model = create_fn(**model_args, **kwargs)
+
+    if checkpoint_path:
+        assert os.path.isfile(checkpoint_path) and checkpoint_path.endswith(
+            ".ckpt"
+        ), f"[{checkpoint_path}] not a ckpt file."
+        checkpoint_param = load_checkpoint(checkpoint_path)
+        load_param_into_net(model, checkpoint_param)
+        logger.info(f"Load checkpoint from [{checkpoint_path}] success.")
+
+    return model
+
+
+# Tools: build model from yaml cfg
+def build_model_from_cfg(**kwargs):
+    return Model(**kwargs)
+
+
+class Model(nn.Cell):
+    def __init__(self, model_cfg, in_channels=3, num_classes=80, sync_bn=False):
+        super(Model, self).__init__()
+        self.model, self.save, self.layers_param = parse_model(
+            deepcopy(model_cfg), ch=[in_channels], nc=num_classes, sync_bn=sync_bn
+        )
+        # Recompute
+        if hasattr(model_cfg, "recompute") and model_cfg.recompute and model_cfg.recompute_layers > 0:
+            for i in range(model_cfg.recompute_layers):
+                self.model[i].recompute()
+            logger.info(
+                f"Turn on recompute, and the results of the first {model_cfg.recompute_layers} layers "
+                f"will be recomputed."
+            )
+        initialize_defult(self)
+
+    def construct(self, x):
+        y, dt = (), ()  # outputs
+        for i in range(len(self.model)):
+            m = self.model[i]
+            iol, f, _, _ = self.layers_param[i]  # iol: index of layers
+
+            if not (isinstance(f, int) and f == -1):  # if not from previous layer
+                if isinstance(f, int):
+                    x = y[f]
+                else:
+                    _x = ()
+                    for j in f:
+                        if j == -1:
+                            _x += (x,)
+                        else:
+                            _x += (y[j],)
+                    x = _x
+
+            x = m(x)  # run
+
+            y += (x if iol in self.save else None,)  # save output
+        return x
+
+    @staticmethod
+    @ops.constexpr
+    def _get_h_w_list(ratio, gs, hw):
+        return tuple([math.ceil(x * ratio / gs) * gs for x in hw])
+
+
+def parse_model(d, ch, nc, sync_bn=False):  # model_dict, input_channels(3)
+    _SYNC_BN = sync_bn
+    if _SYNC_BN:
+        logger.info("Parse model with Sync BN.")
+    verbose = d.get("verbose_log", False)
+    if verbose:
+        logger.info("")
+        logger.info("network structure are as follows")
+        logger.info("%3s%18s%3s%10s  %-60s%-40s" % ("", "from", "n", "params", "module", "arguments"))
+    anchors, reg_max, max_channels = d.get("anchors", None), d.get("reg_max", None), d.get("max_channels", None)
+    stride, gd, gw = d.stride, d.depth_multiple, d.width_multiple
+    nc, na = (
+        nc,
+        (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors,
+    )  # number of classes, number of anchors
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    layers_param = []
+    num_total_param, num_train_param = 0, 0
+    for i, (f, n, m, args) in enumerate(d.backbone + d.head):  # from, number, module, args
+        kwargs = {}
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+
+        _args = []
+        for j, a in enumerate(args):
+            if isinstance(a, str) and "=" in a:
+                _index = a.find("=")
+                k, v = a[:_index], a[_index + 1 :]
+                try:
+                    v = eval(v)
+                except:
+                    logger.warning(f"Parse Model, args: {k}={v}, keep str type")
+                kwargs[k] = v
+            else:
+                try:
+                    a = eval(a) if isinstance(a, str) else a
+                except:
+                    logger.warning(f"Parse Model, args: {a}, keep str type")
+                _args += [
+                    a,
+                ]
+        args = _args
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in (
+            nn.Conv2d,
+            ConvNormAct,
+            RepConv,
+            DownC,
+            SPPCSPC,
+            SPPF,
+            C3,
+            C2f,
+            Bottleneck,
+            Residualblock,
+            Focus,
+            DWConvNormAct,
+            DWBottleneck,
+            DWC3,
+        ):
+            c1, c2 = ch[f], args[0]
+            if max_channels:
+                c2 = min(c2, max_channels)
+            c2 = math.ceil(c2 * gw / 8) * 8
+
+            args = [c1, c2, *args[1:]]
+            if m in (
+                ConvNormAct,
+                RepConv,
+                DownC,
+                SPPCSPC,
+                SPPF,
+                C3,
+                C2f,
+                Bottleneck,
+                Residualblock,
+                DWConvNormAct,
+                DWBottleneck,
+                DWC3,
+            ):
+                kwargs["sync_bn"] = sync_bn
+            if m in (DownC, SPPCSPC, C3, C2f, DWC3):
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m in (nn.BatchNorm2d, nn.SyncBatchNorm):
+            args = [ch[f]]
+        elif m in (Concat,):
+            c2 = sum([ch[x] for x in f])
+        elif m is Shortcut:
+            c2 = ch[f[0]]
+        elif m in (YOLOv7Head, YOLOv7AuxHead, YOLOv5Head, YOLOv4Head, YOLOv3Head):
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+        elif m in (YOLOv8Head, YOLOv8SegHead, YOLOXHead):  # head of anchor free
+            args.append([ch[x] for x in f])
+            if m in (YOLOv8SegHead,):
+                args[3] = math.ceil(min(args[3], max_channels) * gw / 8) * 8
+        elif m is ReOrg:
+            c2 = ch[f] * 4
+        else:
+            c2 = ch[f]
+
+        m_ = nn.SequentialCell([m(*args, **kwargs) for _ in range(n)]) if n > 1 else m(*args, **kwargs)
+
+        t = str(m)  # module type
+        np = sum([x.size for x in m_.get_parameters()])  # number params
+        np_trainable = sum([x.size for x in m_.trainable_params()])  # number trainable params
+        num_total_param += np
+        num_train_param += np_trainable
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        layers_param.append((i, f, t, np))
+        if verbose:
+            logger.info("%3s%18s%3s%10.0f  %-60s%-40s" % (i, f, n, np, t, args + [kwargs] if kwargs else args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    logger.info(f"number of network params, total: {num_total_param / 1e6}M, trainable: {num_train_param / 1e6}M")
+    return nn.CellList(layers), sorted(save), layers_param
diff --git a/community/cv/ShipWise/mindyolo/models/registry.py b/community/cv/ShipWise/mindyolo/models/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..08e369a2706c275a70bc2c636c9eaf8fcbf28c65
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/registry.py
@@ -0,0 +1,106 @@
+"""model registry and list"""
+import fnmatch
+import sys
+from collections import defaultdict
+
+__all__ = ["list_models", "is_model", "model_entrypoint", "list_modules", "is_model_in_modules", "is_model_pretrained"]
+
+_module_to_models = defaultdict(set)
+_model_to_module = {}
+_model_entrypoints = {}
+_model_has_pretrained = set()
+
+
+def register_model(fn):
+    # lookup containing module
+    mod = sys.modules[fn.__module__]
+    module_name_split = fn.__module__.split(".")
+    module_name = module_name_split[-1] if len(module_name_split) else ""
+
+    # add model to __all__ in module
+    model_name = fn.__name__
+    if hasattr(mod, "__all__"):
+        mod.__all__.append(model_name)
+    else:
+        mod.__all__ = [model_name]
+
+    # add entries to registry dict/sets
+    _model_entrypoints[model_name] = fn
+    _model_to_module[model_name] = module_name
+    _module_to_models[module_name].add(model_name)
+    has_pretrained = False
+    if hasattr(mod, "default_cfgs") and model_name in mod.default_cfgs:
+        cfg = mod.default_cfgs[model_name]
+        has_pretrained = "url" in cfg and cfg["url"]
+    if has_pretrained:
+        _model_has_pretrained.add(model_name)
+    return fn
+
+
+def list_models(filter="", module="", pretrained=False, exclude_filters=""):
+    if module:
+        all_models = list(_module_to_models[module])
+    else:
+        all_models = _model_entrypoints.keys()
+
+    if filter:
+        models = []
+        include_filters = filter if isinstance(filter, (tuple, list)) else [filter]
+        for f in include_filters:
+            include_models = fnmatch.filter(all_models, f)  # include these models
+            if include_models:
+                models = set(models).union(include_models)
+    else:
+        models = all_models
+
+    if exclude_filters:
+        if not isinstance(exclude_filters, (tuple, list)):
+            exclude_filters = [exclude_filters]
+        for xf in exclude_filters:
+            exclude_models = fnmatch.filter(models, xf)  # exclude these models
+            if exclude_models:
+                models = set(models).difference(exclude_models)
+
+    if pretrained:
+        models = _model_has_pretrained.intersection(models)
+
+    models = sorted(list(models))
+
+    return models
+
+
+def is_model(model_name):
+    """
+    Check if a model name exists
+    """
+    return model_name in _model_entrypoints
+
+
+def model_entrypoint(model_name):
+    """
+    Fetch a model entrypoint for specified model name
+    """
+    return _model_entrypoints[model_name]
+
+
+def list_modules():
+    """
+    Return list of module names that contain models / model entrypoints
+    """
+    modules = _module_to_models.keys()
+    return list(sorted(modules))
+
+
+def is_model_in_modules(model_name, module_names):
+    """
+    Check if a model exists within a subset of modules
+    Args:
+        model_name (str) - name of model to check
+        module_names (tuple, list, set) - names of modules to search in
+    """
+    assert isinstance(module_names, (tuple, list, set))
+    return any(model_name in _module_to_models[n] for n in module_names)
+
+
+def is_model_pretrained(model_name):
+    return model_name in _model_has_pretrained
diff --git a/community/cv/ShipWise/mindyolo/models/shipwise.py b/community/cv/ShipWise/mindyolo/models/shipwise.py
new file mode 100644
index 0000000000000000000000000000000000000000..23e9e474cc546f99c80cd8d301f145d1ef4a7559
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/shipwise.py
@@ -0,0 +1,104 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.heads.yolov8_head import YOLOv8Head
+from mindyolo.models.model_factory import build_model_from_cfg
+from mindyolo.models.registry import register_model
+
+__all__ = ["ShipWise", "shipwise"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+
+default_cfgs = {"shipwise": _cfg(url="")}
+
+
+class SEBlock(nn.Cell):
+    """Squeeze-and-Excitation Block for channel-wise attention."""
+
+    def __init__(self, channels, reduction=16):
+        super(SEBlock, self).__init__()
+        self.pool = nn.AdaptiveAvgPool2D(1)
+        self.fc = nn.SequentialCell(
+            nn.Dense(channels, channels // reduction, has_bias=False),
+            nn.ReLU(),
+            nn.Dense(channels // reduction, channels, has_bias=False),
+            nn.Sigmoid()
+        )
+
+    def construct(self, x):
+        b, c, _, _ = x.shape
+        y = self.pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+
+
+class ShipWise(nn.Cell):
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(ShipWise, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+
+        # Build the base model
+        self.model = build_model_from_cfg(
+            model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn
+        )
+
+        # Insert SEBlock into the model without changing input/output interface
+        self.insert_se_block()
+
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        self.initialize_weights()
+
+    def construct(self, x):
+        return self.model(x)
+
+    def insert_se_block(self):
+        """Insert SEBlock into the model's backbone without altering the input/output interface."""
+        # Assuming the backbone is a SequentialCell
+        backbone = self.model.model[0]
+        if isinstance(backbone, nn.SequentialCell):
+            # Insert SEBlock after the last layer of the backbone
+            layers = list(backbone.cells())
+            backbone_out_channels = layers[-1].out_channels
+            se_block = SEBlock(channels=backbone_out_channels)
+
+            # Reconstruct the backbone with SEBlock
+            new_backbone = nn.SequentialCell(*layers, se_block)
+            self.model.model[0] = new_backbone
+
+    def initialize_weights(self):
+        # Initialize the weights of SEBlock if present
+        backbone = self.model.model[0]
+        if isinstance(backbone, nn.SequentialCell):
+            for m in backbone.cells():
+                if isinstance(m, SEBlock):
+                    for layer in m.fc.cells():
+                        if isinstance(layer, nn.Dense):
+                            ms.common.initializer.initializer(
+                                ms.common.initializer.XavierUniform(), layer.weight.shape, layer.weight.dtype
+                            )
+
+        # Reset parameters for Detect Head
+        m = self.model.model[-1]
+        if isinstance(m, YOLOv8Head):
+            m.initialize_biases()
+            m.dfl.initialize_conv_weight()
+
+
+@register_model
+def shipwise(cfg, in_channels=3, num_classes=None, **kwargs) -> ShipWise:
+    """Get ShipWise model."""
+    model = ShipWise(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+# TODO: Preset pre-training model for ShipWise
diff --git a/community/cv/ShipWise/mindyolo/models/yolov3.py b/community/cv/ShipWise/mindyolo/models/yolov3.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f87d2feeeda55923bcdc684e0500e1c06d94841
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/yolov3.py
@@ -0,0 +1,66 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.heads.yolov3_head import YOLOv3Head
+from mindyolo.models.model_factory import build_model_from_cfg
+from mindyolo.models.registry import register_model
+
+__all__ = ["YOLOv3", "yolov3"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+
+default_cfgs = {"yolov3": _cfg(url="")}
+
+
+class YOLOv3(nn.Cell):
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(YOLOv3, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        self.initialize_weights()
+
+    def construct(self, x):
+        return self.model(x)
+
+    def initialize_weights(self):
+        # reset parameter for Detect Head
+        m = self.model.model[-1]
+        if isinstance(m, YOLOv3Head):
+            m.initialize_biases()
+
+
+@register_model
+def yolov3(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv3:
+    """Get yolov3 model."""
+    model = YOLOv3(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import load_config, Config
+
+    cfg, _, _ = load_config('../../configs/yolov3/yolov3.yaml')
+    cfg = Config(cfg)
+    network = create_model(
+        model_name=cfg.network.model_name,
+        model_cfg=cfg.network,
+        num_classes=cfg.data.nc,
+        sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False,
+    )
+    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = network(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
diff --git a/community/cv/ShipWise/mindyolo/models/yolov4.py b/community/cv/ShipWise/mindyolo/models/yolov4.py
new file mode 100644
index 0000000000000000000000000000000000000000..08748c5f6d29c2b8593bc7d2c3432d22eca45205
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/yolov4.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.model_factory import build_model_from_cfg
+from mindyolo.models.registry import register_model
+
+__all__ = ["YOLOv4", "yolov4"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+
+default_cfgs = {"yolov4": _cfg(url="")}
+
+
+class YOLOv4(nn.Cell):
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(YOLOv4, self).__init__()
+        self.cfg = cfg
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+    def construct(self, x):
+        return self.model(x)
+
+
+@register_model
+def yolov4(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv4:
+    """Get yolov4 model."""
+    model = YOLOv4(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import load_config, Config
+
+    cfg, _, _ = load_config('../../configs/yolov4/yolov4.yaml')
+    cfg = Config(cfg)
+    network = create_model(
+        model_name=cfg.network.model_name,
+        model_cfg=cfg.network,
+        num_classes=cfg.data.nc,
+        sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False,
+    )
+    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = network(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
diff --git a/community/cv/ShipWise/mindyolo/models/yolov5.py b/community/cv/ShipWise/mindyolo/models/yolov5.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb8931e8a64f703dec75d616b000e364464bc99d
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/yolov5.py
@@ -0,0 +1,69 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.heads.yolov5_head import YOLOv5Head
+from mindyolo.models.model_factory import build_model_from_cfg
+from mindyolo.models.registry import register_model
+
+__all__ = ["YOLOv5", "yolov5"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+
+default_cfgs = {"yolov5": _cfg(url="")}
+
+
+class YOLOv5(nn.Cell):
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(YOLOv5, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        self.initialize_weights()
+
+    def construct(self, x):
+        return self.model(x)
+
+    def initialize_weights(self):
+        # reset parameter for Detect Head
+        m = self.model.model[-1]
+        if isinstance(m, YOLOv5Head):
+            m.initialize_biases()
+
+
+@register_model
+def yolov5(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv5:
+    """Get yolov5 model."""
+    model = YOLOv5(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+# TODO: Preset pre-training model for yolov5-n/s/m
+
+
+if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import load_config, Config
+
+    cfg, _, _ = load_config('../../configs/yolov5/yolov5s.yaml')
+    cfg = Config(cfg)
+    network = create_model(
+        model_name=cfg.network.model_name,
+        model_cfg=cfg.network,
+        num_classes=cfg.data.nc,
+        sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False,
+    )
+    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = network(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
diff --git a/community/cv/ShipWise/mindyolo/models/yolov7.py b/community/cv/ShipWise/mindyolo/models/yolov7.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0b100aa934eb3f9782f1f6a914e88752ee30206
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/yolov7.py
@@ -0,0 +1,71 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.heads.yolov7_head import YOLOv7AuxHead, YOLOv7Head
+from mindyolo.models.model_factory import build_model_from_cfg
+from .registry import register_model
+
+__all__ = ["YOLOv7", "yolov7"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+
+default_cfgs = {"yolov7": _cfg(url="")}
+
+
+class YOLOv7(nn.Cell):
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(YOLOv7, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        self.initialize_weights()
+
+    def construct(self, x):
+        return self.model(x)
+
+    def initialize_weights(self):
+        # reset parameter for Detect Head
+        m = self.model.model[-1]
+        if isinstance(m, YOLOv7Head):
+            m.initialize_biases()
+        if isinstance(m, YOLOv7AuxHead):
+            m.initialize_aux_biases()
+
+
+@register_model
+def yolov7(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv7:
+    """Get yolov7 model."""
+    model = YOLOv7(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+# TODO: Preset pre-training model for yolov7-tiny/l/x
+
+
+if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import load_config, Config
+
+    cfg, _, _ = load_config('../../configs/yolov7/yolov7-tiny.yaml')
+    cfg = Config(cfg)
+    network = create_model(
+        model_name=cfg.network.model_name,
+        model_cfg=cfg.network,
+        num_classes=cfg.data.nc,
+        sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False,
+    )
+    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = network(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
diff --git a/community/cv/ShipWise/mindyolo/models/yolov8.py b/community/cv/ShipWise/mindyolo/models/yolov8.py
new file mode 100644
index 0000000000000000000000000000000000000000..f67301f35cb892a00d55d807ccd65fd61f69a309
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/yolov8.py
@@ -0,0 +1,70 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.heads.yolov8_head import YOLOv8Head
+from mindyolo.models.model_factory import build_model_from_cfg
+from mindyolo.models.registry import register_model
+
+__all__ = ["YOLOv8", "yolov8"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+
+default_cfgs = {"yolov8": _cfg(url="")}
+
+
+class YOLOv8(nn.Cell):
+    def __init__(self, cfg, in_channels=3, num_classes=None, sync_bn=False):
+        super(YOLOv8, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        self.stride_max = int(max(self.cfg.stride))
+        ch, nc = in_channels, num_classes
+
+        self.nc = nc  # override yaml value
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]  # default names
+
+        self.initialize_weights()
+
+    def construct(self, x):
+        return self.model(x)
+
+    def initialize_weights(self):
+        # reset parameter for Detect Head
+        m = self.model.model[-1]
+        if isinstance(m, YOLOv8Head):
+            m.initialize_biases()
+            m.dfl.initialize_conv_weight()
+
+
+@register_model
+def yolov8(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOv8:
+    """Get yolov8 model."""
+    model = YOLOv8(cfg=cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+# TODO: Preset pre-training model for yolov8-n
+
+
+if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import load_config, Config
+
+    cfg, _, _ = load_config('../../configs/yolov8/yolov8s.yaml')
+    cfg = Config(cfg)
+    network = create_model(
+        model_name=cfg.network.model_name,
+        model_cfg=cfg.network,
+        num_classes=cfg.data.nc,
+        sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False,
+    )
+    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = network(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
diff --git a/community/cv/ShipWise/mindyolo/models/yolox.py b/community/cv/ShipWise/mindyolo/models/yolox.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dde87051704b3ce8e715b1e0a85a9b26ced63ea
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/models/yolox.py
@@ -0,0 +1,65 @@
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.models.registry import register_model
+from mindyolo.models.heads import YOLOXHead
+from mindyolo.models.model_factory import build_model_from_cfg
+
+__all__ = ["YOLOX", "yolox"]
+
+
+def _cfg(url="", **kwargs):
+    return {"url": url, **kwargs}
+
+default_cfgs = {"yolox": _cfg(url="")}
+
+
+class YOLOX(nn.Cell):
+    """connect yolox backbone and head"""
+
+    def __init__(self, cfg, in_channels=3, num_classes=80, sync_bn=False):
+        super(YOLOX, self).__init__()
+        self.cfg = cfg
+        self.stride = Tensor(np.array(cfg.stride), ms.int32)
+        ch, nc = in_channels, num_classes
+        self.nc = nc
+        self.model = build_model_from_cfg(model_cfg=cfg, in_channels=ch, num_classes=nc, sync_bn=sync_bn)
+        self.names = [str(i) for i in range(nc)]
+
+        self.initialize_weights()
+
+    def construct(self, x):
+        return self.model(x)
+
+    def initialize_weights(self):
+        # reset parameter for Detect Head
+        m = self.model.model[-1]
+        assert isinstance(m, YOLOXHead)
+        m.initialize_biases()
+
+
+@register_model
+def yolox(cfg, in_channels=3, num_classes=None, **kwargs) -> YOLOX:
+    """Get yolox model."""
+    model = YOLOX(cfg, in_channels=in_channels, num_classes=num_classes, **kwargs)
+    return model
+
+
+if __name__ == "__main__":
+    from mindyolo.models.model_factory import create_model
+    from mindyolo.utils.config import load_config, Config
+
+    cfg, _, _ = load_config('../../configs/yolox/yolox-s.yaml')
+    cfg = Config(cfg)
+    network = create_model(
+        model_name=cfg.network.model_name,
+        model_cfg=cfg.network,
+        num_classes=cfg.data.nc,
+        sync_bn=cfg.sync_bn if hasattr(cfg, "sync_bn") else False,
+    )
+    x = Tensor(np.random.randn(1, 3, 640, 640), ms.float32)
+    out = network(x)
+    out = out[0] if isinstance(out, (list, tuple)) else out
+    print(f"Output shape is {[o.shape for o in out]}")
diff --git a/community/cv/ShipWise/mindyolo/optim/__init__.py b/community/cv/ShipWise/mindyolo/optim/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..40b4f1f50b603cf275474e39ae136bd571d21f15
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/optim/__init__.py
@@ -0,0 +1,11 @@
+from . import ema, group_params, optim_factory, scheduler
+from .ema import *
+from .group_params import *
+from .optim_factory import *
+from .scheduler import *
+
+__all__ = []
+__all__.extend(ema.__all__)
+__all__.extend(group_params.__all__)
+__all__.extend(scheduler.__all__)
+__all__.extend(optim_factory.__all__)
diff --git a/community/cv/ShipWise/mindyolo/optim/ema.py b/community/cv/ShipWise/mindyolo/optim/ema.py
new file mode 100644
index 0000000000000000000000000000000000000000..479df62a84157d5a10138f2008d84f8b73598d91
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/optim/ema.py
@@ -0,0 +1,51 @@
+import mindspore as ms
+from mindspore import Parameter, Tensor, nn, ops
+
+__all__ = ["EMA"]
+
+
+class EMA(nn.Cell):
+    """Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
+    Keep a moving average of everything in the model state_dict (parameters and buffers).
+    This is intended to allow functionality like
+    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
+    A smoothed version of the weight is necessary for some training schemes to perform well.
+    """
+
+    def __init__(self, model, ema_model, decay=0.9999, updates=0):
+        super(EMA, self).__init__()
+        # Create EMA
+        self.ema = ema_model
+        self.ema.set_train(False)
+        self.weight = ms.ParameterTuple(list(model.get_parameters()))
+        self.ema_weight = ms.ParameterTuple(list(ema_model.get_parameters()))
+        self.updates = Parameter(Tensor(updates, ms.float32), requires_grad=False)  # number of EMA updates
+        self.decay_value = decay
+        self.assign = ops.Assign()
+        self.hyper_map = ops.HyperMap()
+
+    def decay(self, x):
+        # decay exponential ramp (to help early epochs)
+        return self.decay_value * (1 - ops.exp(ops.neg(x) / 2000))
+
+    @ms.jit
+    def update(self):
+        # Update EMA parameters
+        def update_param(d, ema_v, weight):
+            if weight.dtype == ms.int32:
+                return self.assign(ema_v, weight)
+            else:
+                tep_v = ema_v * d
+                return self.assign(ema_v, weight * (1.0 - d) + tep_v)
+
+        ops.assign_add(self.updates, 1)
+        d = self.decay(self.updates)
+        success = self.hyper_map(ops.partial(update_param, d), self.ema_weight, self.weight)
+
+        return success
+
+    @ms.jit
+    def clone_from_model(self):
+        ops.assign_add(self.updates, 1)
+        success = self.hyper_map(ops.assign, self.ema_weight, self.weight)
+        return success
diff --git a/community/cv/ShipWise/mindyolo/optim/group_params.py b/community/cv/ShipWise/mindyolo/optim/group_params.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c78edd2f96ada066dee30e38e8858dbcdf2df84
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/optim/group_params.py
@@ -0,0 +1,284 @@
+import numpy as np
+
+from .scheduler import cosine_decay_lr, linear_lr
+
+__all__ = ["create_group_param"]
+
+
+def create_group_param(params, gp_weight_decay=0.0, **kwargs):
+    """
+    Create group parameters for optimizer.
+
+    Args:
+        params: Network parameters
+        gp_weight_decay: Weight decay. Default: 0.0
+        **kwargs: Others
+    """
+    if "group_param" in kwargs:
+        gp_strategy = kwargs["group_param"]
+        if gp_strategy == "filter_bias_and_bn":
+            return filter_bias_and_bn(params, gp_weight_decay)
+        elif gp_strategy == "yolov8":
+            return group_param_yolov8(params, weight_decay=gp_weight_decay, **kwargs)
+        elif gp_strategy == "yolov7":
+            return group_param_yolov7(params, weight_decay=gp_weight_decay, **kwargs)
+        elif gp_strategy == "yolov5":
+            return group_param_yolov5(params, weight_decay=gp_weight_decay, **kwargs)
+        elif gp_strategy == "yolov4":
+            return group_param_yolov4(params, weight_decay=gp_weight_decay, **kwargs)
+        elif gp_strategy == "yolov3":
+            return group_param_yolov3(params, weight_decay=gp_weight_decay, **kwargs)
+        else:
+            raise NotImplementedError
+    else:
+        return params
+
+
+def filter_bias_and_bn(params, weight_decay):
+    no_decay_params, decay_params = _group_param_common2(params)
+
+    return [
+        {"params": decay_params, "weight_decay": weight_decay},
+        {"params": no_decay_params},
+    ]
+
+
+def group_param_yolov3(
+    params,
+    weight_decay,
+    start_factor,
+    end_factor,
+    lr_init,
+    warmup_bias_lr,
+    warmup_epochs,
+    min_warmup_step,
+    accumulate,
+    epochs,
+    steps_per_epoch,
+    total_batch_size,
+    **kwargs
+):
+    # old: # weight, gamma, bias/beta
+    # new: # bias/beta, weight, others
+    pg0, pg1, pg2 = _group_param_common3(params)
+
+    lr_pg0, lr_pg1, lr_pg2 = [], [], []
+    lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs)
+
+    warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step)
+    xi = [0, warmup_steps]
+    for i in range(epochs * steps_per_epoch):
+        _lr = lrs[i]
+        if i < warmup_steps:
+            lr_pg0.append(np.interp(i, xi, [warmup_bias_lr, _lr]))
+            lr_pg1.append(np.interp(i, xi, [0.0, _lr]))
+            lr_pg2.append(np.interp(i, xi, [0.0, _lr]))
+        else:
+            lr_pg0.append(_lr)
+            lr_pg1.append(_lr)
+            lr_pg2.append(_lr)
+
+    nbs = 64
+    weight_decay *= total_batch_size * accumulate / nbs  # scale weight_decay
+    group_params = [
+        {"params": pg0, "lr": lr_pg0},
+        {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay},
+        {"params": pg2, "lr": lr_pg2},
+    ]
+    return group_params
+
+
+def group_param_yolov4(
+    params,
+    weight_decay,
+    start_factor,
+    end_factor,
+    lr_init,
+    warmup_epochs,
+    min_warmup_step,
+    accumulate,
+    epochs,
+    steps_per_epoch,
+    total_batch_size,
+    **kwargs
+):
+    pg0, pg1 = _group_param_common2(params)  # bias/beta/gamma, others
+
+    lr_pg0, lr_pg1 = [], []
+    lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs)
+
+    warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step)
+
+    xi = [0, warmup_steps]
+    for i in range(epochs * steps_per_epoch):
+        _lr = lrs[i]
+        if i < warmup_steps:
+            lr_pg0.append(np.interp(i, xi, [0.0, lr_init]))
+            lr_pg1.append(np.interp(i, xi, [0.0, lr_init]))
+
+        else:
+            lr_pg0.append(_lr)
+            lr_pg1.append(_lr)
+
+    group_params = [{"params": pg0, "lr": lr_pg0}, {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay}]
+    return group_params
+
+
+def group_param_yolov5(
+    params,
+    weight_decay,
+    start_factor,
+    end_factor,
+    lr_init,
+    warmup_bias_lr,
+    warmup_epochs,
+    min_warmup_step,
+    accumulate,
+    epochs,
+    steps_per_epoch,
+    total_batch_size,
+    **kwargs
+):
+    # old: # weight, gamma, bias/beta
+    # new: # bias/beta, weight, others
+    pg0, pg1, pg2 = _group_param_common3(params)
+
+    lr_pg0, lr_pg1, lr_pg2 = [], [], []
+    lrs = linear_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs)
+
+    warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step)
+    xi = [0, warmup_steps]
+    for i in range(epochs * steps_per_epoch):
+        _lr = lrs[i]
+        if i < warmup_steps:
+            lr_pg0.append(np.interp(i, xi, [warmup_bias_lr, _lr]))
+            lr_pg1.append(np.interp(i, xi, [0.0, _lr]))
+            lr_pg2.append(np.interp(i, xi, [0.0, _lr]))
+        else:
+            lr_pg0.append(_lr)
+            lr_pg1.append(_lr)
+            lr_pg2.append(_lr)
+
+    nbs = 64
+    weight_decay *= total_batch_size * accumulate / nbs  # scale weight_decay
+    group_params = [
+        {"params": pg0, "lr": lr_pg0},
+        {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay},
+        {"params": pg2, "lr": lr_pg2},
+    ]
+    return group_params
+
+
+def group_param_yolov7(
+    params,
+    weight_decay,
+    start_factor,
+    end_factor,
+    lr_init,
+    warmup_bias_lr,
+    warmup_epochs,
+    min_warmup_step,
+    accumulate,
+    epochs,
+    steps_per_epoch,
+    total_batch_size,
+    **kwargs
+):
+    pg0, pg1, pg2 = _group_param_common3(params)  # bias/beta, weight, others
+
+    lr_pg0, lr_pg1, lr_pg2 = [], [], []
+    lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs)
+
+    warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step)
+    warmup_bias_steps_first = min(max(round(3 * steps_per_epoch), min_warmup_step), warmup_steps)
+    warmup_bias_lr_first = np.interp(warmup_bias_steps_first, [0, warmup_steps], [0.0, lr_init])
+    xi = [0, warmup_steps]
+    for i in range(epochs * steps_per_epoch):
+        _lr = lrs[i]
+        if i < warmup_steps:
+            lr_pg0.append(
+                np.interp(i, [0, warmup_bias_steps_first, warmup_steps], [warmup_bias_lr, warmup_bias_lr_first, _lr])
+            )
+            lr_pg1.append(np.interp(i, xi, [0.0, _lr]))
+            lr_pg2.append(np.interp(i, xi, [0.0, _lr]))
+
+        else:
+            lr_pg0.append(_lr)
+            lr_pg1.append(_lr)
+            lr_pg2.append(_lr)
+
+    nbs = 64
+    weight_decay *= total_batch_size * accumulate / nbs  # scale weight_decay
+    group_params = [
+        {"params": pg0, "lr": lr_pg0},
+        {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay},
+        {"params": pg2, "lr": lr_pg2},
+    ]
+    return group_params
+
+
+def group_param_yolov8(
+    params,
+    weight_decay,
+    start_factor,
+    end_factor,
+    lr_init,
+    warmup_bias_lr,
+    warmup_epochs,
+    min_warmup_step,
+    accumulate,
+    epochs,
+    steps_per_epoch,
+    total_batch_size,
+    **kwargs
+):
+    pg0, pg1, pg2 = _group_param_common3(params)  # bias/beta, weight, others
+
+    lr_pg0, lr_pg1, lr_pg2 = [], [], []
+    lrs = linear_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs)
+
+    warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step)
+    xi = [0, warmup_steps]
+    for i in range(epochs * steps_per_epoch):
+        _lr = lrs[i]
+        if i < warmup_steps:
+            lr_pg0.append(np.interp(i, xi, [warmup_bias_lr, _lr]))
+            lr_pg1.append(np.interp(i, xi, [0.0, _lr]))
+            lr_pg2.append(np.interp(i, xi, [0.0, _lr]))
+        else:
+            lr_pg0.append(_lr)
+            lr_pg1.append(_lr)
+            lr_pg2.append(_lr)
+
+    nbs = 64
+    weight_decay *= total_batch_size * accumulate / nbs  # scale weight_decay
+    group_params = [
+        {"params": pg0, "lr": lr_pg0},
+        {"params": pg1, "lr": lr_pg1, "weight_decay": weight_decay},
+        {"params": pg2, "lr": lr_pg2},
+    ]
+    return group_params
+
+
+def _group_param_common2(params):
+    pg0, pg1 = [], []  # optimizer parameter groups
+    for p in params:
+        if "bias" in p.name or "beta" in p.name or "gamma" in p.name:
+            pg0.append(p)
+        else:
+            pg1.append(p)
+
+    return pg0, pg1  # bias/beta/gamma, others
+
+
+def _group_param_common3(params):
+    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
+    for p in params:
+        if "bias" in p.name or "beta" in p.name:
+            pg0.append(p)
+        elif "weight" in p.name:
+            pg1.append(p)
+        else:
+            pg2.append(p)
+
+    return pg0, pg1, pg2  # bias/beta, weight, others
diff --git a/community/cv/ShipWise/mindyolo/optim/optim_factory.py b/community/cv/ShipWise/mindyolo/optim/optim_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e1d39a33b7b98542d289b9e149c05e0a659c406
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/optim/optim_factory.py
@@ -0,0 +1,64 @@
+""" optim factory """
+import os
+from typing import Optional
+
+from mindspore import load_checkpoint, load_param_into_net, nn
+
+__all__ = ["create_optimizer"]
+
+
+def create_optimizer(
+    params,
+    optimizer: str = "momentum",
+    lr: Optional[float] = 1e-3,
+    weight_decay: float = 0,
+    momentum: float = 0.9,
+    nesterov: bool = False,
+    loss_scale: float = 1.0,
+    checkpoint_path: str = "",
+    **kwargs,
+):
+    r"""Creates optimizer by name.
+
+    Args:
+        params: network parameters.
+        optim: optimizer name like 'sgd', 'nesterov', 'momentum'.
+        lr: learning rate, float or lr scheduler. Fixed and dynamic learning rate are supported. Default: 1e-3.
+        weight_decay: weight decay factor. Default: 0.
+        momentum: momentum if the optimizer supports. Default: 0.9.
+        nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False.
+        loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0.
+        checkpoint_path: Optimizer weight path. Default: ''.
+
+    Returns:
+        Optimizer object
+    """
+
+    optim = optimizer.lower()
+
+    if optim == "sgd":
+        optimizer = nn.SGD(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            nesterov=nesterov,
+            loss_scale=loss_scale,
+        )
+    elif optim in ["momentum", "nesterov"]:
+        optimizer = nn.Momentum(
+            params=params,
+            learning_rate=lr,
+            momentum=momentum,
+            weight_decay=weight_decay,
+            use_nesterov=nesterov,
+            loss_scale=loss_scale,
+        )
+    else:
+        raise ValueError(f"Invalid optimizer: {optim}")
+
+    if checkpoint_path.endswith(".ckpt") and os.path.isfile(checkpoint_path):
+        param_dict = load_checkpoint(checkpoint_path, filter_prefix="learning_rate")
+        load_param_into_net(optimizer, param_dict)
+
+    return optimizer
diff --git a/community/cv/ShipWise/mindyolo/optim/scheduler.py b/community/cv/ShipWise/mindyolo/optim/scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fe074b137b357a7e434ac3815df589e7bba254d
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/optim/scheduler.py
@@ -0,0 +1,237 @@
+import math
+import numpy as np
+
+__all__ = ["create_lr_scheduler", "create_warmup_momentum_scheduler"]
+
+
+def create_lr_scheduler(lr_init, lr_scheduler=None, by_epoch=True, **kwargs):
+    """
+    Create lr scheduler for optimizer.
+
+    Args:
+        lr_init: Initial learning rate
+        lr_scheduler: LR scheduler name like 'linear', 'cos'.
+        by_epoch: learning rate updated by epoch if true, else updated by iteration. Default true
+        **kwargs: Others
+    """
+
+    if lr_scheduler:
+        assert isinstance(lr_scheduler, str), f"lr_scheduler should be a string, but got {type(lr_scheduler)}"
+        if lr_scheduler == "yolox":
+            return create_yolox_lr_scheduler(lr_init=lr_init, by_epoch=by_epoch, **kwargs)
+    else:
+        return lr_init
+
+
+def create_yolox_lr_scheduler(
+    start_factor, end_factor, lr_init, steps_per_epoch, warmup_epochs, epochs, by_epoch, cooldown_epochs=0, **kwargs
+):
+    assert epochs - warmup_epochs - cooldown_epochs > 0, f"the sum of warmup({warmup_epochs}) and " \
+                                                         f"cooldown{cooldown_epochs} epoch should " \
+                                                         f"be less than total epoch{epochs}"
+    # quadratic
+    lrs_qua = quadratic_lr(0.01, start_factor, lr_init, steps_per_epoch, epochs=warmup_epochs, by_epoch=by_epoch)
+
+    # cosine
+    cosine_epochs = epochs - warmup_epochs - cooldown_epochs
+    lrs_cos = cosine_decay_lr(
+        start_factor, end_factor, lr_init, steps_per_epoch, epochs=cosine_epochs, by_epoch=by_epoch
+    )
+
+    # constant
+    lrs_col = []
+    if cooldown_epochs > 0:
+        cool_down_lr = lr_init * end_factor
+        lrs_col = [cool_down_lr] * cooldown_epochs * steps_per_epoch
+
+    lrs = lrs_qua + lrs_cos + lrs_col
+    return lrs
+
+
+def quadratic_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, by_epoch=True, t_max=None, **kwargs):
+    if t_max is None:
+        t_max = epochs if by_epoch else steps_per_epoch * epochs
+    lrs = []
+    start_lr = lr_init * start_factor
+    end_lr = lr_init * end_factor
+    for i in range(steps_per_epoch * epochs):
+        epoch_idx = i // steps_per_epoch
+        index = epoch_idx if by_epoch else i
+        multiplier = min(index, t_max) / t_max
+        multiplier = pow(multiplier, 2)
+        lrs.append(start_lr + multiplier * (end_lr - start_lr))
+    return lrs
+
+
+def create_warmup_momentum_scheduler(
+    steps_per_epoch, momentum=None, warmup_momentum=None, warmup_epochs=None, min_warmup_step=None, **kwargs
+):
+    """
+    Create warmup momentum scheduler.
+
+    Args:
+        steps_per_epoch: Number of steps in each epoch.
+        momentum (float, optional): Hyperparameter of type float, means momentum for the moving average.
+            It must be at least 0.0. Default: None.
+        warmup_momentum (float, optional): Hyperparameter of type float, means warmup momentum for the moving average.
+            It must be at least 0.0. Default: None.
+        warmup_epochs: Number of epochs for warmup.
+        min_warmup_step: Minimum number of steps for warmup.
+        **kwargs: Others
+    """
+
+    if warmup_momentum:
+        warmup_steps = max(round(warmup_epochs * steps_per_epoch), min_warmup_step)
+        return linear_momentum(warmup_momentum, momentum, warmup_steps)
+    else:
+        return None
+
+
+def linear_momentum(start, end, total_steps):
+    """
+    Args:
+        start: Starting value.
+        end: Ending value.
+        total_steps: Number of total step.
+
+    Returns:
+        momentum_list: A list with length total_steps.
+    """
+
+    momentum_list = []
+    for i in range(total_steps):
+        momentum_list.append(np.interp(i, [0, total_steps], [start, end]))
+
+    return momentum_list
+
+
+def linear_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, t_max=None, **kwargs):
+    """
+    Args:
+        start_factor: Starting factor.
+        end_factor: Ending factor.
+        lr_init: Initial learning rate.
+        steps_per_epoch: Total number of steps per epoch.
+        epochs: Total number of epochs trained.
+        t_max: The maximum number of epochs where lr changes. Default: None.
+
+    Examples:
+        >>> lrs = linear_lr(0.1, 0.01, 0.2, 100, 5)
+        >>> print(f"lrs len: {len(lrs)}")
+        >>> print(f"lrs per epoch: {[lrs[i] for i in range(len(lrs)) if ((i + 1) % 100 == 0)]}")
+        lrs len: 500
+        lrs: [0.02, 0.0155, 0.011, 0.0065, 0.002]
+    """
+
+    if t_max is None:
+        t_max = epochs
+    lrs = []
+    start_lr = lr_init * start_factor
+    end_lr = lr_init * end_factor
+    for i in range(steps_per_epoch * epochs):
+        epoch_idx = i // steps_per_epoch
+        multiplier = min(epoch_idx, t_max) / t_max
+        lrs.append(start_lr + multiplier * (end_lr - start_lr))
+    return lrs
+
+
+def cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, by_epoch=True, t_max=None, **kwargs):
+    """
+    Args:
+        start_factor: Starting factor.
+        end_factor: Ending factor.
+        lr_init: Initial learning rate.
+        steps_per_epoch: Total number of steps per epoch.
+        epochs: Total number of epochs trained.
+        t_max: The maximum number of epochs where lr changes. Default: None.
+
+    Examples:
+        >>> lrs = cosine_decay_lr(0.1, 0.01, 0.2, 100, 5)
+        >>> print(f"lrs len: {len(lrs)}")
+        >>> print(f"lrs: {[lrs[i] for i in range(len(lrs)) if ((i + 1) % 100 == 0)]}")
+        lrs len: 500
+        lrs: [0.02, 0.0173, 0.011, 0.0046, 0.002]
+    """
+
+    if t_max is None:
+        t_max = epochs if by_epoch else steps_per_epoch * epochs
+    lrs = []
+    start_lr = lr_init * start_factor
+    end_lr = lr_init * end_factor
+    delta = 0.5 * (start_lr - end_lr)
+    for i in range(steps_per_epoch * epochs):
+        epoch_idx = i // steps_per_epoch
+        index = epoch_idx if by_epoch else i
+        multiplier = min(index, t_max) / t_max
+        lrs.append(end_lr + delta * (1.0 + math.cos(math.pi * multiplier)))
+    return lrs
+
+
+def cosine_decay_lr_with_linear_warmup(
+    warmup_epochs,
+    warmup_lrs,
+    start_factor,
+    end_factor,
+    lr_init,
+    steps_per_epoch,
+    epochs,
+    min_warmup_step=1000,
+    t_max=None,
+    **kwargs,
+):
+    """
+    Args:
+        warmup_epochs (Union[int, tuple[int]]): The warmup epochs of the lr scheduler.
+            The data type is an integer or a tuple of integers. An integer represents the warmup epoch size.
+            A tuple of integers represents the warmup epochs interpolation nodes. Like: [0, 12, 24] or 24.
+        warmup_lrs (Union[int, tuple[float]]): The warmup lr of the lr scheduler.
+            The data type is a float or a tuple of float(The last element can be None).
+            A float represents the start warmup lr.
+            A tuple of float represents the warmup lrs interpolation nodes. Like: [0.01, 0.1, 'None'] or [0.01, 0.1] or 0.01.
+        start_factor: Starting factor.
+        end_factor: Ending factor.
+        lr_init: Initial learning rate.
+        steps_per_epoch: Total number of steps per epoch.
+        epochs: Total number of epochs trained.
+        min_warmup_step (int): Minimum warm-up steps. Default: 1000.
+        t_max: The maximum number of epochs where lr changes. Default: None.
+
+    Examples:
+        >>> lrs = cosine_decay_lr_with_linear_warmup([0, 3], [0.0001, None], 0.1, 0.01, 0.2, 100, 5, min_warmup_step=1)
+        >>> print(f"lrs len: {len(lrs)}")
+        >>> print(f"lrs every epoch: {[lrs[i] for i in range(len(lrs)) if ((i + 1) % 100 == 0)]}")
+        lrs len: 500
+        lrs every epoch: [0.0066, 0.0115, 0.0109, 0.0046, 0.002]
+    """
+
+    warmup_epochs = [0, warmup_epochs] if isinstance(warmup_epochs, int) else warmup_epochs
+    if isinstance(warmup_epochs, (int, float)):
+        warmup_epochs = [0, int(warmup_epochs)]
+    elif isinstance(warmup_epochs, (list, tuple)):
+        warmup_epochs = warmup_epochs
+    else:
+        raise ValueError
+
+    if isinstance(warmup_lrs, float):
+        warmup_lrs = [
+            warmup_lrs,
+        ]
+    elif isinstance(warmup_lrs, (list, tuple)):
+        if warmup_lrs[-1] in ("None", "none", None):
+            warmup_lrs = warmup_lrs[:-1]
+    else:
+        raise ValueError
+
+    assert (
+        len(warmup_epochs) == len(warmup_lrs) + 1
+    ), "LRScheduler: The length of 'warmup_epochs' and 'warmup_lrs' is inconsistent"
+
+    lrs = cosine_decay_lr(start_factor, end_factor, lr_init, steps_per_epoch, epochs, t_max)
+    warmup_steps = [min(i * steps_per_epoch, len(lrs)) for i in warmup_epochs]
+    warmup_steps[-1] = max(warmup_steps[-1], min(len(lrs), min_warmup_step))
+
+    for i in range(warmup_steps[-1]):
+        _lr = lrs[i]
+        lrs[i] = np.interp(i, warmup_steps, warmup_lrs + [_lr,])
+
+    return lrs
diff --git a/community/cv/ShipWise/mindyolo/utils/__init__.py b/community/cv/ShipWise/mindyolo/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..16de95da931a58fae14338d3eb50cbc0837c5022
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/__init__.py
@@ -0,0 +1,7 @@
+"""Utility Tools"""
+from .checkpoint_manager import *
+from .config import *
+from .logger import *
+from .metrics import *
+from .modelarts import *
+from .utils import *
diff --git a/community/cv/ShipWise/mindyolo/utils/callback.py b/community/cv/ShipWise/mindyolo/utils/callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..df7c61ded3ba790e9a138053c52250f5bda4d8e1
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/callback.py
@@ -0,0 +1,381 @@
+import math
+import os
+import sys
+import time
+from typing import Union, Tuple, List
+
+import numpy as np
+from mindspore import Profiler, SummaryRecord, Tensor
+from mindyolo.utils.modelarts import sync_data
+from mindyolo.utils import CheckpointManager, logger
+from mindyolo.utils.registry import Registry
+from mindyolo.utils.train_step_factory import create_train_step_fn
+
+CALLBACK_REGISTRY = Registry("callback")
+
+
+def create_callback(arg_callback):
+    def _create_callback_worker(name, **kwargs):
+        cb_cls = CALLBACK_REGISTRY.get(name)
+        instance = cb_cls(**kwargs)
+        return instance
+
+    assert isinstance(arg_callback, (tuple, list)), f'expect callback to be list of tuple, ' \
+                                                     f'but got {type(arg_callback)} instead'
+    for i, cb in enumerate(arg_callback):
+        assert isinstance(cb, dict) and 'name' in cb, f'callback[{i}] is not a dict or does not contain key [name]'
+
+    logger.info(CALLBACK_REGISTRY)
+
+    return [_create_callback_worker(**kw) for kw in arg_callback]
+
+
+class RunContext:
+    """
+    Hold and manage information about the running state of the model
+    Args:
+        epoch_num (int): total epoch number in the training process
+        steps_per_epoch (int): total steps of one epoch
+        trainer (Trainer): trainer class that perform training process
+        test_fn (Function): test function that can evaluate the training model
+        enable_modelarts (bool): whether to enable modelarts. usually on cloud when true
+        ckpt_save_dir (str): checkpoint saving directory
+        train_url (str): training url. usually on cloud when not empty
+
+    """
+
+    def __init__(
+        self,
+        epoch_num=0,
+        steps_per_epoch=0,
+        total_steps=0,
+        trainer=None,
+        test_fn=None,
+        enable_modelarts=False,
+        ckpt_save_dir="",
+        save_dir="",
+        train_url="",
+        overflow_still_update=False,
+        ms_jit=True,
+        rank_size=8,
+    ):
+
+        self.epoch_num = epoch_num
+        self.steps_per_epoch = steps_per_epoch
+        self.total_epochs = total_steps
+        self.trainer = trainer
+        self.test_fn = test_fn
+        self.ckpt_save_dir = ckpt_save_dir
+        self.save_dir = save_dir
+        self.enable_modelarts = enable_modelarts
+        self.train_url = train_url
+        self.overflow_still_update = overflow_still_update
+        self.ms_jit = ms_jit
+        self.rank_size = rank_size
+
+        # the first index start with 1 rather than 0
+        self.cur_epoch_index = 0
+        self.cur_step_index = 0
+        self.loss = []
+        self.lr = 0
+
+
+class BaseCallback:
+    """
+    Base class of callback. Applied in Train function, it can take actions on 6 different stage of the training process.
+
+    """
+
+    def __init__(self):
+        pass
+
+    def __repr__(self):
+        members = vars(self)
+        mem_str = ", ".join([f"{k}={v}" for k, v in members.items()])
+        fmt_str = self.__class__.__name__ + f"({mem_str})"
+        return fmt_str
+
+    def on_train_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of training process"""
+        pass
+
+    def on_train_end(self, run_context: RunContext):
+        """hooks to run on the end of training process"""
+        pass
+
+    def on_train_epoch_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of a training epoch"""
+        pass
+
+    def on_train_epoch_end(self, run_context: RunContext):
+        """hooks to run on the end of a training epoch"""
+        pass
+
+    def on_train_step_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of a training step"""
+        pass
+
+    def on_train_step_end(self, run_context: RunContext):
+        """hooks to run on the end of a training step"""
+        pass
+
+
+@CALLBACK_REGISTRY.registry_module()
+class YoloxSwitchTrain(BaseCallback):
+    """
+    Switch train hook applied in yolox model. Yolox model uses a two stage training strategy. Compared with the 1st
+    stage, the 2nd second has no mosaic data augmentation and add l1 loss item. Reference: url
+
+    Args:
+        switch_epoch_num (int): index of epoch to switch stage. This value equals to the epoch number of first stage.
+        is_switch_loss (bool): whether to switch loss
+        is_switch_data_aug (bool): whether to switch data augmentation
+
+    """
+
+    def __init__(self, switch_epoch_num=285, is_switch_loss=True, is_switch_data_aug=False, **kwargs):
+        super().__init__()
+        self.switch_epoch_num = switch_epoch_num
+        self.switch_epoch_index = switch_epoch_num + 1
+        self.is_switch_loss = is_switch_loss
+        self.is_switch_data_aug = is_switch_data_aug
+
+    def on_train_step_begin(self, run_context: RunContext):
+        pass
+
+    def on_train_epoch_begin(self, run_context: RunContext):
+        cur_epoch_index = run_context.cur_epoch_index
+        trainer = run_context.trainer
+        loss_ratio = run_context.rank_size
+        overflow_still_update = run_context.overflow_still_update
+        ms_jit = run_context.ms_jit
+
+        # switch loss
+        if self.is_switch_loss and cur_epoch_index == self.switch_epoch_index:
+            logger.info(f"\nAdding L1 loss starts from epoch {self.switch_epoch_index}. Graph recompiling\n")
+            trainer.loss_fn.use_l1 = True
+            trainer.train_step_fn = create_train_step_fn(task='detect',
+                                                         network=trainer.network,
+                                                         loss_fn=trainer.loss_fn,
+                                                         optimizer=trainer.optimizer,
+                                                         loss_ratio=loss_ratio,
+                                                         scaler=trainer.scaler,
+                                                         reducer=trainer.reducer,
+                                                         ema=trainer.ema,
+                                                         overflow_still_update=overflow_still_update,
+                                                         ms_jit=ms_jit)
+
+        # switch data_aug, not implemented hear
+        if self.is_switch_data_aug:
+            raise ValueError(
+                "Currently switch_data_aug should be implemented using multi-stage training pipe line. "
+                "Refer train_transforms for more information. Keep is_switch_data_aug button False."
+            )
+
+
+@CALLBACK_REGISTRY.registry_module()
+class EvalWhileTrain(BaseCallback):
+    """
+    Callback of evaluation while training. Mainly two parts are included, namely evaluating at requested time and
+    uploading ckpt file to cloud. Piecewise evaluation with different interval in each piece is supported.
+    Args:
+        stage_epochs (Union(List, Tuple, int)): For list or tuple type, piecewise mode is on and each element
+                indicates the epoch number in its piece. For int type, single piece mode is on and the value indicates
+                the possible max epoch index where the model will be evaluated. Default positive infinite means no switch
+        stage_intervals (Union(List, Tuple, int)): With the same type and length with stage_epochs, interval represents
+                the corresponding interval of each piece. Default 1
+        eval_last_epoch (bool): whether to evaluate the last epoch of each piece. Default True
+        isolated_epochs (Union(List, Tuple, int, None)): isolated epochs to evaluation for flexible sense. Default None.
+        keep_checkpoint_max (int): the most possible checkpoint to keep on disk. Default 10.
+
+    Example:
+        Case 1: evaluate single stage
+        >>> hook EvalWhileTrain(stage_intervals=5)
+        The above hook will evaluate the model with an interval of 5, and final epoch will be evaluated by default.
+
+        Case 2: evaluate multiple stage
+        >>> hook = EvalWhileTrain(stage_epochs=[285, 15], stage_intervals=[25, 5], isolated_epochs=[3, 213])
+        The above hook will evaluate the model by two stage. At 1st stage, 285 epochs are evaluated with an interval of
+            25, while at 2nd stage, 15 epochs are evaluated with an interval of 5. Meanwhile, the model is evaluated at
+            3 and 213 epoch specified by isolated_epochs. The final epoch of the two stage, namely 285 and 300, will be
+            evaluated by default.
+    """
+
+    def __init__(
+        self,
+        stage_epochs: Union[List, Tuple, int] = sys.maxsize,
+        stage_intervals: Union[List, Tuple, int] = 1,
+        eval_last_epoch=True,
+        isolated_epochs: Union[List, Tuple, int, None] = None,
+        keep_checkpoint_max=10,
+    ):
+        super().__init__()
+        assert isinstance(stage_intervals, (list, tuple, int))
+        assert isinstance(stage_epochs, (list, tuple, int))
+
+        # cast interval list in case of 1 stage
+        if isinstance(stage_intervals, int) or isinstance(stage_epochs, int):
+            assert isinstance(stage_intervals, int) and isinstance(
+                stage_epochs, int
+            ), f"stage_intervals and stage_epochs must be int at the same time"
+            stage_intervals = [stage_intervals]
+            stage_epochs = [stage_epochs]
+
+        # cast isolated_epochs to list
+        if isolated_epochs is not None:
+            assert isinstance(isolated_epochs, (list, tuple, int))
+            if isinstance(isolated_epochs, int):
+                isolated_epochs = [isolated_epochs]
+        else:
+            isolated_epochs = []
+
+        assert len(stage_intervals) == len(stage_epochs)
+        self.stage_intervals = stage_intervals
+        self.stage_epochs = stage_epochs  # for log
+        self.stage_cum_epochs = np.cumsum(stage_epochs)
+        self.eval_last_epoch = eval_last_epoch
+        self.isolated_epochs = isolated_epochs
+        self.keep_checkpoint_max = keep_checkpoint_max
+        self.manager_best = CheckpointManager(ckpt_save_policy="top_k")
+        self.ckpt_filelist_best = []
+
+    def on_train_epoch_end(self, run_context: RunContext):
+        cur_epoch_index = run_context.cur_epoch_index
+        epochs = run_context.epoch_num
+        # reset to total epoch if exceed
+        for i in range(len(self.stage_cum_epochs)):
+            if self.stage_cum_epochs[i] > epochs:
+                self.stage_cum_epochs[i] = epochs
+
+        stage = np.searchsorted(self.stage_cum_epochs, cur_epoch_index, side="left")
+        # in case of cur_epoch_index greater than total epoch that need evaluation
+        if stage == len(self.stage_intervals):
+            return
+
+        offset = self.stage_cum_epochs[stage - 1] if stage > 0 else 0
+        interval_cond = (cur_epoch_index - offset) % self.stage_intervals[stage] == 0
+        last_cond = self.eval_last_epoch and (cur_epoch_index == self.stage_cum_epochs[stage])
+        isolated_cond = any(cur_epoch_index == e for e in self.isolated_epochs)
+        if interval_cond or last_cond or isolated_cond:
+            self._run_eval(run_context)
+
+    def on_train_end(self, run_context: RunContext):
+        enable_modelarts = run_context.enable_modelarts
+        train_url = run_context.train_url
+        if enable_modelarts and self.ckpt_filelist_best:
+            ckpt_filelist_best = [s[0] for s in self.ckpt_filelist_best]
+            for p in ckpt_filelist_best:
+                sync_data(p, train_url + "/weights/" + p.split("/")[-1])
+
+    def _run_eval(self, run_context: RunContext):
+        s_eval_time = time.time()
+
+        trainer = run_context.trainer
+        test_fn = run_context.test_fn
+        cur_epoch = run_context.cur_epoch_index
+        epochs = run_context.epoch_num
+        ckpt_save_dir = run_context.ckpt_save_dir
+
+        eval_network = trainer.ema.ema if trainer.ema else trainer.network
+        _train_status = eval_network.training
+        eval_network.set_train(False)
+        accuracy = test_fn(network=eval_network, cur_epoch=f'{cur_epoch:03d}')
+        accuracy = accuracy[0] if isinstance(accuracy, (list, tuple)) else accuracy
+        eval_network.set_train(_train_status)
+
+        save_path_best = os.path.join(
+            ckpt_save_dir,
+            f"best_{trainer.model_name}-{cur_epoch}_{trainer.steps_per_epoch}" f"_acc{accuracy:.3f}.ckpt",
+        )
+
+        if trainer.main_device:
+            self.ckpt_filelist_best = self.manager_best.save_ckpoint(
+                eval_network, num_ckpt=self.keep_checkpoint_max, metric=accuracy, save_path=save_path_best
+            )
+            best_path, best_accu = self.ckpt_filelist_best[0]
+            logger.info(
+                f"Epoch {cur_epoch}/{epochs}, eval accuracy: {accuracy:.3f}, "
+                f"run_eval time: {(time.time() - s_eval_time):.3f} s."
+            )
+            logger.info(f"best accuracy: {best_accu:.3f}, saved at: {best_path}")
+
+
+@CALLBACK_REGISTRY.registry_module()
+class SummaryCallback(BaseCallback):
+    """
+    Callback of whether to collect summary data at training time.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def on_train_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of training process"""
+        self.summary_dir = os.path.join(run_context.save_dir, "summary")
+        self.summary_record = SummaryRecord(self.summary_dir)
+
+    def on_train_end(self, run_context: RunContext):
+        """hooks to run on the end of training process"""
+        self.summary_record.close()
+        if run_context.enable_modelarts:
+            for p in os.listdir(self.summary_dir):
+                summary_file_path = os.path.join(self.summary_dir, p)
+                sync_data(summary_file_path, run_context.train_url + "/summary/" + summary_file_path.split("/")[-1])
+
+    def on_train_epoch_end(self, run_context: RunContext):
+        """hooks to run on the end of a training epoch"""
+        trainer = run_context.trainer
+        if trainer.data_sink:
+            for i in range(len(run_context.loss)):
+                self.summary_record.add_value("scalar", f"{trainer.loss_item_name[i]}", run_context.loss[i])
+            self.summary_record.add_value("scalar", f"cur_lr", Tensor(run_context.lr))
+            self.summary_record.record(run_context.cur_epoch_index)
+            self.summary_record.flush()
+
+    def on_train_step_end(self, run_context: RunContext):
+        """hooks to run on the end of a training step"""
+        trainer = run_context.trainer
+        if run_context.cur_step_index % trainer.log_interval == 0:
+            for i in range(len(run_context.loss)):
+                self.summary_record.add_value("scalar", f"{trainer.loss_item_name[i]}", run_context.loss[i])
+            self.summary_record.add_value("scalar", f"cur_lr", Tensor(run_context.lr))
+            self.summary_record.record(run_context.cur_step_index)
+            self.summary_record.flush()
+
+
+@CALLBACK_REGISTRY.registry_module()
+class ProfilerCallback(BaseCallback):
+    """
+    Callback of whether to collect profiler data at training time.
+
+    Example:
+        Case 1: Non-data sinking mode Collects performance data in the specified step interval.
+        Case 2: Data sink mode Collects performance data for a specified epoch interval.
+    """
+
+    def __init__(self, profiler_step_num):
+        super().__init__()
+        self.profiler_step_num = profiler_step_num
+
+    def on_train_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of training process"""
+        self.prof_dir = os.path.join(run_context.save_dir, "profiling_data")
+        self.prof = Profiler(output_path=self.prof_dir)
+
+    def on_train_epoch_end(self, run_context: RunContext):
+        """hooks to run on the beginning of a training epoch"""
+        if run_context.cur_epoch_index == math.ceil(self.profiler_step_num/run_context.steps_per_epoch):
+            self.prof.stop()
+            self.prof.analyse()
+
+    def on_train_step_end(self, run_context: RunContext):
+        """hooks to run on the beginning of a training step"""
+        if run_context.cur_step_index == self.profiler_step_num:
+            self.prof.stop()
+            self.prof.analyse()
+
+    def on_train_end(self, run_context: RunContext):
+        if run_context.enable_modelarts:
+            for p in os.listdir(self.prof_dir):
+                prof_file_path = os.path.join(self.prof_dir, p)
+                sync_data(prof_file_path, run_context.train_url + "/profiling_data/" + prof_file_path.split("/")[-1])
diff --git a/community/cv/ShipWise/mindyolo/utils/checkpoint_manager.py b/community/cv/ShipWise/mindyolo/utils/checkpoint_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..2138f68b05ea7fbd00a0c7182d69ca92f84cab2a
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/checkpoint_manager.py
@@ -0,0 +1,123 @@
+"""checkpoint manager """
+import os
+import stat
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor
+
+from mindyolo.utils import logger
+
+__all__ = ["CheckpointManager"]
+
+
+class CheckpointManager:
+    """
+    Manage checkpoint files according to ckpt_save_policy of checkpoint.
+    Args:
+        ckpt_save_policy (str): Checkpoint saving strategy. The optional values is None, "top_k" or "latest_k".
+        None means to save each checkpoint, top_k means to save K checkpoints with the highest accuracy,
+        and latest_k means saving the latest K checkpoint. Default: None.
+    """
+
+    def __init__(self, ckpt_save_policy=None):
+        self._ckpoint_filelist = []
+        self.ckpt_save_policy = ckpt_save_policy
+
+    @property
+    def ckpoint_filelist(self):
+        """Get all the related checkpoint files managed here."""
+        return self._ckpoint_filelist
+
+    @property
+    def ckpoint_num(self):
+        """Get the number of the related checkpoint files managed here."""
+        return len(self._ckpoint_filelist)
+
+    def update_ckpoint_filelist(self, directory, prefix):
+        """Update the checkpoint file list."""
+        self._ckpoint_filelist = []
+        files = os.listdir(directory)
+        for filename in files:
+            if os.path.splitext(filename)[-1] == ".ckpt" and filename.startswith(prefix + "-"):
+                mid_name = filename[len(prefix) : -5]
+                flag = not (True in [char.isalpha() for char in mid_name])
+                if flag:
+                    self._ckpoint_filelist.append(os.path.join(directory, filename))
+
+    def remove_ckpoint_file(self, file_name):
+        """Remove the specified checkpoint file from this checkpoint manager and also from the directory."""
+        try:
+            os.chmod(file_name, stat.S_IWRITE)
+            os.remove(file_name)
+        except OSError:
+            logger.warning("OSError, failed to remove the older ckpt file %s.", file_name)
+        except ValueError:
+            logger.warning("ValueError, failed to remove the older ckpt file %s.", file_name)
+
+    def remove_oldest_ckpoint_file(self):
+        """Remove the oldest checkpoint file from this checkpoint manager and also from the directory."""
+        ckpoint_files = sorted(self._ckpoint_filelist, key=os.path.getmtime)
+        self.remove_ckpoint_file(ckpoint_files[0])
+        self._ckpoint_filelist.remove(ckpoint_files[0])
+
+    def keep_one_ckpoint_per_minutes(self, minutes, cur_time):
+        """Only keep the latest one ckpt file per minutes, remove other files generated in [last_time, cur_time]."""
+        del_list = []
+        oldest_file = ""
+        oldest_time = cur_time
+        for ck_file in self._ckpoint_filelist:
+            modify_time = os.path.getmtime(ck_file)
+            if cur_time - modify_time < 60 * minutes:
+                del_list.append(ck_file)
+
+                if modify_time < oldest_time:
+                    oldest_time = modify_time
+                    oldest_file = ck_file
+
+        for mv_file in del_list:
+            if mv_file == oldest_file:
+                continue
+            self.remove_ckpoint_file(mv_file)
+
+    def top_K_checkpoint(self, network, K=10, metric=None, save_path=""):
+        """Save and return Top K checkpoint address and accuracy."""
+        last_file = self._ckpoint_filelist[-1] if self._ckpoint_filelist else None
+        if isinstance(metric, Tensor):
+            metric = metric.asnumpy()
+        if self.ckpoint_num < K or np.greater(metric, last_file[1]):
+            if self.ckpoint_num >= K:
+                delete = K - 1
+                if delete < 0 or self.ckpoint_num <= delete:
+                    return
+                to_delete = self._ckpoint_filelist[delete:]
+                for d in to_delete:
+                    self.remove_ckpoint_file(d[0])
+                self._ckpoint_filelist = self._ckpoint_filelist[:delete]
+            ms.save_checkpoint(network, save_path, async_save=True)
+            self._ckpoint_filelist.append((save_path, float(metric)))
+            self._ckpoint_filelist = sorted(self._ckpoint_filelist, key=lambda x: x[1], reverse=True)
+
+    def latest_K_checkpoint(self, network, K=10, save_path=""):
+        """Save latest K checkpoint."""
+        if K and 0 < K <= self.ckpoint_num:
+            self.remove_oldest_ckpoint_file()
+        ms.save_checkpoint(network, save_path, async_save=True)
+        self._ckpoint_filelist.append(save_path)
+
+    def save_ckpoint(self, network, num_ckpt=10, metric=None, save_path=""):
+        """Save checkpoint according to different save strategy."""
+        if self.ckpt_save_policy is None:
+            ms.save_checkpoint(network, save_path, async_save=True)
+        elif self.ckpt_save_policy == "top_k":
+            if metric is None:
+                raise ValueError(f"The expected 'metric' is not None, but got: {metric}.")
+            self.top_K_checkpoint(network, K=num_ckpt, metric=metric, save_path=save_path)
+            return self._ckpoint_filelist
+        elif self.ckpt_save_policy == "latest_k":
+            self.latest_K_checkpoint(network, K=num_ckpt, save_path=save_path)
+            return self._ckpoint_filelist
+        else:
+            raise ValueError(
+                f"The expected 'ckpt_save_policy' is None, top_k or latest_k," f"but got: {self.ckpt_save_policy}."
+            )
diff --git a/community/cv/ShipWise/mindyolo/utils/config.py b/community/cv/ShipWise/mindyolo/utils/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b35a893d6b78c937a2c4d468ac451918eadeaa0
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/config.py
@@ -0,0 +1,150 @@
+import argparse
+import collections
+import os
+from copy import deepcopy
+import yaml
+
+try:
+    collectionsAbc = collections.abc
+except AttributeError:
+    collectionsAbc = collections
+
+__all__ = ["parse_args"]
+
+
+def parse_args(parser):
+    parser_config = argparse.ArgumentParser(description="Config", add_help=False)
+    parser_config.add_argument(
+        "-c", "--config", type=str, default="", help="YAML config file specifying default arguments."
+    )
+
+    args_config, remaining = parser_config.parse_known_args()
+
+    # Do we have a config file to parse?
+    if args_config.config:
+        cfg, _, _ = load_config(args_config.config)
+        cfg = Config(cfg)
+        parser.set_defaults(**cfg)
+        parser.set_defaults(config=args_config.config)
+
+    # The main arg parser parses the rest of the args, the usual
+    # defaults will have been overridden if config file specified.
+    args = parser.parse_args(remaining)
+
+    return Config(vars(args))
+
+
+def load_config(file_path):
+    BASE = "__BASE__"
+    assert os.path.splitext(file_path)[-1] in [".yaml", ".yml"], f"[{file_path}] not yaml format."
+    cfg_default, cfg_helper, cfg_choices = _parse_yaml(file_path)
+
+    # NOTE: cfgs outside have higher priority than cfgs in _BASE_
+    if BASE in cfg_default:
+        all_base_cfg_default = {}
+        all_base_cfg_helper = {}
+        all_base_cfg_choices = {}
+        base_yamls = list(cfg_default[BASE])
+        for base_yaml in base_yamls:
+            if base_yaml.startswith("~"):
+                base_yaml = os.path.expanduser(base_yaml)
+            if not base_yaml.startswith("/"):
+                base_yaml = os.path.join(os.path.dirname(file_path), base_yaml)
+
+            base_cfg_default, base_cfg_helper, base_cfg_choices = load_config(base_yaml)
+            all_base_cfg_default = _merge_config(base_cfg_default, all_base_cfg_default)
+            all_base_cfg_helper = _merge_config(base_cfg_helper, all_base_cfg_helper)
+            all_base_cfg_choices = _merge_config(base_cfg_choices, all_base_cfg_choices)
+
+        del cfg_default[BASE]
+        return (
+            _merge_config(cfg_default, all_base_cfg_default),
+            _merge_config(cfg_helper, all_base_cfg_helper),
+            _merge_config(cfg_choices, all_base_cfg_choices),
+        )
+
+    return cfg_default, cfg_helper, cfg_choices
+
+
+def _parse_yaml(yaml_path):
+    """
+    Parse the yaml config file.
+
+    Args:
+        yaml_path: Path to the yaml config.
+    """
+    with open(yaml_path, "r") as fin:
+        try:
+            cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
+            cfgs = [x for x in cfgs]
+            if len(cfgs) == 1:
+                cfg = cfgs[0]
+                cfg_helper = {}
+                cfg_choices = {}
+            elif len(cfgs) == 2:
+                cfg, cfg_helper = cfgs
+                cfg_choices = {}
+            elif len(cfgs) == 3:
+                cfg, cfg_helper, cfg_choices = cfgs
+            else:
+                raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml")
+        except:
+            raise ValueError("Failed to parse yaml")
+    return cfg, cfg_helper, cfg_choices
+
+
+def _merge_config(config, base):
+    """Merge config"""
+    new = deepcopy(base)
+    for k, v in config.items():
+        if k in new and isinstance(new[k], dict) and isinstance(config[k], collectionsAbc.Mapping):
+            new[k] = _merge_config(config[k], new[k])
+        else:
+            new[k] = config[k]
+    return new
+
+
+class Config(dict):
+    """
+    Configuration namespace. Convert dictionary to members.
+    """
+
+    def __init__(self, cfg_dict):
+        super(Config, self).__init__()
+        for k, v in cfg_dict.items():
+            setattr(self, k, Config(v) if isinstance(v, dict) else v)
+
+    def __setattr__(self, name, value):
+        self[name] = value
+        self.__dict__.update({name: value})
+
+    def __getattr__(self, name):
+        if name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+
+    def __str__(self):
+        return config_format_func(self)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+def config_format_func(config, prefix=""):
+    """
+    Args:
+        config: dict-like object
+    Returns:
+        formatted str
+    """
+    msg = ""
+    if prefix:
+        prefix += "."
+
+    for k, v in config.__dict__.items():
+        if isinstance(v, Config):
+            msg += config_format_func(v, prefix=str(k))
+        else:
+            msg += format(prefix + str(k), "<40") + format(str(v), "<") + "\n"
+    return msg
diff --git a/community/cv/ShipWise/mindyolo/utils/convert_weight_cspdarknet53.py b/community/cv/ShipWise/mindyolo/utils/convert_weight_cspdarknet53.py
new file mode 100644
index 0000000000000000000000000000000000000000..663fc3616e73cd4cfd1f7d8cc5549a7f28aaff5f
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/convert_weight_cspdarknet53.py
@@ -0,0 +1,167 @@
+import mindspore as ms
+
+convert_dict = {
+    "feature_map.backbone.conv0.1": "model.model.0.bn",
+    "feature_map.backbone.conv1.1": "model.model.1.bn",
+    "feature_map.backbone.conv2.1": "model.model.2.bn",
+    "feature_map.backbone.conv3.1": "model.model.3.conv1.bn",
+    "feature_map.backbone.conv4.1": "model.model.3.conv2.bn",
+    "feature_map.backbone.conv5.1": "model.model.4.bn",
+    "feature_map.backbone.conv6.1": "model.model.5.bn",
+    "feature_map.backbone.conv7.1": "model.model.7.bn",
+    "feature_map.backbone.conv8.1": "model.model.8.bn",
+    "feature_map.backbone.conv9.1": "model.model.9.bn",
+    "feature_map.backbone.layer2.0.conv1.1": "model.model.10.0.conv1.bn",
+    "feature_map.backbone.layer2.0.conv2.1": "model.model.10.0.conv2.bn",
+    "feature_map.backbone.layer2.1.conv1.1": "model.model.10.1.conv1.bn",
+    "feature_map.backbone.layer2.1.conv2.1": "model.model.10.1.conv2.bn",
+    "feature_map.backbone.conv10.1": "model.model.11.bn",
+    "feature_map.backbone.conv11.1": "model.model.12.bn",
+    "feature_map.backbone.conv12.1": "model.model.14.bn",
+    "feature_map.backbone.conv13.1": "model.model.15.bn",
+    "feature_map.backbone.conv14.1": "model.model.16.bn",
+    "feature_map.backbone.layer3.0.conv1.1": "model.model.17.0.conv1.bn",
+    "feature_map.backbone.layer3.1.conv1.1": "model.model.17.1.conv1.bn",
+    "feature_map.backbone.layer3.2.conv1.1": "model.model.17.2.conv1.bn",
+    "feature_map.backbone.layer3.3.conv1.1": "model.model.17.3.conv1.bn",
+    "feature_map.backbone.layer3.4.conv1.1": "model.model.17.4.conv1.bn",
+    "feature_map.backbone.layer3.5.conv1.1": "model.model.17.5.conv1.bn",
+    "feature_map.backbone.layer3.6.conv1.1": "model.model.17.6.conv1.bn",
+    "feature_map.backbone.layer3.7.conv1.1": "model.model.17.7.conv1.bn",
+    "feature_map.backbone.layer3.0.conv2.1": "model.model.17.0.conv2.bn",
+    "feature_map.backbone.layer3.1.conv2.1": "model.model.17.1.conv2.bn",
+    "feature_map.backbone.layer3.2.conv2.1": "model.model.17.2.conv2.bn",
+    "feature_map.backbone.layer3.3.conv2.1": "model.model.17.3.conv2.bn",
+    "feature_map.backbone.layer3.4.conv2.1": "model.model.17.4.conv2.bn",
+    "feature_map.backbone.layer3.5.conv2.1": "model.model.17.5.conv2.bn",
+    "feature_map.backbone.layer3.6.conv2.1": "model.model.17.6.conv2.bn",
+    "feature_map.backbone.layer3.7.conv2.1": "model.model.17.7.conv2.bn",
+    "feature_map.backbone.conv15.1": "model.model.18.bn",
+    "feature_map.backbone.conv16.1": "model.model.19.bn",
+    "feature_map.backbone.conv17.1": "model.model.21.bn",
+    "feature_map.backbone.conv18.1": "model.model.22.bn",
+    "feature_map.backbone.conv19.1": "model.model.23.bn",
+    "feature_map.backbone.layer4.0.conv1.1": "model.model.24.0.conv1.bn",
+    "feature_map.backbone.layer4.1.conv1.1": "model.model.24.1.conv1.bn",
+    "feature_map.backbone.layer4.2.conv1.1": "model.model.24.2.conv1.bn",
+    "feature_map.backbone.layer4.3.conv1.1": "model.model.24.3.conv1.bn",
+    "feature_map.backbone.layer4.4.conv1.1": "model.model.24.4.conv1.bn",
+    "feature_map.backbone.layer4.5.conv1.1": "model.model.24.5.conv1.bn",
+    "feature_map.backbone.layer4.6.conv1.1": "model.model.24.6.conv1.bn",
+    "feature_map.backbone.layer4.7.conv1.1": "model.model.24.7.conv1.bn",
+    "feature_map.backbone.layer4.0.conv2.1": "model.model.24.0.conv2.bn",
+    "feature_map.backbone.layer4.1.conv2.1": "model.model.24.1.conv2.bn",
+    "feature_map.backbone.layer4.2.conv2.1": "model.model.24.2.conv2.bn",
+    "feature_map.backbone.layer4.3.conv2.1": "model.model.24.3.conv2.bn",
+    "feature_map.backbone.layer4.4.conv2.1": "model.model.24.4.conv2.bn",
+    "feature_map.backbone.layer4.5.conv2.1": "model.model.24.5.conv2.bn",
+    "feature_map.backbone.layer4.6.conv2.1": "model.model.24.6.conv2.bn",
+    "feature_map.backbone.layer4.7.conv2.1": "model.model.24.7.conv2.bn",
+    "feature_map.backbone.conv20.1": "model.model.25.bn",
+    "feature_map.backbone.conv21.1": "model.model.26.bn",
+    "feature_map.backbone.conv22.1": "model.model.28.bn",
+    "feature_map.backbone.conv23.1": "model.model.29.bn",
+    "feature_map.backbone.conv24.1": "model.model.30.bn",
+    "feature_map.backbone.layer5.0.conv1.1": "model.model.31.0.conv1.bn",
+    "feature_map.backbone.layer5.1.conv1.1": "model.model.31.1.conv1.bn",
+    "feature_map.backbone.layer5.2.conv1.1": "model.model.31.2.conv1.bn",
+    "feature_map.backbone.layer5.3.conv1.1": "model.model.31.3.conv1.bn",
+    "feature_map.backbone.layer5.0.conv2.1": "model.model.31.0.conv2.bn",
+    "feature_map.backbone.layer5.1.conv2.1": "model.model.31.1.conv2.bn",
+    "feature_map.backbone.layer5.2.conv2.1": "model.model.31.2.conv2.bn",
+    "feature_map.backbone.layer5.3.conv2.1": "model.model.31.3.conv2.bn",
+    "feature_map.backbone.conv25.1": "model.model.32.bn",
+    "feature_map.backbone.conv26.1": "model.model.33.bn",
+    "feature_map.backbone.conv27.1": "model.model.35.bn",
+    "feature_map.backbone.conv0.0": "model.model.0.conv",
+    "feature_map.backbone.conv1.0": "model.model.1.conv",
+    "feature_map.backbone.conv2.0": "model.model.2.conv",
+    "feature_map.backbone.conv3.0": "model.model.3.conv1.conv",
+    "feature_map.backbone.conv4.0": "model.model.3.conv2.conv",
+    "feature_map.backbone.conv5.0": "model.model.4.conv",
+    "feature_map.backbone.conv6.0": "model.model.5.conv",
+    "feature_map.backbone.conv7.0": "model.model.7.conv",
+    "feature_map.backbone.conv8.0": "model.model.8.conv",
+    "feature_map.backbone.conv9.0": "model.model.9.conv",
+    "feature_map.backbone.layer2.0.conv1.0": "model.model.10.0.conv1.conv",
+    "feature_map.backbone.layer2.0.conv2.0": "model.model.10.0.conv2.conv",
+    "feature_map.backbone.layer2.1.conv1.0": "model.model.10.1.conv1.conv",
+    "feature_map.backbone.layer2.1.conv2.0": "model.model.10.1.conv2.conv",
+    "feature_map.backbone.conv10.0": "model.model.11.conv",
+    "feature_map.backbone.conv11.0": "model.model.12.conv",
+    "feature_map.backbone.conv12.0": "model.model.14.conv",
+    "feature_map.backbone.conv13.0": "model.model.15.conv",
+    "feature_map.backbone.conv14.0": "model.model.16.conv",
+    "feature_map.backbone.layer3.0.conv1.0": "model.model.17.0.conv1.conv",
+    "feature_map.backbone.layer3.1.conv1.0": "model.model.17.1.conv1.conv",
+    "feature_map.backbone.layer3.2.conv1.0": "model.model.17.2.conv1.conv",
+    "feature_map.backbone.layer3.3.conv1.0": "model.model.17.3.conv1.conv",
+    "feature_map.backbone.layer3.4.conv1.0": "model.model.17.4.conv1.conv",
+    "feature_map.backbone.layer3.5.conv1.0": "model.model.17.5.conv1.conv",
+    "feature_map.backbone.layer3.6.conv1.0": "model.model.17.6.conv1.conv",
+    "feature_map.backbone.layer3.7.conv1.0": "model.model.17.7.conv1.conv",
+    "feature_map.backbone.layer3.0.conv2.0": "model.model.17.0.conv2.conv",
+    "feature_map.backbone.layer3.1.conv2.0": "model.model.17.1.conv2.conv",
+    "feature_map.backbone.layer3.2.conv2.0": "model.model.17.2.conv2.conv",
+    "feature_map.backbone.layer3.3.conv2.0": "model.model.17.3.conv2.conv",
+    "feature_map.backbone.layer3.4.conv2.0": "model.model.17.4.conv2.conv",
+    "feature_map.backbone.layer3.5.conv2.0": "model.model.17.5.conv2.conv",
+    "feature_map.backbone.layer3.6.conv2.0": "model.model.17.6.conv2.conv",
+    "feature_map.backbone.layer3.7.conv2.0": "model.model.17.7.conv2.conv",
+    "feature_map.backbone.conv15.0": "model.model.18.conv",
+    "feature_map.backbone.conv16.0": "model.model.19.conv",
+    "feature_map.backbone.conv17.0": "model.model.21.conv",
+    "feature_map.backbone.conv18.0": "model.model.22.conv",
+    "feature_map.backbone.conv19.0": "model.model.23.conv",
+    "feature_map.backbone.layer4.0.conv1.0": "model.model.24.0.conv1.conv",
+    "feature_map.backbone.layer4.1.conv1.0": "model.model.24.1.conv1.conv",
+    "feature_map.backbone.layer4.2.conv1.0": "model.model.24.2.conv1.conv",
+    "feature_map.backbone.layer4.3.conv1.0": "model.model.24.3.conv1.conv",
+    "feature_map.backbone.layer4.4.conv1.0": "model.model.24.4.conv1.conv",
+    "feature_map.backbone.layer4.5.conv1.0": "model.model.24.5.conv1.conv",
+    "feature_map.backbone.layer4.6.conv1.0": "model.model.24.6.conv1.conv",
+    "feature_map.backbone.layer4.7.conv1.0": "model.model.24.7.conv1.conv",
+    "feature_map.backbone.layer4.0.conv2.0": "model.model.24.0.conv2.conv",
+    "feature_map.backbone.layer4.1.conv2.0": "model.model.24.1.conv2.conv",
+    "feature_map.backbone.layer4.2.conv2.0": "model.model.24.2.conv2.conv",
+    "feature_map.backbone.layer4.3.conv2.0": "model.model.24.3.conv2.conv",
+    "feature_map.backbone.layer4.4.conv2.0": "model.model.24.4.conv2.conv",
+    "feature_map.backbone.layer4.5.conv2.0": "model.model.24.5.conv2.conv",
+    "feature_map.backbone.layer4.6.conv2.0": "model.model.24.6.conv2.conv",
+    "feature_map.backbone.layer4.7.conv2.0": "model.model.24.7.conv2.conv",
+    "feature_map.backbone.conv20.0": "model.model.25.conv",
+    "feature_map.backbone.conv21.0": "model.model.26.conv",
+    "feature_map.backbone.conv22.0": "model.model.28.conv",
+    "feature_map.backbone.conv23.0": "model.model.29.conv",
+    "feature_map.backbone.conv24.0": "model.model.30.conv",
+    "feature_map.backbone.layer5.0.conv1.0": "model.model.31.0.conv1.conv",
+    "feature_map.backbone.layer5.1.conv1.0": "model.model.31.1.conv1.conv",
+    "feature_map.backbone.layer5.2.conv1.0": "model.model.31.2.conv1.conv",
+    "feature_map.backbone.layer5.3.conv1.0": "model.model.31.3.conv1.conv",
+    "feature_map.backbone.layer5.0.conv2.0": "model.model.31.0.conv2.conv",
+    "feature_map.backbone.layer5.1.conv2.0": "model.model.31.1.conv2.conv",
+    "feature_map.backbone.layer5.2.conv2.0": "model.model.31.2.conv2.conv",
+    "feature_map.backbone.layer5.3.conv2.0": "model.model.31.3.conv2.conv",
+    "feature_map.backbone.conv25.0": "model.model.32.conv",
+    "feature_map.backbone.conv26.0": "model.model.33.conv",
+    "feature_map.backbone.conv27.0": "model.model.35.conv",
+}
+
+
+def convert_weight(ori_weight, new_weight):
+    new_ckpt = []
+    param_dict = ms.load_checkpoint(ori_weight)
+    for k, v in param_dict.items():
+        if "feature_map.backbone" in k:
+            for key, val in convert_dict.items():
+                if key in k:
+                    k = k.replace(key, val)
+        new_ckpt.append({"name": k, "data": v})
+    ms.save_checkpoint(new_ckpt, new_weight)
+
+
+if __name__ == "__main__":
+    convert_weight(
+        "./cspdarknet53_ascend_v120_imagenet2012_official_cv_bs64_top1acc7854_top5acc9428.ckpt",
+        "./yolov4_backbone.ckpt",
+    )
diff --git a/community/cv/ShipWise/mindyolo/utils/convert_weight_darknet53.py b/community/cv/ShipWise/mindyolo/utils/convert_weight_darknet53.py
new file mode 100644
index 0000000000000000000000000000000000000000..9233995d9138a05c930d31b19d9047ae32b0d996
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/convert_weight_darknet53.py
@@ -0,0 +1,66 @@
+import os
+import sys
+
+import numpy as np
+
+import mindspore as ms
+from config import parse_args
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+from mindyolo.models import create_model
+
+
+def _load_weight(weights_file):
+    """Loads pre-trained weights."""
+    if not os.path.isfile(weights_file):
+        raise ValueError(f'"{weights_file}" is not a valid weight file.')
+    with open(weights_file, "rb") as fp:
+        np.fromfile(fp, dtype=np.int32, count=5)
+        return np.fromfile(fp, dtype=np.float32)
+
+
+def convert_weight(cfg, weights_file="./darknet53.conv.74", output_file="./yolov3_backbone.ckpt"):
+    """Convert weight to mindspore ckpt."""
+    net = create_model(model_name=cfg.network.model_name, model_cfg=cfg.network)
+    params = net.get_parameters()
+    params = [p for p in params]
+    weights = _load_weight(weights_file)
+    index = 0
+    param_list = []
+    weights_num = len(weights)
+    for i in range(0, len(params), 5):
+        weight = params[i]
+        mean = params[i + 1]
+        var = params[i + 2]
+        gamma = params[i + 3]
+        beta = params[i + 4]
+        beta_data = weights[index : index + beta.size].reshape(beta.shape)
+        index += beta.size
+        gamma_data = weights[index : index + gamma.size].reshape(gamma.shape)
+        index += gamma.size
+        mean_data = weights[index : index + mean.size].reshape(mean.shape)
+        index += mean.size
+        var_data = weights[index : index + var.size].reshape(var.shape)
+        index += var.size
+        weight_data = weights[index : index + weight.size].reshape(weight.shape)
+        index += weight.size
+
+        param_list.append(
+            {"name": weight.name, "type": weight.dtype, "shape": weight.shape, "data": ms.Tensor(weight_data)}
+        )
+        param_list.append({"name": mean.name, "type": mean.dtype, "shape": mean.shape, "data": ms.Tensor(mean_data)})
+        param_list.append({"name": var.name, "type": var.dtype, "shape": var.shape, "data": ms.Tensor(var_data)})
+        param_list.append(
+            {"name": gamma.name, "type": gamma.dtype, "shape": gamma.shape, "data": ms.Tensor(gamma_data)}
+        )
+        param_list.append({"name": beta.name, "type": beta.dtype, "shape": beta.shape, "data": ms.Tensor(beta_data)})
+
+        if index >= weights_num:
+            break
+
+    ms.save_checkpoint(param_list, output_file)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    convert_weight(args)
diff --git a/community/cv/ShipWise/mindyolo/utils/logger.py b/community/cv/ShipWise/mindyolo/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0457a2eb7e466e23ae2bc126a64534be62b00e6
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/logger.py
@@ -0,0 +1,182 @@
+"""Custom Logger."""
+import logging
+import os
+import sys
+from datetime import datetime
+
+__all__ = ["get_logger"]
+
+GLOBAL_LOGGER = None
+
+
+class CustomStreamHandler(logging.StreamHandler):
+    def __init__(self, stream=None):
+        super().__init__(stream)
+
+    def emit(self, record):
+        # to start with logger header at every newline
+        # use __str__ to enable record.msg to be non-str object
+        messages = record.msg.__str__().split("\n")
+        for msg in messages:
+            record.msg = msg
+            super(CustomStreamHandler, self).emit(record)
+
+
+class Logger(logging.Logger):
+    """
+    Logger classes and functions, support print information on console and files.
+
+    Args:
+         logger_name(str): The name of Logger. In most cases, it can be the name of the network.
+    """
+
+    def __init__(self, logger_name="MindYOLO"):
+        super(Logger, self).__init__(logger_name)
+        self.log_level = "INFO"
+        self.rank_id = _get_rank_id()
+        self.device_per_servers = 8
+        self.formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
+
+    def write(self, msg):
+        """
+        write method to simulate Stream class
+        """
+        if msg and not msg.isspace():  # skip line with white spaces
+            self.info(msg)
+
+    def flush(self):
+        """
+        write method to simulate Stream class
+        """
+        pass
+
+
+def setup_logging(logger_name="MindYOLO", log_level="INFO", rank_id=None, device_per_servers=8):
+    """Setup logging file."""
+    logger = get_logger()
+    logger.name = logger_name
+    logger.log_level = log_level
+    if rank_id is not None:
+        logger.rank_id = rank_id
+    logger.device_per_servers = device_per_servers
+
+    if logger.log_level not in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]:
+        raise ValueError(
+            f"Not support log_level: {logger.log_level}, "
+            f"the log_level should be in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']"
+        )
+
+    # In the distributed scenario, only one card is printed on the console.
+    if logger.rank_id % logger.device_per_servers == 0:
+        console = CustomStreamHandler(sys.stdout)
+        console.setLevel(logger.log_level)
+        console.setFormatter(logger.formatter)
+        logger.addHandler(console)
+
+
+def setup_logging_file(log_dir="./logs"):
+    """Setup logging file."""
+    logger = get_logger()
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir, exist_ok=True)
+
+    # Generate a file stream based on the log generation time and rank_id
+    log_name = f"{logger.name}_{datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')}_rank_{logger.rank_id}.log"
+    log_path = os.path.join(log_dir, log_name)
+    file_handler = logging.FileHandler(log_path)
+    file_handler.setLevel(logger.log_level)
+    file_handler.setFormatter(logger.formatter)
+    logger.addHandler(file_handler)
+
+
+def print_args(args):
+    """Print hyper-parameter"""
+    get_logger().info("Args:")
+    args_dict = vars(args)
+    for key in args_dict.keys():
+        get_logger().info("--> %s: %s", key, args_dict[key])
+    get_logger().info("")
+
+
+def important_info(msg, *args, **kwargs):
+    """For information that needs to be focused on, add special printing format."""
+    line_width = 2
+    important_msg = "\n"
+    important_msg += ("*" * 70 + "\n") * line_width
+    important_msg += ("*" * line_width + "\n") * 2
+    important_msg += "*" * line_width + " " * 8 + msg + "\n"
+    important_msg += ("*" * line_width + "\n") * 2
+    important_msg += ("*" * 70 + "\n") * line_width
+    get_logger().info(important_msg, *args, **kwargs)
+
+
+def info(msg, *args, **kwargs):
+    """
+    Log a message with severity 'INFO' on the MindYOLO logger.
+
+    Examples:
+        >>> from mindyolo import logger
+        >>> logger.setup_logging(logger_name="MindYOLO", log_level="INFO", rank_id=0, device_per_servers=8)
+        >>> logger.setup_logging_file(log_dir="./logs")
+        >>> logger.info("test info")
+    """
+    get_logger().info(msg, *args, **kwargs)
+
+
+def debug(msg, *args, **kwargs):
+    """Log a message with severity 'DEBUG' on the MindYOLO logger."""
+    get_logger().debug(msg, *args, **kwargs)
+
+
+def error(msg, *args, **kwargs):
+    """Log a message with severity 'ERROR' on the MindYOLO logger."""
+    get_logger().error(msg, *args, **kwargs)
+
+
+def warning(msg, *args, **kwargs):
+    """Log a message with severity 'WARNING' on the MindYOLO logger."""
+    get_logger().warning(msg, *args, **kwargs)
+
+
+def critical(msg, *args, **kwargs):
+    """Log a message with severity 'CRITICAL' on the MindYOLO logger."""
+    get_logger().critical(msg, *args, **kwargs)
+
+
+def get_level():
+    """
+    Get the logger level.
+
+    Returns:
+        str, the Log level includes 'CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'.
+    """
+    # level and glog level mapping dictionary
+
+    return get_logger().log_level
+
+
+def _get_rank_id():
+    """Get rank id."""
+    rank_id = os.getenv("RANK_ID")
+    gpu_rank_id = os.getenv("OMPI_COMM_WORLD_RANK")
+    rank = "0"
+    if rank_id and gpu_rank_id and rank_id != gpu_rank_id:
+        print(
+            f"Environment variables RANK_ID and OMPI_COMM_WORLD_RANK set by different values, RANK_ID={rank_id}, "
+            f"OMPI_COMM_WORLD_RANK={gpu_rank_id}. We will use RANK_ID to get rank id by default.",
+            flush=True,
+        )
+    if rank_id:
+        rank = rank_id
+    elif gpu_rank_id:
+        rank = gpu_rank_id
+    return int(rank)
+
+
+def get_logger():
+    """Get logger instance."""
+    global GLOBAL_LOGGER
+    if GLOBAL_LOGGER:
+        return GLOBAL_LOGGER
+    GLOBAL_LOGGER = Logger()
+    return GLOBAL_LOGGER
diff --git a/community/cv/ShipWise/mindyolo/utils/metrics.py b/community/cv/ShipWise/mindyolo/utils/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f6d1733f38d4ca8a21dfd835d6f82c582b0c21b
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/metrics.py
@@ -0,0 +1,355 @@
+import time
+import cv2
+import numpy as np
+
+import mindspore as ms
+from mindspore import ops, Tensor
+
+__all__ = ["non_max_suppression", "scale_coords", "xyxy2xywh", "xywh2xyxy"]
+
+
+def non_max_suppression(
+    prediction,
+    mask_coefficient=None,
+    conf_thres=0.25,
+    iou_thres=0.45,
+    conf_free=False,
+    classes=None,
+    agnostic=False,
+    multi_label=False,
+    time_limit=20.0,
+):
+    """Runs Non-Maximum Suppression (NMS) on inference results
+
+    Args:
+        prediction (ndarray): Prediction. If conf_free is False, prediction on (bs, N, 5+nc) ndarray each point,
+            the last dimension meaning [center_x, center_y, width, height, conf, cls0, ...]; If conf_free is True,
+            prediction on (bs, N, 4+nc) ndarray each point, the last dimension meaning [center_x, center_y, width, height, cls0, ...].
+        conf_free (bool): Whether the prediction result include conf.
+
+    Returns:
+         list of detections, on (n,6) ndarray per image, the last dimension meaning [xyxy, conf, cls].
+    """
+
+    if not conf_free:
+        nc = prediction.shape[2] - 5  # number of classes
+        xc = prediction[..., 4] > conf_thres  # candidates
+    else:
+        nc = prediction.shape[2] - 4  # number of classes
+        xc = prediction[..., 4:].max(-1) > conf_thres  # candidates
+        prediction = np.concatenate(
+            (prediction[..., :4], prediction[..., 4:].max(-1, keepdims=True), prediction[..., 4:]), axis=-1
+        )
+
+    nm = 0
+    if mask_coefficient is not None:
+        assert mask_coefficient.shape[:2] == prediction.shape[:2], \
+            f"mask_coefficient shape {mask_coefficient.shape[:2]} and " \
+            f"prediction.shape {prediction.shape[:2]} are not equal."
+        nm = mask_coefficient.shape[2]
+        prediction = np.concatenate((prediction, mask_coefficient), axis=-1)
+
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_det = 300  # maximum number of detections per image
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = time_limit if time_limit > 0 else 1e3  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+
+    t = time.time()
+    output = [np.zeros((0, 6+nm))] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Scale class with conf
+        if not conf_free:
+            if nc == 1:
+                x[:, 5:5+nc] = x[:, 4:5]  # signle cls no need to multiplicate.
+            else:
+                x[:, 5:5+nc] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:5+nc] > conf_thres).nonzero()
+            x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(np.float32)), 1) if nm == 0 else \
+                np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(np.float32), x[i, -nm:]), 1)
+        else:  # best class only
+            conf = x[:, 5:5+nc].max(1, keepdims=True)  # get maximum conf
+            j = np.argmax(x[:, 5:5+nc], axis=1,keepdims=True)  # get maximum index
+            x = np.concatenate((box, conf, j.astype(np.float32)), 1)[conf.flatten() > conf_thres] if nm == 0 else \
+                np.concatenate((box, conf, j.astype(np.float32), x[:, -nm:]), 1)[conf.flatten() > conf_thres]
+
+
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == np.array(classes)).any(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort()[-max_nms:]]  # sort by confidence
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+
+        i = _nms(boxes, scores, iou_thres)  # NMS for per sample
+
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
+            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = _box_iou(boxes[i], boxes) > iou_thres  # iou matrix # (N, M)
+            weights = iou * scores[None]  # box weights
+            # (N, M) @ (M, 4) / (N, 1)
+            x[i, :4] = np.matmul(weights, x[:, :4]) / weights.sum(1, keepdim=True)  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(
+                f"WARNING: Batch NMS time limit {time_limit}s exceeded, this batch "
+                f"process {xi + 1}/{prediction.shape[0]} sample."
+            )
+            break  # time limit exceeded
+
+    return output
+
+
+def scale_coords(img1_shape, coords, img0_shape, ratio=None, pad=None):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+
+    if ratio is None:  # calculate from img0_shape
+        ratio = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # ratio  = old / new
+    else:
+        ratio = ratio[0]
+
+    if pad is None:
+        padh, padw = (img1_shape[0] - img0_shape[0] * ratio) / 2, (img1_shape[1] - img0_shape[1] * ratio) / 2
+    else:
+        padh, padw = pad[:]
+
+    coords[:, [0, 2]] -= padw  # x padding
+    coords[:, [1, 3]] -= padh  # y padding
+    coords[:, [0, 2]] /= ratio  # x rescale
+    coords[:, [1, 3]] /= ratio  # y rescale
+    coords = _clip_coords(coords, img0_shape)
+    return coords
+
+
+def _clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, img_shape[1])  # x1, x2
+    boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, img_shape[0])  # y1, y2
+    return boxes
+
+
+def _nms(xyxys, scores, threshold):
+    """Calculate NMS"""
+    s_time = time.time()
+    x1 = xyxys[:, 0]
+    y1 = xyxys[:, 1]
+    x2 = xyxys[:, 2]
+    y2 = xyxys[:, 3]
+    scores = scores
+    # areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+    reserved_boxes = []
+    while order.size > 0:
+        i = order[0]
+        reserved_boxes.append(i)
+        max_x1 = np.maximum(x1[i], x1[order[1:]])
+        max_y1 = np.maximum(y1[i], y1[order[1:]])
+        min_x2 = np.minimum(x2[i], x2[order[1:]])
+        min_y2 = np.minimum(y2[i], y2[order[1:]])
+
+        # intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1)
+        # intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1)
+        intersect_w = np.maximum(0.0, min_x2 - max_x1)
+        intersect_h = np.maximum(0.0, min_y2 - max_y1)
+        intersect_area = intersect_w * intersect_h
+
+        ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area + 1e-6)
+        indexes = np.where(ovr <= threshold)[0]
+        order = order[indexes + 1]
+    return np.array(reserved_boxes)
+
+
+def _box_iou(box1, box2):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 ([N, 4])
+        box2 ([M, 4])
+    Returns:
+        iou ([N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1])
+
+    area1 = box_area(box1.T)
+    area2 = box_area(box2.T)
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (
+        (np.minimum(box1[:, None, 2:], box2[:, 2:]) - np.maximum(box1[:, None, :2], box2[:, :2])).clip(0, None).prod(2)
+    )
+    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+
+def xyxy2xywh(x):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    y = np.copy(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+    y[:, 2] = x[:, 2] - x[:, 0]  # width
+    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    return y
+
+
+#------------------------for segment------------------------
+
+def scale_image(masks, img0_shape, pad=None):
+    """
+    Takes a mask, and resizes it to the original image size
+    Args:
+      masks (numpy.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
+      img0_shape (tuple): the original image shape
+      ratio_pad (tuple): the ratio of the padding to the original image.
+    Returns:
+      masks (numpy.ndarray): The masks that are being returned.
+    """
+
+    # Rescale coordinates (xyxy) from img1_shape to img0_shape
+    img1_shape = masks.shape
+    if (np.array(img1_shape[:2]) == np.array(img0_shape[:2])).all():
+        return masks
+
+    if pad is None:
+        ratio = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # ratio  = old / new
+        pad = (img1_shape[0] - img0_shape[0] * ratio) / 2, (img1_shape[1] - img0_shape[1] * ratio) / 2
+
+    top, left = int(pad[0]), int(pad[1])  # y, x
+    bottom, right = int(img1_shape[0] - pad[0]), int(img1_shape[1] - pad[1])
+
+    if len(masks.shape) < 2:
+        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
+    masks = masks[top:bottom, left:right]
+    masks = cv2.resize(masks, dsize=(img0_shape[1], img0_shape[0]), interpolation=cv2.INTER_LINEAR)
+    # masks = ops.interpolate(Tensor(masks, dtype=ms.float32)[None], shape, mode='bilinear', align_corners=False)[0].asnumpy()  # CHW
+    if len(masks.shape) == 2:
+        masks = masks[:, :, None]
+
+    return masks
+
+
+def crop_mask(masks, boxes):
+    """
+    It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
+    Args:
+      masks (numpy.ndarray): [h, w, n] array of masks
+      boxes (numpy.ndarray): [n, 4] array of bbox coordinates in relative point form
+    Returns:
+      (numpy.ndarray): The masks are being cropped to the bounding box.
+    """
+    n, h, w = masks.shape
+    x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
+    r = np.arange(w, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
+    c = np.arange(h, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)
+
+    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+
+def process_mask_upsample(protos, masks_in, bboxes, shape):
+    """
+    It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
+    quality but is slower.
+    Args:
+      protos (numpy.ndarray): [mask_dim, mask_h, mask_w]
+      masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms
+      bboxes (numpy.ndarray): [n, 4], n is number of masks after nms
+      shape (tuple): the size of the input image (h,w)
+    Returns:
+      (numpy.ndarray): The upsampled masks.
+    """
+    assert len(shape) == 2, f"The length of the shape is {len(shape)}, expected to be 2."
+    c, mh, mw = protos.shape  # CHW
+    masks = sigmoid((np.matmul(masks_in, protos.reshape(c, -1)))).reshape(-1, mh, mw)
+
+    # interpolate bilinear
+    # (n, mh, mw) -> (mh, mw, n) -> (*shape, n) -> (n, *shape)
+    # masks = cv2.resize(masks.transpose(1, 2, 0), dsize=shape, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)
+    masks = ops.interpolate(Tensor(masks, dtype=ms.float32)[None], shape, mode='bilinear', align_corners=False)[0].asnumpy()  # CHW
+
+    masks = crop_mask(masks, bboxes)  # CHW
+    return masks > 0.5
+
+
+def process_mask(protos, masks_in, bboxes, shape, upsample=False):
+    """
+    Apply masks to bounding boxes using the output of the mask head.
+
+    Args:
+        protos (numpy.ndarray): A array of shape [mask_dim, mask_h, mask_w].
+        masks_in (numpy.ndarray): A array of shape [n, mask_dim], where n is the number of masks after NMS.
+        bboxes (numpy.ndarray): A array of shape [n, 4], where n is the number of masks after NMS.
+        shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
+        upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
+
+    Returns:
+        (numpy.ndarray): A binary mask array of shape [n, h, w], where n is the number of masks after NMS, and h and w
+            are the height and width of the input image. The mask is applied to the bounding boxes.
+    """
+
+    assert len(shape) == 2, f"The length of the shape is {len(shape)}, expected to be 2."
+    c, mh, mw = protos.shape  # CHW
+    ih, iw = shape
+    masks = sigmoid(np.matmul(masks_in, protos.view(c, -1))).reshape(-1, mh, mw)  # CHW
+
+    downsampled_bboxes = np.copy(bboxes)
+    downsampled_bboxes[:, 0] *= mw / iw
+    downsampled_bboxes[:, 2] *= mw / iw
+    downsampled_bboxes[:, 3] *= mh / ih
+    downsampled_bboxes[:, 1] *= mh / ih
+
+    masks = crop_mask(masks, downsampled_bboxes)  # CHW
+    if upsample:
+        # masks = cv2.resize(masks.transpose(1, 2, 0), dsize=shape, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)
+        masks = ops.interpolate(Tensor(masks, dtype=ms.float32)[None], shape, mode='bilinear', align_corners=False)[0].asnumpy()  # CHW
+    return masks > 0.5
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+#----------------------------------------------------------
diff --git a/community/cv/ShipWise/mindyolo/utils/modelarts.py b/community/cv/ShipWise/mindyolo/utils/modelarts.py
new file mode 100644
index 0000000000000000000000000000000000000000..9da6975fc552b1a7d6b3c116aeb1314e24a10a27
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/modelarts.py
@@ -0,0 +1,53 @@
+import os
+
+_global_sync_count = 0
+
+__all__ = ["sync_data"]
+
+
+def get_device_id():
+    device_id = os.getenv("DEVICE_ID", "0")
+    return int(device_id)
+
+
+def get_device_num():
+    device_num = os.getenv("RANK_SIZE", "1")
+    return int(device_num)
+
+
+def get_rank_id():
+    global_rank_id = os.getenv("RANK_ID", "0")
+    return int(global_rank_id)
+
+
+def sync_data(from_path, to_path):
+    """
+    Download data from remote obs to local directory if the first url is remote url and the second one is local path
+    Upload data from local directory to remote obs in contrast.
+    """
+    import time
+
+    import moxing as mox
+
+    global _global_sync_count
+    sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
+    _global_sync_count += 1
+
+    # Each server contains 8 devices as most.
+    if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
+        print("from path: ", from_path)
+        print("to path: ", to_path)
+        mox.file.copy_parallel(from_path, to_path)
+        print("===finish data synchronization===")
+        try:
+            os.mknod(sync_lock)
+        except IOError:
+            pass
+        print("===save flag===")
+
+    while True:
+        if os.path.exists(sync_lock):
+            break
+        time.sleep(1)
+
+    print("Finish sync data from {} to {}.".format(from_path, to_path))
diff --git a/community/cv/ShipWise/mindyolo/utils/poly.py b/community/cv/ShipWise/mindyolo/utils/poly.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5ec9e3911f2208be5cdf33370473d14b6721d4d
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/poly.py
@@ -0,0 +1,58 @@
+import cv2
+import numpy as np
+
+from mindyolo.data.utils import xywhn2xyxy
+
+
+def show_img_with_bbox(data_dict, classes):
+    """
+    Image and bboxes visualization. If input multiple images, apply on the first image only.
+    Args:
+        record: related data of images
+        classes: all categories of the whole dataset
+
+    Returns: an image with detection boxes and categories
+    """
+    img, labels = data_dict["images"][0], data_dict["labels"][0]
+    img = img.transpose(1, 2, 0)[:, :, ::-1] * 255.0
+    img = np.ascontiguousarray(img, dtype=np.uint8)
+    labels = labels[labels[:, 1] > 0]  # filter invalid label
+    category_ids = labels[:, 1]
+    bboxes = labels[:, 2:]
+
+    categories = [classes[int(category_id)] for category_id in category_ids]
+    bboxes = xywhn2xyxy(bboxes[category_ids >= 0])
+    for bbox, category in zip(bboxes, categories):
+        bbox = bbox.astype(np.int32)
+        categories_size = cv2.getTextSize(category + "0", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
+        color = ((np.random.random((3,)) * 0.6 + 0.4) * 255).astype(np.uint8)
+        color = np.array(color).astype(np.int32).tolist()
+
+        if bbox[1] - categories_size[1] - 3 < 0:
+            cv2.rectangle(
+                img,
+                (bbox[0], bbox[1] + 2),
+                (bbox[0] + categories_size[0], bbox[1] + categories_size[1] + 3),
+                color=color,
+                thickness=-1,
+            )
+            cv2.putText(
+                img,
+                category,
+                (bbox[0], bbox[1] + categories_size[1] + 3),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                (0, 0, 0),
+                thickness=1,
+            )
+        else:
+            cv2.rectangle(
+                img,
+                (bbox[0], bbox[1] - categories_size[1] - 3),
+                (bbox[0] + categories_size[0], bbox[1] - 3),
+                color,
+                thickness=-1,
+            )
+            cv2.putText(img, category, (bbox[0], bbox[1] - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
+        cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, thickness=2)
+    return img
diff --git a/community/cv/ShipWise/mindyolo/utils/registry.py b/community/cv/ShipWise/mindyolo/utils/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..5516e6d5f170f7b6939e4d7c4d9aaac07d206910
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/registry.py
@@ -0,0 +1,88 @@
+import inspect
+import os
+
+
+class Registry:
+    """
+    a registry that maps string to class
+    """
+
+    def __init__(self, name):
+        """
+        Args:
+            name (str): registry name
+        """
+        self._name = name
+        self._module_dict = dict()
+
+    def __len__(self):
+        return len(self._module_dict)
+
+    def __contains__(self, key):
+        return self.get(key) is not None
+
+    def __repr__(self):
+        format_str = self.__class__.__name__ + f"(name={self._name}, total={len(self._module_dict)})\n"
+        class2path = lambda c: os.path.sep.join(c.__module__.split('.')) + '.py'
+        format_str += ''.join(
+            [f"  ({i}): {k} in {class2path(v)}\n" for i, (k, v) in enumerate(self._module_dict.items())]
+                              )
+        return format_str
+
+    @property
+    def name(self):
+        # registry name cannot be changed from outside
+        return self._name
+
+    @property
+    def module_dict(self):
+        # module dict cannot be changed from outside
+        return self._module_dict
+
+    def get(self, key):
+        """query the registry record"""
+        return self._module_dict.get(key, None)
+
+    def registry_module(self, module_name=None):
+        """
+        Registry a module. A record will be added to 'self._module_dict', whose key is the class name (by default) or
+        the specified name, and value is the class itself.
+        It is used as a decorator
+
+        Example:
+            >>> network = Registry('network')
+            >>> # case1: default module name
+            >>> @network.registry_module()
+            >>> class ResNet()
+            >>>     pass
+            >>> resnet = network.get('ResNet')
+            >>>
+            >>> # case2: customized module name
+            >>> @network.registry_module('yolov3')
+            >>> class YOLOv3()
+            >>>     pass
+            >>> yolov3 = network.get('yolov3')
+        """
+        if module_name is not None:
+            assert isinstance(module_name, str), f"module_name should be a str but got {type(module_name)} instead"
+
+        # use as a decorator
+        def _registry(cls):
+            return self._registry_module(module_class=cls, module_name=module_name)
+
+        return _registry
+
+    def _registry_module(self, module_class, module_name=None):
+        """
+        main worker of registry
+        """
+        assert inspect.isclass(
+            module_class
+        ), f"module to register should be a class but got {type(module_class)} instead"
+        if module_name in self:
+            raise KeyError(f"{module_name} is already registered in {self._name}")
+        if module_name is None:
+            module_name = module_class.__name__
+        self._module_dict[module_name] = module_class
+
+        return module_class
diff --git a/community/cv/ShipWise/mindyolo/utils/train_step_factory.py b/community/cv/ShipWise/mindyolo/utils/train_step_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7eec98b2391233b79fe2aa156d9e47250205956
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/train_step_factory.py
@@ -0,0 +1,120 @@
+import mindspore as ms
+from mindspore import context, nn, ops
+
+__all__ = ["create_train_step_fn", "get_gradreducer", "get_loss_scaler"]
+
+
+def get_gradreducer(is_parallel, parameters):
+    if is_parallel:
+        mean = context.get_auto_parallel_context("gradients_mean")
+        degree = context.get_auto_parallel_context("device_num")
+        grad_reducer = nn.DistributedGradReducer(parameters, mean, degree)
+    else:
+        grad_reducer = ops.functional.identity
+
+    return grad_reducer
+
+
+def get_loss_scaler(ms_loss_scaler="static", scale_value=1024, scale_factor=2, scale_window=2000):
+    if ms_loss_scaler == "dynamic":
+        from mindspore.amp import DynamicLossScaler
+
+        loss_scaler = DynamicLossScaler(scale_value=scale_value, scale_factor=scale_factor, scale_window=scale_window)
+    elif ms_loss_scaler == "static":
+        from mindspore.amp import StaticLossScaler
+
+        loss_scaler = StaticLossScaler(scale_value=scale_value)
+    elif ms_loss_scaler in ("none", "None"):
+        from mindspore.amp import StaticLossScaler
+
+        loss_scaler = StaticLossScaler(1.0)
+    else:
+        raise NotImplementedError(f"Not support ms_loss_scaler: {ms_loss_scaler}")
+
+    return loss_scaler
+
+
+def create_train_step_fn(task, network, loss_fn, optimizer, loss_ratio, scaler, reducer,
+                         ema=None, overflow_still_update=False, ms_jit=False, clip_grad=False, clip_grad_value=10.):
+    from mindspore.amp import all_finite
+
+    use_ema = True if ema else False
+
+    if task == "detect":
+
+        def forward_func(x, label):
+            pred = network(x)
+            loss, loss_items = loss_fn(pred, label, x)
+            loss *= loss_ratio
+            return scaler.scale(loss), ops.stop_gradient(loss_items)
+
+        grad_fn = ops.value_and_grad(forward_func, grad_position=None, weights=optimizer.parameters, has_aux=True)
+
+        def train_step_func(x, label, optimizer_update=True):
+            (loss, loss_items), grads = grad_fn(x, label)
+            grads = reducer(grads)
+            unscaled_grads = scaler.unscale(grads)
+            grads_finite = all_finite(unscaled_grads)
+
+            if clip_grad:
+                unscaled_grads = ops.clip_by_global_norm(unscaled_grads, clip_norm=clip_grad_value)
+
+            if optimizer_update:
+                if grads_finite:
+                    loss = ops.depend(loss, optimizer(unscaled_grads))
+                    if use_ema:
+                        loss = ops.depend(loss, ema.update())
+                else:
+                    if overflow_still_update:
+                        loss = ops.depend(loss, optimizer(unscaled_grads))
+                        if use_ema:
+                            loss = ops.depend(loss, ema.update())
+
+            return scaler.unscale(loss), loss_items, unscaled_grads, grads_finite
+
+        @ms.jit
+        def jit_warpper(*args):
+            return train_step_func(*args)
+
+        return train_step_func if not ms_jit else jit_warpper
+
+    elif task == "segment":
+
+        def forward_func(x, label, seg):
+            pred = network(x)
+            loss, loss_items = loss_fn(pred, label, seg)
+            loss *= loss_ratio
+            return scaler.scale(loss), ops.stop_gradient(loss_items)
+
+        grad_fn = ops.value_and_grad(forward_func, grad_position=None, weights=optimizer.parameters, has_aux=True)
+
+        def train_step_func(x, label, seg, optimizer_update=True):
+            (loss, loss_items), grads = grad_fn(x, label, seg)
+            grads = reducer(grads)
+            unscaled_grads = scaler.unscale(grads)
+            grads_finite = all_finite(unscaled_grads)
+
+            if clip_grad:
+                unscaled_grads = ops.clip_by_global_norm(unscaled_grads, clip_norm=clip_grad_value)
+
+            if optimizer_update:
+                if grads_finite:
+                    loss = ops.depend(loss, optimizer(unscaled_grads))
+                    if use_ema:
+                        loss = ops.depend(loss, ema.update())
+                else:
+                    if overflow_still_update:
+                        loss = ops.depend(loss, optimizer(unscaled_grads))
+                        if use_ema:
+                            loss = ops.depend(loss, ema.update())
+
+            return scaler.unscale(loss), loss_items, unscaled_grads, grads_finite
+
+        @ms.jit
+        def jit_warpper(*args):
+            return train_step_func(*args)
+
+        return train_step_func if not ms_jit else jit_warpper
+
+    else:
+        raise NotImplementedError
\ No newline at end of file
diff --git a/community/cv/ShipWise/mindyolo/utils/trainer_factory.py b/community/cv/ShipWise/mindyolo/utils/trainer_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7e1f221bb88887a09322735a01190d945ba2c54
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/trainer_factory.py
@@ -0,0 +1,518 @@
+import math
+import os
+import time
+import types
+from typing import Union, List
+
+import mindspore as ms
+from mindspore import Tensor, nn, ops
+
+from mindyolo.utils import logger
+from mindyolo.utils.callback import BaseCallback, EvalWhileTrain, RunContext
+from mindyolo.utils.checkpoint_manager import CheckpointManager
+from mindyolo.utils.modelarts import sync_data
+
+__all__ = [
+    "create_trainer",
+]
+
+
+def create_trainer(
+    model_name: str,
+    train_step_fn: types.FunctionType,
+    scaler,
+    network: nn.Cell,
+    loss_fn: nn.Cell,
+    ema: nn.Cell,
+    optimizer: nn.Cell,
+    dataloader: ms.dataset.Dataset,
+    steps_per_epoch: int,
+    callback: List[BaseCallback],
+    reducer,
+    data_sink,
+    profiler
+):
+    return Trainer(
+        model_name=model_name,
+        train_step_fn=train_step_fn,
+        scaler=scaler,
+        network=network,
+        loss_fn=loss_fn,
+        ema=ema,
+        optimizer=optimizer,
+        dataloader=dataloader,
+        steps_per_epoch=steps_per_epoch,
+        callback=callback,
+        reducer=reducer,
+        data_sink=data_sink,
+        profiler=profiler
+    )
+
+
+class Trainer:
+    def __init__(
+        self,
+        model_name,
+        train_step_fn,
+        scaler,
+        network,
+        loss_fn,
+        ema,
+        optimizer,
+        dataloader,
+        steps_per_epoch,
+        callback,
+        reducer,
+        data_sink,
+        profiler
+    ):
+        self.model_name = model_name
+        self.train_step_fn = train_step_fn
+        self.scaler = scaler
+        self.dataloader = dataloader
+        self.network = network  # for save checkpoint
+        self.loss_fn = loss_fn
+        self.ema = ema  # for save checkpoint and ema
+        self.optimizer = optimizer  # for save checkpoint
+        self.global_step = 0
+        self.steps_per_epoch = steps_per_epoch
+        self.callback = callback
+        self.reducer = reducer
+        self.data_sink = data_sink
+        self.profiler = profiler
+
+    def train(
+        self,
+        epochs: int,
+        main_device: bool,
+        warmup_step: int = 0,
+        warmup_momentum: Union[list, None] = None,
+        accumulate: int = 1,
+        overflow_still_update: bool = False,
+        keep_checkpoint_max: int = 10,
+        log_interval: int = 1,
+        loss_item_name: list = [],
+        save_dir: str = "",
+        enable_modelarts: bool = False,
+        train_url: str = "",
+        run_eval: bool = False,
+        test_fn: types.FunctionType = None,
+        ms_jit: bool = True,
+        rank_size: int = 8,
+        profiler_step_num: int = 1
+    ):
+        # Attr
+        self.epochs = epochs
+        self.main_device = main_device
+        self.log_interval = log_interval
+        self.overflow_still_update = overflow_still_update
+        self.loss_item_name = loss_item_name
+        self.profiler_step_num = profiler_step_num
+
+        # Directories
+        ckpt_save_dir = os.path.join(save_dir, "weights")
+        if main_device:
+            os.makedirs(ckpt_save_dir, exist_ok=True)  # save checkpoint path
+
+        # to be compatible with old interface
+        has_eval_mask = list(isinstance(c, EvalWhileTrain) for c in self.callback)
+        if run_eval and not any(has_eval_mask):
+            self.callback.append(EvalWhileTrain())
+        if not run_eval and any(has_eval_mask):
+            ind = has_eval_mask.index(True)
+            self.callback.pop(ind)
+
+        # Grad Accumulate
+        self.accumulate_cur_step = 0
+        self.accumulate_grads = None
+        self.accumulate = accumulate
+        self.accumulate_grads_fn = self._get_accumulate_grads_fn()
+
+        # Set Checkpoint Manager
+        manager = CheckpointManager(ckpt_save_policy="latest_k")
+        manager_ema = CheckpointManager(ckpt_save_policy="latest_k") if self.ema else None
+
+        loader = self.dataloader.create_dict_iterator(output_numpy=False, num_epochs=1)
+        s_step_time = time.time()
+        s_epoch_time = time.time()
+        run_context = RunContext(
+            epoch_num=epochs,
+            steps_per_epoch=self.steps_per_epoch,
+            total_steps=self.dataloader.dataset_size,
+            trainer=self,
+            test_fn=test_fn,
+            enable_modelarts=enable_modelarts,
+            ckpt_save_dir=ckpt_save_dir,
+            save_dir=save_dir,
+            train_url=train_url,
+            overflow_still_update=overflow_still_update,
+            ms_jit=ms_jit,
+            rank_size=rank_size,
+        )
+        self._on_train_begin(run_context)
+        for i, data in enumerate(loader):
+            cur_epoch = (i // self.steps_per_epoch) + 1
+            cur_step = (i % self.steps_per_epoch) + 1
+            run_context.cur_epoch_index = cur_epoch
+            run_context.cur_step_index = cur_step
+
+            if cur_step == 1:
+                self._on_train_epoch_begin(run_context)
+            self.global_step += 1
+            if self.global_step < warmup_step:
+                if warmup_momentum and isinstance(self.optimizer, (nn.SGD, nn.Momentum)):
+                    dtype = self.optimizer.momentum.dtype
+                    self.optimizer.momentum = Tensor(warmup_momentum[i], dtype)
+
+            imgs, labels = data["images"], data["labels"]
+            segments = None if 'masks' not in data else data["masks"]
+            self._on_train_step_begin(run_context)
+            run_context.loss, run_context.lr = self.train_step(imgs, labels, segments,
+                                                               cur_step=cur_step,cur_epoch=cur_epoch)
+            self._on_train_step_end(run_context)
+
+            # train log
+            if cur_step % self.log_interval == 0:
+                logger.info(
+                    f"Epoch {cur_epoch}/{epochs}, Step {cur_step}/{self.steps_per_epoch}, "
+                    f"step time: {(time.time() - s_step_time) * 1000 / self.log_interval:.2f} ms"
+                )
+                s_step_time = time.time()
+
+            # save checkpoint per epoch on main device
+            if self.main_device and (i + 1) % self.steps_per_epoch == 0:
+                # Save Checkpoint
+                ms.save_checkpoint(
+                    self.optimizer, os.path.join(ckpt_save_dir, f"optim_{self.model_name}.ckpt"), async_save=True
+                )
+                save_path = os.path.join(ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt")
+                manager.save_ckpoint(self.network, num_ckpt=keep_checkpoint_max, save_path=save_path)
+                if self.ema:
+                    save_path_ema = os.path.join(
+                        ckpt_save_dir, f"EMA_{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt"
+                    )
+                    manager_ema.save_ckpoint(self.ema.ema, num_ckpt=keep_checkpoint_max, save_path=save_path_ema)
+                logger.info(f"Saving model to {save_path}")
+
+                if enable_modelarts:
+                    sync_data(save_path, train_url + "/weights/" + save_path.split("/")[-1])
+                    if self.ema:
+                        sync_data(save_path_ema, train_url + "/weights/" + save_path_ema.split("/")[-1])
+
+                logger.info(f"Epoch {cur_epoch}/{epochs}, epoch time: {(time.time() - s_epoch_time) / 60:.2f} min.")
+                s_step_time = time.time()
+                s_epoch_time = time.time()
+            if self.profiler and self.profiler_step_num == cur_step:
+                break
+            if cur_step == self.steps_per_epoch:
+                self._on_train_epoch_end(run_context)
+
+        self._on_train_end(run_context)
+        logger.info("End Train.")
+
+    def train_with_datasink(
+        self,
+        task: str,
+        epochs: int,
+        main_device: bool,
+        warmup_epoch: int = 0,
+        warmup_momentum: Union[list, None] = None,
+        keep_checkpoint_max: int = 10,
+        log_interval: int = 1,
+        loss_item_name: list = [],
+        save_dir: str = "",
+        enable_modelarts: bool = False,
+        train_url: str = "",
+        run_eval: bool = False,
+        test_fn: types.FunctionType = None,
+        overflow_still_update: bool = False,
+        ms_jit: bool = True,
+        rank_size: int = 8,
+        profiler_step_num: int = 1
+    ):
+        # Modify dataset columns name for data sink mode, because dataloader could not send string data to device.
+        if task == "detect":
+            loader = self.dataloader.project(["images", "labels"])
+        elif task == "segment":
+            loader = self.dataloader.project(["images", "labels", "masks"])
+        else:
+            raise NotImplementedError
+
+        # to be compatible with old interface
+        has_eval_mask = list(isinstance(c, EvalWhileTrain) for c in self.callback)
+        if run_eval and not any(has_eval_mask):
+            self.callback.append(EvalWhileTrain())
+        if not run_eval and any(has_eval_mask):
+            ind = has_eval_mask.index(True)
+            self.callback.pop(ind)
+
+        # Change warmup_momentum, list of step -> list of epoch
+        warmup_momentum = (
+            [warmup_momentum[_i * self.steps_per_epoch] for _i in range(warmup_epoch)]
+            + [warmup_momentum[-1], ] * (epochs - warmup_epoch) if warmup_momentum else None
+        )
+
+        # Build train epoch func with sink process
+        train_epoch_fn = ms.train.data_sink(
+            fn=self.train_step_fn,
+            dataset=loader,
+            sink_size=self.steps_per_epoch,
+            jit_config=ms.JitConfig()
+        )
+
+        # Attr
+        self.epochs = epochs
+        self.main_device = main_device
+        self.log_interval = log_interval
+        self.loss_item_name = loss_item_name
+        self.profiler_step_num = profiler_step_num
+
+        # Directories
+        ckpt_save_dir = os.path.join(save_dir, "weights")
+
+        if main_device:
+            os.makedirs(ckpt_save_dir, exist_ok=True)  # save checkpoint path
+
+        # Set Checkpoint Manager
+        manager = CheckpointManager(ckpt_save_policy="latest_k")
+        manager_ema = CheckpointManager(ckpt_save_policy="latest_k") if self.ema else None
+
+        run_context = RunContext(
+            epoch_num=epochs,
+            steps_per_epoch=self.steps_per_epoch,
+            total_steps=self.dataloader.dataset_size,
+            trainer=self,
+            test_fn=test_fn,
+            enable_modelarts=enable_modelarts,
+            ckpt_save_dir=ckpt_save_dir,
+            save_dir=save_dir,
+            train_url=train_url,
+            overflow_still_update=overflow_still_update,
+            ms_jit=ms_jit,
+            rank_size=rank_size,
+        )
+
+        s_epoch_time = time.time()
+        self._on_train_begin(run_context)
+        for epoch in range(epochs):
+            cur_epoch = epoch + 1
+            self.global_step += self.steps_per_epoch
+            run_context.cur_epoch_index = cur_epoch
+            if epoch == 0:
+                logger.warning("In the data sink mode, log output will only occur once each epoch is completed.")
+                logger.warning(
+                    "The first epoch will be compiled for the graph, which may take a long time; "
+                    "You can come back later :)."
+                )
+
+            if warmup_momentum and isinstance(self.optimizer, (nn.SGD, nn.Momentum)):
+                dtype = self.optimizer.momentum.dtype
+                self.optimizer.momentum = Tensor(warmup_momentum[epoch], dtype)
+
+            # train one epoch with datasink
+            self._on_train_epoch_begin(run_context)
+            _, loss_item, _, _ = train_epoch_fn()
+
+            # print loss and lr
+            log_string = f"Epoch {cur_epoch}/{epochs}, Step {self.steps_per_epoch}/{self.steps_per_epoch}"
+            if len(self.loss_item_name) < len(loss_item):
+                self.loss_item_name += [f"loss_item{i}" for i in range(len(loss_item) - len(self.loss_item_name))]
+            for i in range(len(loss_item)):
+                log_string += f", {self.loss_item_name[i]}: {loss_item[i].asnumpy():.4f}"
+            if self.optimizer.dynamic_lr:
+                if self.optimizer.is_group_lr:
+                    lr_cell = self.optimizer.learning_rate[0]
+                    cur_lr = lr_cell(Tensor(self.global_step, ms.int32)).asnumpy().item()
+                else:
+                    cur_lr = self.optimizer.learning_rate(Tensor(self.global_step, ms.int32)).asnumpy().item()
+            else:
+                cur_lr = self.optimizer.learning_rate.asnumpy().item()
+            log_string += f", cur_lr: {cur_lr}"
+            logger.info(log_string)
+            run_context.loss, run_context.lr = loss_item, cur_lr
+            self._on_train_epoch_end(run_context)
+
+            # save checkpoint per epoch on main device
+            if self.main_device:
+                # Save Checkpoint
+                ms.save_checkpoint(
+                    self.optimizer, os.path.join(ckpt_save_dir, f"optim_{self.model_name}.ckpt"), async_save=True
+                )
+                save_path = os.path.join(ckpt_save_dir, f"{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt")
+                manager.save_ckpoint(self.network, num_ckpt=keep_checkpoint_max, save_path=save_path)
+                if self.ema:
+                    save_path_ema = os.path.join(
+                        ckpt_save_dir, f"EMA_{self.model_name}-{cur_epoch}_{self.steps_per_epoch}.ckpt"
+                    )
+                    manager_ema.save_ckpoint(self.ema.ema, num_ckpt=keep_checkpoint_max, save_path=save_path_ema)
+                logger.info(f"Saving model to {save_path}")
+
+                if enable_modelarts:
+                    sync_data(save_path, train_url + "/weights/" + save_path.split("/")[-1])
+                    if self.ema:
+                        sync_data(save_path_ema, train_url + "/weights/" + save_path_ema.split("/")[-1])
+
+                logger.info(f"Epoch {cur_epoch}/{epochs}, epoch time: {(time.time() - s_epoch_time) / 60:.2f} min.")
+                s_epoch_time = time.time()
+
+            if self.profiler and math.ceil(self.profiler_step_num/self.steps_per_epoch) == cur_epoch:
+                break
+        self._on_train_end(run_context)
+        logger.info("End Train.")
+
+    def train_step(self, imgs, labels, segments=None, cur_step=0, cur_epoch=0):
+        if self.accumulate == 1:
+            if segments is None:
+                loss, loss_item, _, grads_finite = self.train_step_fn(imgs, labels, True)
+            else:
+                loss, loss_item, _, grads_finite = self.train_step_fn(imgs, labels, segments, True)
+            self.scaler.adjust(grads_finite)
+            if not grads_finite and (cur_step % self.log_interval == 0):
+                if self.overflow_still_update:
+                    logger.warning(f"overflow, still update, loss scale adjust to {self.scaler.scale_value.asnumpy()}")
+                else:
+                    logger.warning(f"overflow, drop step, loss scale adjust to {self.scaler.scale_value.asnumpy()}")
+        else:
+            if segments is None:
+                loss, loss_item, grads, grads_finite = self.train_step_fn(imgs, labels, False)
+            else:
+                loss, loss_item, grads, grads_finite = self.train_step_fn(imgs, labels, segments, False)
+            self.scaler.adjust(grads_finite)
+            if grads_finite or self.overflow_still_update:
+                self.accumulate_cur_step += 1
+                if self.accumulate_grads:
+                    self.accumulate_grads = self.accumulate_grads_fn(
+                        self.accumulate_grads, grads
+                    )  # update self.accumulate_grads
+                else:
+                    self.accumulate_grads = grads
+
+                if self.accumulate_cur_step % self.accumulate == 0:
+                    self.optimizer(self.accumulate_grads)
+                    if self.ema:
+                        self.ema.update()
+                    logger.info(
+                        f"Epoch {cur_epoch}/{self.epochs}, Step {cur_step}/{self.steps_per_epoch}, "
+                        f"accumulate: {self.accumulate}, optimizer an accumulate step success."
+                    )
+                    from mindspore.amp import all_finite
+
+                    if not all_finite(self.accumulate_grads):
+                        logger.warning(f"overflow, still update.")
+                    # reset accumulate
+                    self.accumulate_grads, self.accumulate_cur_step = None, 0
+            else:
+                logger.warning(
+                    f"Epoch {cur_epoch}/{self.epochs}, Step {cur_step}/{self.steps_per_epoch}, "
+                    f"accumulate: {self.accumulate}, this step grad overflow, drop. "
+                    f"Loss scale adjust to {self.scaler.scale_value.asnumpy()}"
+                )
+
+        # train log
+        cur_lr = 0
+        if cur_step % self.log_interval == 0:
+            log_string = (
+                f"Epoch {cur_epoch}/{self.epochs}, Step {cur_step}/{self.steps_per_epoch}, imgsize {imgs.shape[2:]}"
+            )
+            # print loss
+            if len(self.loss_item_name) < len(loss_item):
+                self.loss_item_name += [f"loss_item{i}" for i in range(len(loss_item) - len(self.loss_item_name))]
+            for i in range(len(loss_item)):
+                log_string += f", {self.loss_item_name[i]}: {loss_item[i].asnumpy():.4f}"
+
+            # print lr
+            if self.optimizer.dynamic_lr:
+                if self.optimizer.is_group_lr:
+                    lr_cell = self.optimizer.learning_rate[0]
+                    cur_lr = lr_cell(Tensor(self.global_step, ms.int32)).asnumpy().item()
+                else:
+                    cur_lr = self.optimizer.learning_rate(Tensor(self.global_step, ms.int32)).asnumpy().item()
+            else:
+                cur_lr = self.optimizer.learning_rate.asnumpy().item()
+            log_string += f", cur_lr: {cur_lr}"
+            logger.info(log_string)
+        return loss_item, cur_lr
+
+    def _get_accumulate_grads_fn(self):
+        hyper_map = ops.HyperMap()
+
+        def accu_fn(g1, g2):
+            g1 = g1 + g2
+            return g1
+
+        def accumulate_grads_fn(accumulate_grads, grads):
+            success = hyper_map(accu_fn, accumulate_grads, grads)
+            return success
+
+        return accumulate_grads_fn
+
+    def _get_transform_stage(self, cur_epoch, stage_epochs=[]):
+        _cur_stage = 0
+        for _i in range(len(stage_epochs)):
+            if cur_epoch <= stage_epochs[_i]:
+                _cur_stage = _i
+            else:
+                break
+        return _cur_stage
+
+    def _on_train_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of training process"""
+
+        # check callback type validation
+        callback = self.callback
+        if callback is None:
+            callback = []
+        assert isinstance(callback, (tuple, list)), (
+            f"expect callback to be list of tuple, " f"but got {type(callback)} instead"
+        )
+        for cb in callback:
+            assert isinstance(cb, BaseCallback), (
+                f"expect callback element to be subclass of BaseCallback, " f"but got {type(cb)} instead"
+            )
+        # log callback base info
+        logger.info(f"got {len(callback)} active callback as follows:")
+        for cb in self.callback:
+            logger.info(cb)
+
+        # check range of log interval
+        if self.log_interval > self.steps_per_epoch:
+            logger.warning(
+                f"log interval should be less than total steps of one epoch, "
+                f"but got {self.log_interval} > {self.steps_per_epoch}, set log_interval as steps_per_epoch "
+                f"{self.steps_per_epoch}"
+            )
+            self.log_interval = self.steps_per_epoch
+
+        # throw warning of long time cost
+        logger.warning(
+            "The first epoch will be compiled for the graph, which may take a long time; " "You can come back later :)."
+        )
+
+        # execute customized callback
+        for cb in self.callback:
+            cb.on_train_begin(run_context)
+
+    def _on_train_end(self, run_context: RunContext):
+        """hooks to run on the end of training process"""
+        for cb in self.callback:
+            cb.on_train_end(run_context)
+
+    def _on_train_epoch_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of a training epoch"""
+        for cb in self.callback:
+            cb.on_train_epoch_begin(run_context)
+
+    def _on_train_epoch_end(self, run_context: RunContext):
+        """hooks to run on the end of a training epoch"""
+        for cb in self.callback:
+            cb.on_train_epoch_end(run_context)
+
+    def _on_train_step_begin(self, run_context: RunContext):
+        """hooks to run on the beginning of a training step"""
+        for cb in self.callback:
+            cb.on_train_step_begin(run_context)
+
+    def _on_train_step_end(self, run_context: RunContext):
+        """hooks to run on the end of a training step"""
+        for cb in self.callback:
+            cb.on_train_step_end(run_context)
diff --git a/community/cv/ShipWise/mindyolo/utils/utils.py b/community/cv/ShipWise/mindyolo/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c364c94d10c17facf0cf5ed95b645297746b27b6
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/utils/utils.py
@@ -0,0 +1,223 @@
+import os
+import random
+import yaml
+import cv2
+from datetime import datetime
+import numpy as np
+
+import mindspore as ms
+from mindspore import ops, Tensor, nn
+from mindspore.communication.management import get_group_size, get_rank, init
+from mindspore import ParallelMode
+
+from mindyolo.utils import logger
+
+
+def set_seed(seed=2):
+    np.random.seed(seed)
+    random.seed(seed)
+    ms.set_seed(seed)
+
+
+def set_default(args):
+    # Set Context
+    ms.set_context(mode=args.ms_mode, device_target=args.device_target, max_call_depth=2000)
+    # if args.ms_mode == 0:
+    #     ms.set_context(jit_config={"jit_level": "O2"})
+    if args.device_target == "Ascend":
+        device_id = int(os.getenv("DEVICE_ID", 0))
+        ms.set_context(device_id=device_id)
+    elif args.device_target == "GPU" and args.ms_enable_graph_kernel:
+        ms.set_context(enable_graph_kernel=True)
+    # Set Parallel
+    if args.is_parallel:
+        init()
+        args.rank, args.rank_size, parallel_mode = get_rank(), get_group_size(), ParallelMode.DATA_PARALLEL
+        ms.set_auto_parallel_context(device_num=args.rank_size, parallel_mode=parallel_mode, gradients_mean=True)
+    else:
+        args.rank, args.rank_size = 0, 1
+    # Set Default
+    args.total_batch_size = args.per_batch_size * args.rank_size
+    args.sync_bn = args.sync_bn and ms.get_context("device_target") == "Ascend" and args.rank_size > 1
+    args.accumulate = max(1, np.round(args.nbs / args.total_batch_size)) if args.auto_accumulate else args.accumulate
+    # optimizer
+    args.optimizer.warmup_epochs = args.optimizer.get("warmup_epochs", 0)
+    args.optimizer.min_warmup_step = args.optimizer.get("min_warmup_step", 0)
+    args.optimizer.epochs = args.epochs
+    args.optimizer.nbs = args.nbs
+    args.optimizer.accumulate = args.accumulate
+    args.optimizer.total_batch_size = args.total_batch_size
+    # data
+    cv2.setNumThreads(args.opencv_threads_num)  # Set the number of threads for opencv.
+    args.data.nc = 1 if args.single_cls else int(args.data.nc)  # number of classes
+    args.data.names = ["item"] if args.single_cls and len(args.names) != 1 else args.data.names  # class names
+    assert len(args.data.names) == args.data.nc, "%g names found for nc=%g dataset in %s" % (
+        len(args.data.names),
+        args.data.nc,
+        args.config,
+    )
+    # Directories and Save run settings
+    time = get_broadcast_datetime(rank_size=args.rank_size)
+    args.save_dir = os.path.join(
+        args.save_dir, f'{time[0]:04d}.{time[1]:02d}.{time[2]:02d}-{time[3]:02d}.{time[4]:02d}.{time[5]:02d}')
+    os.makedirs(args.save_dir, exist_ok=True)
+    if args.rank % args.rank_size == 0:
+        with open(os.path.join(args.save_dir, "cfg.yaml"), "w") as f:
+            yaml.dump(vars(args), f, sort_keys=False)
+
+    # callback
+    args.callback = args.get('callback', [])
+
+    # Set Logger
+    logger.setup_logging(
+        logger_name="MindYOLO", log_level=args.log_level, rank_id=args.rank, device_per_servers=args.rank_size
+    )
+    logger.setup_logging_file(log_dir=os.path.join(args.save_dir, "logs"))
+
+    # Modelarts: Copy data, from the s3 bucket to the computing node; Reset dataset dir.
+    if args.enable_modelarts:
+        from mindyolo.utils.modelarts import sync_data
+
+        os.makedirs(args.data_dir, exist_ok=True)
+        sync_data(args.data_url, args.data_dir)
+        sync_data(args.save_dir, args.train_url)
+        if args.ckpt_url:
+            sync_data(args.ckpt_url, args.ckpt_dir)  # pretrain ckpt
+        # args.data.dataset_dir = os.path.join(args.data_dir, args.data.dataset_dir)
+        args.data.train_set = os.path.join(args.data_dir, args.data.train_set)
+        args.data.val_set = os.path.join(args.data_dir, args.data.val_set)
+        args.data.test_set = os.path.join(args.data_dir, args.data.test_set)
+        args.weight = args.ckpt_dir if args.ckpt_dir else ""
+        args.ema_weight = os.path.join(args.ckpt_dir, args.ema_weight) if args.ema_weight else ""
+
+
+def drop_inconsistent_shape_parameters(model, param_dict):
+    updated_param_dict = dict()
+
+    # TODO: hard code
+    param_dict = {k.replace('ema.', ''): v for k, v in param_dict.items()}
+
+    for param in model.get_parameters():
+        name = param.name
+        if name in param_dict:
+            if param_dict[name].shape == param.shape:
+                updated_param_dict[name] = param_dict[name]
+            else:
+                logger.warning(
+                    f"Dropping checkpoint parameter `{name}` with shape `{param_dict[name].shape}`, "
+                    f"which is inconsistent with cell shape `{param.shape}`"
+                )
+        else:
+            logger.warning(f"Cannot find checkpoint parameter `{name}`.")
+    return updated_param_dict
+
+
+def load_pretrain(network, weight, ema=None, ema_weight=None, strict=True):
+    if weight.endswith(".ckpt"):
+        param_dict = ms.load_checkpoint(weight)
+        if not strict:
+            param_dict = drop_inconsistent_shape_parameters(network, param_dict)
+        ms.load_param_into_net(network, param_dict)
+        logger.info(f'Pretrain model load from "{weight}" success.')
+    if ema:
+        if ema_weight.endswith(".ckpt"):
+            param_dict_ema = ms.load_checkpoint(ema_weight)
+            if not strict:
+                param_dict_ema = drop_inconsistent_shape_parameters(ema.ema, param_dict_ema)
+            ms.load_param_into_net(ema.ema, param_dict_ema)
+            logger.info(f'Ema pretrain model load from "{ema_weight}" success.')
+        else:
+            ema.clone_from_model()
+            logger.info("ema_weight not exist, default pretrain weight is currently used.")
+
+
+def freeze_layers(network, freeze=[]):
+    if len(freeze) > 0:
+        freeze = [f"model.{x}." for x in freeze]  # parameter names to freeze (full or partial)
+        for n, p in network.parameters_and_names():
+            if any(x in n for x in freeze):
+                logger.info("freezing %s" % n)
+                p.requires_grad = False
+
+
+def draw_result(img_path, result_dict, data_names, is_coco_dataset=True, save_path="./detect_results"):
+    import random
+    import cv2
+    from mindyolo.data import COCO80_TO_COCO91_CLASS
+
+    os.makedirs(save_path, exist_ok=True)
+    save_result_path = os.path.join(save_path, img_path.split("/")[-1])
+    im = cv2.imread(img_path)
+    category_id, bbox, score = result_dict["category_id"], result_dict["bbox"], result_dict["score"]
+    seg = result_dict.get("segmentation", None)
+    mask = None if seg is None else np.zeros_like(im, dtype=np.float32)
+    for i in range(len(bbox)):
+        # draw box
+        x_l, y_t, w, h = bbox[i][:]
+        x_r, y_b = x_l + w, y_t + h
+        x_l, y_t, x_r, y_b = int(x_l), int(y_t), int(x_r), int(y_b)
+        _color = [random.randint(0, 255) for _ in range(3)]
+        cv2.rectangle(im, (x_l, y_t), (x_r, y_b), tuple(_color), 2)
+        if seg:
+            _color_seg = np.array([random.randint(0, 255) for _ in range(3)], np.float32)
+            mask += seg[i][:, :, None] * _color_seg[None, None, :]
+
+        # draw label
+        if is_coco_dataset:
+            class_name_index = COCO80_TO_COCO91_CLASS.index(category_id[i])
+        else:
+            class_name_index = category_id[i]
+        class_name = data_names[class_name_index]  # args.data.names[class_name_index]
+        text = f"{class_name}: {score[i]}"
+        (text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
+        cv2.rectangle(im, (x_l, y_t - text_h - baseline), (x_l + text_w, y_t), tuple(_color), -1)
+        cv2.putText(im, text, (x_l, y_t - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
+
+    # save results
+    if seg:
+        im = (0.7 * im + 0.3 * mask).astype(np.uint8)
+    cv2.imwrite(save_result_path, im)
+
+
+def get_broadcast_datetime(rank_size=1, root_rank=0):
+    time = datetime.now()
+    time_list = [time.year, time.month, time.day, time.hour, time.minute, time.second, time.microsecond]
+    if rank_size <=1:
+        return time_list
+
+    # only broadcast in distribution mode
+    x = broadcast((Tensor(time_list, dtype=ms.int32),), root_rank)
+    x = x[0].asnumpy().tolist()
+    return x
+
+@ms.jit
+def broadcast(x, root_rank):
+    return ops.Broadcast(root_rank=root_rank)(x)
+
+class AllReduce(nn.Cell):
+    """
+    a wrapper class to make ops.AllReduce become a Cell. This is a workaround for sync_wait
+    """
+    def __init__(self):
+        super(AllReduce, self).__init__()
+        self.all_reduce = ops.AllReduce(op=ops.ReduceOp.SUM)
+
+    def construct(self, x):
+        return self.all_reduce(x)
+
+
+class Synchronizer:
+    def __init__(self, rank_size=1):
+        # this init method should be run only once
+        self.all_reduce = AllReduce()
+        self.rank_size = rank_size
+
+    def __call__(self):
+        if self.rank_size <= 1:
+            return
+        sync = Tensor(np.array([1]).astype(np.int32))
+        sync = self.all_reduce(sync)
+        sync = sync.asnumpy()[0]
+        if sync != self.rank_size:
+            raise ValueError(f'Sync value {sync} is not equal to rank size {self.rank_size}.'
+                             f' There might be wrong with devices')
diff --git a/community/cv/ShipWise/mindyolo/version.py b/community/cv/ShipWise/mindyolo/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..aea8b22a3c73772753b9708e70cae90d45345f02
--- /dev/null
+++ b/community/cv/ShipWise/mindyolo/version.py
@@ -0,0 +1,2 @@
+"""version init"""
+__version__ = "0.4.0-dev"
diff --git a/community/cv/ShipWise/requirements.txt b/community/cv/ShipWise/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5202961583d74cdb2747836179c9109e0d6e13c
--- /dev/null
+++ b/community/cv/ShipWise/requirements.txt
@@ -0,0 +1,16 @@
+# MindYOLO requirements
+# Usage: pip install -r requirements.txt
+
+# Setup
+pybind11>=2.10.4
+
+# Base
+numpy>=1.17.0
+PyYAML>=5.3
+tqdm
+opencv-python>=4.7.0.68
+opencv-python-headless>=4.7.0.68
+
+# Extras
+albumentations>=1.0.3
+pycocotools>=2.0.2
diff --git a/community/cv/ShipWise/workspace/__init__.py b/community/cv/ShipWise/workspace/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb9cad7f36ac9db05a61ea2fba14c87a9b8f0dd0
--- /dev/null
+++ b/community/cv/ShipWise/workspace/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024-09-22 11:41
+# @Author  : Jiang Liu
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/community/cv/ShipWise/workspace/configs/dataset/HRSC2016.yaml b/community/cv/ShipWise/workspace/configs/dataset/HRSC2016.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0be2c002421a512255e2af97c3067f13063cd36f
--- /dev/null
+++ b/community/cv/ShipWise/workspace/configs/dataset/HRSC2016.yaml
@@ -0,0 +1,14 @@
+data:
+  dataset_name: HRSC2016
+
+  train_set: ./workspace/datasets/HRSC2016/train.txt
+  val_set: ./workspace/datasets/HRSC2016/val.txt
+  test_set: ./workspace/datasets/HRSC2016/test.txt
+
+  nc: 28
+
+  # class names
+  names: [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27' ]
+
+  train_transforms: [ ]
+  test_transforms: [ ]
\ No newline at end of file
diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.high.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.high.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..706cb08df1e106489b035d138c0d2ae32060e4b6
--- /dev/null
+++ b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.high.yaml
@@ -0,0 +1,67 @@
+optimizer:
+  optimizer: momentum
+  lr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+  momentum: 0.937  # SGD momentum/Adam beta1
+  nesterov: True  # update gradients with NAG(Nesterov Accelerated Gradient) algorithm
+  loss_scale: 1.0  # loss scale for optimizer
+  warmup_epochs: 3  # warmup epochs (fractions ok)
+  warmup_momentum: 0.8  # warmup initial momentum
+  warmup_bias_lr: 0.1  # warmup initial bias lr
+  min_warmup_step: 1000  # minimum warmup step
+  group_param: yolov8  # group param strategy
+  gp_weight_decay: 0.0005  # group param weight decay 5e-4
+  start_factor: 1.0
+  end_factor: 0.01
+
+loss:
+  name: YOLOv8Loss
+  box: 7.5  # box loss gain
+  cls: 0.5  # cls loss gain
+  dfl: 1.5  # dfl loss gain
+  reg_max: 16
+
+data:
+  num_parallel_workers: 1
+
+  # multi-stage data augment
+  train_transforms: {
+    stage_epochs: [ 490, 10 ],
+    trans_list: [
+      [
+        { func_name: mosaic, prob: 1.0 },
+        { func_name: copy_paste, prob: 0.3 },
+        {func_name: resample_segments},
+        { func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.9, shear: 0.0 },
+        { func_name: mixup, alpha: 32.0, beta: 32.0, prob: 0.15, pre_transform: [
+          { func_name: mosaic, prob: 1.0 },
+          { func_name: copy_paste, prob: 0.3 },
+          { func_name: resample_segments },
+          { func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.9, shear: 0.0 }, ]
+        },
+        {func_name: albumentations},
+        {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4},
+        {func_name: fliplr, prob: 0.5},
+        {func_name: label_norm, xyxy2xywh_: True},
+        {func_name: label_pad, padding_size: 160, padding_value: -1},
+        {func_name: image_norm, scale: 255.},
+        {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+      ],
+      [
+        {func_name: letterbox, scaleup: True},
+        {func_name: resample_segments},
+        {func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.9, shear: 0.0},
+        {func_name: albumentations},
+        {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4},
+        {func_name: fliplr, prob: 0.5},
+        {func_name: label_norm, xyxy2xywh_: True},
+        {func_name: label_pad, padding_size: 160, padding_value: -1},
+        {func_name: image_norm, scale: 255.},
+        {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+      ]]
+  }
+
+  test_transforms: [
+    {func_name: letterbox, scaleup: False, only_image: True},
+    {func_name: image_norm, scale: 255.},
+    {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+  ]
diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.low.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.low.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c3a82698feaf4d9c2f747500a73f39a27d957b46
--- /dev/null
+++ b/community/cv/ShipWise/workspace/configs/ship-wise/hyp.scratch.low.yaml
@@ -0,0 +1,62 @@
+
+
+optimizer:
+  optimizer: momentum
+  lr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+  momentum: 0.937  # SGD momentum/Adam beta1
+  nesterov: True  # update gradients with NAG(Nesterov Accelerated Gradient) algorithm
+  loss_scale: 1.0  # loss scale for optimizer
+  warmup_epochs: 3  # warmup epochs (fractions ok)
+  warmup_momentum: 0.8  # warmup initial momentum
+  warmup_bias_lr: 0.1  # warmup initial bias lr
+  min_warmup_step: 1000  # minimum warmup step
+  group_param: yolov8  # group param strategy
+  gp_weight_decay: 0.0005  # group param weight decay 5e-4
+  start_factor: 1.0
+  end_factor: 0.01
+
+loss:
+  name: YOLOv8Loss
+  box: 7.5  # box loss gain
+  cls: 0.5  # cls loss gain
+  dfl: 1.5  # dfl loss gain
+  reg_max: 16
+
+data:
+  num_parallel_workers: 1
+
+  # multi-stage data augment
+  train_transforms: {
+    stage_epochs: [ 490, 10 ],
+    trans_list: [
+      [
+        { func_name: mosaic, prob: 1.0 },
+        { func_name: resample_segments },
+        { func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.5, shear: 0.0 },
+        {func_name: albumentations},
+        {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4},
+        {func_name: fliplr, prob: 0.5},
+        {func_name: label_norm, xyxy2xywh_: True},
+        {func_name: label_pad, padding_size: 160, padding_value: -1},
+        {func_name: image_norm, scale: 255.},
+        {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+      ],
+      [
+        {func_name: letterbox, scaleup: True},
+        {func_name: resample_segments},
+        {func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.5, shear: 0.0},
+        {func_name: albumentations},
+        {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4},
+        {func_name: fliplr, prob: 0.5},
+        {func_name: label_norm, xyxy2xywh_: True},
+        {func_name: label_pad, padding_size: 160, padding_value: -1},
+        {func_name: image_norm, scale: 255.},
+        {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+      ]]
+  }
+
+  test_transforms: [
+    {func_name: letterbox, scaleup: False, only_image: True},
+    {func_name: image_norm, scale: 255.},
+    {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+  ]
diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-base.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5738013cec0f89ffe555f6569a17a8877bd72deb
--- /dev/null
+++ b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-base.yaml
@@ -0,0 +1,45 @@
+epochs: 500
+per_batch_size: 2
+img_size: 640
+iou_thres: 0.7
+conf_free: True
+sync_bn: True
+opencv_threads_num: 0
+
+network:
+  model_name: shipwise
+  nc: 28
+  reg_max: 16
+
+  stride: [ 8, 16, 32 ]
+
+  backbone:
+    - [ -1, 1, ConvNormAct, [ 64, 3, 2 ] ]
+    - [ -1, 1, ConvNormAct, [ 128, 3, 2 ] ]
+    - [ -1, 3, C2f, [ 128, True ] ]
+    - [ -1, 1, ConvNormAct, [ 256, 3, 2 ] ]
+    - [ -1, 6, C2f, [ 256, True ] ]
+    - [ -1, 1, ConvNormAct, [ 512, 3, 2 ] ]
+    - [ -1, 6, C2f, [ 512, True ] ]
+    - [ -1, 1, ConvNormAct, [ 1024, 3, 2 ] ]
+    - [ -1, 3, C2f, [ 1024, True ] ]
+    - [ -1, 1, SPPF, [ 1024, 5 ] ]
+
+  head:
+    - [ -1, 1, Upsample, [ None, 2, 'nearest' ] ]
+    - [ [ -1, 6 ], 1, Concat, [ 1 ] ]
+    - [ -1, 3, C2f, [ 512 ] ]
+
+    - [ -1, 1, Upsample, [ None, 2, 'nearest' ] ]
+    - [ [ -1, 4 ], 1, Concat, [ 1 ] ]
+    - [ -1, 3, C2f, [ 256 ] ]
+
+    - [ -1, 1, ConvNormAct, [ 256, 3, 2 ] ]
+    - [ [ -1, 12 ], 1, Concat, [ 1 ] ]
+    - [ -1, 3, C2f, [ 512 ] ]
+
+    - [ -1, 1, ConvNormAct, [ 512, 3, 2 ] ]
+    - [ [ -1, 9 ], 1, Concat, [ 1 ] ]
+    - [ -1, 3, C2f, [ 1024 ] ]
+
+    - [ [ 15, 18, 21 ], 1, YOLOv8Head, [ nc, reg_max, stride ] ]
diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-l.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-l.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3481f481d42fd7047f481384f4bc8db9bc38bc6e
--- /dev/null
+++ b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-l.yaml
@@ -0,0 +1,11 @@
+__BASE__: [
+  '../dataset/HRSC2016.yaml',
+  './hyp.scratch.high.yaml',
+  './ship-wise-base.yaml'
+]
+
+overflow_still_update: False
+network:
+  depth_multiple: 1.00  # scales module repeats
+  width_multiple: 1.00  # scales convolution channels
+  max_channels: 512
diff --git a/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-s.yaml b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-s.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..79a63d1446514f1479ba869c54890168a70f344b
--- /dev/null
+++ b/community/cv/ShipWise/workspace/configs/ship-wise/ship-wise-s.yaml
@@ -0,0 +1,11 @@
+__BASE__: [
+  '../dataset/HRSC2016.yaml',
+  './hyp.scratch.low.yaml',
+  './ship-wise-base.yaml'
+]
+
+overflow_still_update: False
+network:
+  depth_multiple: 0.33  # scales module repeats
+  width_multiple: 0.50  # scales convolution channels
+  max_channels: 1024
diff --git a/community/cv/ShipWise/workspace/flask/__init__.py b/community/cv/ShipWise/workspace/flask/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..39ecbcf98674cd90193d5fa44139d4ed406cfbad
--- /dev/null
+++ b/community/cv/ShipWise/workspace/flask/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024-09-16 19:25
+# @Author  : Jiang Liu
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/community/cv/ShipWise/workspace/flask/index.py b/community/cv/ShipWise/workspace/flask/index.py
new file mode 100644
index 0000000000000000000000000000000000000000..308a9bb089dd97d7d5188bf87bbd026c391ce007
--- /dev/null
+++ b/community/cv/ShipWise/workspace/flask/index.py
@@ -0,0 +1,38 @@
+from io import BytesIO
+
+import cv2
+import numpy as np
+from flask import Flask, request, jsonify
+
+from workspace.flask.model.ship_wise import init, infer
+
+app = Flask(__name__)
+
+# 在应用开始时加载模型
+user_config = {
+    "config": r"H:\Workspace\DeepLearning\mindyolo-summer-ospp/workspace/configs/ship-wise/ship-wise-s.yaml",
+    "weight": r"H:\Workspace\DeepLearning\mindyolo-summer-ospp/runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt",
+    "save_result": False,
+    "device_target": "CPU",
+}
+args, network = init(user_config)
+
+
+@app.route('/detect', methods=['POST'])
+def detect():
+    """
+    检测图片中的物体
+    输入：图片文件
+    输出：{ "bbox": [[698.248,524.238,217.65,196.28]], "category_id": [18], "score": [0.82683] }
+    """
+    file = request.files['image']
+    in_memory_file = BytesIO()
+    file.save(in_memory_file)
+    data = np.fromstring(in_memory_file.getvalue(), dtype=np.uint8)
+    image = cv2.imdecode(data, cv2.IMREAD_COLOR)
+    result = infer(args, network, image)
+    return jsonify(result)
+
+
+if __name__ == '__main__':
+    app.run(debug=True, port=8080)
diff --git a/community/cv/ShipWise/workspace/flask/model/__init__.py b/community/cv/ShipWise/workspace/flask/model/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3badb5891f4a91820e90535413c4ff8f85686fb1
--- /dev/null
+++ b/community/cv/ShipWise/workspace/flask/model/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024-09-16 19:57
+# @Author  : Jiang Liu
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/community/cv/ShipWise/workspace/flask/model/ship_wise.py b/community/cv/ShipWise/workspace/flask/model/ship_wise.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c4c1f686c352eefe8b5ddd9ba0e58af6145796f
--- /dev/null
+++ b/community/cv/ShipWise/workspace/flask/model/ship_wise.py
@@ -0,0 +1,78 @@
+from demo.predict import get_parser_infer, set_default_infer, detect
+from mindyolo.utils.config import load_config, Config
+import os
+import mindspore as ms
+from mindyolo.models import create_model
+from mindyolo.utils.utils import draw_result, set_seed
+
+
+class NetworkSingleton:
+    _instance = None
+    _args = None
+
+    def __new__(cls, args):
+        if cls._instance is None:
+            cls._instance = super(NetworkSingleton, cls).__new__(cls)
+            cls._instance.init_network(args)
+            cls._args = args
+        return cls._instance
+
+    def init_network(self, args):
+        set_seed(args.seed)
+        set_default_infer(args)
+        self.network = create_model(
+            model_name=args.network.model_name,
+            model_cfg=args.network,
+            num_classes=args.data.nc,
+            sync_bn=False,
+            checkpoint_path=args.weight,
+        )
+        self.network.set_train(False)
+        ms.amp.auto_mixed_precision(self.network, amp_level=args.ms_amp_level)
+
+    def get_network(self):
+        return self.network
+
+    def get_args(self):
+        return self._args
+
+
+def infer(args, network, img):
+    is_coco_dataset = "coco" in args.data.dataset_name
+    # 默认任务为 Detection
+    result_dict = detect(
+        network=network,
+        img=img,
+        conf_thres=args.conf_thres,
+        iou_thres=args.iou_thres,
+        conf_free=args.conf_free,
+        nms_time_limit=args.nms_time_limit,
+        img_size=args.img_size,
+        stride=max(max(args.network.stride), 32),
+        num_class=args.data.nc,
+        is_coco_dataset=is_coco_dataset,
+    )
+    if args.save_result:
+        save_path = os.path.join(args.save_dir, "detect_results")
+        draw_result(args.image_path, result_dict, args.data.names, is_coco_dataset=is_coco_dataset,
+                    save_path=save_path)
+    return result_dict
+
+
+def init(user_config=None):
+    parser = get_parser_infer()
+    test_img_path = r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test\100000630.bmp"
+    if user_config is None:
+        user_config = {
+            "config": "./workspace/configs/ship-wise/ship-wise-s.yaml",
+            "weight": "./runs/2024.09.15-22.56.30/weights/ship-wise-s-153_422.ckpt",
+            "device_target": "CPU",
+        }
+    cfg, _, _ = load_config(user_config["config"])
+    cfg = Config(cfg)
+    parser.set_defaults(**cfg)
+    parser.set_defaults(**user_config)
+    args = parser.parse_args()
+    args = Config(vars(args))
+    network = NetworkSingleton(args).get_network()
+    return args, network
diff --git a/community/cv/ShipWise/workspace/predict.py b/community/cv/ShipWise/workspace/predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..499a6d7cfff82c4f2ebaeca20d151fabe101fab4
--- /dev/null
+++ b/community/cv/ShipWise/workspace/predict.py
@@ -0,0 +1,346 @@
+import argparse
+import ast
+import math
+import os
+import sys
+import time
+import cv2
+import numpy as np
+import yaml
+from datetime import datetime
+
+import mindspore as ms
+from mindspore import Tensor, nn
+
+from mindyolo.data import COCO80_TO_COCO91_CLASS
+from mindyolo.models import create_model
+from mindyolo.utils import logger
+from mindyolo.utils.config import parse_args
+from mindyolo.utils.metrics import non_max_suppression, scale_coords, xyxy2xywh, process_mask_upsample, scale_image
+from mindyolo.utils.utils import draw_result, set_seed
+
+
+def get_parser_infer(parents=None):
+    parser = argparse.ArgumentParser(description="Infer", parents=[parents] if parents else [])
+    parser.add_argument("--task", type=str, default="detect", choices=["detect", "segment"])
+    parser.add_argument("--device_target", type=str, default="Ascend", help="device target, Ascend/GPU/CPU")
+    parser.add_argument("--ms_mode", type=int, default=0, help="train mode, graph/pynative")
+    parser.add_argument("--ms_amp_level", type=str, default="O0", help="amp level, O0/O1/O2")
+    parser.add_argument(
+        "--ms_enable_graph_kernel", type=ast.literal_eval, default=False, help="use enable_graph_kernel or not"
+    )
+    parser.add_argument("--weight", type=str, default="yolov7_300.ckpt", help="model.ckpt path(s)")
+    parser.add_argument("--img_size", type=int, default=640, help="inference size (pixels)")
+    parser.add_argument(
+        "--single_cls", type=ast.literal_eval, default=False, help="train multi-class data as single-class"
+    )
+    parser.add_argument("--nms_time_limit", type=float, default=60.0, help="time limit for NMS")
+    parser.add_argument("--conf_thres", type=float, default=0.25, help="object confidence threshold")
+    parser.add_argument("--iou_thres", type=float, default=0.65, help="IOU threshold for NMS")
+    parser.add_argument(
+        "--conf_free", type=ast.literal_eval, default=False, help="Whether the prediction result include conf"
+    )
+    parser.add_argument("--seed", type=int, default=2, help="set global seed")
+    parser.add_argument("--log_level", type=str, default="INFO", help="save dir")
+    parser.add_argument("--save_dir", type=str, default="./runs_infer", help="save dir")
+
+    parser.add_argument("--image_path", type=str, help="path to image")
+    parser.add_argument("--save_result", type=ast.literal_eval, default=True, help="whether save the inference result")
+
+    return parser
+
+
+def is_yolov7(args):
+    if "yolov7" not in args.config:
+        pass
+    else:
+        ms.set_context(ascend_config={"precision_mode": "allow_fp32_to_fp16"})
+
+
+def set_default_infer(args):
+    # Set Context
+    ms.set_context(mode=args.ms_mode, device_target=args.device_target, max_call_depth=2000)
+    # MaxPool2d does not support dtype=fp32, ops's bug. Needed to be updated when ops's demand is done.
+    is_yolov7(args)
+    # if args.ms_mode == 0:
+    #     ms.set_context(jit_config={"jit_level": "O2"})
+    if args.device_target == "Ascend":
+        ms.set_context(device_id=int(os.getenv("DEVICE_ID", 0)))
+    elif args.device_target == "GPU" and args.ms_enable_graph_kernel:
+        ms.set_context(enable_graph_kernel=True)
+    args.rank, args.rank_size = 0, 1
+    # Set Data
+    args.data.nc = 1 if args.single_cls else int(args.data.nc)  # number of classes
+    args.data.names = ["item"] if args.single_cls and len(args.names) != 1 else args.data.names  # class names
+    assert len(args.data.names) == args.data.nc, "%g names found for nc=%g dataset in %s" % (
+        len(args.data.names),
+        args.data.nc,
+        args.config,
+    )
+    # Directories and Save run settings
+    platform = sys.platform
+    if platform == "win32":
+        args.save_dir = os.path.join(args.save_dir, datetime.now().strftime("%Y.%m.%d-%H.%M.%S"))
+    else:
+        args.save_dir = os.path.join(args.save_dir, datetime.now().strftime("%Y.%m.%d-%H:%M:%S"))
+    os.makedirs(args.save_dir, exist_ok=True)
+    if args.rank % args.rank_size == 0:
+        with open(os.path.join(args.save_dir, "cfg.yaml"), "w") as f:
+            yaml.dump(vars(args), f, sort_keys=False)
+    # Set Logger
+    logger.setup_logging(logger_name="MindYOLO", log_level="INFO", rank_id=args.rank, device_per_servers=args.rank_size)
+    logger.setup_logging_file(log_dir=os.path.join(args.save_dir, "logs"))
+
+
+def detect(
+        network: nn.Cell,
+        img: np.ndarray,
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.65,
+        conf_free: bool = False,
+        nms_time_limit: float = 60.0,
+        img_size: int = 640,
+        stride: int = 32,
+        num_class: int = 80,
+        is_coco_dataset: bool = True,
+):
+    # Resize
+    h_ori, w_ori = img.shape[:2]  # orig hw
+    r = img_size / max(h_ori, w_ori)  # resize image to img_size
+    if r != 1:  # always resize down, only resize up if training with augmentation
+        interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
+        img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+    h, w = img.shape[:2]
+    if h < img_size or w < img_size:
+        new_h, new_w = math.ceil(h / stride) * stride, math.ceil(w / stride) * stride
+        dh, dw = (new_h - h) / 2, (new_w - w) / 2
+        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+        img = cv2.copyMakeBorder(
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
+        )  # add border
+
+    # Transpose Norm
+    img = img[:, :, ::-1].transpose(2, 0, 1) / 255.0
+    imgs_tensor = Tensor(img[None], ms.float32)
+
+    # Run infer
+    _t = time.time()
+    out = network(imgs_tensor)  # inference and training outputs
+    out = out[0] if isinstance(out, (tuple, list)) else out
+    infer_times = time.time() - _t
+
+    # Run NMS
+    t = time.time()
+    out = out.asnumpy()
+    out = non_max_suppression(
+        out,
+        conf_thres=conf_thres,
+        iou_thres=iou_thres,
+        conf_free=conf_free,
+        multi_label=True,
+        time_limit=nms_time_limit,
+    )
+    nms_times = time.time() - t
+
+    result_dict = {"category_id": [], "bbox": [], "score": []}
+    total_category_ids, total_bboxes, total_scores = [], [], []
+    for si, pred in enumerate(out):
+        if len(pred) == 0:
+            continue
+
+        # Predictions
+        predn = np.copy(pred)
+        scale_coords(img.shape[1:], predn[:, :4], (h_ori, w_ori))  # native-space pred
+
+        box = xyxy2xywh(predn[:, :4])  # xywh
+        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+        category_ids, bboxes, scores = [], [], []
+        for p, b in zip(pred.tolist(), box.tolist()):
+            category_ids.append(COCO80_TO_COCO91_CLASS[int(p[5])] if is_coco_dataset else int(p[5]))
+            bboxes.append([round(x, 3) for x in b])
+            scores.append(round(p[4], 5))
+
+        total_category_ids.extend(category_ids)
+        total_bboxes.extend(bboxes)
+        total_scores.extend(scores)
+
+    result_dict["category_id"].extend(total_category_ids)
+    result_dict["bbox"].extend(total_bboxes)
+    result_dict["score"].extend(total_scores)
+
+    t = tuple(x * 1e3 for x in (infer_times, nms_times, infer_times + nms_times)) + (img_size, img_size, 1)  # tuple
+    logger.info(f"Predict result is: {result_dict}")
+    logger.info(f"Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g;" % t)
+    logger.info(f"Detect a image success.")
+
+    return result_dict
+
+
+def segment(
+        network: nn.Cell,
+        img: np.ndarray,
+        conf_thres: float = 0.25,
+        iou_thres: float = 0.65,
+        conf_free: bool = False,
+        nms_time_limit: float = 60.0,
+        img_size: int = 640,
+        stride: int = 32,
+        num_class: int = 80,
+        is_coco_dataset: bool = True,
+):
+    # Resize
+    h_ori, w_ori = img.shape[:2]  # orig hw
+    r = img_size / max(h_ori, w_ori)  # resize image to img_size
+    if r != 1:  # always resize down, only resize up if training with augmentation
+        interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
+        img = cv2.resize(img, (int(w_ori * r), int(h_ori * r)), interpolation=interp)
+    h, w = img.shape[:2]
+    if h < img_size or w < img_size:
+        new_h, new_w = math.ceil(h / stride) * stride, math.ceil(w / stride) * stride
+        dh, dw = (new_h - h) / 2, (new_w - w) / 2
+        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+        img = cv2.copyMakeBorder(
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
+        )  # add border
+
+    # Transpose Norm
+    img = img[:, :, ::-1].transpose(2, 0, 1) / 255.0
+    imgs_tensor = Tensor(img[None], ms.float32)
+
+    # Run infer
+    _t = time.time()
+    out, (_, _, prototypes) = network(imgs_tensor)  # inference and training outputs
+    infer_times = time.time() - _t
+
+    # Run NMS
+    t = time.time()
+    _c = num_class + 4 if conf_free else num_class + 5
+    out = out.asnumpy()
+    bboxes, mask_coefficient = out[:, :, :_c], out[:, :, _c:]
+    out = non_max_suppression(
+        bboxes,
+        mask_coefficient,
+        conf_thres=conf_thres,
+        iou_thres=iou_thres,
+        conf_free=conf_free,
+        multi_label=True,
+        time_limit=nms_time_limit,
+    )
+    nms_times = time.time() - t
+
+    prototypes = prototypes.asnumpy()
+
+    result_dict = {"category_id": [], "bbox": [], "score": [], "segmentation": []}
+    total_category_ids, total_bboxes, total_scores, total_seg = [], [], [], []
+    for si, (pred, proto) in enumerate(zip(out, prototypes)):
+        if len(pred) == 0:
+            continue
+
+        # Predictions
+        pred_masks = process_mask_upsample(proto, pred[:, 6:], pred[:, :4], shape=imgs_tensor[si].shape[1:])
+        pred_masks = pred_masks.astype(np.float32)
+        pred_masks = scale_image((pred_masks.transpose(1, 2, 0)), (h_ori, w_ori))
+        predn = np.copy(pred)
+        scale_coords(img.shape[1:], predn[:, :4], (h_ori, w_ori))  # native-space pred
+
+        box = xyxy2xywh(predn[:, :4])  # xywh
+        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+        category_ids, bboxes, scores, segs = [], [], [], []
+        for ii, (p, b) in enumerate(zip(pred.tolist(), box.tolist())):
+            category_ids.append(COCO80_TO_COCO91_CLASS[int(p[5])] if is_coco_dataset else int(p[5]))
+            bboxes.append([round(x, 3) for x in b])
+            scores.append(round(p[4], 5))
+            segs.append(pred_masks[:, :, ii])
+
+        total_category_ids.extend(category_ids)
+        total_bboxes.extend(bboxes)
+        total_scores.extend(scores)
+        total_seg.extend(segs)
+
+    result_dict["category_id"].extend(total_category_ids)
+    result_dict["bbox"].extend(total_bboxes)
+    result_dict["score"].extend(total_scores)
+    result_dict["segmentation"].extend(total_seg)
+
+    t = tuple(x * 1e3 for x in (infer_times, nms_times, infer_times + nms_times)) + (img_size, img_size, 1)  # tuple
+    logger.info(f"Predict result is:")
+    for k, v in result_dict.items():
+        if k == "segmentation":
+            logger.info(f"{k} shape: {v[0].shape}")
+        else:
+            logger.info(f"{k}: {v}")
+    logger.info(f"Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g;" % t)
+    logger.info(f"Detect a image success.")
+
+    return result_dict
+
+
+def infer(args):
+    # Init
+    set_seed(args.seed)
+    set_default_infer(args)
+
+    # Create Network
+    network = create_model(
+        model_name=args.network.model_name,
+        model_cfg=args.network,
+        num_classes=args.data.nc,
+        sync_bn=False,
+        checkpoint_path=args.weight,
+    )
+    network.set_train(False)
+    ms.amp.auto_mixed_precision(network, amp_level=args.ms_amp_level)
+
+    # Load Image
+    if isinstance(args.image_path, str) and os.path.isfile(args.image_path):
+        import cv2
+        img = cv2.imread(args.image_path)
+    else:
+        raise ValueError("Detect: input image file not available.")
+
+    # Detect
+    is_coco_dataset = "coco" in args.data.dataset_name
+    if args.task == "detect":
+        result_dict = detect(
+            network=network,
+            img=img,
+            conf_thres=args.conf_thres,
+            iou_thres=args.iou_thres,
+            conf_free=args.conf_free,
+            nms_time_limit=args.nms_time_limit,
+            img_size=args.img_size,
+            stride=max(max(args.network.stride), 32),
+            num_class=args.data.nc,
+            is_coco_dataset=is_coco_dataset,
+        )
+        if args.save_result:
+            save_path = os.path.join(args.save_dir, "detect_results")
+            draw_result(args.image_path, result_dict, args.data.names, is_coco_dataset=is_coco_dataset,
+                        save_path=save_path)
+    elif args.task == "segment":
+        result_dict = segment(
+            network=network,
+            img=img,
+            conf_thres=args.conf_thres,
+            iou_thres=args.iou_thres,
+            conf_free=args.conf_free,
+            nms_time_limit=args.nms_time_limit,
+            img_size=args.img_size,
+            stride=max(max(args.network.stride), 32),
+            num_class=args.data.nc,
+            is_coco_dataset=is_coco_dataset,
+        )
+        if args.save_result:
+            save_path = os.path.join(args.save_dir, "segment_results")
+            draw_result(args.image_path, result_dict, args.data.names, is_coco_dataset=is_coco_dataset,
+                        save_path=save_path)
+
+    logger.info("Infer completed.")
+
+
+if __name__ == "__main__":
+    parser = get_parser_infer()
+    args = parse_args(parser)
+    infer(args)
diff --git a/community/cv/ShipWise/workspace/script/__init__.py b/community/cv/ShipWise/workspace/script/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2a7aa3e85270eba5dce50ecf653b6684c16c14f
--- /dev/null
+++ b/community/cv/ShipWise/workspace/script/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024-09-10 17:12
+# @Author  : Jiang Liu
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/__init__.py b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c6f3caf0b26f4e6e3c40ba3d7e7d3feb40922f8
--- /dev/null
+++ b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024-09-15 13:06
+# @Author  : Jiang Liu
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
diff --git "a/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\345\210\207\345\210\206\346\225\260\346\215\256\351\233\206.py" "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\345\210\207\345\210\206\346\225\260\346\215\256\351\233\206.py"
new file mode 100644
index 0000000000000000000000000000000000000000..ec1c2fe61556e815eca74e9d592b0a45f937f71a
--- /dev/null
+++ "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\345\210\207\345\210\206\346\225\260\346\215\256\351\233\206.py"
@@ -0,0 +1,66 @@
+import os
+import random
+from shutil import copyfile
+
+
+def split_dataset(img_dir, annotation_dir,
+                  train_dir, val_dir, test_dir,
+                  train_set_file_path, val_set_file_path, test_set_file_path,
+                  train_ratio=0.8, val_ratio=0.1):
+    os.makedirs(train_dir, exist_ok=True)
+    os.makedirs(val_dir, exist_ok=True)
+    os.makedirs(test_dir, exist_ok=True)
+
+    img_files = os.listdir(img_dir)
+    annotation_files = os.listdir(annotation_dir)
+
+    # 过滤无标注的图片
+    img_files = [img_file for img_file in img_files if img_file.replace('.bmp', '.txt') in annotation_files]
+
+    random.shuffle(img_files)
+
+    num_files = len(img_files)
+    num_train_files = int(num_files * train_ratio)
+    num_val_files = int(num_files * val_ratio)
+    num_test_files = num_files - num_train_files - num_val_files
+
+    # 将划分的图片路径写入文件
+    train_set = img_files[:num_train_files]
+    val_set = img_files[num_train_files:num_train_files + num_val_files]
+    test_set = img_files[num_train_files + num_val_files:]
+    for (set_file_path, set_dir, set_files) in zip([train_set_file_path, val_set_file_path, test_set_file_path],
+                                                   [train_dir, val_dir, test_dir],
+                                                   [train_set, val_set, test_set]):
+        with open(set_file_path, 'w') as file:
+            for set_file in set_files:
+                file.write(os.path.join(set_dir, set_file) + '\n')
+
+    for i, img_file in enumerate(img_files):
+        annotation_file = img_file.replace('.bmp', '.txt')
+        if i < num_train_files:
+            copyfile(os.path.join(img_dir, img_file), os.path.join(train_dir, img_file))
+            copyfile(os.path.join(annotation_dir, annotation_file), os.path.join(train_dir, annotation_file))
+        elif i < num_train_files + num_val_files:
+            copyfile(os.path.join(img_dir, img_file), os.path.join(val_dir, img_file))
+            copyfile(os.path.join(annotation_dir, annotation_file), os.path.join(val_dir, annotation_file))
+        else:
+            copyfile(os.path.join(img_dir, img_file), os.path.join(test_dir, img_file))
+            copyfile(os.path.join(annotation_dir, annotation_file), os.path.join(test_dir, annotation_file))
+
+
+def main():
+    configs = {
+        'img_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet\AllImages",
+        'annotation_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO\Annotations",
+        'train_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\train",
+        'val_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\validation",
+        'test_dir': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test",
+        'train_set_file_path': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\train.txt",
+        'val_set_file_path': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\val.txt",
+        'test_set_file_path': r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO-Split\test.txt",
+    }
+    split_dataset(**configs)
+
+
+if __name__ == '__main__':
+    main()
diff --git "a/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\350\275\254\346\215\242\346\225\260\346\215\256\351\233\206\344\270\272YOLO\346\240\274\345\274\217.py" "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\350\275\254\346\215\242\346\225\260\346\215\256\351\233\206\344\270\272YOLO\346\240\274\345\274\217.py"
new file mode 100644
index 0000000000000000000000000000000000000000..f61a015eb4f8fa1c06bdfd820c0fea52a3f5e217
--- /dev/null
+++ "b/community/cv/ShipWise/workspace/script/dataset_tools/HRSC/\350\275\254\346\215\242\346\225\260\346\215\256\351\233\206\344\270\272YOLO\346\240\274\345\274\217.py"
@@ -0,0 +1,79 @@
+import xml.etree.ElementTree as ET
+import os
+
+
+def convert_cls_id(cls_id):
+    mapping = {
+        '100000001': 0,
+        '100000002': 1,
+        '100000003': 2,
+        '100000004': 3,
+        '100000005': 4,
+        '100000006': 5,
+        '100000007': 6,
+        '100000008': 7,
+        '100000009': 8,
+        '100000010': 9,
+        '100000011': 10,
+        '100000012': 11,
+        '100000013': 12,
+        '100000015': 13,
+        '100000016': 14,
+        '100000017': 15,
+        '100000018': 16,
+        '100000019': 17,
+        '100000020': 18,
+        '100000022': 19,
+        '100000024': 20,
+        '100000025': 21,
+        '100000026': 22,
+        '100000027': 23,
+        '100000028': 24,
+        '100000029': 25,
+        '100000030': 26,
+        '100000032': 27,
+    }
+    return mapping[cls_id]
+
+
+def xml_to_txt(xml_file_dir, txt_file_dir):
+    os.makedirs(txt_file_dir, exist_ok=True)
+    all_class_ids = set()
+    for xml_file in os.listdir(xml_file_dir):
+        if not xml_file.endswith('.xml') or xml_file == 'annotation_fmt.xml':
+            continue
+        xml_file_path = os.path.join(xml_file_dir, xml_file)
+        tree = ET.parse(xml_file_path)
+        root = tree.getroot()
+        img_id = root.find('Img_ID').text
+        img_width = int(root.find('Img_SizeWidth').text)
+        img_height = int(root.find('Img_SizeHeight').text)
+        txt_file_path = os.path.join(txt_file_dir, f"{img_id}.txt")
+        objs = root.findall('.//HRSC_Object')
+        if len(objs) == 0:
+            continue
+        with open(txt_file_path, 'w') as txt_file:
+            for obj in objs:
+                class_id = convert_cls_id(obj.find('Class_ID').text)
+                all_class_ids.add(class_id)
+                box_xmin = int(obj.find('box_xmin').text)
+                box_ymin = int(obj.find('box_ymin').text)
+                box_xmax = int(obj.find('box_xmax').text)
+                box_ymax = int(obj.find('box_ymax').text)
+                x_center = ((box_xmin + box_xmax) / 2) / img_width
+                y_center = ((box_ymin + box_ymax) / 2) / img_height
+                box_width = (box_xmax - box_xmin) / img_width
+                box_height = (box_ymax - box_ymin) / img_height
+                txt_file.write(f"{class_id} {x_center} {y_center} {box_width} {box_height}\n")
+    print(f"Total class ids: {len(all_class_ids)}")
+    print([str(class_id) for class_id in all_class_ids])
+
+
+def main():
+    xml_file_dir = r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet\Annotations"
+    txt_file_dir = r"H:\Library\Datasets\HRSC\HRSC2016_dataset\HRSC2016\FullDataSet-YOLO\Annotations"
+    xml_to_txt(xml_file_dir, txt_file_dir)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/community/cv/ShipWise/workspace/script/dataset_tools/__init__.py b/community/cv/ShipWise/workspace/script/dataset_tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c20f18bb3615ba9bf3bad6a26c03b0b76507ee7c
--- /dev/null
+++ b/community/cv/ShipWise/workspace/script/dataset_tools/__init__.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024-09-15 13:05
+# @Author  : Jiang Liu
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/community/cv/ShipWise/workspace/train.py b/community/cv/ShipWise/workspace/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..cecce9956565deaadcfc58b4d418920af88842f8
--- /dev/null
+++ b/community/cv/ShipWise/workspace/train.py
@@ -0,0 +1,320 @@
+import argparse
+import ast
+import os
+from functools import partial
+
+import mindspore as ms
+
+from mindyolo.data import COCODataset, create_loader
+from mindyolo.models import create_loss, create_model
+from mindyolo.optim import (EMA, create_group_param, create_lr_scheduler,
+                            create_optimizer, create_warmup_momentum_scheduler)
+from mindyolo.utils import logger
+from mindyolo.utils.config import parse_args
+from mindyolo.utils.train_step_factory import get_gradreducer, get_loss_scaler, create_train_step_fn
+from mindyolo.utils.trainer_factory import create_trainer
+from mindyolo.utils.callback import create_callback
+from mindyolo.utils.utils import (freeze_layers, load_pretrain, set_default,
+                                  set_seed, Synchronizer)
+
+
+def get_parser_train(parents=None):
+    parser = argparse.ArgumentParser(description="Train", parents=[parents] if parents else [])
+    parser.add_argument("--task", type=str, default="detect", choices=["detect", "segment"])
+    parser.add_argument("--device_target", type=str, default="Ascend", help="device target, Ascend/GPU/CPU")
+    parser.add_argument("--save_dir", type=str, default="./runs", help="save dir")
+    parser.add_argument("--log_level", type=str, default="INFO", help="log level to print")
+    parser.add_argument("--is_parallel", type=ast.literal_eval, default=False, help="Distribute train or not")
+    parser.add_argument("--ms_mode", type=int, default=0,
+                        help="Running in GRAPH_MODE(0) or PYNATIVE_MODE(1) (default=0)")
+    parser.add_argument("--ms_amp_level", type=str, default="O0", help="amp level, O0/O1/O2/O3")
+    parser.add_argument("--keep_loss_fp32", type=ast.literal_eval, default=True,
+                        help="Whether to maintain loss using fp32/O0-level calculation")
+    parser.add_argument("--ms_loss_scaler", type=str, default="static", help="train loss scaler, static/dynamic/none")
+    parser.add_argument("--ms_loss_scaler_value", type=float, default=1024.0, help="static loss scale value")
+    parser.add_argument("--ms_jit", type=ast.literal_eval, default=True, help="use jit or not")
+    parser.add_argument("--ms_enable_graph_kernel", type=ast.literal_eval, default=False,
+                        help="use enable_graph_kernel or not")
+    parser.add_argument("--ms_datasink", type=ast.literal_eval, default=False, help="Train with datasink.")
+    parser.add_argument("--overflow_still_update", type=ast.literal_eval, default=True, help="overflow still update")
+    parser.add_argument("--clip_grad", type=ast.literal_eval, default=False)
+    parser.add_argument("--clip_grad_value", type=float, default=10.0)
+    parser.add_argument("--ema", type=ast.literal_eval, default=True, help="ema")
+    parser.add_argument("--weight", type=str, default="", help="initial weight path")
+    parser.add_argument("--ema_weight", type=str, default="", help="initial ema weight path")
+    parser.add_argument("--freeze", type=list, default=[], help="Freeze layers: backbone of yolov7=50, first3=0 1 2")
+    parser.add_argument("--epochs", type=int, default=300, help="total train epochs")
+    parser.add_argument("--per_batch_size", type=int, default=32, help="per batch size for each device")
+    parser.add_argument("--img_size", type=list, default=640, help="train image sizes")
+    parser.add_argument("--nbs", type=list, default=64, help="nbs")
+    parser.add_argument("--accumulate", type=int, default=1,
+                        help="grad accumulate step, recommended when batch-size is less than 64")
+    parser.add_argument("--auto_accumulate", type=ast.literal_eval, default=False, help="auto accumulate")
+    parser.add_argument("--log_interval", type=int, default=100, help="log interval")
+    parser.add_argument("--single_cls", type=ast.literal_eval, default=False,
+                        help="train multi-class data as single-class")
+    parser.add_argument("--sync_bn", type=ast.literal_eval, default=False,
+                        help="use SyncBatchNorm, only available in DDP mode")
+    parser.add_argument("--keep_checkpoint_max", type=int, default=100)
+    parser.add_argument("--run_eval", type=ast.literal_eval, default=False, help="Whether to run eval during training")
+    parser.add_argument("--conf_thres", type=float, default=0.001, help="object confidence threshold for run_eval")
+    parser.add_argument("--iou_thres", type=float, default=0.65, help="IOU threshold for NMS for run_eval")
+    parser.add_argument("--conf_free", type=ast.literal_eval, default=False,
+                        help="Whether the prediction result include conf")
+    parser.add_argument("--rect", type=ast.literal_eval, default=False, help="rectangular training")
+    parser.add_argument("--nms_time_limit", type=float, default=20.0, help="time limit for NMS")
+    parser.add_argument("--recompute", type=ast.literal_eval, default=False, help="Recompute")
+    parser.add_argument("--recompute_layers", type=int, default=0)
+    parser.add_argument("--seed", type=int, default=2, help="set global seed")
+    parser.add_argument("--summary", type=ast.literal_eval, default=True, help="collect train loss scaler or not")
+    parser.add_argument("--profiler", type=ast.literal_eval, default=False, help="collect profiling data or not")
+    parser.add_argument("--profiler_step_num", type=int, default=1, help="collect profiler data for how many steps.")
+    parser.add_argument("--opencv_threads_num", type=int, default=2, help="set the number of threads for opencv")
+    parser.add_argument("--strict_load", type=ast.literal_eval, default=True, help="strictly load the pretrain model")
+
+    # args for ModelArts
+    parser.add_argument("--enable_modelarts", type=ast.literal_eval, default=False, help="enable modelarts")
+    parser.add_argument("--data_url", type=str, default="", help="ModelArts: obs path to dataset folder")
+    parser.add_argument("--ckpt_url", type=str, default="", help="ModelArts: obs path to pretrain model checkpoint file")
+    parser.add_argument("--multi_data_url", type=str, default="", help="ModelArts: list of obs paths to multi-dataset folders")
+    parser.add_argument("--pretrain_url", type=str, default="", help="ModelArts: list of obs paths to multi-pretrain model files")
+    parser.add_argument("--train_url", type=str, default="", help="ModelArts: obs path to output folder")
+    parser.add_argument("--data_dir", type=str, default="/cache/data/",
+                        help="ModelArts: local device path to dataset folder")
+    parser.add_argument("--ckpt_dir", type=str, default="/cache/pretrain_ckpt/",
+                        help="ModelArts: local device path to checkpoint folder")
+    return parser
+
+
+def train(args):
+    # Set Default
+    set_seed(args.seed)
+    set_default(args)
+    main_device = args.rank % args.rank_size == 0
+
+    logger.info(f"parse_args:\n{args}")
+    logger.info("Please check the above information for the configurations")
+
+    # Create Network
+    args.network.recompute = args.recompute
+    args.network.recompute_layers = args.recompute_layers
+    network = create_model(
+        model_name=args.network.model_name,
+        model_cfg=args.network,
+        num_classes=args.data.nc,
+        sync_bn=args.sync_bn,
+    )
+
+    if args.ema:
+        ema_network = create_model(
+            model_name=args.network.model_name,
+            model_cfg=args.network,
+            num_classes=args.data.nc,
+        )
+        ema = EMA(network, ema_network)
+    else:
+        ema = None
+    load_pretrain(network, args.weight, ema, args.ema_weight, args.strict_load)  # load pretrain
+    freeze_layers(network, args.freeze)  # freeze Layers
+    ms.amp.auto_mixed_precision(network, amp_level=args.ms_amp_level)
+    if ema:
+        ms.amp.auto_mixed_precision(ema.ema, amp_level=args.ms_amp_level)
+
+    # Create Dataloaders
+    transforms = args.data.train_transforms
+    stage_dataloaders = []
+    stage_epochs = [args.epochs,] if not isinstance(transforms, dict) else transforms['stage_epochs']
+    stage_transforms = [transforms,] if not isinstance(transforms, dict) else transforms['trans_list']
+    assert len(stage_epochs) == len(stage_transforms), "The length of transforms and stage_epochs is not equal."
+    assert sum(stage_epochs) == args.epochs, f"Stage epochs [{sum(stage_epochs)}] not equal args.epochs [{args.epochs}]"
+    for stage in range(len(stage_epochs)):
+        _dataset = COCODataset(
+            dataset_path=args.data.train_set,
+            img_size=args.img_size,
+            transforms_dict=stage_transforms[stage],
+            is_training=True,
+            augment=True,
+            rect=args.rect,
+            single_cls=args.single_cls,
+            batch_size=args.total_batch_size,
+            stride=max(args.network.stride),
+            return_segments=(args.task == "segment")
+        )
+        _dataloader = create_loader(
+            dataset=_dataset,
+            batch_collate_fn=_dataset.train_collate_fn,
+            column_names_getitem=_dataset.column_names_getitem,
+            column_names_collate=_dataset.column_names_collate,
+            batch_size=args.per_batch_size,
+            epoch_size=stage_epochs[stage],
+            rank=args.rank,
+            rank_size=args.rank_size,
+            shuffle=True,
+            drop_remainder=True,
+            num_parallel_workers=args.data.num_parallel_workers,
+            python_multiprocessing=True,
+        )
+        stage_dataloaders.append(_dataloader)
+    dataloader = stage_dataloaders[0] if len(stage_dataloaders) == 1 else ms.dataset.ConcatDataset(stage_dataloaders)
+    steps_per_epoch = dataloader.get_dataset_size() // args.epochs
+
+    if args.run_eval:
+        from test import test
+        eval_dataset = COCODataset(
+            dataset_path=args.data.val_set,
+            img_size=args.img_size,
+            transforms_dict=args.data.test_transforms,
+            is_training=False,
+            augment=False,
+            rect=args.rect,
+            single_cls=args.single_cls,
+            batch_size=args.per_batch_size,
+            stride=max(args.network.stride),
+        )
+        eval_dataloader = create_loader(
+            dataset=eval_dataset,
+            batch_collate_fn=eval_dataset.test_collate_fn,
+            column_names_getitem=eval_dataset.column_names_getitem,
+            column_names_collate=eval_dataset.column_names_collate,
+            batch_size=args.per_batch_size,
+            epoch_size=1,
+            rank=args.rank,
+            rank_size=args.rank_size,
+            shuffle=False,
+            drop_remainder=False,
+            num_parallel_workers=1,
+            python_multiprocessing=True,
+        )
+    else:
+        eval_dataset, eval_dataloader = None, None
+
+    # Create Loss
+    loss_fn = create_loss(
+        **args.loss, anchors=args.network.get("anchors", 1), stride=args.network.stride, nc=args.data.nc
+    )
+    ms.amp.auto_mixed_precision(loss_fn, amp_level="O0" if args.keep_loss_fp32 else args.ms_amp_level)
+
+    # Create Optimizer
+    args.optimizer.steps_per_epoch = steps_per_epoch
+    lr = create_lr_scheduler(**args.optimizer)
+    params = create_group_param(params=network.trainable_params(), **args.optimizer)
+    optimizer = create_optimizer(params=params, lr=lr, **args.optimizer)
+    warmup_momentum = create_warmup_momentum_scheduler(**args.optimizer)
+
+    # Create train_step_fn
+    reducer = get_gradreducer(args.is_parallel, optimizer.parameters)
+    scaler = get_loss_scaler(args.ms_loss_scaler, scale_value=args.ms_loss_scaler_value)
+    train_step_fn = create_train_step_fn(
+        task=args.task,
+        network=network,
+        loss_fn=loss_fn,
+        optimizer=optimizer,
+        loss_ratio=args.rank_size,
+        scaler=scaler,
+        reducer=reducer,
+        ema=ema,
+        overflow_still_update=args.overflow_still_update,
+        ms_jit=args.ms_jit,
+        clip_grad=args.clip_grad,
+        clip_grad_value=args.clip_grad_value
+    )
+
+    # Create callbacks
+    if args.summary:
+        args.callback.append({"name": "SummaryCallback"})
+    if args.profiler:
+        args.callback.append({"name": "ProfilerCallback", "profiler_step_num": args.profiler_step_num})
+    callback_fns = create_callback(args.callback)
+
+    # Create test function for run eval while train
+    if args.run_eval:
+        is_coco_dataset = "coco" in args.data.dataset_name
+        test_fn = partial(
+            test,
+            task=args.task,
+            dataloader=eval_dataloader,
+            anno_json_path=os.path.join(
+                args.data.val_set[: -len(args.data.val_set.split("/")[-1])], "annotations/instances_val2017.json"
+            ),
+            conf_thres=args.conf_thres,
+            iou_thres=args.iou_thres,
+            conf_free=args.conf_free,
+            num_class=args.data.nc,
+            nms_time_limit=args.nms_time_limit,
+            is_coco_dataset=is_coco_dataset,
+            imgIds=None if not is_coco_dataset else eval_dataset.imgIds,
+            per_batch_size=args.per_batch_size,
+            rank=args.rank,
+            rank_size=args.rank_size,
+            save_dir=args.save_dir,
+            synchronizer=Synchronizer(args.rank_size) if args.rank_size > 1 else None,
+        )
+    else:
+        test_fn = None
+
+    # Create Trainer
+    network.set_train(True)
+    optimizer.set_train(True)
+    model_name = os.path.basename(args.config)[:-5]  # delete ".yaml"
+    trainer = create_trainer(
+        model_name=model_name,
+        train_step_fn=train_step_fn,
+        scaler=scaler,
+        dataloader=dataloader,
+        steps_per_epoch=steps_per_epoch,
+        network=network,
+        loss_fn=loss_fn,
+        ema=ema,
+        optimizer=optimizer,
+        callback=callback_fns,
+        reducer=reducer,
+        data_sink=args.ms_datasink,
+        profiler=args.profiler
+    )
+    if not args.ms_datasink:
+        trainer.train(
+            epochs=args.epochs,
+            main_device=main_device,
+            warmup_step=max(round(args.optimizer.warmup_epochs * steps_per_epoch), args.optimizer.min_warmup_step),
+            warmup_momentum=warmup_momentum,
+            accumulate=args.accumulate,
+            overflow_still_update=args.overflow_still_update,
+            keep_checkpoint_max=args.keep_checkpoint_max,
+            log_interval=args.log_interval,
+            loss_item_name=[] if not hasattr(loss_fn, "loss_item_name") else loss_fn.loss_item_name,
+            save_dir=args.save_dir,
+            enable_modelarts=args.enable_modelarts,
+            train_url=args.train_url,
+            run_eval=args.run_eval,
+            test_fn=test_fn,
+            rank_size=args.rank_size,
+            ms_jit=args.ms_jit,
+            profiler_step_num=args.profiler_step_num
+        )
+    else:
+        logger.warning("DataSink is an experimental interface under development.")
+        logger.warning("Train with data sink mode.")
+        assert args.accumulate == 1, "datasink mode not support grad accumulate."
+        trainer.train_with_datasink(
+            task=args.task,
+            epochs=args.epochs,
+            main_device=main_device,
+            warmup_epoch=max(args.optimizer.warmup_epochs, args.optimizer.min_warmup_step // steps_per_epoch),
+            warmup_momentum=warmup_momentum,
+            keep_checkpoint_max=args.keep_checkpoint_max,
+            log_interval=args.log_interval,
+            loss_item_name=[] if not hasattr(loss_fn, "loss_item_name") else loss_fn.loss_item_name,
+            save_dir=args.save_dir,
+            enable_modelarts=args.enable_modelarts,
+            train_url=args.train_url,
+            run_eval=args.run_eval,
+            test_fn=test_fn,
+            profiler_step_num=args.profiler_step_num
+        )
+    logger.info("Training completed.")
+
+
+if __name__ == "__main__":
+    parser = get_parser_train()
+    args = parse_args(parser)
+    train(args)