diff --git a/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/README.md b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c1331cfe05e3d10da67249d29e09aea18bdd524f --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/README.md @@ -0,0 +1,236 @@ +# YOLOv8-推理指导 + + +- [概述](#ZH-CN_TOPIC_0000001172161501) + + - [输入输出数据](#section540883920406) + +- [推理环境准备](#ZH-CN_TOPIC_0000001126281702) + +- [快速上手](#ZH-CN_TOPIC_0000001126281700) + + - [获取源码](#section4622531142816) + + - [准备数据集](#section183221994411) + + - [模型推理](#section741711594517) + +- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573) + +****** + +# 概述 +YOLO系列网络模型是最为经典的one-stage算法,也是目前工业领域使用最多的目标检测网络,YOLOv8网络模型是YOLO系列的最新版本,在继承了原有YOLO网络模型优点的基础上,具有更高的检测精度。 + +Ultralytics YOLOv8 是一款前沿、最先进(SOTA)的模型,基于先前 YOLO 版本的成功,引入了新功能和改进,进一步提升性能和灵活性。 + +YOLOv8 设计快速、准确且易于使用,使其成为各种物体检测与跟踪、实例分割、图像分类和姿态估计任务的绝佳选择。 + +本文旨在提供基于推理引擎的Yolov8参考样例,使用了coco2017数据集,并测试了昇腾310P3芯片上的推理精度供参考,模型性能仍在持续优化中。 + +- 参考实现: + + ``` + url=https://github.com/ultralytics/ultralytics + commit_id=7a7c8dc7b70cf4bc0be18763a6b66805974ecbe6 + model_name=yolov8 + ``` + +## 输入输出数据 + +- 输入数据 + + | 输入数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | ------------------------- | ------------ | + | images | RGB_FP32 | batchsize x 3 x 640 x 640 | NCHW | + +- 输出数据 + + | 输出数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | --------------------- | ------------ | + | output0 | FLOAT32 | batchsize x 84 x 8400 | ND | + + +# 推理环境准备 +- 该模型需要以下插件与驱动 + **表 1** 版本配套表 + +| 配套 | 版本 | 环境准备指导 | +| ------------------------------------------------------- |--------|---------------------------------------------------------------------------------------------------------------------------------------------| +| 固件与驱动 | 23.0.rc1 | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) | +| CANN | 7.0.RC1.alpha003 | [推理应用开发学习文档](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/600alpha003/infacldevg/aclpythondevg/aclpythondevg_0000.html) | +| Python | 3.9.11 | - | +| PyTorch | 2.0.1 | - | +| torch_aie | 6.3rc2| - | +| 说明:Atlas 300I Duo 推理卡请以CANN版本选择实际固件与驱动版本。 | \ | \ | + + +# 快速上手 + +## 获取源码 + +1. 获取源码 + + ``` + git clone https://github.com/ultralytics/ultralytics + cd ultralytics + git reset --hard 7a7c8dc7b70cf4bc0be18763a6b66805974ecbe6 + pip3 install -e . + git apply ../dataloader.patch + cd .. + ``` + +2. 安装依赖 + + ``` + pip3 install -r requirements.txt + cd ultralytics + pip3 install -r requirements.txt + cd .. + ``` + + +## 准备数据集 + +1. 获取原始数据集。 + + 该模型使用 [coco2017 val数据集](https://cocodataset.org/#download) 进行精度评估。 + + labels[下载地址](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fyolov5%2Freleases%2Fdownload%2Fv1.0%2Fcoco2017labels.zip),验证集[下载地址](https://gitee.com/link?target=https%3A%2F%2Fimages.cocodataset.org%2Fzips%2Fval2017.zip)。 + + 文件结构如下: + + ``` + datasets + ├── coco + ├── annotations + └── instance_val2017.json + ├── images + ├── test2017 + ├── train2017 + ├── val2107 + ├── 00000000139.jpg + ├── 00000000285.jpg + ...... + └── 00000581781.jpg + ├── labels + —— val2017 + ├── LICENSE + ├── README.txt + ├── test-dev2017.txt + ├── train2017.txt + ├── val2017.txt + ``` + + +## 模型推理 + +1. 模型准备 + + 先获取模型权重文件,再将其导出为torchscript模型文件,然后用Torch-AIE进行编译。 + + 1. 获取权重文件 + + 在[链接](https://github.com/ultralytics/assets/releases/tag/v0.0.0)中找到所需版本下载,也可以使用下述命令下载: + + ``` + wget https://github.com/ultralytics/assets/releases/download/v0.0.0/${model}.pt + ``` + + - 参数说明: + - `${model}`:模型大小,可选`yolov8[n/s/m/l/x]` + + > **说明**:后续以 yolov8n 模型作为示例进行指导说明,请根据实际情况进行对应修改。 + + 2. 导出torchscript模型 + + 运行下述命令导出torchscript模型。 + + ``` + python3 export.py --weights="./yolov8n.pt" + ``` + + 获得 yolov8n.torchscript 文件。 + + + 3. 使用Torch-AIE进行模型编译 + + + 1. 执行如下命令。 + + ``` + python3 compile.py + ``` + + 运行成功后生成 yolov8n.aie.ts 模型文件。 + + +2. 开始推理验证。 + + 1. 参数设置 + + 在 `ultralytics\ultralytics\yolo\cfg` 文件夹的 `default.yaml` 与 `ultralytics\ultralytics\yolo\data\datasets` 文件夹的 `coco.yaml` 中填入相关参数。 + + 请根据实际情况修改相关参数。 + + - default.yaml 参数说明: + - `model`:pt 权重文件,必须与 onnx 模型对应一致 + - `data`: 数据配置文件,此处以 coco.yaml 为例进行说明 + - `batch`:批处理数量大小,必须与 om 模型的 batchsize 相等 + - `project`:推理结果的总保存路径 + - `name`:每次推理结果的文件名称 + - ...... + + - coco.yaml 参数说明: + - `path`:coco 数据集存放路径 + - `train`:train2017.txt, 训练数据集路径文本 + - `val`:val2017.txt,验证数据集路径文本 + - `test`:test-dev2017.txt,测试数据集路径文本 + + 2. 执行推理 & 精度验证 + + 运行 `torch_aie_infer.py` 推理模型,结果保存在`default.yaml`中`project`所设置路径,或者 `run/detect/val[n]` 文件夹下的 `predictions.json`,精度计算结果通过打屏显示。 + + ``` + python3 torch_aie_infer.py --weight=yolov8n.pt --ts=yolov8n.aie.ts --device_id=0 --batch=8 + ``` + + - 命令参数说明: + - `--weight`:pt 权重文件所在路径 + - `--ts`:ts 模型所在路径 + - `--device_id`:使用芯片的序号 + - `--batch`:batchsize,批处理数量大小 + + 3. 性能测试 + + 运行以下命令: + + ``` + python3 infer_test.py + ``` + + +# 模型推理性能&精度 + +调用Torch-AIE接口推理计算,yolov8n性能&精度参考下列数据。 + + **表 1** 推理配置及性能 +| 芯片型号 | Batch Size | 数据集 | 性能 | +|:-----:|:------:|:----:|:----:| +| Ascend310P3| 8 | coco2017 | 477.63 fps| + + **表 2** 模型推理精度 +| 类型 | 配置 | 精度 | +|:-----:|:------:|:----:| +| Average Precision (AP)| @[ IoU=0.50:0.95 , area= all , maxDets=100 ] | 0.371 +| Average Precision (AP)| @[ IoU=0.50 , area= all , maxDets=100 ] | 0.522 +| Average Precision (AP)| @[ IoU=0.75 , area= all , maxDets=100 ] | 0.403 +| Average Precision (AP)| @[ IoU=0.50:0.95 , area= small , maxDets=100 ] | 0.184 +| Average Precision (AP)| @[ IoU=0.50:0.95 , area=medium , maxDets=100 ] | 0.407 +| Average Precision (AP)| @[ IoU=0.50:0.95 , area= large , maxDets=100 ] | 0.530 +| Average Recall (AR)| @[ IoU=0.50:0.95 , area= all , maxDets= 1 ] | 0.314 +| Average Recall (AR)| @[ IoU=0.50:0.95 , area= all , maxDets= 10 ] | 0.512 +| Average Recall (AR)| @[ IoU=0.50:0.95 , area= all , maxDets=100 ] | 0.553 +| Average Recall (AR)| @[ IoU=0.50:0.95 , area= small , maxDets=100 ] | 0.323 +| Average Recall (AR)| @[ IoU=0.50:0.95 , area=medium , maxDets=100 ] | 0.612 +| Average Recall (AR)| @[ IoU=0.50:0.95 , area= large , maxDets=100 ] | 0.730 \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/compile.py b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..e37af429ad08d9d04530fb865800cf122d9cfe90 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/compile.py @@ -0,0 +1,55 @@ +# # Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. + +import argparse + +import torch +import torch_aie +from torch_aie import _enums + + +def main(args): + model_path = args.model_path + batchsize = args.batchsize + output_path = args.output + + model = torch.jit.load(model_path) + model.eval() + + inputs = [torch_aie.Input((batchsize, 3, 640, 640))] + compiled_module = torch_aie.compile( + model, + inputs=inputs, + precision_policy=_enums.PrecisionPolicy.FP16, + allow_tensor_replace_int=False, + min_block_size=1, + soc_version="Ascend310P3", + optimization_level=0, + ) + compiled_module.save(output_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_path", type=str, default="./yolov8n.torchscript", help="Path to the PyTorch model (.torchscript file)", + ) + parser.add_argument( + "--batchsize", type=int, default=8, help="Batch size for input tensors" + ) + parser.add_argument( + "--output", type=str, default="yolov8n.aie.ts", help="Path to the output compiled file", + ) + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/dataloader.patch b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/dataloader.patch new file mode 100644 index 0000000000000000000000000000000000000000..32bed658fc090d157459e366665f075bb827a9ed --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/dataloader.patch @@ -0,0 +1,13 @@ +diff --git a/ultralytics/yolo/data/build.py b/ultralytics/yolo/data/build.py +index a2e62fa..b2d560d 100644 +--- a/ultralytics/yolo/data/build.py ++++ b/ultralytics/yolo/data/build.py +@@ -75,7 +75,7 @@ def build_dataloader(cfg, batch_size, img_path, stride=32, label_path=None, rank + batch_size=batch_size, + augment=mode == "train", # augmentation + hyp=cfg, # TODO: probably add a get_hyps_from_cfg function +- rect=cfg.rect if mode == "train" else True, # rectangular batches ++ rect=False, # rectangular batches + cache=cfg.cache or None, + single_cls=cfg.single_cls or False, + stride=int(stride), \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/export.py b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/export.py new file mode 100644 index 0000000000000000000000000000000000000000..59d0293a5fa7db6f56729fcbd03ae154102504dd --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/export.py @@ -0,0 +1,31 @@ +# # Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. + +import argparse + +from ultralytics import YOLO + + +def main(args): + model = YOLO(args.weights) + results = model.export(format="torchscript") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--weights", type=str, default="yolov8n.pt", help="Path to the weights file" + ) + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/infer_test.py b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/infer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6cd6f2f9fe83e3849c3a1a587f08d28aaf169d7b --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/infer_test.py @@ -0,0 +1,60 @@ +# # Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. + +import argparse +import time + +import torch +import torch_aie +from torch_aie import _enums + + +def main(args): + aie_module = torch.jit.load(args.aie_model_path) + aie_module.eval() + + inputs = torch.randn(args.batchsize, 3, 640, 640) + torch_aie.set_device(0) + # 测试模型性能 + infer_cost = [] + for i in range(args.infer_times): + input = inputs.to("npu:0") + stream = torch_aie.npu.Stream("npu:0") + with torch_aie.npu.stream(stream): + start_time = time.time() + output = aie_module.forward(input) + stream.synchronize() + cost = time.time() - start_time + if i >= 6: # 模型预热 + infer_cost.append(cost) + output = output.to("cpu") + + avg_cost = sum(infer_cost) / len(infer_cost) + fps = args.batchsize / avg_cost + print(f"AIE avg cost: {avg_cost}, AIE fps:{ fps}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--aie_model_path", type=str, default="./yolov8n.aie.ts", help="Path to the AIE model file", + ) + parser.add_argument( + "--batchsize", type=int, default=8, help="batch size for inference" + ) + parser.add_argument( + "--infer_times", type=int, default=80, help="infer times for model test" + ) + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/torch_aie_infer.py b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/torch_aie_infer.py new file mode 100644 index 0000000000000000000000000000000000000000..0697abdcf0289aa9dc95ea7b353fb0b835a11f58 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/Yolov8/torch_aie_infer.py @@ -0,0 +1,519 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import glob +import time +import json +from pathlib import Path + +from tqdm import tqdm +import numpy as np +import cv2 +import torch +import torch_aie +from torch_aie import _enums +import torchvision +from torch.utils.data.dataset import Dataset +from ultralytics import YOLO + +from ultralytics.nn.tasks import attempt_load_one_weight +from ultralytics.yolo.cfg import get_cfg +from ultralytics.yolo.utils import DEFAULT_CFG, SETTINGS, RANK, TQDM_BAR_FORMAT, LOGGER, colorstr, ops +from ultralytics.yolo.utils.files import increment_path +from ultralytics.yolo.utils.metrics import ConfusionMatrix, DetMetrics, box_iou +from ultralytics.yolo.utils.ops import Profile, xywh2xyxy, xyxy2xywh +from ultralytics.yolo.utils.checks import check_requirements +from ultralytics.yolo.utils.plotting import output_to_target, plot_images +from ultralytics.yolo.data import build_dataloader +from ultralytics.yolo.data.utils import check_det_dataset +from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader + + +def _reset_ckpt_args(args): + for arg in "augment", "verbose", "project", "name", "exist_ok", "resume", "batch", "epochs", "cache", \ + "save_json", "half", "v5loader", "device", "cfg", "save", "rect", "plots": + args.pop(arg, None) + + +def get_dataloader(args, dataset_path, batch_size): + # calculate stride - check if model is initialized + return create_dataloader(path=dataset_path, + imgsz=args.imgsz, + batch_size=batch_size, + stride=32, + hyp=vars(args), + cache=False, + pad=0.5, + rect=False, + workers=args.workers, + prefix=colorstr(f'{args.mode}: '), + shuffle=False, + seed=args.seed)[0] if args.v5loader else \ + build_dataloader(args, batch_size, img_path=dataset_path, stride=32, mode="val")[0] + + +def init_metrics(args, model): + val = args.data.get("val", "") # validation path + args.is_coco = isinstance(val, str) and val.endswith( + f"coco{os.sep}val2017.txt" + ) # is COCO dataset + args.class_map = ops.coco80_to_coco91_class() if args.is_coco else list(range(1000)) + args.save_json |= args.is_coco and not args.training # run on final val if training COCO + args.names = model.names + args.nc = len(model.names) + args.metrics.names = args.names + args.metrics.plot = args.plots + args.confusion_matrix = ConfusionMatrix(nc=args.nc) + args.seen = 0 + args.jdict = [] + args.stats = [] + args.logger = LOGGER + + +def preprocess(args, batch): + batch["img"] = (batch["img"].half() if args.half else batch["img"].float()) / 255 + + nb = len(batch["img"]) + args.lb = ( + [ + torch.cat([batch["cls"], batch["bboxes"]], dim=-1)[batch["batch_idx"] == i] + for i in range(nb) + ] + if args.save_hybrid + else [] + ) # for autolabelling + + return batch + + +def postprocess(args, preds): + preds = non_max_suppression(prediction=preds, + conf_thres=args.conf, + iou_thres=args.iou, + labels=args.lb, + multi_label=True, + agnostic=args.single_cls, + max_det=args.max_det) + + return preds + + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0, # number of masks +): + + # Checks + assert ( + 0 <= conf_thres <= 1 + ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert ( + 0 <= iou_thres <= 1 + ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + if isinstance( + prediction, (list, tuple) + ): # YOLOv8 model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + ### mod + prediction = torch.tensor(prediction) + device = "cpu" + prediction.to(device) + ### + + device = prediction.device + mps = "mps" in device.type # Apple MPS + if mps: # MPS not fully supported yet, convert tensors to CPU before NMS + prediction = prediction.cpu() + bs = prediction.shape[0] # batch size + nc = prediction.shape[1] - nm - 4 # number of classes + mi = 4 + nc # mask start index + xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates + + # Settings + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.5 + 0.05 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + x = x.transpose(0, -1)[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Detections matrix nx6 (xyxy, conf, cls) + box, cls, mask = x.split((4, nc, nm), 1) + box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2) + if multi_label: + i, j = (cls > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = cls.max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + x = x[ + x[:, 4].argsort(descending=True)[:max_nms] + ] # sort by confidence and remove excess boxes + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + i = i[:max_det] # limit detections + if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( + 1, keepdim=True + ) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if mps: + output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + LOGGER.warning(f"WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded") + break # time limit exceeded + + return output + + +def _process_batch(args, detections, labels): + iou = box_iou(labels[:, 1:], detections[:, :4]) + correct = np.zeros((detections.shape[0], args.iouv.shape[0])).astype(bool) + correct_class = labels[:, 0:1] == detections[:, 5] + for i in range(len(args.iouv)): + x = torch.where( + (iou >= args.iouv[i]) & correct_class + ) # IoU > threshold and classes match + if x[0].shape[0]: + matches = ( + torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) + .cpu() + .numpy() + ) # [label, detect, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + correct[matches[:, 1].astype(int), i] = True + + return torch.tensor(correct, dtype=torch.bool, device=detections.device) + + +def update_metrics(args, preds, batch): + # Metrics + for si, pred in enumerate(preds): + idx = batch["batch_idx"] == si + cls = batch["cls"][idx] + bbox = batch["bboxes"][idx] + nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions + shape = batch["ori_shape"][si] + correct_bboxes = torch.zeros( + npr, args.niou, dtype=torch.bool, device=args.device + ) # init + args.seen += 1 + + if npr == 0: + if nl: + args.stats.append( + ( + correct_bboxes, + *torch.zeros((2, 0), device=args.device), + cls.squeeze(-1), + ) + ) + if args.plots: + args.confusion_matrix.process_batch( + detections=None, labels=cls.squeeze(-1) + ) + continue + + # Predictions + if args.single_cls: + pred[:, 5] = 0 + predn = pred.clone() + ops.scale_boxes( + batch["img"][si].shape[1:], + predn[:, :4], + shape, + ratio_pad=batch["ratio_pad"][si], + ) # native-space pred + + # Evaluate + if nl: + height, width = batch["img"].shape[2:] + tbox = xywh2xyxy(bbox) * torch.tensor( + (width, height, width, height), device=args.device + ) # target boxes + ops.scale_boxes( + batch["img"][si].shape[1:], + tbox, + shape, + ratio_pad=batch["ratio_pad"][si], + ) # native-space labels + labelsn = torch.cat((cls, tbox), 1) # native-space labels + correct_bboxes = _process_batch(args, predn, labelsn) + if args.plots: + args.confusion_matrix.process_batch(predn, labelsn) + args.stats.append( + (correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1)) + ) # (conf, pcls, tcls) + + # Save + if args.save_json: + pred_to_json(args, predn, batch["im_file"][si]) + + +def plot_val_samples(args, batch, ni): + plot_images( + batch["img"], + batch["batch_idx"], + batch["cls"].squeeze(-1), + batch["bboxes"], + paths=batch["im_file"], + fname=args.save_dir / f"val_batch{ni}_labels.jpg", + names=args.names, + ) + + +def plot_predictions(args, batch, preds, ni): + plot_images( + batch["img"], + *output_to_target(preds, max_det=15), + paths=batch["im_file"], + fname=args.save_dir / f"val_batch{ni}_pred.jpg", + names=args.names, + ) # pred + + +def pred_to_json(args, predn, filename): + stem = Path(filename).stem + image_id = int(stem) if stem.isnumeric() else stem + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + for p, b in zip(predn.tolist(), box.tolist()): + args.jdict.append( + { + "image_id": image_id, + "category_id": args.class_map[int(p[5])], + "bbox": [round(x, 3) for x in b], + "score": round(p[4], 5), + } + ) + + +def get_desc(): + return ("%22s" + "%11s" * 6) % ("Class", "Images", "Instances", "Box(P", "R", "mAP50", "mAP50-95)") + + +def get_stats(args): + stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*args.stats)] # to numpy + if len(stats) and stats[0].any(): + args.metrics.process(*stats) + args.nt_per_class = np.bincount( + stats[-1].astype(int), minlength=args.nc + ) # number of targets per class + + return args.metrics.results_dict + + +def print_results(args): + pf = "%22s" + "%11i" * 2 + "%11.3g" * len(args.metrics.keys) # print format + args.logger.info( + pf % ("all", args.seen, args.nt_per_class.sum(), *args.metrics.mean_results()) + ) + if args.nt_per_class.sum() == 0: + args.logger.warning( + f"WARNING ⚠️ no labels found in {args.task} set, can not compute metrics without labels" + ) + + # Print results per class + if args.verbose and not args.training and args.nc > 1 and len(args.stats): + for i, c in enumerate(args.metrics.ap_class_index): + args.logger.info( + pf + % ( + args.names[c], + args.seen, + args.nt_per_class[c], + *args.metrics.class_result(i), + ) + ) + + if args.plots: + args.confusion_matrix.plot( + save_dir=args.save_dir, names=list(args.names.values()) + ) + + +def eval_json(args, stats): + if args.save_json and args.is_coco and len(args.jdict): + anno_json = ( + Path(args.data["path"]) / "annotations/instances_val2017.json" + ) # annotations + pred_json = Path(args.save_dir) / "predictions.json" # predictions + args.logger.info( + f"\nEvaluating pycocotools mAP using {pred_json} and {anno_json}..." + ) + try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + check_requirements("pycocotools>=2.0.6") + from pycocotools.coco import COCO # noqa + from pycocotools.cocoeval import COCOeval # noqa + + for x in anno_json, pred_json: + assert x.is_file(), f"{x} file not found" + anno = COCO(str(anno_json)) # init annotations api + pred = anno.loadRes( + str(pred_json) + ) # init predictions api (must pass string, not Path) + eval = COCOeval(anno, pred, "bbox") + if args.is_coco: + eval.params.imgIds = [ + int(Path(x).stem) for x in args.dataloader.dataset.im_files + ] # images to eval + eval.evaluate() + eval.accumulate() + eval.summarize() + stats[args.metrics.keys[-1]], stats[args.metrics.keys[-2]] = eval.stats[ + :2 + ] # update mAP50-95 and mAP50 + except Exception as e: + args.logger.warning(f"pycocotools unable to run: {e}") + + +def val(input_args): + weights = input_args.weight + pt_model, _ = attempt_load_one_weight(weights) + task = pt_model.args["task"] + overrides = pt_model.args + overrides = overrides.copy() + _reset_ckpt_args(overrides) + + overrides["mode"] = "val" + args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides) + args.task = task + + project = args.project or Path(SETTINGS["runs_dir"]) / args.task + + name = args.name or f"{args.mode}" + args.save_dir = increment_path( + Path(project) / name, exist_ok=args.exist_ok if RANK in {-1, 0} else True + ) + args.save_dir.mkdir(parents=True, exist_ok=True) + + args.lb = [] + args.metrics = DetMetrics(save_dir=args.save_dir) + args.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 + args.niou = args.iouv.numel() + args.save_json = True + args.device = "cpu" + args.training = False + + if isinstance(args.data, str) and args.data.endswith(".yaml"): + args.data = check_det_dataset(args.data) + + args.dataloader = get_dataloader(args, args.data.get("val"), args.batch) + + aie_model_path = input_args.ts + device_id = input_args.device_id + torch_aie.set_device(0) + aie_model = torch.jit.load(aie_model_path) + + dt = Profile(), Profile(), Profile() + n_batches = len(args.dataloader) + desc = get_desc() + bar = tqdm(args.dataloader, desc, n_batches, bar_format=TQDM_BAR_FORMAT) + init_metrics(args, pt_model) + for batch_i, batch in enumerate(bar): + # preprocess + with dt[0]: + batch = preprocess(args, batch) + + # inference + with dt[1]: + preds = aie_model(batch["img"]) + + # pre-process predicitions + with dt[2]: + preds = postprocess(args, preds) + + update_metrics(args, preds, batch) + if args.plots and batch_i < 3: + plot_val_samples(args, batch, batch_i) + plot_predictions(args, batch, preds, batch_i) + + stats = get_stats(args) + print_results(args) + speed = tuple( + x.t / len(args.dataloader.dataset) * 1e3 for x in dt + ) # speeds per image + args.logger.info( + "Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image" + % speed + ) + + if args.save_json and args.jdict: + with open(str(args.save_dir / "predictions.json"), "w") as f: + args.logger.info(f"Saving {f.name}...") + json.dump(args.jdict, f) # flatten and save + eval_json(args, stats) # update stats + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--weight", default="./yolov8n.pt", help="pt file path") + parser.add_argument("--ts", default="./yolov8n.aie.ts", help="aie model path") + parser.add_argument("--device_id", default=0, help="device id") + parser.add_argument("--batch", default=8, help="batch size") + input_args = parser.parse_args() + + val(input_args) + + +if __name__ == "__main__": + main() \ No newline at end of file