diff --git a/Samples/DetectionRetrainingAndInfer/README.md b/Samples/DetectionRetrainingAndInfer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f35ccfec1ff9cce6e1f6a266aed812c98ccbb5a6 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/README.md @@ -0,0 +1,171 @@ +# 口罩识别目标检测训练和推理 + +#### 样例介绍 + +本样例基于预训练ssd-mobilenet模型使用口罩识别数据集实现了检测口罩佩戴识别的功能,包含训练到om推理全过程。 + +#### 样例下载 + +可以使用以下两种方式下载,请选择其中一种进行源码准备。 + +- 命令行方式下载(**下载时间较长,但步骤简单**)。 + + ``` + # 登录开发板,HwHiAiUser用户命令行中执行以下命令下载源码仓。 + cd ${HOME} + git clone https://gitee.com/ascend/EdgeAndRobotics.git + # 切换到样例目录 + cd EdgeAndRobotics/Samples/ClassficationRetrainingAndInfer + ``` + +- 压缩包方式下载(**下载时间较短,但步骤稍微复杂**)。 + + ``` + # 1. 仓右上角选择 【克隆/下载】 下拉框并选择 【下载ZIP】。 + # 2. 将ZIP包上传到开发板的普通用户家目录中,【例如:${HOME}/EdgeAndRobotics-master.zip】。 + # 3. 开发环境中,执行以下命令,解压zip包。 + cd ${HOME} + chmod +x EdgeAndRobotics-master.zip + unzip EdgeAndRobotics-master.zip + # 4. 切换到样例目录 + cd EdgeAndRobotics-master/Samples/ClassficationRetrainingAndInfer + ``` + +#### 执行准备 + +- 本样例中的模型支持PyTorch2.1.0、torchvision1.16.0版本,请参考[安装PyTorch](https://www.hiascend.com/document/detail/zh/canncommercial/700/envdeployment/instg/instg_0046.html)章节安装PyTorch以及torch_npu插件。 + ``` + # torch_npu由于需要源码编译,速度可能较慢,本样例提供 python3.9,torch2.1版本的torch_npu whl包 + wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/wanzutao/torch_npu-2.1.0rc1-cp39-cp39-linux_aarch64.whl + + # 使用pip命令安装 + pip3 install torch_npu-2.1.0rc1-cp39-cp39-linux_aarch64.whl + ``` + +- 本样例中的模型还依赖一些其它库(具体依赖哪些库,可查看本样例目录下的requirements.txt文件),可执行以下命令安装: + + ``` + pip3 install -r requirements.txt # PyTorch2.1版本 + ``` + +- 配置离线推理所需的环境变量。 + + ``` + # 配置程序编译依赖的头文件与库文件路径 + export DDK_PATH=/usr/local/Ascend/ascend-toolkit/latest + export NPU_HOST_LIB=$DDK_PATH/runtime/lib64/stub + ``` + +- 安装离线推理所需的ACLLite库。 + + 参考[ACLLite仓](https://gitee.com/ascend/ACLLite)安装ACLLite库。 + + +#### 模型训练 + +1. 以HwHiAiUser用户登录开发板,切换到样例目录下。 +2. 设置环境变量减小算子编译内存占用。 + ``` + export TE_PARALLEL_COMPILER=1 + export MAX_COMPILE_CORE_NUMBER=1 + ``` +3. 准备数据集 + ``` + cd dataset + wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/wanzutao/detection/mask.zip + unzip mask.zip + ``` + + +4. 数据集处理,分出训练集和验证集. + ``` + cd .. + python3 predata.py + ``` +5. 下载预训练模型 + ``` + cd models + wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/wanzutao/detection/mobilenet-v1-ssd-mp-0_675.pth + ``` +6. 运行训练脚本。 + + ``` + cd .. + python3 main.py + ``` + 训练完成后,权重文件保存在models目录下,并输出模型训练精度和性能信息。 + + 此处展示单Device、batch_size=8的训练结果数据: + | NAME | Loss | FPS | Epochs | AMP_Type | Torch_Version | + | :----: | :---: | :---: | :----: | :------: | :-----------: | + | 1p-NPU | 1.8480 | 2 | 10 | O2 | 2.1 | + + +#### 离线推理 + +1. 以HwHiAiUser用户登录开发板,切换到当前样例目录。 +2. 导出onnx模型 + ``` + python3 export.py + ``` + +3. 获取测试图片数据。 + + ``` + cd omInfer/data + wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/wanzutao/detection/mask.jpg + ``` + + **注:**若需更换测试图片,则需自行准备测试图片,并将测试图片放到omInfer/data目录下,并修改代码中图片名称。 + +4. 获取PyTorch框架的mobilenet-ssd模型(\*.onnx),并转换为昇腾AI处理器能识别的模型(\*.om)。 + - 当设备内存**小于8G**时,可设置如下两个环境变量减少atc模型转换过程中使用的进程数,减小内存占用。 + ``` + export TE_PARALLEL_COMPILER=1 + export MAX_COMPILE_CORE_NUMBER=1 + ``` + - 为了方便下载,在这里直接给出原始模型下载及模型转换命令,可以直接拷贝执行。 + ``` + # 将导出的mobilenet-ssd.onnx模型拷贝到model目录下 + cd ../model + cp ../../mobilenet-ssd.onnx ./ + + # 获取AIPP配置文件 + wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/wanzutao/detection/aipp.cfg + + # 模型转换 + atc --model=mobilenet-ssd.onnx --framework=5 --soc_version=Ascend310B4 --output=mobilenet-ssd --insert_op_conf=aipp.cfg + ``` + + atc命令中各参数的解释如下,详细约束说明请参见[《ATC模型转换指南》](https://hiascend.com/document/redirect/CannCommunityAtc)。 + + - --model:转换前模型文件的路径。 + - --framework:原始框架类型。5表示ONNX。 + - --output:转换后模型文件的路径。请注意,记录保存该om模型文件的路径,后续开发应用时需要使用。 + - --input\_shape:模型输入数据的shape。 + - --soc\_version:昇腾AI处理器的版本。 + + +5. 编译样例源码。 + + 执行以下命令编译样例源码。 + + ``` + cd ../scripts + bash sample_build.sh + ``` + +6. 运行样例。 + + 执行以下脚本运行样例: + + ``` + bash sample_run.sh + ``` + + 执行成功后,omInfer/output目录下会生成检测输出图片 + + ![输入图片说明](omInfer/out_0.jpg) + + +#### 相关操作 \ No newline at end of file diff --git a/Samples/DetectionRetrainingAndInfer/dataset/.gitkeep b/Samples/DetectionRetrainingAndInfer/dataset/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/export.py b/Samples/DetectionRetrainingAndInfer/export.py new file mode 100644 index 0000000000000000000000000000000000000000..d71b92fb9fdd431cf33c2b7d7b3e54160949c46d --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/export.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# converts a saved PyTorch model to ONNX format +import os +import sys +import argparse + +import torch.onnx + +from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd +from vision.ssd.config import mobilenetv1_ssd_config + +# set the device +device = torch.device('cpu') +print(f"=> running on device {device}") + +input = "models/best.pth" +# format input model paths + +num_classes = 4 +resolution = 300 +net_name = 'ssd-mobilenet' +# construct the network architecture +print(f"=> creating network: {net_name}") +print(f"=> num classes: {num_classes}") +print(f"=> resolution: {resolution}x{resolution}") + +mobilenetv1_ssd_config.set_image_size(300) +net = create_mobilenetv1_ssd(num_classes, is_test=True) +# load the model checkpoint +print(f"=> loading checkpoint: {input}") + +net.load(input) +net.to(device) +net.eval() + +# create example image data +dummy_input = torch.randn(1, 3, resolution, resolution) +output = 'mobilenet-ssd.onnx' + +# export to ONNX +input_names = ['input_0'] +output_names = ['scores', 'boxes'] + +print("=> exporting model to ONNX...") +torch.onnx.export(net, dummy_input, output, verbose=True, input_names=input_names, output_names=output_names) +print(f"model exported to: {output}") diff --git a/Samples/DetectionRetrainingAndInfer/main.py b/Samples/DetectionRetrainingAndInfer/main.py new file mode 100644 index 0000000000000000000000000000000000000000..0f3cb0d6d1cf40c28e2bae9a4988be4a777d7cdc --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/main.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +# +# train an SSD detection model on Pascal VOC or Open Images datasets +# https://github.com/dusty-nv/jetson-inference/blob/master/docs/pytorch-ssd.md +# +import os +import sys +import logging +import datetime +import torch +import torch_npu +from torch_npu.npu import amp + +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter +from torch.optim.lr_scheduler import CosineAnnealingLR + +from vision.utils.misc import Timer +from vision.ssd.ssd import MatchPrior +from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd +from vision.dataset import VOCDataset +from vision.nn.multibox_loss import MultiboxLoss +from vision.ssd.config import mobilenetv1_ssd_config +from vision.ssd.data_preprocessing import TrainAugmentation, TestTransform + + +DEFAULT_PRETRAINED_MODEL='models/mobilenet-v1-ssd-mp-0_675.pth' + +logging.basicConfig(stream=sys.stdout, level=getattr(logging, "INFO", logging.INFO), + format='%(asctime)s - %(message)s', datefmt="%Y-%m-%d %H:%M:%S") +# make sure that the checkpoint output dir exists +checkpoint_folder = "models" +checkpoint_folder = os.path.expanduser(checkpoint_folder) +if not os.path.exists(checkpoint_folder): + os.mkdir(checkpoint_folder) +tensorboard = SummaryWriter(log_dir=os.path.join(checkpoint_folder, "tensorboard", f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}")) + +DEVICE = torch.device("npu:0") + +def train(loader, net, criterion, optimizer, device, scaler, debug_steps=100, epoch=-1): + net.train(True) + + train_loss = 0.0 + train_regression_loss = 0.0 + train_classification_loss = 0.0 + + running_loss = 0.0 + running_regression_loss = 0.0 + running_classification_loss = 0.0 + + num_batches = 0 + + for i, data in enumerate(loader): + images, boxes, labels = data + images = images.to(device) + boxes = boxes.to(device) + labels = labels.to(device) + + optimizer.zero_grad() + with amp.autocast(): + confidence, locations = net(images) + regression_loss, classification_loss = criterion(confidence, locations, labels, boxes) + loss = regression_loss + classification_loss + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + + train_loss += loss.item() + train_regression_loss += regression_loss.item() + train_classification_loss += classification_loss.item() + + running_loss += loss.item() + running_regression_loss += regression_loss.item() + running_classification_loss += classification_loss.item() + + if i and i % debug_steps == 0: + avg_loss = running_loss / debug_steps + avg_reg_loss = running_regression_loss / debug_steps + avg_clf_loss = running_classification_loss / debug_steps + logging.info( + f"Epoch: {epoch}, Step: {i}/{len(loader)}, " + + f"Avg Loss: {avg_loss:.4f}, " + + f"Avg Regression Loss {avg_reg_loss:.4f}, " + + f"Avg Classification Loss: {avg_clf_loss:.4f}" + ) + running_loss = 0.0 + running_regression_loss = 0.0 + running_classification_loss = 0.0 + + num_batches += 1 + + train_loss /= num_batches + train_regression_loss /= num_batches + train_classification_loss /= num_batches + + logging.info( + f"Epoch: {epoch}, " + + f"Training Loss: {train_loss:.4f}, " + + f"Training Regression Loss {train_regression_loss:.4f}, " + + f"Training Classification Loss: {train_classification_loss:.4f}" + ) + + tensorboard.add_scalar('Loss/train', train_loss, epoch) + tensorboard.add_scalar('Regression Loss/train', train_regression_loss, epoch) + tensorboard.add_scalar('Classification Loss/train', train_classification_loss, epoch) + +def test(loader, net, criterion, device): + net.eval() + running_loss = 0.0 + running_regression_loss = 0.0 + running_classification_loss = 0.0 + num = 0 + for _, data in enumerate(loader): + images, boxes, labels = data + images = images.to(device) + boxes = boxes.to(device) + labels = labels.to(device) + num += 1 + with torch.no_grad(): + with amp.autocast(): + confidence, locations = net(images) + regression_loss, classification_loss = criterion(confidence, locations, labels, boxes) + loss = regression_loss + classification_loss + + running_loss += loss.item() + running_regression_loss += regression_loss.item() + running_classification_loss += classification_loss.item() + + return running_loss / num, running_regression_loss / num, running_classification_loss / num + +if __name__ == '__main__': + + timer = Timer() + create_net = create_mobilenetv1_ssd + config = mobilenetv1_ssd_config + config.set_image_size(300) + + # create data transforms for train/test/val + train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) + target_transform = MatchPrior(config.priors, config.center_variance, + config.size_variance, 0.5) + + test_transform = TestTransform(config.image_size, config.image_mean, config.image_std) + dataset_path = "dataset" + batch_size = 4 + num_workers = 3 + # load datasets (could be multiple) + logging.info("Prepare training datasets.") + train_dataset = VOCDataset(dataset_path, transform=train_transform, + target_transform=target_transform) + num_classes = len(train_dataset.class_names) + # create training dataset + logging.info("Train dataset size: {}".format(len(train_dataset))) + train_loader = DataLoader(train_dataset, batch_size, + num_workers=num_workers, + shuffle=True) + + # create validation dataset + val_dataset = VOCDataset(dataset_path, transform=test_transform, + target_transform=target_transform, is_test=True) + val_loader = DataLoader(val_dataset, batch_size, + num_workers=num_workers, + shuffle=False) + + # create the network + logging.info("Build network.") + net = create_net(num_classes) + last_epoch = -1 + + # load a previous model checkpoint (if requested) + timer.start("Load Model") + + logging.info(f"Init from pretrained SSD {DEFAULT_PRETRAINED_MODEL}") + + if not os.path.exists(DEFAULT_PRETRAINED_MODEL): + os.system(f"wget --quiet --show-progress --progress=bar:force:noscroll --no-check-certificate https://nvidia.box.com/shared/static/djf5w54rjvpqocsiztzaandq1m3avr7c.pth -O {DEFAULT_PRETRAINED_MODEL}") + + net.init_from_pretrained_ssd(DEFAULT_PRETRAINED_MODEL) + + logging.info(f'Took {timer.end("Load Model"):.2f} seconds to load the model.') + + # move the model to GPU + net.to(DEVICE) + + # define loss function and optimizer + criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, + center_variance=0.1, size_variance=0.2, device=DEVICE) + + optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9, + weight_decay=5e-4) + scaler = amp.GradScaler() + logging.info("Uses CosineAnnealingLR scheduler.") + scheduler = CosineAnnealingLR(optimizer, 100, last_epoch=last_epoch) + + # train for the desired number of epochs + logging.info(f"Start training from epoch {last_epoch + 1}.") + num_epochs = 100 + best_loss = 10000 + model_path = os.path.join(checkpoint_folder, "best.pth") + for epoch in range(last_epoch + 1, num_epochs): + train(train_loader, net, criterion, optimizer, device=DEVICE, scaler=scaler, debug_steps=10, epoch=epoch) + scheduler.step() + val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE) + + logging.info( + f"Epoch: {epoch}, " + + f"Validation Loss: {val_loss:.4f}, " + + f"Validation Regression Loss {val_regression_loss:.4f}, " + + f"Validation Classification Loss: {val_classification_loss:.4f}" + ) + + tensorboard.add_scalar('Loss/val', val_loss, epoch) + tensorboard.add_scalar('Regression Loss/val', val_regression_loss, epoch) + tensorboard.add_scalar('Classification Loss/val', val_classification_loss, epoch) + + if val_loss < best_loss: + best_loss = val_loss + net.save(model_path) + logging.info(f"Saved model {model_path}") + + + logging.info("Task done, exiting program.") + tensorboard.close() \ No newline at end of file diff --git a/Samples/DetectionRetrainingAndInfer/models/.keep b/Samples/DetectionRetrainingAndInfer/models/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/data/.keep b/Samples/DetectionRetrainingAndInfer/omInfer/data/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/model/.keep b/Samples/DetectionRetrainingAndInfer/omInfer/model/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/out_0.jpg b/Samples/DetectionRetrainingAndInfer/omInfer/out_0.jpg new file mode 100644 index 0000000000000000000000000000000000000000..03c559a549ea7a48b0dda1c756cc64f6536c345d Binary files /dev/null and b/Samples/DetectionRetrainingAndInfer/omInfer/out_0.jpg differ diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/output/.keep b/Samples/DetectionRetrainingAndInfer/omInfer/output/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/scripts/sample_build.sh b/Samples/DetectionRetrainingAndInfer/omInfer/scripts/sample_build.sh new file mode 100644 index 0000000000000000000000000000000000000000..d5837a40c294238f7f5d19c720efd49695b7989a --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/omInfer/scripts/sample_build.sh @@ -0,0 +1,40 @@ +#!/bin/bash +ScriptPath="$( cd "$(dirname "$BASH_SOURCE")" ; pwd -P )" + +function build() +{ + if [ -d ${ScriptPath}/../out ];then + rm -rf ${ScriptPath}/../out + fi + + if [ -d ${ScriptPath}/../build/intermediates/host ];then + rm -rf ${ScriptPath}/../build/intermediates/host + fi + + mkdir -p ${ScriptPath}/../build/intermediates/host + cd ${ScriptPath}/../build/intermediates/host + + cmake ../../../src -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE + if [ $? -ne 0 ];then + echo "[ERROR] cmake error, Please check your environment!" + return 1 + fi + make + if [ $? -ne 0 ];then + echo "[ERROR] build failed, Please check your environment!" + return 1 + fi + cd - > /dev/null +} + +function main() +{ + echo "[INFO] Sample preparation" + build + if [ $? -ne 0 ];then + return 1 + fi + echo "[INFO] Sample preparation is complete" +} +main + diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/scripts/sample_run.sh b/Samples/DetectionRetrainingAndInfer/omInfer/scripts/sample_run.sh new file mode 100644 index 0000000000000000000000000000000000000000..2fe8dade67d7e7602a1a3f8b5eecad1fe4ed2a97 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/omInfer/scripts/sample_run.sh @@ -0,0 +1,17 @@ +#!/bin/bash +ScriptPath="$( cd "$(dirname "$BASH_SOURCE")" ; pwd -P )" + +function main() +{ + echo "[INFO] The sample starts to run" + running_command="./main" + cd ${ScriptPath}/../out + ${running_command} + if [ $? -ne 0 ];then + echo "[INFO] The program runs failed" + else + echo "[INFO] The program runs successfully" + fi +} +main + diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/src/CMakeLists.txt b/Samples/DetectionRetrainingAndInfer/omInfer/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b448950e19f05a5055357d4898c509ee4e98e0dc --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/omInfer/src/CMakeLists.txt @@ -0,0 +1,56 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +cmake_minimum_required(VERSION 3.5.1) + +project(sampleUsbCamera) + +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../../../out") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{DDK_PATH}) +if (NOT DEFINED ENV{DDK_PATH}) + set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") + message(STATUS "set default INC_PATH: ${INC_PATH}") +else() + message(STATUS "set INC_PATH: ${INC_PATH}") +endif () + +set(LIB_PATH $ENV{NPU_HOST_LIB}) +if (NOT DEFINED ENV{NPU_HOST_LIB}) + set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64/stub") + message(STATUS "set default LIB_PATH: ${LIB_PATH}") +else() + message(STATUS "set LIB_PATH: ${LIB_PATH}") +endif () + +find_package(OpenCV REQUIRED) +find_path(AVCODEC_INCLUDE_DIR libavcodec/avcodec.h) +find_library(AVCODEC_LIBRARY avcodec) + +include_directories( + ${OpenCV_INCLUDE_DIRS} + ${AVCODEC_INCLUDE_DIR} + ${INC_PATH}/runtime/include/ + ./ +) + +link_directories( + ${OpenCV_LIB_DIRS} + ${AVCODEC_LIBRARY} + ${LIB_PATH} +) + +add_executable(main + main.cpp) + +if(target STREQUAL "Simulator_Function") + target_link_libraries(main funcsim) +else() + target_link_libraries(main ascendcl acl_dvpp stdc++ dl rt pthread acllite_dvpp_lite acllite_media acllite_om_execute acllite_common ${AVCODEC_LIBRARY} ${OpenCV_LIBS}) +endif() + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/Samples/DetectionRetrainingAndInfer/omInfer/src/main.cpp b/Samples/DetectionRetrainingAndInfer/omInfer/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f18a11a11a6c90d41cd1f1fc9659d0182ed2e3b7 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/omInfer/src/main.cpp @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include "acllite_dvpp_lite/ImageProc.h" +#include "acllite_om_execute/ModelProc.h" +#include "opencv2/opencv.hpp" + +using namespace std; +using namespace acllite; +using namespace cv; + +typedef struct BoundBox { + float x; + float y; + float width; + float height; + float score; + int classIndex; +} BoundBox; +float iou(BoundBox box1, BoundBox box2) +{ + float xLeft = max(box1.x, box2.x); + float yTop = max(box1.y, box2.y); + float xRight = min(box1.x + box1.width, box2.x + box2.width); + float yBottom = min(box1.y + box1.height, box2.y + box2.height); + float width = max(0.0f, xRight - xLeft); + float hight = max(0.0f, yBottom - yTop); + float area = width * hight; + float iou = area / (box1.width * box1.height + box2.width * box2.height - area); + return iou; +} +bool sortScore(BoundBox box1, BoundBox box2) +{ + return box1.score > box2.score; +} +int main() +{ + vector labels = { {"BACKGROUND"},{"with_mask"},{"mask_weared_incorrect"},{"without_mask"}}; + AclLiteResource aclResource; + bool ret = aclResource.Init(); + CHECK_RET(ret, LOG_PRINT("[ERROR] InitACLResource failed."); return 1); + + ImageProc imageProc; + ModelProc modelProc; + ret = modelProc.Load("../model/mobilenet-ssd.om"); + CHECK_RET(ret, LOG_PRINT("[ERROR] load model mobilenet-ssd.om failed."); return 1); + string imagePath = "../data/mask.jpg"; + ImageData src = imageProc.Read(imagePath); + + CHECK_RET(src.size, LOG_PRINT("[ERROR] ImRead image failed."); return 1); + ImageData dst; + ImageSize dsize(300, 300); + + imageProc.Resize(src, dst, dsize); + ret = modelProc.CreateInput(static_cast(dst.data.get()), dst.size); + CHECK_RET(ret, LOG_PRINT("[ERROR] Create model input failed."); return 1); + vector inferOutputs; + ret = modelProc.Execute(inferOutputs); + CHECK_RET(ret, LOG_PRINT("[ERROR] model execute failed."); return 1); + uint32_t dataSize = inferOutputs[0].size; + uint32_t size = inferOutputs[1].size; + // get result from output data set + float* scores = static_cast(inferOutputs[0].data.get()); + float* boxes = static_cast(inferOutputs[1].data.get()); + if (scores == nullptr || boxes == nullptr) { + LOG_PRINT("get result from output data set failed."); + return 1; + } + size_t classNum = 4; + size_t boxes_nums = 3000; + size_t candidate_size = 200; + size_t top_k = 20; + float prob_threshold = 0.7; + float iou_threshold = 0.45; + const double fountScale = 0.5; + const uint32_t lineSolid = 2; + const uint32_t labelOffset = 11; + const cv::Scalar fountColor(0, 0, 255); + const vector colors{ + cv::Scalar(237, 149, 100), cv::Scalar(0, 215, 255), + cv::Scalar(50, 205, 50), cv::Scalar(139, 85, 26)}; + cv::Mat srcImage = cv::imread(imagePath); + int width = srcImage.cols; + int height = srcImage.rows; + for(int index = 1; index < classNum; index++) { + vector box_scores; + vector result; + for(int j = 0; j < boxes_nums; ++j){ + if(scores[j * classNum + index] > prob_threshold){ + BoundBox box; + box.score = scores[j * classNum + index]; + box.width = (boxes[4 * j + 2]- boxes[4 * j]) * width ; + box.height = (boxes[4 * j + 3] - boxes[4 * j + 1]) * height; + box.x = boxes[4 * j] * width; + box.y = boxes[4 * j + 1] * height; + box.classIndex = index; + box_scores.push_back(box); + } + } + std::sort(box_scores.begin(),box_scores.end(),sortScore); + if(box_scores.size() > candidate_size){ + box_scores.erase(box_scores.begin() + candidate_size + 1, box_scores.end()); + } + int len = box_scores.size(); + if(len > 0){ + for(int i = 0;i < box_scores.size(); i++){ + if(result.size() == top_k) break; + result.push_back(box_scores[i]); + for(int j = i + 1; j < box_scores.size();j++){ + float iou_t = iou(box_scores[i],box_scores[j]); + if(iou_t > iou_threshold){ + box_scores.erase(box_scores.begin() + j); + j--; + } + } + } + } + for (size_t i = 0; i < result.size(); ++i) { + cv::Point leftUpPoint, rightBottomPoint; + leftUpPoint.x = result[i].x ; + leftUpPoint.y = result[i].y; + rightBottomPoint.x = result[i].x + result[i].width; + rightBottomPoint.y = result[i].y + result[i].height; + cv::rectangle(srcImage, leftUpPoint, rightBottomPoint, colors[i % colors.size()], lineSolid); + string className = labels[result[i].classIndex]; + string markString = to_string(result[i].score) + ":" + className; + cv::putText(srcImage, markString, cv::Point(leftUpPoint.x, leftUpPoint.y + labelOffset), + cv::FONT_HERSHEY_COMPLEX, fountScale, fountColor); + } + + } + string savePath = "../output/out_0.jpg"; + cv::imwrite(savePath, srcImage); + return 0; +} + diff --git a/Samples/DetectionRetrainingAndInfer/predata.py b/Samples/DetectionRetrainingAndInfer/predata.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1030a1c592448611fd9cc218aca715bf7a2668 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/predata.py @@ -0,0 +1,47 @@ +import os +import shutil +def preparinbdata(main_xml_file, main_img_file, train_size, val_size): + for i in range(0, train_size): + source_xml = main_xml_file + "/" + metarial[i] + ".xml" + source_img = main_img_file + "/" + metarial[i] + ".png" + + mstring = metarial[i] + train_destination_xml = "./dataset/mask/train/labels" + "/" + metarial[i] + ".xml" + train_destination_png = "./dataset/mask/train/images" + "/" + metarial[i] + ".png" + + shutil.copy(source_xml, train_destination_xml) + shutil.copy(source_img, train_destination_png) + for n in range(train_size , train_size + val_size): + + source_xml = main_xml_file + "/" + metarial[n] + ".xml" + source_img = main_img_file + "/" + metarial[n] + ".png" + + mstring = metarial[n] + val_destination_xml = "./dataset/mask/val/labels" + "/" + metarial[n] + ".xml" + val_destination_png = "./dataset/mask/val/images" + "/" + metarial[n] + ".png" + + shutil.copy(source_xml, val_destination_xml) + shutil.copy(source_img, val_destination_png) + +if __name__ == '__main__': + metarial = [] + for i in os.listdir("./dataset/images"): + str = i[:-4] + metarial.append(str) + train_size = int(len(metarial) * 0.7) + val_size = int(len(metarial) * 0.3) + print("Sum of image: ", len(metarial)) + print("Sum of the train size: ", train_size) + print("Sum of the val size: ", val_size) + if not os.path.exists("./dataset/mask"): + os.mkdir('./dataset/mask') + os.mkdir('./dataset/mask/train') + os.mkdir('./dataset/mask/val') + os.mkdir('./dataset/mask/train/images') + os.mkdir('./dataset/mask/train/labels') + os.mkdir('./dataset/mask/val/images') + os.mkdir('./dataset/mask/val/labels') + preparinbdata(main_xml_file = "./dataset/annotations", + main_img_file = "./dataset/images", + train_size = train_size, + val_size = val_size) diff --git a/Samples/DetectionRetrainingAndInfer/requirements.txt b/Samples/DetectionRetrainingAndInfer/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c70a80ecc442e0e3062fbbba0ad4439787dfd43a --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/requirements.txt @@ -0,0 +1,5 @@ +onnx +numpy +opencv-python +protobuf==3.20.2 +tensorboard \ No newline at end of file diff --git a/Samples/DetectionRetrainingAndInfer/vision/__init__.py b/Samples/DetectionRetrainingAndInfer/vision/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/vision/dataset.py b/Samples/DetectionRetrainingAndInfer/vision/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e5cffe9d5dea1ab890b15527387d6195caf81201 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/dataset.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +import os +import logging + +import torch +import numpy as np +import xml.etree.ElementTree as ET + +from PIL import Image + + +class VOCDataset(torch.utils.data.Dataset): + """ + Object detection dataset for Pascal VOC (http://host.robots.ox.ac.uk/pascal/VOC/) + """ + def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None): + """ + Dataset for VOC data. + + Parameters: + root (string) -- path to the VOC2007 or VOC2012 dataset, containing the following sub-directories: + Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject + + is_test (bool) -- if true, then use the data subset from `ImageSets/Main/test.txt` + if false, then use the data subset from `ImageSets/Main/trainval.txt` + if these files don't exist, then `ImageSets/Main/default.txt` will be used + """ + self.root = root + self.is_test = is_test + self.transform = transform + self.target_transform = target_transform + if not os.path.exists(os.path.join(self.root, 'mask')): + logging.info("No dataset, please prepare dataset") + # determine the image set file to use + if is_test: + self.image_sets_file = os.path.join(self.root, 'mask/val') + else: + self.image_sets_file = os.path.join(self.root, 'mask/train') + + + # read the image set ID's + self.ids = self._read_image_ids() + self.keep_difficult = keep_difficult + + self.class_names = ('BACKGROUND','with_mask', + 'mask_weared_incorrect','without_mask') + + self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} + + def __getitem__(self, index): + image_id = self.ids[index] + boxes, labels, is_difficult = self._get_annotation(image_id) + + if not self.keep_difficult: + boxes = boxes[is_difficult == 0] + labels = labels[is_difficult == 0] + + if logging.root.level is logging.DEBUG: + logging.debug(f"voc_dataset image_id={image_id}" + ' \n boxes=' + str(boxes) + ' \n labels=' + str(labels)) + + image = self._read_image(image_id) + + if self.transform: + image, boxes, labels = self.transform(image, boxes, labels) + if self.target_transform: + boxes, labels = self.target_transform(boxes, labels) + + return image, boxes, labels + + def get_image(self, index): + image_id = self.ids[index] + image = self._read_image(image_id) + if self.transform: + image, _ = self.transform(image) + return image + + def get_annotation(self, index): + image_id = self.ids[index] + return image_id, self._get_annotation(image_id) + + def __len__(self): + return len(self.ids) + + def _read_image_ids(self): + ids = [] + for i in os.listdir(os.path.join(self.image_sets_file,"images")): + image_id = i[:-4] + + if self._get_num_annotations(image_id) > 0: + if self._find_image(image_id) is not None: + ids.append(image_id) + else: + print('warning - could not find image {:s} - ignoring from dataset'.format(image_id)) + else: + print('warning - image {:s} has no box/labels annotations, ignoring from dataset'.format(image_id)) + + return ids + + def _get_num_annotations(self, image_id): + annotation_file = os.path.join(self.image_sets_file, f'labels/{image_id}.xml') + objects = ET.parse(annotation_file).findall("object") + return len(objects) + + def _get_annotation(self, image_id): + annotation_file = os.path.join(self.image_sets_file, f'labels/{image_id}.xml') + objects = ET.parse(annotation_file).findall("object") + boxes = [] + labels = [] + is_difficult = [] + for object in objects: + class_name = object.find('name').text.strip() #.lower().strip() + # we're only concerned with clases in our list + if class_name in self.class_dict: + bbox = object.find('bndbox') + + # VOC dataset format follows Matlab, in which indexes start from 0 + x1 = float(bbox.find('xmin').text) - 1 + y1 = float(bbox.find('ymin').text) - 1 + x2 = float(bbox.find('xmax').text) - 1 + y2 = float(bbox.find('ymax').text) - 1 + boxes.append([x1, y1, x2, y2]) + + labels.append(self.class_dict[class_name]) + + # retrieve element + is_difficult_obj = object.find('difficult') + is_difficult_str = '0' + + if is_difficult_obj is not None: + is_difficult_str = object.find('difficult').text + + is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) + else: + print(f"warning - image {image_id} has object with unknown class '{class_name}'") + + return (np.array(boxes, dtype=np.float32), + np.array(labels, dtype=np.int64), + np.array(is_difficult, dtype=np.uint8)) + + def _find_image(self, image_id): + image_file = os.path.join(self.image_sets_file, f'images/{image_id}.png') + if os.path.exists(image_file): + return image_file + return None + + def _read_image(self, image_id): + image_file = self._find_image(image_id) + + if image_file is None: + raise IOError(f"failed to find {image_file}") + + image = Image.open(image_file).convert('RGB') + + if image is None or image.size == 0: + raise IOError(f"invalid/corrupt image {image_file}") + + return np.asarray(image) + + + diff --git a/Samples/DetectionRetrainingAndInfer/vision/nn/__init__.py b/Samples/DetectionRetrainingAndInfer/vision/nn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/vision/nn/mobilenet.py b/Samples/DetectionRetrainingAndInfer/vision/nn/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..98300df83ade171951a99c5e72b86eb5a4e18aa2 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/nn/mobilenet.py @@ -0,0 +1,52 @@ +# borrowed from "https://github.com/marvis/pytorch-mobilenet" + +import torch.nn as nn +import torch.nn.functional as F + + +class MobileNetV1(nn.Module): + def __init__(self, num_classes=1024): + super(MobileNetV1, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(3, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + ) + self.fc = nn.Linear(1024, num_classes) + + def forward(self, x): + x = self.model(x) + x = F.avg_pool2d(x, 7) + x = x.view(-1, 1024) + x = self.fc(x) + return x \ No newline at end of file diff --git a/Samples/DetectionRetrainingAndInfer/vision/nn/multibox_loss.py b/Samples/DetectionRetrainingAndInfer/vision/nn/multibox_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..2351c7607b46dff4c35cee827bbf77bd04f9cc2a --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/nn/multibox_loss.py @@ -0,0 +1,47 @@ +import torch.nn as nn +import torch.nn.functional as F +import torch + + +from ..utils import box_utils + + +class MultiboxLoss(nn.Module): + def __init__(self, priors, iou_threshold, neg_pos_ratio, + center_variance, size_variance, device): + """Implement SSD Multibox Loss. + + Basically, Multibox loss combines classification loss + and Smooth L1 regression loss. + """ + super(MultiboxLoss, self).__init__() + self.iou_threshold = iou_threshold + self.neg_pos_ratio = neg_pos_ratio + self.center_variance = center_variance + self.size_variance = size_variance + self.priors = priors + self.priors.to(device) + + def forward(self, confidence, predicted_locations, labels, gt_locations): + """Compute classification loss and smooth l1 loss. + + Args: + confidence (batch_size, num_priors, num_classes): class predictions. + locations (batch_size, num_priors, 4): predicted locations. + labels (batch_size, num_priors): real labels of all the priors. + boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. + """ + num_classes = confidence.size(2) + with torch.no_grad(): + # derived from cross_entropy=sum(log(p)) + loss = -F.log_softmax(confidence, dim=2)[:, :, 0] + mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) + + confidence = confidence[mask, :] + classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], size_average=False) + pos_mask = labels > 0 + predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4) + gt_locations = gt_locations[pos_mask, :].reshape(-1, 4) + smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, size_average=False) + num_pos = gt_locations.size(0) + return smooth_l1_loss/num_pos, classification_loss/num_pos diff --git a/Samples/DetectionRetrainingAndInfer/vision/nn/scaled_l2_norm.py b/Samples/DetectionRetrainingAndInfer/vision/nn/scaled_l2_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..c1fd642e886cacf8bd409940e6bb18f4a9e0f501 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/nn/scaled_l2_norm.py @@ -0,0 +1,19 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F + + +class ScaledL2Norm(nn.Module): + def __init__(self, in_channels, initial_scale): + super(ScaledL2Norm, self).__init__() + self.in_channels = in_channels + self.scale = nn.Parameter(torch.Tensor(in_channels)) + self.initial_scale = initial_scale + self.reset_parameters() + + def forward(self, x): + return (F.normalize(x, p=2, dim=1) + * self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3)) + + def reset_parameters(self): + self.scale.data.fill_(self.initial_scale) \ No newline at end of file diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/__init__.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/config/__init__.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/config/mobilenetv1_ssd_config.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/config/mobilenetv1_ssd_config.py new file mode 100644 index 0000000000000000000000000000000000000000..226767192218033a6b97dd6f0252f10783e53fae --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/ssd/config/mobilenetv1_ssd_config.py @@ -0,0 +1,76 @@ +import numpy as np + +from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors + +image_size = 300 +image_mean = np.array([127, 127, 127]) # RGB layout +image_std = 128.0 +iou_threshold = 0.45 +center_variance = 0.1 +size_variance = 0.2 + +specs = [ + SSDSpec(19, 16, SSDBoxSizes(60, 105), [2, 3]), + SSDSpec(10, 32, SSDBoxSizes(105, 150), [2, 3]), + SSDSpec(5, 64, SSDBoxSizes(150, 195), [2, 3]), + SSDSpec(3, 100, SSDBoxSizes(195, 240), [2, 3]), + SSDSpec(2, 150, SSDBoxSizes(240, 285), [2, 3]), + SSDSpec(1, 300, SSDBoxSizes(285, 330), [2, 3]) +] + +priors = generate_ssd_priors(specs, image_size) + + +def set_image_size(size=300, min_ratio=20, max_ratio=90): + global image_size + global specs + global priors + + from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd + + import torch + import math + import logging + + image_size = size + ssd = create_mobilenetv1_ssd(num_classes=3) # TODO does num_classes matter here? + x = torch.randn(1, 3, image_size, image_size) + + feature_maps = ssd(x, get_feature_map_size=True) + + steps = [ + math.ceil(image_size * 1.0 / feature_map) for feature_map in feature_maps + ] + step = int(math.floor((max_ratio - min_ratio) / (len(feature_maps) - 2))) + min_sizes = [] + max_sizes = [] + for ratio in range(min_ratio, max_ratio + 1, step): + min_sizes.append(image_size * ratio / 100.0) + max_sizes.append(image_size * (ratio + step) / 100.0) + min_sizes = [image_size * (min_ratio / 2) / 100.0] + min_sizes + max_sizes = [image_size * min_ratio / 100.0] + max_sizes + + # this update logic makes different boxes than the original for 300x300 (but better for power-of-two) + # for backwards-compatibility, keep the default 300x300 config if that's what's being called for + if image_size != 300: + specs = [] + + for i in range(len(feature_maps)): + specs.append( SSDSpec(feature_maps[i], steps[i], SSDBoxSizes(min_sizes[i], max_sizes[i]), [2, 3]) ) # ssd-mobilenet-* aspect ratio is [2,3] + + logging.info(f'model resolution {image_size}x{image_size}') + for spec in specs: + logging.info(str(spec)) + + priors = generate_ssd_priors(specs, image_size) + +#print(' ') +#print('SSD-Mobilenet-v1 priors:') +#print(priors.shape) +#print(priors) +#print(' ') + +#import torch +#torch.save(priors, 'mb1-ssd-priors.pt') + +#np.savetxt('mb1-ssd-priors.txt', priors.numpy()) diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/data_preprocessing.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/data_preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..ca79fed8739042ed1dc09417512bb84d8686a4e9 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/ssd/data_preprocessing.py @@ -0,0 +1,62 @@ +from ..transforms.transforms import * + + +class TrainAugmentation: + def __init__(self, size, mean=0, std=1.0): + """ + Args: + size: the size the of final image. + mean: mean pixel value per channel. + """ + self.mean = mean + self.size = size + self.augment = Compose([ + ConvertFromInts(), + PhotometricDistort(), + Expand(self.mean), + RandomSampleCrop(), + RandomMirror(), + ToPercentCoords(), + Resize(self.size), + SubtractMeans(self.mean), + lambda img, boxes=None, labels=None: (img / std, boxes, labels), + ToTensor(), + ]) + + def __call__(self, img, boxes, labels): + """ + + Args: + img: the output of cv.imread in RGB layout. + boxes: boundding boxes in the form of (x1, y1, x2, y2). + labels: labels of boxes. + """ + return self.augment(img, boxes, labels) + + +class TestTransform: + def __init__(self, size, mean=0.0, std=1.0): + self.transform = Compose([ + ToPercentCoords(), + Resize(size), + SubtractMeans(mean), + lambda img, boxes=None, labels=None: (img / std, boxes, labels), + ToTensor(), + ]) + + def __call__(self, image, boxes, labels): + return self.transform(image, boxes, labels) + + +class PredictionTransform: + def __init__(self, size, mean=0.0, std=1.0): + self.transform = Compose([ + Resize(size), + SubtractMeans(mean), + lambda img, boxes=None, labels=None: (img / std, boxes, labels), + ToTensor() + ]) + + def __call__(self, image): + image, _, _ = self.transform(image) + return image \ No newline at end of file diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/mobilenetv1_ssd.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/mobilenetv1_ssd.py new file mode 100644 index 0000000000000000000000000000000000000000..22e8a86dc4d0aa299b89e3b40cf5c715037e1d47 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/ssd/mobilenetv1_ssd.py @@ -0,0 +1,74 @@ +import torch +from torch.nn import Conv2d, Sequential, ModuleList, ReLU +from ..nn.mobilenet import MobileNetV1 + +from .ssd import SSD +from .predictor import Predictor +from .config import mobilenetv1_ssd_config as config + + +def create_mobilenetv1_ssd(num_classes, is_test=False): + base_net = MobileNetV1(1001).model # disable dropout layer + + source_layer_indexes = [ + 12, + 14, + ] + extras = ModuleList([ + Sequential( + Conv2d(in_channels=1024, out_channels=256, kernel_size=1), + ReLU(), + Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), + ReLU() + ), + Sequential( + Conv2d(in_channels=512, out_channels=128, kernel_size=1), + ReLU(), + Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), + ReLU() + ), + Sequential( + Conv2d(in_channels=256, out_channels=128, kernel_size=1), + ReLU(), + Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), + ReLU() + ), + Sequential( + Conv2d(in_channels=256, out_channels=128, kernel_size=1), + ReLU(), + Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), + ReLU() + ) + ]) + + regression_headers = ModuleList([ + Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), + Conv2d(in_channels=1024, out_channels=6 * 4, kernel_size=3, padding=1), + Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), + Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), + Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), + Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? + ]) + + classification_headers = ModuleList([ + Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), + Conv2d(in_channels=1024, out_channels=6 * num_classes, kernel_size=3, padding=1), + Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), + Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), + Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), + Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? + ]) + + return SSD(num_classes, base_net, source_layer_indexes, + extras, classification_headers, regression_headers, is_test=is_test, config=config) + + +def create_mobilenetv1_ssd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None): + predictor = Predictor(net, config.image_size, config.image_mean, + config.image_std, + nms_method=nms_method, + iou_threshold=config.iou_threshold, + candidate_size=candidate_size, + sigma=sigma, + device=device) + return predictor diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/predictor.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..1afb5cb2d3cc3733d32bdb52ec6e61dad43499b6 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/ssd/predictor.py @@ -0,0 +1,82 @@ +import torch + +from ..utils import box_utils +from .data_preprocessing import PredictionTransform +from ..utils.misc import Timer + + +class Predictor: + def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None, + iou_threshold=0.45, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None): + self.net = net + self.transform = PredictionTransform(size, mean, std) + self.iou_threshold = iou_threshold + self.filter_threshold = filter_threshold + self.candidate_size = candidate_size + self.nms_method = nms_method + self.sigma = sigma + + if device: + self.device = device + else: + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + self.net.to(self.device) + self.timer = Timer() + + def predict(self, image, top_k=-1, prob_threshold=None): + cpu_device = torch.device("cpu") + height, width, _ = image.shape + + image = self.transform(image) + images = image.unsqueeze(0) + images = images.to(self.device) + + self.net.eval() + + with torch.no_grad(): + self.timer.start() + scores, boxes = self.net.forward(images) + #print("Inference time: ", self.timer.end()) + + boxes = boxes[0] + scores = scores[0] + + if not prob_threshold: + prob_threshold = self.filter_threshold + + # this version of nms is slower on GPU, so we move data to CPU. + boxes = boxes.to(cpu_device) + scores = scores.to(cpu_device) + picked_box_probs = [] + picked_labels = [] + + for class_index in range(1, scores.size(1)): + probs = scores[:, class_index] + mask = probs > prob_threshold + probs = probs[mask] + + if probs.size(0) == 0: + continue + + subset_boxes = boxes[mask, :] + box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) + box_probs = box_utils.nms(box_probs, self.nms_method, + score_threshold=prob_threshold, + iou_threshold=self.iou_threshold, + sigma=self.sigma, + top_k=top_k, + candidate_size=self.candidate_size) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.size(0)) + + if not picked_box_probs: + return torch.tensor([]), torch.tensor([]), torch.tensor([]) + + picked_box_probs = torch.cat(picked_box_probs) + picked_box_probs[:, 0] *= width + picked_box_probs[:, 1] *= height + picked_box_probs[:, 2] *= width + picked_box_probs[:, 3] *= height + + return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4] diff --git a/Samples/DetectionRetrainingAndInfer/vision/ssd/ssd.py b/Samples/DetectionRetrainingAndInfer/vision/ssd/ssd.py new file mode 100644 index 0000000000000000000000000000000000000000..88bec1f9ca8754ffa92d6f478de0fb8f69b5fd8e --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/ssd/ssd.py @@ -0,0 +1,177 @@ + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import numpy as np + +from typing import List, Tuple +from collections import namedtuple + +from ..utils import box_utils + +GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1']) + + +class SSD(nn.Module): + def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int], + extras: nn.ModuleList, classification_headers: nn.ModuleList, + regression_headers: nn.ModuleList, is_test=False, config=None, device=None): + """ + Compose a SSD model using the given components. + """ + super(SSD, self).__init__() + + self.num_classes = num_classes + self.base_net = base_net + self.source_layer_indexes = source_layer_indexes + self.extras = extras + self.classification_headers = classification_headers + self.regression_headers = regression_headers + self.is_test = is_test + self.config = config + + # register layers in source_layer_indexes by adding them to a module list + self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes + if isinstance(t, tuple) and not isinstance(t, GraphPath)]) + if device: + self.device = device + else: + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + self.config = config + self.priors = config.priors.to(self.device) + + def forward(self, x: torch.Tensor, get_feature_map_size: bool=False) -> Tuple[torch.Tensor, torch.Tensor]: + confidences = [] + locations = [] + start_layer_index = 0 + header_index = 0 + if get_feature_map_size: + feature_maps = [] + for end_layer_index in self.source_layer_indexes: + if isinstance(end_layer_index, GraphPath): + path = end_layer_index + end_layer_index = end_layer_index.s0 + added_layer = None + elif isinstance(end_layer_index, tuple): + added_layer = end_layer_index[1] + end_layer_index = end_layer_index[0] + path = None + else: + added_layer = None + path = None + for layer in self.base_net[start_layer_index: end_layer_index]: + x = layer(x) + if added_layer: + y = added_layer(x) + else: + y = x + if path: + sub = getattr(self.base_net[end_layer_index], path.name) + for layer in sub[:path.s1]: + x = layer(x) + y = x + for layer in sub[path.s1:]: + x = layer(x) + end_layer_index += 1 + start_layer_index = end_layer_index + confidence, location = self.compute_header(header_index, y) + if get_feature_map_size: + feature_maps.append(y.shape[-1]) + header_index += 1 + confidences.append(confidence) + locations.append(location) + + for layer in self.base_net[end_layer_index:]: + x = layer(x) + + for layer in self.extras: + x = layer(x) + confidence, location = self.compute_header(header_index, x) + if get_feature_map_size: + feature_maps.append(x.shape[-1]) + header_index += 1 + confidences.append(confidence) + locations.append(location) + + if get_feature_map_size: + return feature_maps + + confidences = torch.cat(confidences, 1) + locations = torch.cat(locations, 1) + + if self.is_test: + confidences = F.softmax(confidences, dim=2) + boxes = box_utils.convert_locations_to_boxes( + locations, self.priors, self.config.center_variance, self.config.size_variance + ) + boxes = box_utils.center_form_to_corner_form(boxes) + return confidences, boxes + else: + return confidences, locations + + def compute_header(self, i, x): + confidence = self.classification_headers[i](x) + confidence = confidence.permute(0, 2, 3, 1).contiguous() + confidence = confidence.view(confidence.size(0), -1, self.num_classes) + + location = self.regression_headers[i](x) + location = location.permute(0, 2, 3, 1).contiguous() + location = location.view(location.size(0), -1, 4) + + return confidence, location + + def init_from_base_net(self, model): + self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True) + self.source_layer_add_ons.apply(_xavier_init_) + self.extras.apply(_xavier_init_) + self.classification_headers.apply(_xavier_init_) + self.regression_headers.apply(_xavier_init_) + + def init_from_pretrained_ssd(self, model): + state_dict = torch.load(model, map_location=lambda storage, loc: storage) + state_dict = {k: v for k, v in state_dict.items() if not (k.startswith("classification_headers") or k.startswith("regression_headers"))} + model_dict = self.state_dict() + model_dict.update(state_dict) + self.load_state_dict(model_dict) + self.classification_headers.apply(_xavier_init_) + self.regression_headers.apply(_xavier_init_) + + def init(self): + self.base_net.apply(_xavier_init_) + self.source_layer_add_ons.apply(_xavier_init_) + self.extras.apply(_xavier_init_) + self.classification_headers.apply(_xavier_init_) + self.regression_headers.apply(_xavier_init_) + + def load(self, model): + self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) + + def save(self, model_path): + torch.save(self.state_dict(), model_path) + + +class MatchPrior(object): + def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold): + self.center_form_priors = center_form_priors + self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors) + self.center_variance = center_variance + self.size_variance = size_variance + self.iou_threshold = iou_threshold + + def __call__(self, gt_boxes, gt_labels): + if type(gt_boxes) is np.ndarray: + gt_boxes = torch.from_numpy(gt_boxes) + if type(gt_labels) is np.ndarray: + gt_labels = torch.from_numpy(gt_labels) + boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels, + self.corner_form_priors, self.iou_threshold) + boxes = box_utils.corner_form_to_center_form(boxes) + locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance) + return locations, labels + + +def _xavier_init_(m: nn.Module): + if isinstance(m, nn.Conv2d): + nn.init.xavier_uniform_(m.weight) diff --git a/Samples/DetectionRetrainingAndInfer/vision/transforms/__init__.py b/Samples/DetectionRetrainingAndInfer/vision/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Samples/DetectionRetrainingAndInfer/vision/transforms/transforms.py b/Samples/DetectionRetrainingAndInfer/vision/transforms/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..753a6287d0ec45013bf5c5de353690efc4d6be23 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/transforms/transforms.py @@ -0,0 +1,409 @@ +# from https://github.com/amdegroot/ssd.pytorch + + +import torch +from torchvision import transforms +import cv2 +import numpy as np +import types +from numpy import random + + +def intersect(box_a, box_b): + max_xy = np.minimum(box_a[:, 2:], box_b[2:]) + min_xy = np.maximum(box_a[:, :2], box_b[:2]) + inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf) + return inter[:, 0] * inter[:, 1] + + +def jaccard_numpy(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: Multiple bounding boxes, Shape: [num_boxes,4] + box_b: Single bounding box, Shape: [4] + Return: + jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]] + """ + inter = intersect(box_a, box_b) + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])) # [A,B] + area_b = ((box_b[2]-box_b[0]) * + (box_b[3]-box_b[1])) # [A,B] + union = area_a + area_b - inter + return inter / union # [A,B] + + +class Compose(object): + """Composes several augmentations together. + Args: + transforms (List[Transform]): list of transforms to compose. + Example: + >>> augmentations.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.ToTensor(), + >>> ]) + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img, boxes=None, labels=None): + for t in self.transforms: + img, boxes, labels = t(img, boxes, labels) + return img, boxes, labels + + +class Lambda(object): + """Applies a lambda as a transform.""" + + def __init__(self, lambd): + assert isinstance(lambd, types.LambdaType) + self.lambd = lambd + + def __call__(self, img, boxes=None, labels=None): + return self.lambd(img, boxes, labels) + + +class ConvertFromInts(object): + def __call__(self, image, boxes=None, labels=None): + return image.astype(np.float32), boxes, labels + + +class SubtractMeans(object): + def __init__(self, mean): + self.mean = np.array(mean, dtype=np.float32) + + def __call__(self, image, boxes=None, labels=None): + image = image.astype(np.float32) + image -= self.mean + return image.astype(np.float32), boxes, labels + + +class ToAbsoluteCoords(object): + def __call__(self, image, boxes=None, labels=None): + height, width, channels = image.shape + boxes[:, 0] *= width + boxes[:, 2] *= width + boxes[:, 1] *= height + boxes[:, 3] *= height + + return image, boxes, labels + + +class ToPercentCoords(object): + def __call__(self, image, boxes=None, labels=None): + height, width, channels = image.shape + boxes[:, 0] /= width + boxes[:, 2] /= width + boxes[:, 1] /= height + boxes[:, 3] /= height + + return image, boxes, labels + + +class Resize(object): + def __init__(self, size=300): + self.size = size + + def __call__(self, image, boxes=None, labels=None): + image = cv2.resize(image, (self.size, + self.size)) + return image, boxes, labels + + +class RandomSaturation(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + image[:, :, 1] *= random.uniform(self.lower, self.upper) + + return image, boxes, labels + + +class RandomHue(object): + def __init__(self, delta=18.0): + assert delta >= 0.0 and delta <= 360.0 + self.delta = delta + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + image[:, :, 0] += random.uniform(-self.delta, self.delta) + image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0 + image[:, :, 0][image[:, :, 0] < 0.0] += 360.0 + return image, boxes, labels + + +class RandomLightingNoise(object): + def __init__(self): + self.perms = ((0, 1, 2), (0, 2, 1), + (1, 0, 2), (1, 2, 0), + (2, 0, 1), (2, 1, 0)) + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + swap = self.perms[random.randint(len(self.perms))] + shuffle = SwapChannels(swap) # shuffle channels + image = shuffle(image) + return image, boxes, labels + + +class ConvertColor(object): + def __init__(self, current, transform): + self.transform = transform + self.current = current + + def __call__(self, image, boxes=None, labels=None): + if self.current == 'BGR' and self.transform == 'HSV': + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + elif self.current == 'RGB' and self.transform == 'HSV': + image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) + elif self.current == 'BGR' and self.transform == 'RGB': + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + elif self.current == 'HSV' and self.transform == 'BGR': + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + elif self.current == 'HSV' and self.transform == "RGB": + image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB) + else: + raise NotImplementedError + return image, boxes, labels + + +class RandomContrast(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + # expects float image + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + image *= alpha + return image, boxes, labels + + +class RandomBrightness(object): + def __init__(self, delta=32): + assert delta >= 0.0 + assert delta <= 255.0 + self.delta = delta + + def __call__(self, image, boxes=None, labels=None): + if random.randint(2): + delta = random.uniform(-self.delta, self.delta) + image += delta + return image, boxes, labels + + +class ToCV2Image(object): + def __call__(self, tensor, boxes=None, labels=None): + return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels + + +class ToTensor(object): + def __call__(self, cvimage, boxes=None, labels=None): + return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels + + +class RandomSampleCrop(object): + """Crop + Arguments: + img (Image): the image being input during training + boxes (Tensor): the original bounding boxes in pt form + labels (Tensor): the class labels for each bbox + mode (float tuple): the min and max jaccard overlaps + Return: + (img, boxes, classes) + img (Image): the cropped image + boxes (Tensor): the adjusted bounding boxes in pt form + labels (Tensor): the class labels for each bbox + """ + def __init__(self): + self.sample_options = ( + # using entire original input image + None, + # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9 + (0.1, None), + (0.3, None), + (0.7, None), + (0.9, None), + # randomly sample a patch + (None, None), + ) + + def __call__(self, image, boxes=None, labels=None): + height, width, _ = image.shape + while True: + # randomly choose a mode + #mode = random.choice(self.sample_options) # throws numpy deprecation warning + mode = self.sample_options[random.randint(len(self.sample_options))] + + if mode is None: + return image, boxes, labels + + min_iou, max_iou = mode + if min_iou is None: + min_iou = float('-inf') + if max_iou is None: + max_iou = float('inf') + + # max trails (50) + for _ in range(50): + current_image = image + + w = random.uniform(0.3 * width, width) + h = random.uniform(0.3 * height, height) + + # aspect ratio constraint b/t .5 & 2 + if h / w < 0.5 or h / w > 2: + continue + + left = random.uniform(width - w) + top = random.uniform(height - h) + + # convert to integer rect x1,y1,x2,y2 + rect = np.array([int(left), int(top), int(left+w), int(top+h)]) + + # calculate IoU (jaccard overlap) b/t the cropped and gt boxes + overlap = jaccard_numpy(boxes, rect) + + # is min and max overlap constraint satisfied? if not try again + if overlap.min() < min_iou and max_iou < overlap.max(): + continue + + # cut the crop from the image + current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], + :] + + # keep overlap with gt box IF center in sampled patch + centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 + + # mask in all gt boxes that above and to the left of centers + m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1]) + + # mask in all gt boxes that under and to the right of centers + m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1]) + + # mask in that both m1 and m2 are true + mask = m1 * m2 + + # have any valid boxes? try again if not + if not mask.any(): + continue + + # take only matching gt boxes + current_boxes = boxes[mask, :].copy() + + # take only matching gt labels + current_labels = labels[mask] + + # should we use the box left and top corner or the crop's + current_boxes[:, :2] = np.maximum(current_boxes[:, :2], + rect[:2]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, :2] -= rect[:2] + + current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], + rect[2:]) + # adjust to crop (by substracting crop's left,top) + current_boxes[:, 2:] -= rect[:2] + + return current_image, current_boxes, current_labels + + +class Expand(object): + def __init__(self, mean): + self.mean = mean + + def __call__(self, image, boxes, labels): + if random.randint(2): + return image, boxes, labels + + height, width, depth = image.shape + ratio = random.uniform(1, 4) + left = random.uniform(0, width*ratio - width) + top = random.uniform(0, height*ratio - height) + + expand_image = np.zeros( + (int(height*ratio), int(width*ratio), depth), + dtype=image.dtype) + expand_image[:, :, :] = self.mean + expand_image[int(top):int(top + height), + int(left):int(left + width)] = image + image = expand_image + + boxes = boxes.copy() + boxes[:, :2] += (int(left), int(top)) + boxes[:, 2:] += (int(left), int(top)) + + return image, boxes, labels + + +class RandomMirror(object): + def __call__(self, image, boxes, classes): + _, width, _ = image.shape + if random.randint(2): + image = image[:, ::-1] + boxes = boxes.copy() + boxes[:, 0::2] = width - boxes[:, 2::-2] + return image, boxes, classes + + +class SwapChannels(object): + """Transforms a tensorized image by swapping the channels in the order + specified in the swap tuple. + Args: + swaps (int triple): final order of channels + eg: (2, 1, 0) + """ + + def __init__(self, swaps): + self.swaps = swaps + + def __call__(self, image): + """ + Args: + image (Tensor): image tensor to be transformed + Return: + a tensor with channels swapped according to swap + """ + # if torch.is_tensor(image): + # image = image.data.cpu().numpy() + # else: + # image = np.array(image) + image = image[:, :, self.swaps] + return image + + +class PhotometricDistort(object): + def __init__(self): + self.pd = [ + RandomContrast(), # RGB + ConvertColor(current="RGB", transform='HSV'), # HSV + RandomSaturation(), # HSV + RandomHue(), # HSV + ConvertColor(current='HSV', transform='RGB'), # RGB + RandomContrast() # RGB + ] + self.rand_brightness = RandomBrightness() + self.rand_light_noise = RandomLightingNoise() + + def __call__(self, image, boxes, labels): + im = image.copy() + im, boxes, labels = self.rand_brightness(im, boxes, labels) + if random.randint(2): + distort = Compose(self.pd[:-1]) + else: + distort = Compose(self.pd[1:]) + im, boxes, labels = distort(im, boxes, labels) + return self.rand_light_noise(im, boxes, labels) + diff --git a/Samples/DetectionRetrainingAndInfer/vision/utils/__init__.py b/Samples/DetectionRetrainingAndInfer/vision/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0789bdb3927dcd3ae7392783184d13652b1cc403 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/utils/__init__.py @@ -0,0 +1 @@ +from .misc import * diff --git a/Samples/DetectionRetrainingAndInfer/vision/utils/box_utils.py b/Samples/DetectionRetrainingAndInfer/vision/utils/box_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..42f2469554099ea5cd6c02ed7a5ec7c723af33b0 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/utils/box_utils.py @@ -0,0 +1,295 @@ +import collections +import torch +import itertools +from typing import List +import math + +SSDBoxSizes = collections.namedtuple('SSDBoxSizes', ['min', 'max']) + +SSDSpec = collections.namedtuple('SSDSpec', ['feature_map_size', 'shrinkage', 'box_sizes', 'aspect_ratios']) + + +def generate_ssd_priors(specs: List[SSDSpec], image_size, clamp=True) -> torch.Tensor: + """Generate SSD Prior Boxes. + + It returns the center, height and width of the priors. The values are relative to the image size + Args: + specs: SSDSpecs about the shapes of sizes of prior boxes. i.e. + specs = [ + SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]), + SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]), + SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]), + SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]), + SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]), + SSDSpec(1, 300, SSDBoxSizes(264, 315), [2]) + ] + image_size: image size. + clamp: if true, clamp the values to make fall between [0.0, 1.0] + Returns: + priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values + are relative to the image size. + """ + priors = [] + for spec in specs: + scale = image_size / spec.shrinkage + for j, i in itertools.product(range(spec.feature_map_size), repeat=2): + x_center = (i + 0.5) / scale + y_center = (j + 0.5) / scale + + # small sized square box + size = spec.box_sizes.min + h = w = size / image_size + priors.append([ + x_center, + y_center, + w, + h + ]) + + # big sized square box + size = math.sqrt(spec.box_sizes.max * spec.box_sizes.min) + h = w = size / image_size + priors.append([ + x_center, + y_center, + w, + h + ]) + + # change h/w ratio of the small sized box + size = spec.box_sizes.min + h = w = size / image_size + for ratio in spec.aspect_ratios: + ratio = math.sqrt(ratio) + priors.append([ + x_center, + y_center, + w * ratio, + h / ratio + ]) + priors.append([ + x_center, + y_center, + w / ratio, + h * ratio + ]) + + priors = torch.tensor(priors) + if clamp: + torch.clamp(priors, 0.0, 1.0, out=priors) + return priors + + +def convert_locations_to_boxes(locations, priors, center_variance, + size_variance): + """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w). + + The conversion: + $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$ + $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$ + We do it in the inverse direction here. + Args: + locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well. + priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes. + center_variance: a float used to change the scale of center. + size_variance: a float used to change of scale of size. + Returns: + boxes: priors: [[center_x, center_y, h, w]]. All the values + are relative to the image size. + """ + # priors can have one dimension less. + if priors.dim() + 1 == locations.dim(): + priors = priors.unsqueeze(0) + return torch.cat([ + locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], + torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:] + ], dim=locations.dim() - 1) + + +def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): + # priors can have one dimension less + if center_form_priors.dim() + 1 == center_form_boxes.dim(): + center_form_priors = center_form_priors.unsqueeze(0) + return torch.cat([ + (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance, + torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance + ], dim=center_form_boxes.dim() - 1) + + +def area_of(left_top, right_bottom) -> torch.Tensor: + """Compute the areas of rectangles given two corners. + + Args: + left_top (N, 2): left top corner. + right_bottom (N, 2): right bottom corner. + + Returns: + area (N): return the area. + """ + hw = torch.clamp(right_bottom - left_top, min=0.0) + return hw[..., 0] * hw[..., 1] + + +def iou_of(boxes0, boxes1, eps=1e-5): + """Return intersection-over-union (Jaccard index) of boxes. + + Args: + boxes0 (N, 4): ground truth boxes. + boxes1 (N or 1, 4): predicted boxes. + eps: a small number to avoid 0 as denominator. + Returns: + iou (N): IoU values. + """ + overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) + + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def assign_priors(gt_boxes, gt_labels, corner_form_priors, + iou_threshold): + """Assign ground truth boxes and targets to priors. + + Args: + gt_boxes (num_targets, 4): ground truth boxes. + gt_labels (num_targets): labels of targets. + priors (num_priors, 4): corner form priors + Returns: + boxes (num_priors, 4): real values for priors. + labels (num_priros): labels for priors. + """ + # size: num_priors x num_targets + ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1)) + # size: num_priors + best_target_per_prior, best_target_per_prior_index = ious.max(1) + # size: num_targets + best_prior_per_target, best_prior_per_target_index = ious.max(0) + + for target_index, prior_index in enumerate(best_prior_per_target_index): + best_target_per_prior_index[prior_index] = target_index + # 2.0 is used to make sure every target has a prior assigned + best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2) + # size: num_priors + labels = gt_labels[best_target_per_prior_index] + labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id + boxes = gt_boxes[best_target_per_prior_index] + return boxes, labels + + +def hard_negative_mining(loss, labels, neg_pos_ratio): + """ + It used to suppress the presence of a large number of negative prediction. + It works on image level not batch level. + For any example/image, it keeps all the positive predictions and + cut the number of negative predictions to make sure the ratio + between the negative examples and positive examples is no more + the given ratio for an image. + + Args: + loss (N, num_priors): the loss for each example. + labels (N, num_priors): the labels. + neg_pos_ratio: the ratio between the negative examples and positive examples. + """ + pos_mask = labels > 0 + num_pos = pos_mask.long().sum(dim=1, keepdim=True) + num_neg = num_pos * neg_pos_ratio + + loss[pos_mask] = -math.inf + _, indexes = loss.sort(dim=1, descending=True) + _, orders = indexes.sort(dim=1) + neg_mask = orders < num_neg + return pos_mask | neg_mask + + +def center_form_to_corner_form(locations): + return torch.cat([locations[..., :2] - locations[..., 2:]/2, + locations[..., :2] + locations[..., 2:]/2], locations.dim() - 1) + + +def corner_form_to_center_form(boxes): + return torch.cat([ + (boxes[..., :2] + boxes[..., 2:]) / 2, + boxes[..., 2:] - boxes[..., :2] + ], boxes.dim() - 1) + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + """ + + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + iou_threshold: intersection over union threshold. + top_k: keep top_k results. If k <= 0, keep all the results. + candidate_size: only consider the candidates with the highest scores. + Returns: + picked: a list of indexes of the kept boxes + """ + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + _, indexes = scores.sort(descending=True) + indexes = indexes[:candidate_size] + while len(indexes) > 0: + current = indexes[0] + picked.append(current.item()) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + indexes = indexes[1:] + rest_boxes = boxes[indexes, :] + iou = iou_of( + rest_boxes, + current_box.unsqueeze(0), + ) + indexes = indexes[iou <= iou_threshold] + + return box_scores[picked, :] + + +def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None, + sigma=0.5, top_k=-1, candidate_size=200): + if nms_method == "soft": + return soft_nms(box_scores, score_threshold, sigma, top_k) + else: + return hard_nms(box_scores, iou_threshold, top_k, candidate_size=candidate_size) + + +def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1): + """Soft NMS implementation. + + References: + https://arxiv.org/abs/1704.04503 + https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/cython_nms.pyx + + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + score_threshold: boxes with scores less than value are not considered. + sigma: the parameter in score re-computation. + scores[i] = scores[i] * exp(-(iou_i)^2 / simga) + top_k: keep top_k results. If k <= 0, keep all the results. + Returns: + picked_box_scores (K, 5): results of NMS. + """ + picked_box_scores = [] + while box_scores.size(0) > 0: + max_score_index = torch.argmax(box_scores[:, 4]) + cur_box_prob = torch.tensor(box_scores[max_score_index, :]) + picked_box_scores.append(cur_box_prob) + if len(picked_box_scores) == top_k > 0 or box_scores.size(0) == 1: + break + cur_box = cur_box_prob[:-1] + box_scores[max_score_index, :] = box_scores[-1, :] + box_scores = box_scores[:-1, :] + ious = iou_of(cur_box.unsqueeze(0), box_scores[:, :-1]) + box_scores[:, -1] = box_scores[:, -1] * torch.exp(-(ious * ious) / sigma) + box_scores = box_scores[box_scores[:, -1] > score_threshold, :] + if len(picked_box_scores) > 0: + return torch.stack(picked_box_scores) + else: + return torch.tensor([]) + + + diff --git a/Samples/DetectionRetrainingAndInfer/vision/utils/box_utils_numpy.py b/Samples/DetectionRetrainingAndInfer/vision/utils/box_utils_numpy.py new file mode 100644 index 0000000000000000000000000000000000000000..177456f2fb865516cfe5b22ff320c620a29109ca --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/utils/box_utils_numpy.py @@ -0,0 +1,238 @@ +from .box_utils import SSDSpec + +from typing import List +import itertools +import math +import numpy as np + + +def generate_ssd_priors(specs: List[SSDSpec], image_size, clamp=True): + """Generate SSD Prior Boxes. + + It returns the center, height and width of the priors. The values are relative to the image size + Args: + specs: SSDSpecs about the shapes of sizes of prior boxes. i.e. + specs = [ + SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]), + SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]), + SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]), + SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]), + SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]), + SSDSpec(1, 300, SSDBoxSizes(264, 315), [2]) + ] + image_size: image size. + clamp: if true, clamp the values to make fall between [0.0, 1.0] + Returns: + priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values + are relative to the image size. + """ + priors = [] + for spec in specs: + scale = image_size / spec.shrinkage + for j, i in itertools.product(range(spec.feature_map_size), repeat=2): + x_center = (i + 0.5) / scale + y_center = (j + 0.5) / scale + + # small sized square box + size = spec.box_sizes.min + h = w = size / image_size + priors.append([ + x_center, + y_center, + w, + h + ]) + + # big sized square box + size = math.sqrt(spec.box_sizes.max * spec.box_sizes.min) + h = w = size / image_size + priors.append([ + x_center, + y_center, + w, + h + ]) + + # change h/w ratio of the small sized box + size = spec.box_sizes.min + h = w = size / image_size + for ratio in spec.aspect_ratios: + ratio = math.sqrt(ratio) + priors.append([ + x_center, + y_center, + w * ratio, + h / ratio + ]) + priors.append([ + x_center, + y_center, + w / ratio, + h * ratio + ]) + + priors = np.array(priors, dtype=np.float32) + if clamp: + np.clip(priors, 0.0, 1.0, out=priors) + return priors + + +def convert_locations_to_boxes(locations, priors, center_variance, + size_variance): + """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w). + + The conversion: + $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$ + $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$ + We do it in the inverse direction here. + Args: + locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well. + priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes. + center_variance: a float used to change the scale of center. + size_variance: a float used to change of scale of size. + Returns: + boxes: priors: [[center_x, center_y, h, w]]. All the values + are relative to the image size. + """ + # priors can have one dimension less. + if len(priors.shape) + 1 == len(locations.shape): + priors = np.expand_dims(priors, 0) + return np.concatenate([ + locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2], + np.exp(locations[..., 2:] * size_variance) * priors[..., 2:] + ], axis=len(locations.shape) - 1) + + +def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance): + # priors can have one dimension less + if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape): + center_form_priors = np.expand_dims(center_form_priors, 0) + return np.concatenate([ + (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance, + np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance + ], axis=len(center_form_boxes.shape) - 1) + + +def area_of(left_top, right_bottom): + """Compute the areas of rectangles given two corners. + + Args: + left_top (N, 2): left top corner. + right_bottom (N, 2): right bottom corner. + + Returns: + area (N): return the area. + """ + hw = np.clip(right_bottom - left_top, 0.0, None) + return hw[..., 0] * hw[..., 1] + + +def iou_of(boxes0, boxes1, eps=1e-5): + """Return intersection-over-union (Jaccard index) of boxes. + + Args: + boxes0 (N, 4): ground truth boxes. + boxes1 (N or 1, 4): predicted boxes. + eps: a small number to avoid 0 as denominator. + Returns: + iou (N): IoU values. + """ + overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) + + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def center_form_to_corner_form(locations): + return np.concatenate([locations[..., :2] - locations[..., 2:]/2, + locations[..., :2] + locations[..., 2:]/2], len(locations.shape) - 1) + + +def corner_form_to_center_form(boxes): + return np.concatenate([ + (boxes[..., :2] + boxes[..., 2:]) / 2, + boxes[..., 2:] - boxes[..., :2] + ], len(boxes.shape) - 1) + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + """ + + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + iou_threshold: intersection over union threshold. + top_k: keep top_k results. If k <= 0, keep all the results. + candidate_size: only consider the candidates with the highest scores. + Returns: + picked: a list of indexes of the kept boxes + """ + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + #_, indexes = scores.sort(descending=True) + indexes = np.argsort(scores) + #indexes = indexes[:candidate_size] + indexes = indexes[-candidate_size:] + while len(indexes) > 0: + #current = indexes[0] + current = indexes[-1] + picked.append(current) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + #indexes = indexes[1:] + indexes = indexes[:-1] + rest_boxes = boxes[indexes, :] + iou = iou_of( + rest_boxes, + np.expand_dims(current_box, axis=0), + ) + indexes = indexes[iou <= iou_threshold] + + return box_scores[picked, :] + + +# def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None, +# sigma=0.5, top_k=-1, candidate_size=200): +# if nms_method == "soft": +# return soft_nms(box_scores, score_threshold, sigma, top_k) +# else: +# return hard_nms(box_scores, iou_threshold, top_k, candidate_size=candidate_size) + +# +# def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1): +# """Soft NMS implementation. +# +# References: +# https://arxiv.org/abs/1704.04503 +# https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/cython_nms.pyx +# +# Args: +# box_scores (N, 5): boxes in corner-form and probabilities. +# score_threshold: boxes with scores less than value are not considered. +# sigma: the parameter in score re-computation. +# scores[i] = scores[i] * exp(-(iou_i)^2 / simga) +# top_k: keep top_k results. If k <= 0, keep all the results. +# Returns: +# picked_box_scores (K, 5): results of NMS. +# """ +# picked_box_scores = [] +# while box_scores.size(0) > 0: +# max_score_index = torch.argmax(box_scores[:, 4]) +# cur_box_prob = torch.tensor(box_scores[max_score_index, :]) +# picked_box_scores.append(cur_box_prob) +# if len(picked_box_scores) == top_k > 0 or box_scores.size(0) == 1: +# break +# cur_box = cur_box_prob[:-1] +# box_scores[max_score_index, :] = box_scores[-1, :] +# box_scores = box_scores[:-1, :] +# ious = iou_of(cur_box.unsqueeze(0), box_scores[:, :-1]) +# box_scores[:, -1] = box_scores[:, -1] * torch.exp(-(ious * ious) / sigma) +# box_scores = box_scores[box_scores[:, -1] > score_threshold, :] +# if len(picked_box_scores) > 0: +# return torch.stack(picked_box_scores) +# else: +# return torch.tensor([]) diff --git a/Samples/DetectionRetrainingAndInfer/vision/utils/measurements.py b/Samples/DetectionRetrainingAndInfer/vision/utils/measurements.py new file mode 100644 index 0000000000000000000000000000000000000000..5cc590c1d9a94f64aab6b8e3ef4935fa3f2ba02d --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/utils/measurements.py @@ -0,0 +1,32 @@ +import numpy as np + + +def compute_average_precision(precision, recall): + """ + It computes average precision based on the definition of Pascal Competition. It computes the under curve area + of precision and recall. Recall follows the normal definition. Precision is a variant. + pascal_precision[i] = typical_precision[i:].max() + """ + # identical but faster version of new_precision[i] = old_precision[i:].max() + precision = np.concatenate([[0.0], precision, [0.0]]) + for i in range(len(precision) - 1, 0, -1): + precision[i - 1] = np.maximum(precision[i - 1], precision[i]) + + # find the index where the value changes + recall = np.concatenate([[0.0], recall, [1.0]]) + changing_points = np.where(recall[1:] != recall[:-1])[0] + + # compute under curve area + areas = (recall[changing_points + 1] - recall[changing_points]) * precision[changing_points + 1] + return areas.sum() + + +def compute_voc2007_average_precision(precision, recall): + ap = 0. + for t in np.arange(0., 1.1, 0.1): + if np.sum(recall >= t) == 0: + p = 0 + else: + p = np.max(precision[recall >= t]) + ap = ap + p / 11. + return ap diff --git a/Samples/DetectionRetrainingAndInfer/vision/utils/misc.py b/Samples/DetectionRetrainingAndInfer/vision/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..e79545853dda8c6e583c487eb9a4a3120bfa896a --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/utils/misc.py @@ -0,0 +1,45 @@ +import time +import torch + + +def str2bool(s): + return s.lower() in ('true', '1') + + +class Timer: + def __init__(self): + self.clock = {} + + def start(self, key="default"): + self.clock[key] = time.time() + + def end(self, key="default"): + if key not in self.clock: + raise Exception(f"{key} is not in the clock.") + interval = time.time() - self.clock[key] + del self.clock[key] + return interval + + +def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): + torch.save({ + 'epoch': epoch, + 'model': net_state_dict, + 'optimizer': optimizer_state_dict, + 'best_score': best_score + }, checkpoint_path) + torch.save(net_state_dict, model_path) + + +def load_checkpoint(checkpoint_path): + return torch.load(checkpoint_path) + + +def freeze_net_layers(net): + for param in net.parameters(): + param.requires_grad = False + + +def store_labels(path, labels): + with open(path, "w") as f: + f.write("\n".join(labels)) diff --git a/Samples/DetectionRetrainingAndInfer/vision/utils/model_book.py b/Samples/DetectionRetrainingAndInfer/vision/utils/model_book.py new file mode 100644 index 0000000000000000000000000000000000000000..b1e9d17e9c3467e3261b5f4e962887f714b5bff0 --- /dev/null +++ b/Samples/DetectionRetrainingAndInfer/vision/utils/model_book.py @@ -0,0 +1,81 @@ +from collections import OrderedDict +import torch.nn as nn + + +class ModelBook: + """Maintain the mapping between modules and their paths. + + Example: + book = ModelBook(model_ft) + for p, m in book.conv2d_modules(): + print('path:', p, 'num of filters:', m.out_channels) + assert m is book.get_module(p) + """ + + def __init__(self, model): + self._model = model + self._modules = OrderedDict() + self._paths = OrderedDict() + path = [] + self._construct(self._model, path) + + def _construct(self, module, path): + if not module._modules: + return + for name, m in module._modules.items(): + cur_path = tuple(path + [name]) + self._paths[m] = cur_path + self._modules[cur_path] = m + self._construct(m, path + [name]) + + def conv2d_modules(self): + return self.modules(nn.Conv2d) + + def linear_modules(self): + return self.modules(nn.Linear) + + def modules(self, module_type=None): + for p, m in self._modules.items(): + if not module_type or isinstance(m, module_type): + yield p, m + + def num_of_conv2d_modules(self): + return self.num_of_modules(nn.Conv2d) + + def num_of_conv2d_filters(self): + """Return the sum of out_channels of all conv2d layers. + + Here we treat the sub weight with size of [in_channels, h, w] as a single filter. + """ + num_filters = 0 + for _, m in self.conv2d_modules(): + num_filters += m.out_channels + return num_filters + + def num_of_linear_modules(self): + return self.num_of_modules(nn.Linear) + + def num_of_linear_filters(self): + num_filters = 0 + for _, m in self.linear_modules(): + num_filters += m.out_features + return num_filters + + def num_of_modules(self, module_type=None): + num = 0 + for p, m in self._modules.items(): + if not module_type or isinstance(m, module_type): + num += 1 + return num + + def get_module(self, path): + return self._modules.get(path) + + def get_path(self, module): + return self._paths.get(module) + + def update(self, path, module): + old_module = self._modules[path] + del self._paths[old_module] + self._paths[module] = path + self._modules[path] = module