diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md b/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cf25c1a887fd5ef8b5e788a57e0a214c31effc98 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/README.md @@ -0,0 +1,68 @@ +# Lightweight OpenPose + +## Description + +This work heavily optimizes the OpenPose approach to reach real-time inference on CPU with negliable accuracy drop. It detects a skeleton (which consists of keypoints and connections between them) to identify human poses for every person inside the image. The pose may contain up to 18 keypoints: ears, eyes, nose, neck, shoulders, elbows, wrists, hips, knees, and ankles. On COCO 2017 Keypoint Detection validation set this code achives 40% AP for the single scale inference (no flip or any post-processing done). + +## Setup + +### Install + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + +pip3 install onnx +pip3 install tqdm +pip3 install onnxsim +pip3 install simplejson +pip3 install opencv-python==4.6.0.66 +pip3 install mmcv==1.5.3 +pip3 install pycocotools +``` + +### Download +- dataset: http://cocodataset.org/#download +- checkpoints: https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth + +### Model Conversion + +```bash +# export onnx model +git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git +cd lightweight-human-pose-estimation.pytorch +mv scripts/convert_to_onnx.py . +python3 convert_to_onnx.py --checkpoint-path /Path/to/checkpoint_iter_370000.pth +cd .. +mkdir lightweight_openpose +onnxsim ./lightweight-human-pose-estimation.pytorch/human-pose-estimation.onnx ./lightweight_openpose/lightweight_openpose.onnx +``` + +## Inference + +```bash +export DATASETS_DIR=/Path/to/coco_pose/ +export CHECKPOINTS_DIR=/Path/to/lightweight_openpose/ +``` + +### FP16 + +```bash +# Accuracy +bash scripts/infer_lightweight_openpose_fp16_accuracy.sh +# Performance +bash scripts/infer_lightweight_openpose_fp16_performance.sh +``` + +## Results + +Model |BatchSize |Precision |FPS |IOU@0.5 |IOU@0.5:0.95 | +----------|-----------|----------|----------|----------|---------------| +Lightweight OpenPose | 1 | FP16 | 21030.833 | 0.660 | 0.401 | + +## Reference + +https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch \ No newline at end of file diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..ccc3124b0a250f669192a74aaad300ebdd642367 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/build_engine.py @@ -0,0 +1,115 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import json +import onnx +import logging +import argparse + +import tensorrt +from tensorrt import Dims + + +def parse_config(): + parser = argparse.ArgumentParser(description="Build tensorrt engine of lightweight openpose", usage="") + parser.add_argument( + "--model_name", + type=str, + required=True, + help="model name lightweight openpose", + ) + parser.add_argument( + "--onnx_path", + type=str, + required=True, + help="The onnx path", + ) + parser.add_argument( + "--engine_path", + type=str, + required=True, + help="engine path to save", + ) + parser.add_argument( + "--engine_path_dynamicshape", + type=str, + required=True, + help="engine path to save(dynamic)", + ) + parser.add_argument( + "--device", + type=int, + default=0, + help="cuda device, i.e. 0 or 0,1,2,3,4" + ) + config = parser.parse_args() + return config + + +def build_engine_trtapi(config): + onnx_model = config.onnx_path + assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + + parser.parse_from_file(onnx_model) + build_config.set_flag(tensorrt.BuilderFlag.FP16) + + plan = builder.build_serialized_network(network, build_config) + with open(config.engine_path, "wb") as f: + f.write(plan) + + print("Build fixed shape engine done!") + + +def build_engine_trtapi_dynamicshape(config): + onnx_model = config.onnx_path + assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + + profile = builder.create_optimization_profile() + profile.set_shape( + "data", Dims([1, 3, 340, 340]), Dims([1, 3, 368, 744]), Dims([1, 3, 380, 1488]) + ) + build_config.add_optimization_profile(profile) + + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + + parser.parse_from_file(onnx_model) + build_config.set_flag(tensorrt.BuilderFlag.FP16) + + # set dynamic + input_tensor = network.get_input(0) + input_tensor.shape = Dims([-1, 3, -1, -1]) + + plan = builder.build_serialized_network(network, build_config) + with open(config.engine_path_dynamicshape, "wb") as f: + f.write(plan) + + print("Build dynamic shape engine done!") + + +if __name__ == "__main__": + config = parse_config() + build_engine_trtapi(config) + build_engine_trtapi_dynamicshape(config) diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/common.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/common.py new file mode 100644 index 0000000000000000000000000000000000000000..657e1a87741465dd5a68cea18c3609f94ca61af5 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/common.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import os +import cv2 +import glob +import torch +import numpy as np +from PIL import Image +import torchvision.transforms.functional as F +from modules.keypoints import extract_keypoints, group_keypoints +from torch.utils.data.dataset import Dataset +import json + + +def check_target(inference, target): + satisfied = False + if inference > target: + satisfied = True + return satisfied + +def preprocess_img(img_path, img_sz): + img = cv2.imread(img_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + img = F.resize(img, 256, Image.BILINEAR) + img = F.center_crop(img, img_sz) + img = F.to_tensor(img) + img = F.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=False) + img = img.permute(1, 2, 0) + img = np.asarray(img, dtype='float32') + return img + +def get_dataloader(datasets_dir, bsz, imgsz, label_file_name="val_map.txt"): + label_file = os.path.join(datasets_dir, label_file_name) + with open(label_file, "r") as f: + label_data = f.readlines() + label_dict = dict() + for line in label_data: + line = line.strip().split('\t') + label_dict[line[0]] = int(line[1]) + + files = os.listdir(datasets_dir) + batch_img, batch_label = [], [] + + for file in files: + if file == label_file_name: + continue + file_path = os.path.join(datasets_dir, file) + img = preprocess_img(file_path, imgsz) + batch_img.append(np.expand_dims(img, 0)) + batch_label.append(label_dict[file]) + if len(batch_img) == bsz: + yield np.concatenate(batch_img, 0), np.array(batch_label) + batch_img, batch_label = [], [] + + if len(batch_img) > 0: + yield np.concatenate(batch_img, 0), np.array(batch_label) + +def eval_batch(batch_score, batch_label): + batch_score = torch.from_numpy(batch_score) + values, indices = batch_score.topk(5) + top1, top5 = 0, 0 + for idx, label in enumerate(batch_label): + + if label == indices[idx][0]: + top1 += 1 + if label in indices[idx]: + top5 += 1 + return top1, top5 + + + +def run_coco_eval(gt_file_path, dt_file_path): + annotation_type = 'keypoints' + print('Running test for {} results.'.format(annotation_type)) + + coco_gt = COCO(gt_file_path) + coco_dt = coco_gt.loadRes(dt_file_path) + + result = COCOeval(coco_gt, coco_dt, annotation_type) + result.evaluate() + result.accumulate() + result.summarize() + + +def normalize(img, img_mean, img_scale): + img = np.array(img, dtype=np.float32) + img = (img - img_mean) * img_scale + return img + + +def pad_width(img, stride, pad_value, min_dims): + h, w, _ = img.shape + h = min(min_dims[0], h) + min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride + min_dims[1] = max(min_dims[1], w) + min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride + pad = [] + pad.append(int(math.floor((min_dims[0] - h) / 2.0))) + pad.append(int(math.floor((min_dims[1] - w) / 2.0))) + pad.append(int(min_dims[0] - h - pad[0])) + pad.append(int(min_dims[1] - w - pad[1])) + padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3], + cv2.BORDER_CONSTANT, value=pad_value) + return padded_img, pad + + +def convert_to_coco_format(pose_entries, all_keypoints): + coco_keypoints = [] + scores = [] + for n in range(len(pose_entries)): + if len(pose_entries[n]) == 0: + continue + keypoints = [0] * 17 * 3 + to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] + person_score = pose_entries[n][-2] + position_id = -1 + for keypoint_id in pose_entries[n][:-2]: + position_id += 1 + if position_id == 1: # no 'neck' in COCO + continue + + cx, cy, score, visibility = 0, 0, 0, 0 # keypoint not found + if keypoint_id != -1: + cx, cy, score = all_keypoints[int(keypoint_id), 0:3] + cx = cx + 0.5 + cy = cy + 0.5 + visibility = 1 + keypoints[to_coco_map[position_id] * 3 + 0] = cx + keypoints[to_coco_map[position_id] * 3 + 1] = cy + keypoints[to_coco_map[position_id] * 3 + 2] = visibility + coco_keypoints.append(keypoints) + scores.append(person_score * max(0, (pose_entries[n][-1] - 1))) # -1 for 'neck' + return coco_keypoints, scores + + +class CocoValDataset(Dataset): + def __init__(self, labels, images_folder): + super().__init__() + with open(labels, 'r') as f: + self._labels = json.load(f) + self._images_folder = images_folder + + def __getitem__(self, idx): + file_name = self._labels['images'][idx]['file_name'] + img = cv2.imread(os.path.join(self._images_folder, file_name), cv2.IMREAD_COLOR) + return { + 'img': img, + 'file_name': file_name + } + + def __len__(self): + return len(self._labels['images']) + diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/__init__.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..8e66b22746545675454ab2ec6c57e34c96afecee --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/coco.py @@ -0,0 +1,193 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import copy +import json +import math +import os +import pickle + +import cv2 +import numpy as np +import pycocotools + +from torch.utils.data.dataset import Dataset + +BODY_PARTS_KPT_IDS = [[1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [1, 2], [2, 3], [3, 4], [2, 16], + [1, 5], [5, 6], [6, 7], [5, 17], [1, 0], [0, 14], [0, 15], [14, 16], [15, 17]] + + +def get_mask(segmentations, mask): + for segmentation in segmentations: + rle = pycocotools.mask.frPyObjects(segmentation, mask.shape[0], mask.shape[1]) + mask[pycocotools.mask.decode(rle) > 0.5] = 0 + return mask + + +class CocoTrainDataset(Dataset): + def __init__(self, labels, images_folder, stride, sigma, paf_thickness, transform=None): + super().__init__() + self._images_folder = images_folder + self._stride = stride + self._sigma = sigma + self._paf_thickness = paf_thickness + self._transform = transform + with open(labels, 'rb') as f: + self._labels = pickle.load(f) + + def __getitem__(self, idx): + label = copy.deepcopy(self._labels[idx]) # label modified in transform + image = cv2.imread(os.path.join(self._images_folder, label['img_paths']), cv2.IMREAD_COLOR) + mask = np.ones(shape=(label['img_height'], label['img_width']), dtype=np.float32) + mask = get_mask(label['segmentations'], mask) + sample = { + 'label': label, + 'image': image, + 'mask': mask + } + if self._transform: + sample = self._transform(sample) + + mask = cv2.resize(sample['mask'], dsize=None, fx=1/self._stride, fy=1/self._stride, interpolation=cv2.INTER_AREA) + keypoint_maps = self._generate_keypoint_maps(sample) + sample['keypoint_maps'] = keypoint_maps + keypoint_mask = np.zeros(shape=keypoint_maps.shape, dtype=np.float32) + for idx in range(keypoint_mask.shape[0]): + keypoint_mask[idx] = mask + sample['keypoint_mask'] = keypoint_mask + + paf_maps = self._generate_paf_maps(sample) + sample['paf_maps'] = paf_maps + paf_mask = np.zeros(shape=paf_maps.shape, dtype=np.float32) + for idx in range(paf_mask.shape[0]): + paf_mask[idx] = mask + sample['paf_mask'] = paf_mask + + image = sample['image'].astype(np.float32) + image = (image - 128) / 256 + sample['image'] = image.transpose((2, 0, 1)) + del sample['label'] + return sample + + def __len__(self): + return len(self._labels) + + def _generate_keypoint_maps(self, sample): + n_keypoints = 18 + n_rows, n_cols, _ = sample['image'].shape + keypoint_maps = np.zeros(shape=(n_keypoints + 1, + n_rows // self._stride, n_cols // self._stride), dtype=np.float32) # +1 for bg + + label = sample['label'] + for keypoint_idx in range(n_keypoints): + keypoint = label['keypoints'][keypoint_idx] + if keypoint[2] <= 1: + self._add_gaussian(keypoint_maps[keypoint_idx], keypoint[0], keypoint[1], self._stride, self._sigma) + for another_annotation in label['processed_other_annotations']: + keypoint = another_annotation['keypoints'][keypoint_idx] + if keypoint[2] <= 1: + self._add_gaussian(keypoint_maps[keypoint_idx], keypoint[0], keypoint[1], self._stride, self._sigma) + keypoint_maps[-1] = 1 - keypoint_maps.max(axis=0) + return keypoint_maps + + def _add_gaussian(self, keypoint_map, x, y, stride, sigma): + n_sigma = 4 + tl = [int(x - n_sigma * sigma), int(y - n_sigma * sigma)] + tl[0] = max(tl[0], 0) + tl[1] = max(tl[1], 0) + + br = [int(x + n_sigma * sigma), int(y + n_sigma * sigma)] + map_h, map_w = keypoint_map.shape + br[0] = min(br[0], map_w * stride) + br[1] = min(br[1], map_h * stride) + + shift = stride / 2 - 0.5 + for map_y in range(tl[1] // stride, br[1] // stride): + for map_x in range(tl[0] // stride, br[0] // stride): + d2 = (map_x * stride + shift - x) * (map_x * stride + shift - x) + \ + (map_y * stride + shift - y) * (map_y * stride + shift - y) + exponent = d2 / 2 / sigma / sigma + if exponent > 4.6052: # threshold, ln(100), ~0.01 + continue + keypoint_map[map_y, map_x] += math.exp(-exponent) + if keypoint_map[map_y, map_x] > 1: + keypoint_map[map_y, map_x] = 1 + + def _generate_paf_maps(self, sample): + n_pafs = len(BODY_PARTS_KPT_IDS) + n_rows, n_cols, _ = sample['image'].shape + paf_maps = np.zeros(shape=(n_pafs * 2, n_rows // self._stride, n_cols // self._stride), dtype=np.float32) + + label = sample['label'] + for paf_idx in range(n_pafs): + keypoint_a = label['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][0]] + keypoint_b = label['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][1]] + if keypoint_a[2] <= 1 and keypoint_b[2] <= 1: + self._set_paf(paf_maps[paf_idx * 2:paf_idx * 2 + 2], + keypoint_a[0], keypoint_a[1], keypoint_b[0], keypoint_b[1], + self._stride, self._paf_thickness) + for another_annotation in label['processed_other_annotations']: + keypoint_a = another_annotation['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][0]] + keypoint_b = another_annotation['keypoints'][BODY_PARTS_KPT_IDS[paf_idx][1]] + if keypoint_a[2] <= 1 and keypoint_b[2] <= 1: + self._set_paf(paf_maps[paf_idx * 2:paf_idx * 2 + 2], + keypoint_a[0], keypoint_a[1], keypoint_b[0], keypoint_b[1], + self._stride, self._paf_thickness) + return paf_maps + + def _set_paf(self, paf_map, x_a, y_a, x_b, y_b, stride, thickness): + x_a /= stride + y_a /= stride + x_b /= stride + y_b /= stride + x_ba = x_b - x_a + y_ba = y_b - y_a + _, h_map, w_map = paf_map.shape + x_min = int(max(min(x_a, x_b) - thickness, 0)) + x_max = int(min(max(x_a, x_b) + thickness, w_map)) + y_min = int(max(min(y_a, y_b) - thickness, 0)) + y_max = int(min(max(y_a, y_b) + thickness, h_map)) + norm_ba = (x_ba * x_ba + y_ba * y_ba) ** 0.5 + if norm_ba < 1e-7: # Same points, no paf + return + x_ba /= norm_ba + y_ba /= norm_ba + + for y in range(y_min, y_max): + for x in range(x_min, x_max): + x_ca = x - x_a + y_ca = y - y_a + d = math.fabs(x_ca * y_ba - y_ca * x_ba) + if d <= thickness: + paf_map[0, y, x] = x_ba + paf_map[1, y, x] = y_ba + + +class CocoValDataset(Dataset): + def __init__(self, labels, images_folder): + super().__init__() + with open(labels, 'r') as f: + self._labels = json.load(f) + self._images_folder = images_folder + + def __getitem__(self, idx): + file_name = self._labels['images'][idx]['file_name'] + img = cv2.imread(os.path.join(self._images_folder, file_name), cv2.IMREAD_COLOR) + return { + 'img': img, + 'file_name': file_name + } + + def __len__(self): + return len(self._labels['images']) diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py new file mode 100644 index 0000000000000000000000000000000000000000..8fa16b1a113fabd376a3acc07724917ffa0e9d91 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/datasets/transformations.py @@ -0,0 +1,271 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import random + +import cv2 +import numpy as np + + +class ConvertKeypoints: + def __call__(self, sample): + label = sample['label'] + h, w, _ = sample['image'].shape + keypoints = label['keypoints'] + for keypoint in keypoints: # keypoint[2] == 0: occluded, == 1: visible, == 2: not in image + if keypoint[0] == keypoint[1] == 0: + keypoint[2] = 2 + if (keypoint[0] < 0 + or keypoint[0] >= w + or keypoint[1] < 0 + or keypoint[1] >= h): + keypoint[2] = 2 + for other_label in label['processed_other_annotations']: + keypoints = other_label['keypoints'] + for keypoint in keypoints: + if keypoint[0] == keypoint[1] == 0: + keypoint[2] = 2 + if (keypoint[0] < 0 + or keypoint[0] >= w + or keypoint[1] < 0 + or keypoint[1] >= h): + keypoint[2] = 2 + label['keypoints'] = self._convert(label['keypoints'], w, h) + + for other_label in label['processed_other_annotations']: + other_label['keypoints'] = self._convert(other_label['keypoints'], w, h) + return sample + + def _convert(self, keypoints, w, h): + # Nose, Neck, R hand, L hand, R leg, L leg, Eyes, Ears + reorder_map = [1, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] + converted_keypoints = list(keypoints[i - 1] for i in reorder_map) + converted_keypoints.insert(1, [(keypoints[5][0] + keypoints[6][0]) / 2, + (keypoints[5][1] + keypoints[6][1]) / 2, 0]) # Add neck as a mean of shoulders + if keypoints[5][2] == 2 or keypoints[6][2] == 2: + converted_keypoints[1][2] = 2 + elif keypoints[5][2] == 1 and keypoints[6][2] == 1: + converted_keypoints[1][2] = 1 + if (converted_keypoints[1][0] < 0 + or converted_keypoints[1][0] >= w + or converted_keypoints[1][1] < 0 + or converted_keypoints[1][1] >= h): + converted_keypoints[1][2] = 2 + return converted_keypoints + + +class Scale: + def __init__(self, prob=1, min_scale=0.5, max_scale=1.1, target_dist=0.6): + self._prob = prob + self._min_scale = min_scale + self._max_scale = max_scale + self._target_dist = target_dist + + def __call__(self, sample): + prob = random.random() + scale_multiplier = 1 + if prob <= self._prob: + prob = random.random() + scale_multiplier = (self._max_scale - self._min_scale) * prob + self._min_scale + label = sample['label'] + scale_abs = self._target_dist / label['scale_provided'] + scale = scale_abs * scale_multiplier + sample['image'] = cv2.resize(sample['image'], dsize=(0, 0), fx=scale, fy=scale) + label['img_height'], label['img_width'], _ = sample['image'].shape + sample['mask'] = cv2.resize(sample['mask'], dsize=(0, 0), fx=scale, fy=scale) + + label['objpos'][0] *= scale + label['objpos'][1] *= scale + for keypoint in sample['label']['keypoints']: + keypoint[0] *= scale + keypoint[1] *= scale + for other_annotation in sample['label']['processed_other_annotations']: + other_annotation['objpos'][0] *= scale + other_annotation['objpos'][1] *= scale + for keypoint in other_annotation['keypoints']: + keypoint[0] *= scale + keypoint[1] *= scale + return sample + + +class Rotate: + def __init__(self, pad, max_rotate_degree=40): + self._pad = pad + self._max_rotate_degree = max_rotate_degree + + def __call__(self, sample): + prob = random.random() + degree = (prob - 0.5) * 2 * self._max_rotate_degree + h, w, _ = sample['image'].shape + img_center = (w / 2, h / 2) + R = cv2.getRotationMatrix2D(img_center, degree, 1) + + abs_cos = abs(R[0, 0]) + abs_sin = abs(R[0, 1]) + + bound_w = int(h * abs_sin + w * abs_cos) + bound_h = int(h * abs_cos + w * abs_sin) + dsize = (bound_w, bound_h) + + R[0, 2] += dsize[0] / 2 - img_center[0] + R[1, 2] += dsize[1] / 2 - img_center[1] + sample['image'] = cv2.warpAffine(sample['image'], R, dsize=dsize, + borderMode=cv2.BORDER_CONSTANT, borderValue=self._pad) + sample['label']['img_height'], sample['label']['img_width'], _ = sample['image'].shape + sample['mask'] = cv2.warpAffine(sample['mask'], R, dsize=dsize, + borderMode=cv2.BORDER_CONSTANT, borderValue=(1, 1, 1)) # border is ok + label = sample['label'] + label['objpos'] = self._rotate(label['objpos'], R) + for keypoint in label['keypoints']: + point = [keypoint[0], keypoint[1]] + point = self._rotate(point, R) + keypoint[0], keypoint[1] = point[0], point[1] + for other_annotation in label['processed_other_annotations']: + for keypoint in other_annotation['keypoints']: + point = [keypoint[0], keypoint[1]] + point = self._rotate(point, R) + keypoint[0], keypoint[1] = point[0], point[1] + return sample + + def _rotate(self, point, R): + return [R[0, 0] * point[0] + R[0, 1] * point[1] + R[0, 2], + R[1, 0] * point[0] + R[1, 1] * point[1] + R[1, 2]] + + +class CropPad: + def __init__(self, pad, center_perterb_max=40, crop_x=368, crop_y=368): + self._pad = pad + self._center_perterb_max = center_perterb_max + self._crop_x = crop_x + self._crop_y = crop_y + + def __call__(self, sample): + prob_x = random.random() + prob_y = random.random() + + offset_x = int((prob_x - 0.5) * 2 * self._center_perterb_max) + offset_y = int((prob_y - 0.5) * 2 * self._center_perterb_max) + label = sample['label'] + shifted_center = (label['objpos'][0] + offset_x, label['objpos'][1] + offset_y) + offset_left = -int(shifted_center[0] - self._crop_x / 2) + offset_up = -int(shifted_center[1] - self._crop_y / 2) + + cropped_image = np.empty(shape=(self._crop_y, self._crop_x, 3), dtype=np.uint8) + for i in range(3): + cropped_image[:, :, i].fill(self._pad[i]) + cropped_mask = np.empty(shape=(self._crop_y, self._crop_x), dtype=np.uint8) + cropped_mask.fill(1) + + image_x_start = int(shifted_center[0] - self._crop_x / 2) + image_y_start = int(shifted_center[1] - self._crop_y / 2) + image_x_finish = image_x_start + self._crop_x + image_y_finish = image_y_start + self._crop_y + crop_x_start = 0 + crop_y_start = 0 + crop_x_finish = self._crop_x + crop_y_finish = self._crop_y + + w, h = label['img_width'], label['img_height'] + should_crop = True + if image_x_start < 0: # Adjust crop area + crop_x_start -= image_x_start + image_x_start = 0 + if image_x_start >= w: + should_crop = False + + if image_y_start < 0: + crop_y_start -= image_y_start + image_y_start = 0 + if image_y_start >= h: + should_crop = False + + if image_x_finish > w: + diff = image_x_finish - w + image_x_finish -= diff + crop_x_finish -= diff + if image_x_finish < 0: + should_crop = False + + if image_y_finish > h: + diff = image_y_finish - h + image_y_finish -= diff + crop_y_finish -= diff + if image_y_finish < 0: + should_crop = False + + if should_crop: + cropped_image[crop_y_start:crop_y_finish, crop_x_start:crop_x_finish, :] =\ + sample['image'][image_y_start:image_y_finish, image_x_start:image_x_finish, :] + cropped_mask[crop_y_start:crop_y_finish, crop_x_start:crop_x_finish] =\ + sample['mask'][image_y_start:image_y_finish, image_x_start:image_x_finish] + + sample['image'] = cropped_image + sample['mask'] = cropped_mask + label['img_width'] = self._crop_x + label['img_height'] = self._crop_y + + label['objpos'][0] += offset_left + label['objpos'][1] += offset_up + for keypoint in label['keypoints']: + keypoint[0] += offset_left + keypoint[1] += offset_up + for other_annotation in label['processed_other_annotations']: + for keypoint in other_annotation['keypoints']: + keypoint[0] += offset_left + keypoint[1] += offset_up + + return sample + + def _inside(self, point, width, height): + if point[0] < 0 or point[1] < 0: + return False + if point[0] >= width or point[1] >= height: + return False + return True + + +class Flip: + def __init__(self, prob=0.5): + self._prob = prob + + def __call__(self, sample): + prob = random.random() + do_flip = prob <= self._prob + if not do_flip: + return sample + + sample['image'] = cv2.flip(sample['image'], 1) + sample['mask'] = cv2.flip(sample['mask'], 1) + + label = sample['label'] + w, h = label['img_width'], label['img_height'] + label['objpos'][0] = w - 1 - label['objpos'][0] + for keypoint in label['keypoints']: + keypoint[0] = w - 1 - keypoint[0] + label['keypoints'] = self._swap_left_right(label['keypoints']) + + for other_annotation in label['processed_other_annotations']: + other_annotation['objpos'][0] = w - 1 - other_annotation['objpos'][0] + for keypoint in other_annotation['keypoints']: + keypoint[0] = w - 1 - keypoint[0] + other_annotation['keypoints'] = self._swap_left_right(other_annotation['keypoints']) + + return sample + + def _swap_left_right(self, keypoints): + right = [2, 3, 4, 8, 9, 10, 14, 16] + left = [5, 6, 7, 11, 12, 13, 15, 17] + for r, l in zip(right, left): + keypoints[r], keypoints[l] = keypoints[l], keypoints[r] + return keypoints diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py new file mode 100755 index 0000000000000000000000000000000000000000..ccb1dab002e7eef24288879e7130b744e7b3e9a1 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_accuracy.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import sys +import cv2 +import time +import json +import math +import argparse +import numpy as np +from copy import deepcopy + +import torch +import tensorrt +from tensorrt import Dims +from cuda import cuda, cudart + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +from common import CocoValDataset +from modules.keypoints import extract_keypoints, group_keypoints + + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument('--labels', type=str, default="annotations/person_keypoints_val2017.json", help='path to json with keypoints val labels') + parser.add_argument('--output-name', type=str, default='detections.json', help='name of output json file with detected keypoints') + parser.add_argument('--images-folder', type=str, default="val2017/", help='path to COCO val images folder') + parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales') + parser.add_argument("--data_type", type=str, default="float16", help="int8 float16") + parser.add_argument("--model_type", type=str, default="lightweight_openose", help="EfficientNet ResNet50 Vgg16 MobileNet") + parser.add_argument("--test_mode", type=str, default="MAP", help="FPS MAP") + parser.add_argument("--graph_file", type=str, help="graph file path") + parser.add_argument("--weights_file",type=str, help="weights file path") + parser.add_argument("--engine_file", type=str, help="engine file path") + parser.add_argument("--quant_file", type=str, help="weights file path") + parser.add_argument("--datasets_dir", type=str, default="", help="coco pose dir") + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=1, help="test batch size") + parser.add_argument("--imgh", type=int, default=256, help="inference size h") + parser.add_argument("--max_imgw", type=int, default=456, help="inference size max w") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--fixed_shape", action="store_true") + parser.add_argument("--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4") + parser.add_argument("--map_target", type=float, default=-1.0) + config = parser.parse_args() + return config + + +def openpose_trtapi_ixrt(config): + engine_file = config.engine_file + datatype = tensorrt.DataType.FLOAT + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + with open(engine_file, "rb") as f, tensorrt.Runtime(logger) as runtime: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations + + +def run_coco_eval(gt_file_path, dt_file_path): + annotation_type = 'keypoints' + print('Running test for {} results.'.format(annotation_type)) + + coco_gt = COCO(gt_file_path) + coco_dt = coco_gt.loadRes(dt_file_path) + + result = COCOeval(coco_gt, coco_dt, annotation_type) + result.evaluate() + result.accumulate() + result.summarize() + + +def normalize(img, img_mean, img_scale): + img = np.array(img, dtype=np.float32) + img = (img - img_mean) * img_scale + return img + + +def pad_width(img, stride, pad_value, min_dims): + h, w, _ = img.shape + h = min(min_dims[0], h) + min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride + min_dims[1] = max(min_dims[1], w) + min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride + pad = [] + pad.append(int(math.floor((min_dims[0] - h) / 2.0))) + pad.append(int(math.floor((min_dims[1] - w) / 2.0))) + pad.append(int(min_dims[0] - h - pad[0])) + pad.append(int(min_dims[1] - w - pad[1])) + padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3], + cv2.BORDER_CONSTANT, value=pad_value) + return padded_img, pad + + +def convert_to_coco_format(pose_entries, all_keypoints): + coco_keypoints = [] + scores = [] + for n in range(len(pose_entries)): + if len(pose_entries[n]) == 0: + continue + keypoints = [0] * 17 * 3 + to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] + person_score = pose_entries[n][-2] + position_id = -1 + for keypoint_id in pose_entries[n][:-2]: + position_id += 1 + if position_id == 1: # no 'neck' in COCO + continue + + cx, cy, score, visibility = 0, 0, 0, 0 # keypoint not found + if keypoint_id != -1: + cx, cy, score = all_keypoints[int(keypoint_id), 0:3] + cx = cx + 0.5 + cy = cy + 0.5 + visibility = 1 + keypoints[to_coco_map[position_id] * 3 + 0] = cx + keypoints[to_coco_map[position_id] * 3 + 1] = cy + keypoints[to_coco_map[position_id] * 3 + 2] = visibility + coco_keypoints.append(keypoints) + scores.append(person_score * max(0, (pose_entries[n][-1] - 1))) # -1 for 'neck' + return coco_keypoints, scores + + +def infer(img, scales, engine, context, base_height, stride, config, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256): + + input_name = "data" + output_names = [ + "stage_0_output_1_heatmaps", + "stage_0_output_0_pafs", + "stage_1_output_1_heatmaps", + "stage_1_output_0_pafs" + ] + + normed_img = normalize(img, img_mean, img_scale) + height, width, _ = normed_img.shape + scales_ratios = [scale * base_height / float(height) for scale in scales] + avg_heatmaps = np.zeros((height, width, 19), dtype=np.float32) + avg_pafs = np.zeros((height, width, 38), dtype=np.float32) + + for ratio in scales_ratios: + scaled_img = cv2.resize(normed_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) + min_dims = [base_height, max(scaled_img.shape[1], base_height)] + padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims) + hh, ww ,_ = padded_img.shape + + data_batch = padded_img.transpose(2, 0, 1) + data_batch = np.ascontiguousarray(data_batch.reshape(1, *data_batch.shape).astype(np.float32)) + + input_shape = [1, 3, hh, ww] + input_idx = engine.get_binding_index(input_name) + context.set_binding_shape(input_idx, Dims(input_shape)) + + inputs, outputs, allocations = setup_io_bindings(engine, context) + + pred_outputs = [] + for output in outputs: + pred_outputs.append(np.zeros(output["shape"], output["dtype"])) + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], data_batch, data_batch.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + if config.use_async: + stream = cuda.Stream() + context.execute_async_v2(allocations, stream.handle) + stream.synchronize() + else: + context.execute_v2(allocations) + + for i, pred_output in enumerate(pred_outputs): + err, = cuda.cuMemcpyDtoH(pred_output, outputs[i]["allocation"], outputs[i]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + heatmaps = deepcopy(pred_outputs[2][0].transpose(1, 2, 0)).astype(np.float32) + heatmaps = cv2.resize(heatmaps[:,:,:19], (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + heatmaps = heatmaps[pad[0]:heatmaps.shape[0] - pad[2], pad[1]:heatmaps.shape[1] - pad[3]:, :] + heatmaps = cv2.resize(heatmaps, (width, height), interpolation=cv2.INTER_CUBIC) + avg_heatmaps = avg_heatmaps + heatmaps / len(scales_ratios) + + pafs = deepcopy(pred_outputs[3][0].transpose(1, 2, 0)).astype(np.float32) + pafs = cv2.resize(pafs[:,:,:38], (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + pafs = pafs[pad[0]:pafs.shape[0] - pad[2], pad[1]:pafs.shape[1] - pad[3], :] + pafs = cv2.resize(pafs, (width, height), interpolation=cv2.INTER_CUBIC) + avg_pafs = avg_pafs + pafs / len(scales_ratios) + + return avg_heatmaps, avg_pafs + + +def evaluate(labels, output_name, images_folder, engine, context, config, multiscale=False, visualize=False): + base_height = 368 + scales = [1] + if multiscale: + scales = [0.5, 1.0, 1.5, 2.0] + stride = 8 + + dataset = CocoValDataset(labels, images_folder) + coco_result = [] + for i, sample in enumerate(dataset): + file_name = sample['file_name'] + img = sample['img'] + if i % 20 == 1: + print("{}/{} img shape {}".format(i, len(dataset), img.shape)) + + avg_heatmaps, avg_pafs = infer(img, scales, engine, context, base_height, stride, config) + total_keypoints_num = 0 + all_keypoints_by_type = [] + for kpt_idx in range(18): # 19th for bg + total_keypoints_num += extract_keypoints(avg_heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) + # print("total_keypoints_num ",total_keypoints_num) + pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, avg_pafs) + + coco_keypoints, scores = convert_to_coco_format(pose_entries, all_keypoints) + # print(coco_keypoints) + + image_id = int(file_name[0:file_name.rfind('.')]) + for idx in range(len(coco_keypoints)): + coco_result.append({ + 'image_id': image_id, + 'category_id': 1, # person + 'keypoints': coco_keypoints[idx], + 'score': scores[idx] + }) + + + # if i<100 and total_keypoints_num > 0: + # for keypoints in coco_keypoints: + # for idx in range(len(keypoints) // 3): + # cv2.circle(img, (int(keypoints[idx * 3]), int(keypoints[idx * 3 + 1])), + # 3, (255, 0, 255), -1) + # save_name = "{}.jpg".format(i) + # cv2.imwrite(save_name, img) + + + with open(output_name, 'w') as f: + json.dump(coco_result, f, indent=4) + + run_coco_eval(labels, output_name) + + +def main(config): + engine, context = openpose_trtapi_ixrt(config) + print(" config and load model ok...") + evaluate(config.labels, config.output_name, config.images_folder, engine, context, config) + print(" done ...") + + +if __name__ == '__main__': + config = parse_config() + main(config) + + diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py new file mode 100755 index 0000000000000000000000000000000000000000..d472d6d64ecdee4d92c11083d9bdc75c729d1a6e --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/inference_performance.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import sys +import time +import argparse +import numpy as np +from tqdm import tqdm +from copy import deepcopy + +import torch +import onnxruntime + +from common import check_target, get_dataloader, eval_batch + +import tensorrt +from cuda import cuda, cudart + + +def parse_config(): + parser = argparse.ArgumentParser(description="IXRT lightweight openpose") + parser.add_argument("--model_type", type=str, default="lightweight openpose", help="the model name") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument("--engine_file", type=str, help="engine file path") + parser.add_argument("--datasets_dir", type=str, default="", help="ImageNet dir") + parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") + parser.add_argument("--bsz", type=int, default=16, help="test batch size") + parser.add_argument("--imgh", type=int, default=256, help="inference size h") + parser.add_argument("--imgw", type=int, default=456, help="inference size w") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4") + parser.add_argument("--fps_target", type=float, default=-1.0) + parser.add_argument("--map_target", type=float, default=-1.0) + parser.add_argument("--run_loop", type=int, default=-1) + + config = parser.parse_args() + return config + + +def openpose_trtapi_ixrt(config): + engine_file = config.engine_file + datatype = tensorrt.DataType.FLOAT + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + with open(engine_file, "rb") as f, tensorrt.Runtime(logger) as runtime: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + + +def setup_io_bindings(engine, context): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = context.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations + + +def main(config): + + engine, context = openpose_trtapi_ixrt(config) + inputs, outputs, allocations = setup_io_bindings(engine, context) + + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + data_in = np.zeros(inputs[0]["shape"], inputs[0]["dtype"]) + + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], data_in, data_in.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + if config.test_mode == "FPS": + torch.cuda.synchronize() + start_time = time.time() + for i in range(config.run_loop): + context.execute_v2(allocations) + + torch.cuda.synchronize() + end_time = time.time() + forward_time = end_time - start_time + + fps = config.run_loop * config.bsz / forward_time + print(f"\nCheck FPS Test : {fps} Target:{config.fps_target} State : {'Pass' if fps >= config.fps_target else 'Fail'}") + + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/__init__.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py new file mode 100644 index 0000000000000000000000000000000000000000..c1cf70dd216697518c56fd04e227e89ad78f1884 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/keypoints.py @@ -0,0 +1,173 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import math +import numpy as np +from operator import itemgetter + +BODY_PARTS_KPT_IDS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], + [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17]] +BODY_PARTS_PAF_IDS = ([12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], [4, 5], + [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], [36, 37], [18, 19], [26, 27]) + + +def extract_keypoints(heatmap, all_keypoints, total_keypoint_num): + heatmap[heatmap < 0.1] = 0 + heatmap_with_borders = np.pad(heatmap, [(2, 2), (2, 2)], mode='constant') + heatmap_center = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 1:heatmap_with_borders.shape[1]-1] + heatmap_left = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 2:heatmap_with_borders.shape[1]] + heatmap_right = heatmap_with_borders[1:heatmap_with_borders.shape[0]-1, 0:heatmap_with_borders.shape[1]-2] + heatmap_up = heatmap_with_borders[2:heatmap_with_borders.shape[0], 1:heatmap_with_borders.shape[1]-1] + heatmap_down = heatmap_with_borders[0:heatmap_with_borders.shape[0]-2, 1:heatmap_with_borders.shape[1]-1] + + heatmap_peaks = (heatmap_center > heatmap_left) &\ + (heatmap_center > heatmap_right) &\ + (heatmap_center > heatmap_up) &\ + (heatmap_center > heatmap_down) + heatmap_peaks = heatmap_peaks[1:heatmap_center.shape[0]-1, 1:heatmap_center.shape[1]-1] + keypoints = list(zip(np.nonzero(heatmap_peaks)[1], np.nonzero(heatmap_peaks)[0])) # (w, h) + keypoints = sorted(keypoints, key=itemgetter(0)) + + suppressed = np.zeros(len(keypoints), np.uint8) + keypoints_with_score_and_id = [] + keypoint_num = 0 + for i in range(len(keypoints)): + if suppressed[i]: + continue + for j in range(i+1, len(keypoints)): + if math.sqrt((keypoints[i][0] - keypoints[j][0]) ** 2 + + (keypoints[i][1] - keypoints[j][1]) ** 2) < 6: + suppressed[j] = 1 + keypoint_with_score_and_id = (keypoints[i][0], keypoints[i][1], heatmap[keypoints[i][1], keypoints[i][0]], + total_keypoint_num + keypoint_num) + keypoints_with_score_and_id.append(keypoint_with_score_and_id) + keypoint_num += 1 + all_keypoints.append(keypoints_with_score_and_id) + return keypoint_num + + +def connections_nms(a_idx, b_idx, affinity_scores): + # From all retrieved connections that share the same starting/ending keypoints leave only the top-scoring ones. + order = affinity_scores.argsort()[::-1] + affinity_scores = affinity_scores[order] + a_idx = a_idx[order] + b_idx = b_idx[order] + idx = [] + has_kpt_a = set() + has_kpt_b = set() + for t, (i, j) in enumerate(zip(a_idx, b_idx)): + if i not in has_kpt_a and j not in has_kpt_b: + idx.append(t) + has_kpt_a.add(i) + has_kpt_b.add(j) + idx = np.asarray(idx, dtype=np.int32) + return a_idx[idx], b_idx[idx], affinity_scores[idx] + + +def group_keypoints(all_keypoints_by_type, pafs, pose_entry_size=20, min_paf_score=0.05): + pose_entries = [] + all_keypoints = np.array([item for sublist in all_keypoints_by_type for item in sublist]) + points_per_limb = 10 + grid = np.arange(points_per_limb, dtype=np.float32).reshape(1, -1, 1) + all_keypoints_by_type = [np.array(keypoints, np.float32) for keypoints in all_keypoints_by_type] + for part_id in range(len(BODY_PARTS_PAF_IDS)): + part_pafs = pafs[:, :, BODY_PARTS_PAF_IDS[part_id]] + kpts_a = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][0]] + kpts_b = all_keypoints_by_type[BODY_PARTS_KPT_IDS[part_id][1]] + n = len(kpts_a) + m = len(kpts_b) + if n == 0 or m == 0: + continue + + # Get vectors between all pairs of keypoints, i.e. candidate limb vectors. + a = kpts_a[:, :2] + a = np.broadcast_to(a[None], (m, n, 2)) + b = kpts_b[:, :2] + vec_raw = (b[:, None, :] - a).reshape(-1, 1, 2) + + # Sample points along every candidate limb vector. + steps = (1 / (points_per_limb - 1) * vec_raw) + points = steps * grid + a.reshape(-1, 1, 2) + points = points.round().astype(dtype=np.int32) + x = points[..., 0].ravel() + y = points[..., 1].ravel() + + # Compute affinity score between candidate limb vectors and part affinity field. + field = part_pafs[y, x].reshape(-1, points_per_limb, 2) + vec_norm = np.linalg.norm(vec_raw, ord=2, axis=-1, keepdims=True) + vec = vec_raw / (vec_norm + 1e-6) + affinity_scores = (field * vec).sum(-1).reshape(-1, points_per_limb) + valid_affinity_scores = affinity_scores > min_paf_score + valid_num = valid_affinity_scores.sum(1) + affinity_scores = (affinity_scores * valid_affinity_scores).sum(1) / (valid_num + 1e-6) + success_ratio = valid_num / points_per_limb + + # Get a list of limbs according to the obtained affinity score. + valid_limbs = np.where(np.logical_and(affinity_scores > 0, success_ratio > 0.8))[0] + if len(valid_limbs) == 0: + continue + b_idx, a_idx = np.divmod(valid_limbs, n) + affinity_scores = affinity_scores[valid_limbs] + + # Suppress incompatible connections. + a_idx, b_idx, affinity_scores = connections_nms(a_idx, b_idx, affinity_scores) + connections = list(zip(kpts_a[a_idx, 3].astype(np.int32), + kpts_b[b_idx, 3].astype(np.int32), + affinity_scores)) + if len(connections) == 0: + continue + + if part_id == 0: + pose_entries = [np.ones(pose_entry_size) * -1 for _ in range(len(connections))] + for i in range(len(connections)): + pose_entries[i][BODY_PARTS_KPT_IDS[0][0]] = connections[i][0] + pose_entries[i][BODY_PARTS_KPT_IDS[0][1]] = connections[i][1] + pose_entries[i][-1] = 2 + pose_entries[i][-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2] + elif part_id == 17 or part_id == 18: + kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] + kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] + for i in range(len(connections)): + for j in range(len(pose_entries)): + if pose_entries[j][kpt_a_id] == connections[i][0] and pose_entries[j][kpt_b_id] == -1: + pose_entries[j][kpt_b_id] = connections[i][1] + elif pose_entries[j][kpt_b_id] == connections[i][1] and pose_entries[j][kpt_a_id] == -1: + pose_entries[j][kpt_a_id] = connections[i][0] + continue + else: + kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] + kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] + for i in range(len(connections)): + num = 0 + for j in range(len(pose_entries)): + if pose_entries[j][kpt_a_id] == connections[i][0]: + pose_entries[j][kpt_b_id] = connections[i][1] + num += 1 + pose_entries[j][-1] += 1 + pose_entries[j][-2] += all_keypoints[connections[i][1], 2] + connections[i][2] + if num == 0: + pose_entry = np.ones(pose_entry_size) * -1 + pose_entry[kpt_a_id] = connections[i][0] + pose_entry[kpt_b_id] = connections[i][1] + pose_entry[-1] = 2 + pose_entry[-2] = np.sum(all_keypoints[connections[i][0:2], 2]) + connections[i][2] + pose_entries.append(pose_entry) + + filtered_entries = [] + for i in range(len(pose_entries)): + if pose_entries[i][-1] < 3 or (pose_entries[i][-2] / pose_entries[i][-1] < 0.2): + continue + filtered_entries.append(pose_entries[i]) + pose_entries = np.asarray(filtered_entries) + return pose_entries, all_keypoints diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py b/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py new file mode 100644 index 0000000000000000000000000000000000000000..ce49629b8d0191c83abb5235da248e3e61c5df7a --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/modules/pose.py @@ -0,0 +1,132 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import cv2 +import numpy as np + +from modules.keypoints import BODY_PARTS_KPT_IDS, BODY_PARTS_PAF_IDS +from modules.one_euro_filter import OneEuroFilter + + +class Pose: + num_kpts = 18 + kpt_names = ['nose', 'neck', + 'r_sho', 'r_elb', 'r_wri', 'l_sho', 'l_elb', 'l_wri', + 'r_hip', 'r_knee', 'r_ank', 'l_hip', 'l_knee', 'l_ank', + 'r_eye', 'l_eye', + 'r_ear', 'l_ear'] + sigmas = np.array([.26, .79, .79, .72, .62, .79, .72, .62, 1.07, .87, .89, 1.07, .87, .89, .25, .25, .35, .35], + dtype=np.float32) / 10.0 + vars = (sigmas * 2) ** 2 + last_id = -1 + color = [0, 224, 255] + + def __init__(self, keypoints, confidence): + super().__init__() + self.keypoints = keypoints + self.confidence = confidence + self.bbox = Pose.get_bbox(self.keypoints) + self.id = None + self.filters = [[OneEuroFilter(), OneEuroFilter()] for _ in range(Pose.num_kpts)] + + @staticmethod + def get_bbox(keypoints): + found_keypoints = np.zeros((np.count_nonzero(keypoints[:, 0] != -1), 2), dtype=np.int32) + found_kpt_id = 0 + for kpt_id in range(Pose.num_kpts): + if keypoints[kpt_id, 0] == -1: + continue + found_keypoints[found_kpt_id] = keypoints[kpt_id] + found_kpt_id += 1 + bbox = cv2.boundingRect(found_keypoints) + return bbox + + def update_id(self, id=None): + self.id = id + if self.id is None: + self.id = Pose.last_id + 1 + Pose.last_id += 1 + + def draw(self, img): + assert self.keypoints.shape == (Pose.num_kpts, 2) + + for part_id in range(len(BODY_PARTS_PAF_IDS) - 2): + kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] + global_kpt_a_id = self.keypoints[kpt_a_id, 0] + if global_kpt_a_id != -1: + x_a, y_a = self.keypoints[kpt_a_id] + cv2.circle(img, (int(x_a), int(y_a)), 3, Pose.color, -1) + kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] + global_kpt_b_id = self.keypoints[kpt_b_id, 0] + if global_kpt_b_id != -1: + x_b, y_b = self.keypoints[kpt_b_id] + cv2.circle(img, (int(x_b), int(y_b)), 3, Pose.color, -1) + if global_kpt_a_id != -1 and global_kpt_b_id != -1: + cv2.line(img, (int(x_a), int(y_a)), (int(x_b), int(y_b)), Pose.color, 2) + + +def get_similarity(a, b, threshold=0.5): + num_similar_kpt = 0 + for kpt_id in range(Pose.num_kpts): + if a.keypoints[kpt_id, 0] != -1 and b.keypoints[kpt_id, 0] != -1: + distance = np.sum((a.keypoints[kpt_id] - b.keypoints[kpt_id]) ** 2) + area = max(a.bbox[2] * a.bbox[3], b.bbox[2] * b.bbox[3]) + similarity = np.exp(-distance / (2 * (area + np.spacing(1)) * Pose.vars[kpt_id])) + if similarity > threshold: + num_similar_kpt += 1 + return num_similar_kpt + + +def track_poses(previous_poses, current_poses, threshold=3, smooth=False): + """Propagate poses ids from previous frame results. Id is propagated, + if there are at least `threshold` similar keypoints between pose from previous frame and current. + If correspondence between pose on previous and current frame was established, pose keypoints are smoothed. + + :param previous_poses: poses from previous frame with ids + :param current_poses: poses from current frame to assign ids + :param threshold: minimal number of similar keypoints between poses + :param smooth: smooth pose keypoints between frames + :return: None + """ + current_poses = sorted(current_poses, key=lambda pose: pose.confidence, reverse=True) # match confident poses first + mask = np.ones(len(previous_poses), dtype=np.int32) + for current_pose in current_poses: + best_matched_id = None + best_matched_pose_id = None + best_matched_iou = 0 + for id, previous_pose in enumerate(previous_poses): + if not mask[id]: + continue + iou = get_similarity(current_pose, previous_pose) + if iou > best_matched_iou: + best_matched_iou = iou + best_matched_pose_id = previous_pose.id + best_matched_id = id + if best_matched_iou >= threshold: + mask[best_matched_id] = 0 + else: # pose not similar to any previous + best_matched_pose_id = None + current_pose.update_id(best_matched_pose_id) + + if smooth: + for kpt_id in range(Pose.num_kpts): + if current_pose.keypoints[kpt_id, 0] == -1: + continue + # reuse filter if previous pose has valid filter + if (best_matched_pose_id is not None + and previous_poses[best_matched_id].keypoints[kpt_id, 0] != -1): + current_pose.filters[kpt_id] = previous_poses[best_matched_id].filters[kpt_id] + current_pose.keypoints[kpt_id, 0] = current_pose.filters[kpt_id][0](current_pose.keypoints[kpt_id, 0]) + current_pose.keypoints[kpt_id, 1] = current_pose.filters[kpt_id][1](current_pose.keypoints[kpt_id, 1]) + current_pose.bbox = Pose.get_bbox(current_pose.keypoints) diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh b/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh new file mode 100644 index 0000000000000000000000000000000000000000..6a9fedd6239b106d878988e90e93508c483caa0c --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_accuracy.sh @@ -0,0 +1,45 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +DATASETS_DIR=${DATASETS_DIR} +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +python3 build_engine.py --model_name lightweight_openpose \ + --onnx_path ${CHECKPOINTS_DIR}/lightweight_openpose.onnx \ + --engine_path ${CHECKPOINTS_DIR}/lightweight_openpose.engine \ + --engine_path_dynamicshape ${CHECKPOINTS_DIR}/lightweight_openpose_dynamicshape.engine + + +python3 inference_accuracy.py \ + --model_type lightweight_openpose \ + --engine_file ${CHECKPOINTS_DIR}/lightweight_openpose_dynamicshape.engine \ + --datasets_dir ${DATASETS_DIR} \ + --labels ${DATASETS_DIR}/annotations/person_keypoints_val2017.json \ + --images-folder ${DATASETS_DIR}/val2017 \ + --bsz 1 \ + --imgh 368 \ + --max_imgw 1488 \ + --test_mode ACC \ + --device 0 "$@";check_status + +exit ${EXIT_STATUS} diff --git a/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh b/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..beec46441ca9c691b56f0f11647740eeaeed20d6 --- /dev/null +++ b/models/cv/pose_estimation/lightweightopenpose/ixrt/scripts/infer_lightweight_openpose_fp16_performance.sh @@ -0,0 +1,44 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +DATASETS_DIR=${DATASETS_DIR} +CHECKPOINTS_DIR=${CHECKPOINTS_DIR} + +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +python3 build_engine.py --model_name lightweight_openpose \ + --onnx_path ${CHECKPOINTS_DIR}/lightweight_openpose.onnx \ + --engine_path ${CHECKPOINTS_DIR}/lightweight_openpose.engine \ + --engine_path_dynamicshape ${CHECKPOINTS_DIR}/lightweight_openpose_dynamicshape.engine + +python3 inference_performance.py \ + --model_type lightweight_openpose \ + --engine_file ${CHECKPOINTS_DIR}/lightweight_openpose.engine \ + --datasets_dir ${DATASETS_DIR} \ + --bsz 32 \ + --imgh 256 \ + --imgw 456 \ + --test_mode FPS \ + --warm_up 10 \ + --run_loop 20 \ + --device 0 "$@";check_status + +exit ${EXIT_STATUS}