diff --git a/data/datasets/README.md b/data/datasets/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f40c2e824e94c038e186d9e1ffa149a8382a41e2 --- /dev/null +++ b/data/datasets/README.md @@ -0,0 +1 @@ +# This is the default datasets location required by inference models diff --git a/models/cv/classification/mobilenet_v2/ixrt/README.md b/models/cv/classification/mobilenet_v2/ixrt/README.md index 46343115fc731320eb2f61b0358695cb0608116b..10852a9550a34658bf9bc5b8d4a0dadd91e98bb9 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/README.md +++ b/models/cv/classification/mobilenet_v2/ixrt/README.md @@ -13,17 +13,19 @@ pip3 install tqdm pip3 install onnxsim pip3 install opencv-python pip3 install ppq +pip3 install protobuf==3.20.0 ``` ### Download -Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in data/datasets; +Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in `${PROJ_ROOT}/data/datasets`; ## Inference ### FP16 ```bash +cd python/ # Test ACC bash script/infer_mobilenetv2_fp16_accuary.sh # Test FPS diff --git a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py index e726dabc1f19cadeda9f130ef52f8b36ad435d26..ea3f7f6b47414387508d955f71344a4af3217167 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py +++ b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py @@ -85,6 +85,7 @@ def main(config): total_sample = 0 acc_top1, acc_top5 = 0, 0 + start_time = time.time() with tqdm(total= len(dataloader)) as _tqdm: for idx, (batch_data, batch_label) in enumerate(dataloader): batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) @@ -106,7 +107,10 @@ def main(config): _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), acc_5='{:.4f}'.format(acc_top5/total_sample)) _tqdm.update(1) + end_time = time.time() + end2end_time = end_time - start_time + print(F"E2E time : {end2end_time:.3f} seconds") print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") acc1 = acc_top1/total_sample diff --git a/models/cv/detection/yolox/ixrt/python/inference.py b/models/cv/detection/yolox/ixrt/python/inference.py index aa3ae2f513968d46e2a324338c0490becd24e206..a6c681542956f41e2f8fb2a379d11832e2d09817 100644 --- a/models/cv/detection/yolox/ixrt/python/inference.py +++ b/models/cv/detection/yolox/ixrt/python/inference.py @@ -30,47 +30,47 @@ from utils import COCO2017Dataset, COCO2017Evaluator def parse_args(): parser = argparse.ArgumentParser() - - parser.add_argument("--engine", - type=str, - required=True, + + parser.add_argument("--engine", + type=str, + required=True, help="igie engine path.") - + parser.add_argument("--batchsize", type=int, - required=True, + required=True, help="inference batch size.") - - parser.add_argument("--datasets", - type=str, - required=True, + + parser.add_argument("--datasets", + type=str, + required=True, help="datasets path.") - - parser.add_argument("--warmup", - type=int, - default=5, - help="number of warmup before test.") - + + parser.add_argument("--warmup", + type=int, + default=5, + help="number of warmup before test.") + parser.add_argument("--num_workers", type=int, default=16, help="number of workers used in pytorch dataloader.") - + parser.add_argument("--acc_target", type=float, default=None, help="Model inference Accuracy target.") - + parser.add_argument("--fps_target", type=float, default=None, help="Model inference FPS target.") - + parser.add_argument("--conf", type=float, default=0.001, help="confidence threshold.") - + parser.add_argument("--iou", type=float, default=0.65, @@ -80,11 +80,11 @@ def parse_args(): type=bool, default=False, help="Run performance test only") - parser.add_argument("--loop_count", - type=int, - default=-1, + parser.add_argument("--loop_count", + type=int, + default=-1, help="loop count") - + args = parser.parse_args() return args @@ -188,11 +188,11 @@ def main(): host_mem = tensorrt.IHostMemory logger = tensorrt.Logger(tensorrt.Logger.ERROR) - + # Load Engine engine, context = create_engine_context(args.engine, logger) inputs, outputs, allocations = get_io_bindings(engine) - + # Warm up print("\nWarm Start.") for i in range(args.warmup): @@ -218,27 +218,30 @@ def main(): conf_thres=args.conf, iou_thres=args.iou, image_size=640) - + start_time = time.time() for all_inputs in tqdm(dataloader): image = all_inputs[0] pad_batch = len(image) != batch_size if pad_batch: origin_size = len(image) image = np.resize(image, (batch_size, *image.shape[1:])) - + cuda.memcpy_htod(inputs[0]["allocation"], image) context.execute_v2(allocations) - + cuda.memcpy_dtoh(output_np, outputs[0]["allocation"]) # print("output_np") # print(output_np) - + if pad_batch: output_np = output_np[:origin_size] - + evaluator.evaluate(output_np, all_inputs) - + end_time = time.time() + end2end_time = end_time - start_time + print(F"E2E time : {end2end_time:.3f} seconds") + evaluator.summary() - + if __name__ == "__main__": main() \ No newline at end of file diff --git a/models/cv/detection/yolox/ixrt/python/utils.py b/models/cv/detection/yolox/ixrt/python/utils.py index e6bca6dcd5bfa645d98b54e9e98ecd8f0a0b8d67..314aed2e7d9c20cf71e205a6a032369f8697c21f 100644 --- a/models/cv/detection/yolox/ixrt/python/utils.py +++ b/models/cv/detection/yolox/ixrt/python/utils.py @@ -82,7 +82,7 @@ class COCO2017Dataset(torch.utils.data.Dataset): self.input_layout = input_layout self.coco = COCO(annotation_file=self.label_json_path) - + if self.val_mode: self.img_ids = list(sorted(self.coco.imgs.keys())) else: @@ -96,7 +96,7 @@ class COCO2017Dataset(torch.utils.data.Dataset): img = self._load_image(img_path) img, r = self.preproc(img, input_size=self.image_size) - + return img, img_path, r def _get_image_path(self, index): @@ -110,13 +110,13 @@ class COCO2017Dataset(torch.utils.data.Dataset): assert img is not None, f"file {img_path} not found" return img - + def preproc(self, img, input_size, swap=(2, 0, 1)): if len(img.shape) == 3: padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 else: padded_img = np.ones(input_size, dtype=np.uint8) * 114 - + org_img = (img.shape[0], img.shape[1]) r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) resized_img = cv2.resize( @@ -128,10 +128,10 @@ class COCO2017Dataset(torch.utils.data.Dataset): padded_img = padded_img.transpose(swap) padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) - + return padded_img, org_img - + def _load_json_label(self, index): _, (h0, w0), _ = self._load_image(index) @@ -171,19 +171,19 @@ def get_coco_accuracy(pred_json, ann_json): coco_pred = coco.loadRes(pred_json) coco_evaluator = COCOeval(cocoGt=coco, cocoDt=coco_pred, iouType="bbox") - + coco_evaluator.evaluate() coco_evaluator.accumulate() coco_evaluator.summarize() return coco_evaluator.stats -class COCO2017Evaluator: +class COCO2017Evaluator: def __init__(self, label_path, image_size=640, conf_thres=0.001, iou_thres=0.65): - + self.conf_thres = conf_thres self.iou_thres = iou_thres self.label_path = label_path @@ -192,14 +192,14 @@ class COCO2017Evaluator: self.jdict = [] # iou vector for mAP@0.5:0.95 - self.iouv = torch.linspace(0.5, 0.95, 10) + self.iouv = torch.linspace(0.5, 0.95, 10) self.niou = self.iouv.numel() - + def evaluate(self, pred, all_inputs): im = all_inputs[0] img_path = all_inputs[1] img_info = all_inputs[2] - + _, _, height, width = im.shape img_size = [height, width] @@ -212,7 +212,7 @@ class COCO2017Evaluator: for (output, org_img, path) in zip(nms_outputs, img_info, img_path): if output is None: continue - + bboxes = output[:, 0:4] img_h, img_w = org_img @@ -222,11 +222,11 @@ class COCO2017Evaluator: bboxes /= scale cls = output[:, 6] scores = output[:, 4] * output[:, 5] - + bboxes = self._xyxy2xywh(bboxes) self._save_one_json(bboxes, cls, scores, self.jdict, path, coco80_to_coco91) - + def Detect(self, outputs, img_size): grids = [] expanded_strides = [] @@ -247,7 +247,7 @@ class COCO2017Evaluator: expanded_strides = np.concatenate(expanded_strides, 1) outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides - + return outputs def postprocess(self, prediction, num_classes=80, conf_thre=0.7, nms_thre=0.45, class_agnostic=False): @@ -257,7 +257,7 @@ class COCO2017Evaluator: box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 prediction[:, :, :4] = box_corner[:, :, :4] - + output = [None for _ in range(len(prediction))] for i, image_pred in enumerate(prediction): @@ -271,7 +271,7 @@ class COCO2017Evaluator: # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) detections = detections[conf_mask] - + if not detections.size(0): continue if class_agnostic: @@ -295,7 +295,7 @@ class COCO2017Evaluator: output[i] = torch.cat((output[i], detections)) return output - + def _xyxy2xywh(self, bboxes): bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] diff --git a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py index ce68fec2c54fed9c26a4fdeeae6e4158e9dce71d..6362d01167e0f3a8490b9d16875a12d39d83c7b2 100644 --- a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py +++ b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py @@ -275,6 +275,7 @@ def get_maskrcnn_acc(config): print("Warmup done !\nStart forward ...") # run + start_time = time.time() for batch_data, batch_img_shape, batch_img_id, batched_paddings, paths in tqdm(dataloader): batch_data = batch_data.numpy() batch_img_shape = batch_img_shape.numpy() @@ -312,7 +313,10 @@ def get_maskrcnn_acc(config): batched_paddings[0] ) save2json(batch_img_id, bboxs_masks, json_result, class_map) - + end_time = time.time() + end2end_time = end_time - start_time + + print(F"E2E time : {end2end_time:.3f} seconds") print("Forward done !") tmp_result_name = "pred_results.json" diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py index 92c4e83bf7f150156108b7ccd99f0a9373222c2a..67d6c18245b7eec0c8a995fc2a7284715429b498 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py +++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py @@ -26,6 +26,7 @@ import re import argparse import json import sys +import time def normalize_answer(s): """Lower text and remove punctuation, articles and extra whitespace.""" diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py index a85e765c91152562d6180307c2bb1317dc385356..b6af06dcf683496128bbbdb9e5458f8b2753885d 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py +++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py @@ -379,7 +379,8 @@ if __name__ == '__main__': lengths.append(len(features[0].input_ids)) sort_index = np.argsort(lengths) - infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions) + infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions) + print(F"E2E time : {infer_time:.3f} seconds") qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time print(f"Latency QPS: {qps} sentences/s") diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py index 8685604f7e259263be098378f52d6ff90514a6ac..860322c3ed5873e0002d9aa24a011394dd92e570 100644 --- a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py +++ b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py @@ -372,7 +372,8 @@ if __name__ == '__main__': lengths.append(len(features[0].input_ids)) sort_index = np.argsort(lengths) - infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions) + infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions) + print(F"E2E time : {infer_time:.3f} seconds") qps = len(squad_examples)/infer_time print(f"Latency QPS: {qps} sentences/s")