From abb98eeff042c7b1689e627f693dad17ee62bcf3 Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 12 Jul 2024 14:39:21 +0800
Subject: [PATCH 1/6] ixrt mobilenet v2 add latency log

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../cv/classification/mobilenet_v2/ixrt/python/inference.py   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
index e726dabc..ea3f7f6b 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
+++ b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
@@ -85,6 +85,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -106,7 +107,10 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        end2end_time = end_time - start_time
 
+        print(F"E2E time : {end2end_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
-- 
Gitee


From 3ad196343bfdf52ddb9add8456ee6f6a1579f5ec Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 12 Jul 2024 14:59:24 +0800
Subject: [PATCH 2/6] add default datasets loacation at
 PROJ_ROOT/data/datasests

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 data/datasets/README.md                              | 1 +
 models/cv/classification/mobilenet_v2/ixrt/README.md | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 data/datasets/README.md

diff --git a/data/datasets/README.md b/data/datasets/README.md
new file mode 100644
index 00000000..f40c2e82
--- /dev/null
+++ b/data/datasets/README.md
@@ -0,0 +1 @@
+# This is the default datasets location required by inference models
diff --git a/models/cv/classification/mobilenet_v2/ixrt/README.md b/models/cv/classification/mobilenet_v2/ixrt/README.md
index 46343115..10852a95 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/README.md
+++ b/models/cv/classification/mobilenet_v2/ixrt/README.md
@@ -13,17 +13,19 @@ pip3 install tqdm
 pip3 install onnxsim
 pip3 install opencv-python
 pip3 install ppq
+pip3 install protobuf==3.20.0
 ```
 
 ### Download
 
-Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in data/datasets;
+Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in `${PROJ_ROOT}/data/datasets`;
 
 ## Inference
 
 ### FP16
 
 ```bash
+cd python/
 # Test ACC
 bash script/infer_mobilenetv2_fp16_accuary.sh
 # Test FPS
-- 
Gitee


From 395cf3177b3871918e5c9d4f4e355c20ba715a3d Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Tue, 16 Jul 2024 13:57:33 +0800
Subject: [PATCH 3/6] bert squad add e2e time print

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py        | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
index 92c4e83b..b7aa5192 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
@@ -26,6 +26,7 @@ import re
 import argparse
 import json
 import sys
+import time
 
 def normalize_answer(s):
     """Lower text and remove punctuation, articles and extra whitespace."""
@@ -116,7 +117,11 @@ if __name__ == '__main__':
     with open(args.prediction_file) as prediction_file:
         predictions = json.load(prediction_file)
         f1_acc = float(args.f1_acc)
+
+    start_time = time.time()
     res = evaluate(dataset, predictions, f1_acc)
+    end_time = time.time()
+    print(F"E2E time : {end_time-start_time}")
     print(res)
     if res["status"] == 1:
         print("pass!")
-- 
Gitee


From 27c6d6f1ba61ef9659cc0a3bbad61dafa7412d5d Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Tue, 16 Jul 2024 17:39:02 +0800
Subject: [PATCH 4/6] ixrt mask rcnn add e2e time print

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
index ce68fec2..6362d011 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
+++ b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
@@ -275,6 +275,7 @@ def get_maskrcnn_acc(config):
     print("Warmup done !\nStart forward ...")
     
     # run
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id, batched_paddings, paths in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = batch_img_shape.numpy()
@@ -312,7 +313,10 @@ def get_maskrcnn_acc(config):
             batched_paddings[0]
         )
         save2json(batch_img_id, bboxs_masks, json_result, class_map)
-        
+    end_time = time.time()
+    end2end_time = end_time - start_time
+
+    print(F"E2E time : {end2end_time:.3f} seconds")
     print("Forward done !")
     
     tmp_result_name = "pred_results.json"
-- 
Gitee


From fa4c5207741714771801949c6aa8b4b45fef611c Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Tue, 23 Jul 2024 12:09:06 +0800
Subject: [PATCH 5/6] ixrt yolox add e2e time print

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../detection/yolox/ixrt/python/inference.py  | 69 ++++++++++---------
 .../cv/detection/yolox/ixrt/python/utils.py   | 38 +++++-----
 2 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/models/cv/detection/yolox/ixrt/python/inference.py b/models/cv/detection/yolox/ixrt/python/inference.py
index aa3ae2f5..a6c68154 100644
--- a/models/cv/detection/yolox/ixrt/python/inference.py
+++ b/models/cv/detection/yolox/ixrt/python/inference.py
@@ -30,47 +30,47 @@ from utils import COCO2017Dataset, COCO2017Evaluator
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    
-    parser.add_argument("--engine", 
-                        type=str, 
-                        required=True, 
+
+    parser.add_argument("--engine",
+                        type=str,
+                        required=True,
                         help="igie engine path.")
-    
+
     parser.add_argument("--batchsize",
                         type=int,
-                        required=True, 
+                        required=True,
                         help="inference batch size.")
-    
-    parser.add_argument("--datasets", 
-                        type=str, 
-                        required=True, 
+
+    parser.add_argument("--datasets",
+                        type=str,
+                        required=True,
                         help="datasets path.")
-    
-    parser.add_argument("--warmup", 
-                        type=int, 
-                        default=5, 
-                        help="number of warmup before test.")           
-    
+
+    parser.add_argument("--warmup",
+                        type=int,
+                        default=5,
+                        help="number of warmup before test.")
+
     parser.add_argument("--num_workers",
                         type=int,
                         default=16,
                         help="number of workers used in pytorch dataloader.")
-    
+
     parser.add_argument("--acc_target",
                         type=float,
                         default=None,
                         help="Model inference Accuracy target.")
-    
+
     parser.add_argument("--fps_target",
                         type=float,
                         default=None,
                         help="Model inference FPS target.")
-    
+
     parser.add_argument("--conf",
                         type=float,
                         default=0.001,
                         help="confidence threshold.")
-    
+
     parser.add_argument("--iou",
                         type=float,
                         default=0.65,
@@ -80,11 +80,11 @@ def parse_args():
                         type=bool,
                         default=False,
                         help="Run performance test only")
-    parser.add_argument("--loop_count", 
-                        type=int, 
-                        default=-1, 
+    parser.add_argument("--loop_count",
+                        type=int,
+                        default=-1,
                         help="loop count")
-    
+
     args = parser.parse_args()
 
     return args
@@ -188,11 +188,11 @@ def main():
 
     host_mem = tensorrt.IHostMemory
     logger = tensorrt.Logger(tensorrt.Logger.ERROR)
-    
+
     # Load Engine
     engine, context = create_engine_context(args.engine, logger)
     inputs, outputs, allocations = get_io_bindings(engine)
-    
+
     # Warm up
     print("\nWarm Start.")
     for i in range(args.warmup):
@@ -218,27 +218,30 @@ def main():
                                     conf_thres=args.conf,
                                     iou_thres=args.iou,
                                     image_size=640)
-        
+        start_time = time.time()
         for all_inputs in tqdm(dataloader):
             image = all_inputs[0]
             pad_batch = len(image) != batch_size
             if pad_batch:
                 origin_size = len(image)
                 image = np.resize(image, (batch_size, *image.shape[1:]))
-            
+
             cuda.memcpy_htod(inputs[0]["allocation"], image)
             context.execute_v2(allocations)
-            
+
             cuda.memcpy_dtoh(output_np, outputs[0]["allocation"])
             # print("output_np")
             # print(output_np)
-                
+
             if pad_batch:
                 output_np = output_np[:origin_size]
-                
+
             evaluator.evaluate(output_np, all_inputs)
-    
+        end_time = time.time()
+        end2end_time = end_time - start_time
+        print(F"E2E time : {end2end_time:.3f} seconds")
+
         evaluator.summary()
-    
+
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/models/cv/detection/yolox/ixrt/python/utils.py b/models/cv/detection/yolox/ixrt/python/utils.py
index e6bca6dc..314aed2e 100644
--- a/models/cv/detection/yolox/ixrt/python/utils.py
+++ b/models/cv/detection/yolox/ixrt/python/utils.py
@@ -82,7 +82,7 @@ class COCO2017Dataset(torch.utils.data.Dataset):
         self.input_layout = input_layout
 
         self.coco = COCO(annotation_file=self.label_json_path)
-        
+
         if self.val_mode:
             self.img_ids = list(sorted(self.coco.imgs.keys()))
         else:
@@ -96,7 +96,7 @@ class COCO2017Dataset(torch.utils.data.Dataset):
         img = self._load_image(img_path)
 
         img, r = self.preproc(img, input_size=self.image_size)
-        
+
         return img, img_path, r
 
     def _get_image_path(self, index):
@@ -110,13 +110,13 @@ class COCO2017Dataset(torch.utils.data.Dataset):
         assert img is not None, f"file {img_path} not found"
 
         return img
-    
+
     def preproc(self, img, input_size, swap=(2, 0, 1)):
         if len(img.shape) == 3:
             padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
         else:
             padded_img = np.ones(input_size, dtype=np.uint8) * 114
-        
+
         org_img = (img.shape[0], img.shape[1])
         r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
         resized_img = cv2.resize(
@@ -128,10 +128,10 @@ class COCO2017Dataset(torch.utils.data.Dataset):
 
         padded_img = padded_img.transpose(swap)
         padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
-        
+
         return padded_img, org_img
 
-    
+
     def _load_json_label(self, index):
         _, (h0, w0), _ = self._load_image(index)
 
@@ -171,19 +171,19 @@ def get_coco_accuracy(pred_json, ann_json):
     coco_pred = coco.loadRes(pred_json)
 
     coco_evaluator = COCOeval(cocoGt=coco, cocoDt=coco_pred, iouType="bbox")
-            
+
     coco_evaluator.evaluate()
     coco_evaluator.accumulate()
     coco_evaluator.summarize()
     return coco_evaluator.stats
 
-class COCO2017Evaluator:    
+class COCO2017Evaluator:
     def __init__(self,
                  label_path,
                  image_size=640,
                  conf_thres=0.001,
                  iou_thres=0.65):
-        
+
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
         self.label_path = label_path
@@ -192,14 +192,14 @@ class COCO2017Evaluator:
         self.jdict = []
 
         # iou vector for mAP@0.5:0.95
-        self.iouv = torch.linspace(0.5, 0.95, 10)  
+        self.iouv = torch.linspace(0.5, 0.95, 10)
         self.niou = self.iouv.numel()
-    
+
     def evaluate(self, pred, all_inputs):
         im = all_inputs[0]
         img_path = all_inputs[1]
         img_info = all_inputs[2]
-        
+
         _, _, height, width = im.shape
         img_size = [height, width]
 
@@ -212,7 +212,7 @@ class COCO2017Evaluator:
         for (output, org_img, path) in zip(nms_outputs, img_info, img_path):
             if output is None:
                 continue
-            
+
             bboxes = output[:, 0:4]
 
             img_h, img_w = org_img
@@ -222,11 +222,11 @@ class COCO2017Evaluator:
             bboxes /= scale
             cls = output[:, 6]
             scores = output[:, 4] * output[:, 5]
-            
+
             bboxes = self._xyxy2xywh(bboxes)
 
             self._save_one_json(bboxes, cls, scores, self.jdict, path, coco80_to_coco91)
-        
+
     def Detect(self, outputs, img_size):
         grids = []
         expanded_strides = []
@@ -247,7 +247,7 @@ class COCO2017Evaluator:
         expanded_strides = np.concatenate(expanded_strides, 1)
         outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
         outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
-        
+
         return outputs
 
     def postprocess(self, prediction, num_classes=80, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
@@ -257,7 +257,7 @@ class COCO2017Evaluator:
         box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
         box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
         prediction[:, :, :4] = box_corner[:, :, :4]
-        
+
         output = [None for _ in range(len(prediction))]
 
         for i, image_pred in enumerate(prediction):
@@ -271,7 +271,7 @@ class COCO2017Evaluator:
             # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
             detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
             detections = detections[conf_mask]
-            
+
             if not detections.size(0):
                 continue
             if class_agnostic:
@@ -295,7 +295,7 @@ class COCO2017Evaluator:
                 output[i] = torch.cat((output[i], detections))
 
         return output
-    
+
     def _xyxy2xywh(self, bboxes):
         bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
         bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
-- 
Gitee


From aae3dd45dfa8f0e47b9d5c5d87984e90ff629dd1 Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Tue, 23 Jul 2024 12:17:32 +0800
Subject: [PATCH 6/6] add bert base/large squad ixrt e2e time print

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py         | 4 ----
 .../bert_base_squad/ixrt/python/ixrt/inference.py             | 3 ++-
 .../language_model/bert_large_squad/ixrt/python/inference.py  | 3 ++-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
index b7aa5192..67d6c182 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
@@ -117,11 +117,7 @@ if __name__ == '__main__':
     with open(args.prediction_file) as prediction_file:
         predictions = json.load(prediction_file)
         f1_acc = float(args.f1_acc)
-
-    start_time = time.time()
     res = evaluate(dataset, predictions, f1_acc)
-    end_time = time.time()
-    print(F"E2E time : {end_time-start_time}")
     print(res)
     if res["status"] == 1:
         print("pass!")
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
index a85e765c..b6af06dc 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
@@ -379,7 +379,8 @@ if __name__ == '__main__':
                 lengths.append(len(features[0].input_ids))
 
             sort_index = np.argsort(lengths)
-            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)          
+            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)
+            print(F"E2E time : {infer_time:.3f} seconds")
             
             qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time
             print(f"Latency QPS: {qps} sentences/s")
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
index 8685604f..860322c3 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
+++ b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
@@ -372,7 +372,8 @@ if __name__ == '__main__':
                 lengths.append(len(features[0].input_ids))
 
             sort_index = np.argsort(lengths)
-            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)          
+            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)
+            print(F"E2E time : {infer_time:.3f} seconds")
             
             qps = len(squad_examples)/infer_time
             print(f"Latency QPS: {qps} sentences/s")
-- 
Gitee