diff --git a/data/datasets/README.md b/data/datasets/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f40c2e824e94c038e186d9e1ffa149a8382a41e2
--- /dev/null
+++ b/data/datasets/README.md
@@ -0,0 +1 @@
+# This is the default datasets location required by inference models
diff --git a/models/cv/classification/mobilenet_v2/ixrt/README.md b/models/cv/classification/mobilenet_v2/ixrt/README.md
index 46343115fc731320eb2f61b0358695cb0608116b..10852a9550a34658bf9bc5b8d4a0dadd91e98bb9 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/README.md
+++ b/models/cv/classification/mobilenet_v2/ixrt/README.md
@@ -13,17 +13,19 @@ pip3 install tqdm
 pip3 install onnxsim
 pip3 install opencv-python
 pip3 install ppq
+pip3 install protobuf==3.20.0
 ```
 
 ### Download
 
-Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in data/datasets;
+Download the [imagenet](https://www.image-net.org/download.php) validation dataset, and place in `${PROJ_ROOT}/data/datasets`;
 
 ## Inference
 
 ### FP16
 
 ```bash
+cd python/
 # Test ACC
 bash script/infer_mobilenetv2_fp16_accuary.sh
 # Test FPS
diff --git a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
index e726dabc1f19cadeda9f130ef52f8b36ad435d26..ea3f7f6b47414387508d955f71344a4af3217167 100644
--- a/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
+++ b/models/cv/classification/mobilenet_v2/ixrt/python/inference.py
@@ -85,6 +85,7 @@ def main(config):
         total_sample = 0
         acc_top1, acc_top5 = 0, 0
 
+        start_time = time.time()
         with tqdm(total= len(dataloader)) as _tqdm:
             for idx, (batch_data, batch_label) in enumerate(dataloader):
                 batch_data = batch_data.numpy().astype(inputs[0]["dtype"])
@@ -106,7 +107,10 @@ def main(config):
                 _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample),
                                     acc_5='{:.4f}'.format(acc_top5/total_sample))
                 _tqdm.update(1)
+        end_time = time.time()
+        end2end_time = end_time - start_time
 
+        print(F"E2E time : {end2end_time:.3f} seconds")
         print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}")
         print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}")
         acc1 = acc_top1/total_sample
diff --git a/models/cv/detection/yolox/ixrt/python/inference.py b/models/cv/detection/yolox/ixrt/python/inference.py
index aa3ae2f513968d46e2a324338c0490becd24e206..a6c681542956f41e2f8fb2a379d11832e2d09817 100644
--- a/models/cv/detection/yolox/ixrt/python/inference.py
+++ b/models/cv/detection/yolox/ixrt/python/inference.py
@@ -30,47 +30,47 @@ from utils import COCO2017Dataset, COCO2017Evaluator
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    
-    parser.add_argument("--engine", 
-                        type=str, 
-                        required=True, 
+
+    parser.add_argument("--engine",
+                        type=str,
+                        required=True,
                         help="igie engine path.")
-    
+
     parser.add_argument("--batchsize",
                         type=int,
-                        required=True, 
+                        required=True,
                         help="inference batch size.")
-    
-    parser.add_argument("--datasets", 
-                        type=str, 
-                        required=True, 
+
+    parser.add_argument("--datasets",
+                        type=str,
+                        required=True,
                         help="datasets path.")
-    
-    parser.add_argument("--warmup", 
-                        type=int, 
-                        default=5, 
-                        help="number of warmup before test.")           
-    
+
+    parser.add_argument("--warmup",
+                        type=int,
+                        default=5,
+                        help="number of warmup before test.")
+
     parser.add_argument("--num_workers",
                         type=int,
                         default=16,
                         help="number of workers used in pytorch dataloader.")
-    
+
     parser.add_argument("--acc_target",
                         type=float,
                         default=None,
                         help="Model inference Accuracy target.")
-    
+
     parser.add_argument("--fps_target",
                         type=float,
                         default=None,
                         help="Model inference FPS target.")
-    
+
     parser.add_argument("--conf",
                         type=float,
                         default=0.001,
                         help="confidence threshold.")
-    
+
     parser.add_argument("--iou",
                         type=float,
                         default=0.65,
@@ -80,11 +80,11 @@ def parse_args():
                         type=bool,
                         default=False,
                         help="Run performance test only")
-    parser.add_argument("--loop_count", 
-                        type=int, 
-                        default=-1, 
+    parser.add_argument("--loop_count",
+                        type=int,
+                        default=-1,
                         help="loop count")
-    
+
     args = parser.parse_args()
 
     return args
@@ -188,11 +188,11 @@ def main():
 
     host_mem = tensorrt.IHostMemory
     logger = tensorrt.Logger(tensorrt.Logger.ERROR)
-    
+
     # Load Engine
     engine, context = create_engine_context(args.engine, logger)
     inputs, outputs, allocations = get_io_bindings(engine)
-    
+
     # Warm up
     print("\nWarm Start.")
     for i in range(args.warmup):
@@ -218,27 +218,30 @@ def main():
                                     conf_thres=args.conf,
                                     iou_thres=args.iou,
                                     image_size=640)
-        
+        start_time = time.time()
         for all_inputs in tqdm(dataloader):
             image = all_inputs[0]
             pad_batch = len(image) != batch_size
             if pad_batch:
                 origin_size = len(image)
                 image = np.resize(image, (batch_size, *image.shape[1:]))
-            
+
             cuda.memcpy_htod(inputs[0]["allocation"], image)
             context.execute_v2(allocations)
-            
+
             cuda.memcpy_dtoh(output_np, outputs[0]["allocation"])
             # print("output_np")
             # print(output_np)
-                
+
             if pad_batch:
                 output_np = output_np[:origin_size]
-                
+
             evaluator.evaluate(output_np, all_inputs)
-    
+        end_time = time.time()
+        end2end_time = end_time - start_time
+        print(F"E2E time : {end2end_time:.3f} seconds")
+
         evaluator.summary()
-    
+
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/models/cv/detection/yolox/ixrt/python/utils.py b/models/cv/detection/yolox/ixrt/python/utils.py
index e6bca6dcd5bfa645d98b54e9e98ecd8f0a0b8d67..314aed2e7d9c20cf71e205a6a032369f8697c21f 100644
--- a/models/cv/detection/yolox/ixrt/python/utils.py
+++ b/models/cv/detection/yolox/ixrt/python/utils.py
@@ -82,7 +82,7 @@ class COCO2017Dataset(torch.utils.data.Dataset):
         self.input_layout = input_layout
 
         self.coco = COCO(annotation_file=self.label_json_path)
-        
+
         if self.val_mode:
             self.img_ids = list(sorted(self.coco.imgs.keys()))
         else:
@@ -96,7 +96,7 @@ class COCO2017Dataset(torch.utils.data.Dataset):
         img = self._load_image(img_path)
 
         img, r = self.preproc(img, input_size=self.image_size)
-        
+
         return img, img_path, r
 
     def _get_image_path(self, index):
@@ -110,13 +110,13 @@ class COCO2017Dataset(torch.utils.data.Dataset):
         assert img is not None, f"file {img_path} not found"
 
         return img
-    
+
     def preproc(self, img, input_size, swap=(2, 0, 1)):
         if len(img.shape) == 3:
             padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
         else:
             padded_img = np.ones(input_size, dtype=np.uint8) * 114
-        
+
         org_img = (img.shape[0], img.shape[1])
         r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
         resized_img = cv2.resize(
@@ -128,10 +128,10 @@ class COCO2017Dataset(torch.utils.data.Dataset):
 
         padded_img = padded_img.transpose(swap)
         padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
-        
+
         return padded_img, org_img
 
-    
+
     def _load_json_label(self, index):
         _, (h0, w0), _ = self._load_image(index)
 
@@ -171,19 +171,19 @@ def get_coco_accuracy(pred_json, ann_json):
     coco_pred = coco.loadRes(pred_json)
 
     coco_evaluator = COCOeval(cocoGt=coco, cocoDt=coco_pred, iouType="bbox")
-            
+
     coco_evaluator.evaluate()
     coco_evaluator.accumulate()
     coco_evaluator.summarize()
     return coco_evaluator.stats
 
-class COCO2017Evaluator:    
+class COCO2017Evaluator:
     def __init__(self,
                  label_path,
                  image_size=640,
                  conf_thres=0.001,
                  iou_thres=0.65):
-        
+
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
         self.label_path = label_path
@@ -192,14 +192,14 @@ class COCO2017Evaluator:
         self.jdict = []
 
         # iou vector for mAP@0.5:0.95
-        self.iouv = torch.linspace(0.5, 0.95, 10)  
+        self.iouv = torch.linspace(0.5, 0.95, 10)
         self.niou = self.iouv.numel()
-    
+
     def evaluate(self, pred, all_inputs):
         im = all_inputs[0]
         img_path = all_inputs[1]
         img_info = all_inputs[2]
-        
+
         _, _, height, width = im.shape
         img_size = [height, width]
 
@@ -212,7 +212,7 @@ class COCO2017Evaluator:
         for (output, org_img, path) in zip(nms_outputs, img_info, img_path):
             if output is None:
                 continue
-            
+
             bboxes = output[:, 0:4]
 
             img_h, img_w = org_img
@@ -222,11 +222,11 @@ class COCO2017Evaluator:
             bboxes /= scale
             cls = output[:, 6]
             scores = output[:, 4] * output[:, 5]
-            
+
             bboxes = self._xyxy2xywh(bboxes)
 
             self._save_one_json(bboxes, cls, scores, self.jdict, path, coco80_to_coco91)
-        
+
     def Detect(self, outputs, img_size):
         grids = []
         expanded_strides = []
@@ -247,7 +247,7 @@ class COCO2017Evaluator:
         expanded_strides = np.concatenate(expanded_strides, 1)
         outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
         outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
-        
+
         return outputs
 
     def postprocess(self, prediction, num_classes=80, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):
@@ -257,7 +257,7 @@ class COCO2017Evaluator:
         box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
         box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
         prediction[:, :, :4] = box_corner[:, :, :4]
-        
+
         output = [None for _ in range(len(prediction))]
 
         for i, image_pred in enumerate(prediction):
@@ -271,7 +271,7 @@ class COCO2017Evaluator:
             # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
             detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
             detections = detections[conf_mask]
-            
+
             if not detections.size(0):
                 continue
             if class_agnostic:
@@ -295,7 +295,7 @@ class COCO2017Evaluator:
                 output[i] = torch.cat((output[i], detections))
 
         return output
-    
+
     def _xyxy2xywh(self, bboxes):
         bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
         bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
diff --git a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
index ce68fec2c54fed9c26a4fdeeae6e4158e9dce71d..6362d01167e0f3a8490b9d16875a12d39d83c7b2 100644
--- a/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
+++ b/models/cv/segmentation/mask_rcnn/ixrt/python/maskrcnn.py
@@ -275,6 +275,7 @@ def get_maskrcnn_acc(config):
     print("Warmup done !\nStart forward ...")
     
     # run
+    start_time = time.time()
     for batch_data, batch_img_shape, batch_img_id, batched_paddings, paths in tqdm(dataloader):
         batch_data = batch_data.numpy()
         batch_img_shape = batch_img_shape.numpy()
@@ -312,7 +313,10 @@ def get_maskrcnn_acc(config):
             batched_paddings[0]
         )
         save2json(batch_img_id, bboxs_masks, json_result, class_map)
-        
+    end_time = time.time()
+    end2end_time = end_time - start_time
+
+    print(F"E2E time : {end2end_time:.3f} seconds")
     print("Forward done !")
     
     tmp_result_name = "pred_results.json"
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
index 92c4e83bf7f150156108b7ccd99f0a9373222c2a..67d6c18245b7eec0c8a995fc2a7284715429b498 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/evaluate-v1.1.py
@@ -26,6 +26,7 @@ import re
 import argparse
 import json
 import sys
+import time
 
 def normalize_answer(s):
     """Lower text and remove punctuation, articles and extra whitespace."""
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
index a85e765c91152562d6180307c2bb1317dc385356..b6af06dcf683496128bbbdb9e5458f8b2753885d 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
+++ b/models/nlp/language_model/bert_base_squad/ixrt/python/ixrt/inference.py
@@ -379,7 +379,8 @@ if __name__ == '__main__':
                 lengths.append(len(features[0].input_ids))
 
             sort_index = np.argsort(lengths)
-            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)          
+            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)
+            print(F"E2E time : {infer_time:.3f} seconds")
             
             qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time
             print(f"Latency QPS: {qps} sentences/s")
diff --git a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
index 8685604f7e259263be098378f52d6ff90514a6ac..860322c3ed5873e0002d9aa24a011394dd92e570 100644
--- a/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
+++ b/models/nlp/language_model/bert_large_squad/ixrt/python/inference.py
@@ -372,7 +372,8 @@ if __name__ == '__main__':
                 lengths.append(len(features[0].input_ids))
 
             sort_index = np.argsort(lengths)
-            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)          
+            infer_time, all_predictions = inference_all_dynamic(features_list, squad_examples, sort_index, all_predictions)
+            print(F"E2E time : {infer_time:.3f} seconds")
             
             qps = len(squad_examples)/infer_time
             print(f"Latency QPS: {qps} sentences/s")