diff --git a/README.md b/README.md
index 0e015465b17afdafd6ad4c19e62683b38c5c01cd..d33db3066de534e8fb3aead86ab9ab5ea63b252c 100644
--- a/README.md
+++ b/README.md
@@ -69,6 +69,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
DenseNet121 |
FP16 |
Supported |
+ Supported |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ DenseNet161 |
+ FP16 |
+ Supported |
- |
@@ -90,7 +101,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
EfficientNet_B1 |
FP16 |
- - |
+ Supported |
Supported |
@@ -98,6 +109,28 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
Supported |
+
+ EfficientNetV2 |
+ FP16 |
+ - |
+ Supported |
+
+
+ INT8 |
+ - |
+ Supported |
+
+
+ EfficientNetv2_rw_t |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
GoogLeNet |
FP16 |
@@ -113,23 +146,34 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
HRNet-W18 |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
- |
- - |
+ Supported |
InceptionV3 |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
Supported |
+ Supported |
+
+
+ Inception_ResNet_V2 |
+ FP16 |
- |
+ Supported |
+
+
+ INT8 |
+ - |
+ Supported |
MobileNetV2 |
@@ -143,9 +187,20 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Supported |
- MobileNetV3 |
+ MobileNetV3_Large |
FP16 |
+ Supported |
- |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ MobileNetV3_Small |
+ FP16 |
+ Supported |
Supported |
@@ -153,6 +208,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ RegNet_x_1_6gf |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
RepVGG |
FP16 |
@@ -167,9 +233,20 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Res2Net50 |
FP16 |
+ Supported |
+ Supported |
+
+
+ INT8 |
- |
Supported |
+
+ ResNeSt50 |
+ FP16 |
+ Supported |
+ - |
+
INT8 |
- |
@@ -178,12 +255,23 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
ResNet101 |
FP16 |
- - |
- Supported |
+ Supported |
+ Supported |
INT8 |
+ Supported |
+ Supported |
+
+
+ ResNet152 |
+ FP16 |
+ Supported |
- |
+
+
+ INT8 |
+ Supported |
- |
@@ -219,6 +307,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Supported |
- |
+
+ ResNet_V1_D50 |
+ FP16 |
+ - |
+ Supported |
+
+
+ INT8 |
+ - |
+ Supported |
+
ResNeXt50_32x4d |
FP16 |
@@ -227,7 +326,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
INT8 |
- Supported |
+ - |
- |
@@ -241,6 +340,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ ShuffleNetV2_x0_5 |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
SqueezeNet 1.0 |
FP16 |
@@ -252,6 +362,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
Supported |
+
+ SqueezeNet 1.1 |
+ FP16 |
+ - |
+ Supported |
+
+
+ INT8 |
+ - |
+ Supported |
+
Swin Transformer |
FP16 |
@@ -274,6 +395,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Supported |
- |
+
+ Wide_ResNet50 |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ Supported |
+ - |
+
### Detection
@@ -285,6 +417,61 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
IGIE |
IxRT |
+
+ CenterNet |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ DETR |
+ FP16 |
+ - |
+ Supported |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ FCOS |
+ FP16 |
+ - |
+ Supported |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ FoveaBox |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
+
+ HRNet |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
RetinaNet |
FP16 |
@@ -300,12 +487,12 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
YOLOv3 |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
Supported |
- - |
+ Supported |
YOLOv4 |
@@ -322,12 +509,23 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
YOLOv5 |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
Supported |
+ Supported |
+
+
+ YOLOv5s |
+ FP16 |
- |
+ Supported |
+
+
+ INT8 |
+ - |
+ Supported |
YOLOv6 |
@@ -344,12 +542,12 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
YOLOv7 |
FP16 |
Supported |
- - |
+ Supported |
INT8 |
Supported |
- - |
+ Supported |
YOLOv8 |
@@ -395,6 +593,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
- |
- |
+
+ SOLOv1 |
+ FP16 |
+ - |
+ Supported |
+
+
+ INT8 |
+ - |
+ - |
+
### Trace
@@ -428,6 +637,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
Supported |
- |
+
+ RepNet-Vehicle-ReID |
+ FP16 |
+ Supported |
+ - |
+
+
+ INT8 |
+ - |
+ - |
+
## NLP
@@ -461,7 +681,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
INT8 |
- |
- - |
+ Supported |
BERT Large SQuAD |
@@ -476,6 +696,41 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
+### Large Language Model
+
+
+
+ Models |
+ vLLM |
+ TensorRT-LLM |
+ TGI |
+
+
+ Baichuan2-7B |
+ Supported |
+ - |
+ - |
+
+
+ ChatGLM-3-6B |
+ Supported |
+ - |
+ - |
+
+
+ Llama2-7B |
+ - |
+ Supported |
+ - |
+
+
+ Qwen-7B |
+ - |
+ - |
+ Supported |
+
+
+
## Speech
### Speech Recognition
@@ -500,7 +755,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
-------
+---
## 社区
diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md
index 3468b21a221109b56a1718439be86436d6b378c7..9b5c20781e62eabf873f71a0dc63f61a4b015a29 100644
--- a/models/cv/classification/densenet121/ixrt/README.md
+++ b/models/cv/classification/densenet121/ixrt/README.md
@@ -54,6 +54,6 @@ bash scripts/infer_densenet_fp16_performance.sh
## Results
-Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
----------|-----------|----------|----------|----------|--------
-DenseNet | | FP16 | 1536.89 | 0.7442 | 0.9197
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| -------- | --------- | --------- | ------- | -------- | -------- |
+| DenseNet | 32 | FP16 | 1536.89 | 0.7442 | 0.9197 |
diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md
index 098105ce9d0eafc99001a3a1b1a8878c0f7ce590..88ccc2aaa2b2f30a783ef3cfbdcee5c721984b3c 100755
--- a/models/cv/classification/efficientnet_v2/ixrt/README.md
+++ b/models/cv/classification/efficientnet_v2/ixrt/README.md
@@ -1,4 +1,4 @@
-# EfficientnetV2
+# EfficientNetV2
## Description
diff --git a/models/cv/classification/hrnet_w18/ixrt/README.md b/models/cv/classification/hrnet_w18/ixrt/README.md
index 00cf3b2e5bbec9cabdbf25801177aa7d3a368f31..278d5427e513093372c6e8626595d4a4987fc296 100644
--- a/models/cv/classification/hrnet_w18/ixrt/README.md
+++ b/models/cv/classification/hrnet_w18/ixrt/README.md
@@ -64,7 +64,7 @@ bash scripts/infer_hrnet_w18_int8_performance.sh
## Results
-Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
----------|-----------|----------|----------|----------|--------
-ResNet50 | | | | |
-ResNet50 | | | | |
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| -------- | --------- | --------- | ------- | -------- | -------- |
+| ResNet50 | 32 | FP16 | 1474.26 | 0.76764 | 0.93446 |
+| ResNet50 | 32 | INT8 | 1649.40 | 0.76158 | 0.93152 |
diff --git a/models/cv/classification/inceptionresnetv2/ixrt/README.md b/models/cv/classification/inceptionresnetv2/ixrt/README.md
index c0be6674e0480cc4d46d932e00cc07c66b23d74f..6469019389d5b4e42107ea77da0f70fdfb5a0ae3 100755
--- a/models/cv/classification/inceptionresnetv2/ixrt/README.md
+++ b/models/cv/classification/inceptionresnetv2/ixrt/README.md
@@ -1,4 +1,4 @@
-# InceptionResNetV2
+# Inception-ResNetV2
## Description
diff --git a/models/cv/classification/resnet_v1_d50/ixrt/README.md b/models/cv/classification/resnet_v1_d50/ixrt/README.md
index 06a1ed34774be43ddc1d5f52019be0b878ab463c..42880951cd62081e983f4d1b7762004da3690325 100644
--- a/models/cv/classification/resnet_v1_d50/ixrt/README.md
+++ b/models/cv/classification/resnet_v1_d50/ixrt/README.md
@@ -1,4 +1,4 @@
-# ResNet50
+# ResNet_V1_D50
## Description
@@ -64,7 +64,7 @@ bash scripts/infer_resnet_v1_d50_int8_performance.sh
## Results
-Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
----------|-----------|----------|----------|----------|--------
-ResNet50 | | FP16 | 3887.55 | 0.77544 | 0.93568
-ResNet50 | | INT8 | 7148.58 | 0.7711 | 0.93514
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| ------------- | --------- | --------- | ------- | -------- | -------- |
+| ResNet_V1_D50 | 32 | FP16 | 3887.55 | 0.77544 | 0.93568 |
+| ResNet_V1_D50 | 32 | INT8 | 7148.58 | 0.7711 | 0.93514 |
diff --git a/models/cv/classification/squeezenet_1.1/ixrt/README.md b/models/cv/classification/squeezenet_1.1/ixrt/README.md
index 088ee0adf3deef26558346dd829947c216ae4eef..08fe037a0c7c3f1037b531c440dc553d43ebdb60 100644
--- a/models/cv/classification/squeezenet_1.1/ixrt/README.md
+++ b/models/cv/classification/squeezenet_1.1/ixrt/README.md
@@ -70,7 +70,7 @@ bash scripts/infer_squeezenet_v11_int8_performance.sh
## Results
-Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%)
----------------|-----------|----------|---------|----------|--------
-SqueezeNet 1.1 | | FP16 | 13701 | 0.58182 | 0.80622
-SqueezeNet 1.1 | | INT8 | 20128 | 0.50966 | 0.77552
+| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) |
+| -------------- | --------- | --------- | ----- | -------- | -------- |
+| SqueezeNet 1.1 | 32 | FP16 | 13701 | 0.58182 | 0.80622 |
+| SqueezeNet 1.1 | 32 | INT8 | 20128 | 0.50966 | 0.77552 |
diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md
index 5d05389dce9411d113d2cacc7b9387003b863acb..28df3f60e99b4c3901c0ee9c3c74aaa0946e7935 100755
--- a/models/cv/detection/detr/ixrt/README.md
+++ b/models/cv/detection/detr/ixrt/README.md
@@ -1,4 +1,4 @@
-# Detr
+# DETR
## Description
@@ -63,4 +63,4 @@ bash scripts/infer_detr_fp16_performance.sh
Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95
--------|-----------|----------|----------|----------|------------
-Detr | 1 | FP16 | 65.84 | 0.370 | 0.198
+DETR | 1 | FP16 | 65.84 | 0.370 | 0.198
diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md
index 244e8f6acd88d6ed1c21bf628992c462eea4a93d..49db1e04c9263472bd8db0675e3b543098f13362 100755
--- a/models/cv/detection/fcos/ixrt/README.md
+++ b/models/cv/detection/fcos/ixrt/README.md
@@ -1,4 +1,5 @@
# FCOS
+
## Description
FCOS is an anchor-free model based on the Fully Convolutional Network (FCN) architecture for pixel-wise object detection. It implements a proposal-free solution and introduces the concept of centerness.
@@ -7,8 +8,14 @@ For more details, please refer to our [report on Arxiv](https://arxiv.org/abs/19
## Setup
### Install
-```
-yum install mesa-libGL
+
+```bash
+# Install libGL
+## CentOS
+yum install -y mesa-libGL
+## Ubuntu
+apt install -y libgl1-mesa-dev
+
pip3 install tqdm
pip3 install onnx
pip3 install onnxsim
@@ -36,13 +43,14 @@ sh install_mmcv.sh
Pretrained model:
-- COCO2017数据集准备参考: https://cocodataset.org/
+- COCO2017数据集准备参考:
- 图片目录: Path/To/val2017/*.jpg
- 标注文件目录: Path/To/annotations/instances_val2017.json
### Model Conversion
MMDetection is an open source object detection toolbox based on PyTorch. It is a part of the OpenMMLab project.It is utilized for model conversion. In MMDetection, Execute model conversion command, and the checkpoints folder needs to be created, (mkdir checkpoints) in project
+
```bash
git clone -b v2.25.0 https://github.com/open-mmlab/mmdetection.git
@@ -59,12 +67,13 @@ python3 tools/deployment/pytorch2onnx.py \
--skip-postprocess \
--dynamic-export \
--cfg-options \
- model.test_cfg.deploy_nms_pre=-1 \
-
+ model.test_cfg.deploy_nms_pre=-1
```
-If there are issues such as input parameter mismatch during model export, it may be due to ONNX version. To resolve this, please delete the last parameter (dynamic_slice) from the return value of the _slice_helper function in the /usr/local/lib/python3.10/site-packages/mmcv/onnx/onnx_utils/symbolic_helper.py file.
+
+If there are issues such as input parameter mismatch during model export, it may be due to ONNX version. To resolve this, please delete the last parameter (dynamic_slice) from the return value of the_slice_helper function in the /usr/local/lib/python3.10/site-packages/mmcv/onnx/onnx_utils/symbolic_helper.py file.
## Inference
+
```bash
export PROJ_DIR=./
export DATASETS_DIR=/Path/to/coco/
@@ -73,6 +82,7 @@ export RUN_DIR=./
```
### FP16
+
```bash
# Accuracy
bash scripts/infer_fcos_fp16_accuracy.sh
@@ -82,6 +92,6 @@ bash scripts/infer_fcos_fp16_performance.sh
## Results
-Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 |
---------|-----------|----------|---------|----------|-------------|
-Fcos | 1 | FP16 | 51.62 | 0.546 | 0.360 |
\ No newline at end of file
+| Model | BatchSize | Precision | FPS | MAP@0.5 | MAP@0.5:0.95 |
+| ----- | --------- | --------- | ----- | ------- | ------------ |
+| FCOS | 1 | FP16 | 51.62 | 0.546 | 0.360 |
diff --git a/models/cv/detection/yolov5s/ixrt/README.md b/models/cv/detection/yolov5s/ixrt/README.md
index c189fc703dab614623c827b08cc6543b007832dd..28e6cf73659496be68b7c23598a08931b4893266 100755
--- a/models/cv/detection/yolov5s/ixrt/README.md
+++ b/models/cv/detection/yolov5s/ixrt/README.md
@@ -1,4 +1,4 @@
-# YOLOv5-s
+# YOLOv5s
## Description
diff --git a/models/cv/detection/solov1/ixrt/README.md b/models/cv/segmentation/solov1/ixrt/README.md
similarity index 96%
rename from models/cv/detection/solov1/ixrt/README.md
rename to models/cv/segmentation/solov1/ixrt/README.md
index fbe6fd970406a8b567a41b45c71e1a81432f57db..d675f5494bcb969639fa84b1c2d4af78e0f075bb 100644
--- a/models/cv/detection/solov1/ixrt/README.md
+++ b/models/cv/segmentation/solov1/ixrt/README.md
@@ -69,4 +69,4 @@ bash scripts/infer_solov1_fp16_performance.sh
Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95
--------|-----------|----------|----------|----------|------------
-Solov1 | 1 | FP16 | 24.67 | 0.541 | 0.338
+SOLOv1 | 1 | FP16 | 24.67 | 0.541 | 0.338
diff --git a/models/cv/detection/solov1/ixrt/build_engine.py b/models/cv/segmentation/solov1/ixrt/build_engine.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/build_engine.py
rename to models/cv/segmentation/solov1/ixrt/build_engine.py
diff --git a/models/cv/detection/solov1/ixrt/coco_instance.py b/models/cv/segmentation/solov1/ixrt/coco_instance.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/coco_instance.py
rename to models/cv/segmentation/solov1/ixrt/coco_instance.py
diff --git a/models/cv/detection/solov1/ixrt/common.py b/models/cv/segmentation/solov1/ixrt/common.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/common.py
rename to models/cv/segmentation/solov1/ixrt/common.py
diff --git a/models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh b/models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh
similarity index 100%
rename from models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh
rename to models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh
diff --git a/models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh b/models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh
similarity index 100%
rename from models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh
rename to models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh
diff --git a/models/cv/detection/solov1/ixrt/simplify_model.py b/models/cv/segmentation/solov1/ixrt/simplify_model.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/simplify_model.py
rename to models/cv/segmentation/solov1/ixrt/simplify_model.py
diff --git a/models/cv/detection/solov1/ixrt/solo_r50_fpn_3x_coco.py b/models/cv/segmentation/solov1/ixrt/solo_r50_fpn_3x_coco.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/solo_r50_fpn_3x_coco.py
rename to models/cv/segmentation/solov1/ixrt/solo_r50_fpn_3x_coco.py
diff --git a/models/cv/detection/solov1/ixrt/solo_torch2onnx.py b/models/cv/segmentation/solov1/ixrt/solo_torch2onnx.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/solo_torch2onnx.py
rename to models/cv/segmentation/solov1/ixrt/solo_torch2onnx.py
diff --git a/models/cv/detection/solov1/ixrt/solov1_inference.py b/models/cv/segmentation/solov1/ixrt/solov1_inference.py
similarity index 100%
rename from models/cv/detection/solov1/ixrt/solov1_inference.py
rename to models/cv/segmentation/solov1/ixrt/solov1_inference.py
diff --git a/models/cv/trace/repnet/igie/README.md b/models/cv/trace/repnet/igie/README.md
index a37c6c8bcfa1efaea56226824cda28c971e8b69f..03659b81e537bde9a068deb796c10a0ab1377f13 100644
--- a/models/cv/trace/repnet/igie/README.md
+++ b/models/cv/trace/repnet/igie/README.md
@@ -1,4 +1,4 @@
-# RepNet-VehicleReID
+# RepNet-Vehicle-ReID
## Description
diff --git a/models/nlp/language_model/bert_base_squad/igie/README.md b/models/nlp/language_model/bert_base_squad/igie/README.md
index cff33a6402d1533458a5f0a1146e95202d900f65..4c4ab62954a6e398012c3df39e7b52d338e30bdc 100644
--- a/models/nlp/language_model/bert_base_squad/igie/README.md
+++ b/models/nlp/language_model/bert_base_squad/igie/README.md
@@ -43,6 +43,6 @@ bash scripts/infer_bert_base_squad_fp16_performance.sh
## Results
-Model |BatchSize |SeqLength |Precision |FPS | F1 Score
------------------|-----------|----------|----------|----------|--------
-Bertbase(Squad) | 8 | 256 | FP16 |901.81 | 88.08
+| Model | BatchSize | SeqLength | Precision | FPS | F1 Score |
+| --------------- | --------- | --------- | --------- | ------ | -------- |
+| BERT Base SQuAD | 8 | 256 | FP16 | 901.81 | 88.08 |
diff --git a/models/nlp/language_model/bert_base_squad/ixrt/README.md b/models/nlp/language_model/bert_base_squad/ixrt/README.md
index 0a7f4b3302ced700a3acefca5079b4e6bb3fed09..acc3592b59533fcfd6334628977c78b05916de02 100644
--- a/models/nlp/language_model/bert_base_squad/ixrt/README.md
+++ b/models/nlp/language_model/bert_base_squad/ixrt/README.md
@@ -12,23 +12,23 @@ BERT is designed to pre-train deep bidirectional representations from unlabeled
docker pull nvcr.io/nvidia/tensorrt:23.04-py3
```
-### Install
+## Install
-#### On iluvatar
+### Install on Iluvatar
```bash
cmake -S . -B build
cmake --build build -j16
```
-#### On T4
+### Install on T4
```bash
cmake -S . -B build -DUSE_TENSORRT=true
cmake --build build -j16
```
-### Download
+## Download
```bash
cd python
@@ -37,17 +37,6 @@ bash script/prepare.sh v1_1
## Inference
-### On T4
-
-```bash
-# FP16
-cd python
-pip install onnx pycuda
-# use --bs to set max_batch_size (dynamic)
-bash script/build_engine.sh --bs 32
-bash script/inference_squad.sh --bs 32
-```
-
```bash
# INT8
cd python
@@ -55,25 +44,41 @@ pip install onnx pycuda
bash script/build_engine.sh --bs 32 --int8
bash script/inference_squad.sh --bs 32 --int8
```
-#### On iluvatar
+
+### On Iluvatar
+
+#### FP16
```bash
-# FP16
cd python/script
bash infer_bert_base_squad_fp16_ixrt.sh
```
+#### INT8
+
```bash
-# INT8
cd python/script
bash infer_bert_base_squad_int8_ixrt.sh
```
+### On T4
+
+```bash
+# FP16
+cd python
+pip install onnx pycuda
+# use --bs to set max_batch_size (dynamic)
+bash script/build_engine.sh --bs 32
+bash script/inference_squad.sh --bs 32
+```
+
## Results
-Model | BatchSize | Precision | FPS | ACC
-------|-----------|-----------|-----|----
-BERT-Base-SQuAD | 32 | fp16 | Latency QPS: 1543.40 sentences/s | "exact_match": 80.92, "f1": 88.20
+| Model | BatchSize | Precision | Latency QPS | exact_match | f1 |
+| --------------- | --------- | --------- | ----------- | ----------- | ----- |
+| BERT Base SQuAD | 32 | FP16 | 1444.69 | 80.92 | 88.20 |
+| BERT Base SQuAD | 32 | INT8 | 2325.20 | 78.41 | 86.97 |
+
+## Referenece
-## Referenece
-- [bert-base-uncased.zip 外网链接](https://drive.google.com/file/d/1_DJDdKBanqJ6h3VGhH78F9EPgE2wK_Tw/view?usp=drive_link)
+- [bert-base-uncased.zip](https://drive.google.com/file/d/1_DJDdKBanqJ6h3VGhH78F9EPgE2wK_Tw/view?usp=drive_link)
diff --git a/models/nlp/large_language_model/chatglm/vllm/README.md b/models/nlp/large_language_model/chatglm3-6b/vllm/README.md
similarity index 100%
rename from models/nlp/large_language_model/chatglm/vllm/README.md
rename to models/nlp/large_language_model/chatglm3-6b/vllm/README.md
diff --git a/models/nlp/large_language_model/chatglm/vllm/offline_inference.py b/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py
similarity index 100%
rename from models/nlp/large_language_model/chatglm/vllm/offline_inference.py
rename to models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py
diff --git a/models/nlp/large_language_model/chatglm/vllm/server_inference.py b/models/nlp/large_language_model/chatglm3-6b/vllm/server_inference.py
similarity index 100%
rename from models/nlp/large_language_model/chatglm/vllm/server_inference.py
rename to models/nlp/large_language_model/chatglm3-6b/vllm/server_inference.py
diff --git a/models/nlp/large_language_model/chatglm/vllm/utils.py b/models/nlp/large_language_model/chatglm3-6b/vllm/utils.py
similarity index 100%
rename from models/nlp/large_language_model/chatglm/vllm/utils.py
rename to models/nlp/large_language_model/chatglm3-6b/vllm/utils.py
diff --git a/models/nlp/large_language_model/llama2-13b/trtllm/README.md b/models/nlp/large_language_model/llama2-13b/trtllm/README.md
index d10629b12e943dac4b03806888c66e4069074d35..afb1d13d571899b03fa70593ab0b9b4bd1600f85 100755
--- a/models/nlp/large_language_model/llama2-13b/trtllm/README.md
+++ b/models/nlp/large_language_model/llama2-13b/trtllm/README.md
@@ -53,3 +53,9 @@ bash scripts/test_trtllm_llama2_13b_gpu2_build.sh
# Inference
bash scripts/test_trtllm_llama2_13b_gpu2.sh
```
+
+## Results
+
+| Model | tokens | tokens per second |
+| ---------- | ------ | ----------------- |
+| Llama2 13B | 1596 | 33.39 |
diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md
index 65bf7afaf1c3fab51bf7ff3b9909895f135a28ef..348850b3eaa5bb62c11753ef6dce9212aa87e7a8 100644
--- a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md
+++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md
@@ -53,3 +53,9 @@ ENABLE_INFER_PG=1 CUDA_VISIBLE_DEVICES=0 USE_FLASH_ATTENTION=true text-generatio
export CUDA_VISIBLE_DEVICES=1
python3 offline_inference.py --model2path ./data/qwen-7B
```
+
+## Results
+
+| Model | QPS |
+| ------- | ----- |
+| Qwen-7B | 35.64 |