diff --git a/README.md b/README.md index 0e015465b17afdafd6ad4c19e62683b38c5c01cd..d33db3066de534e8fb3aead86ab9ab5ea63b252c 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 DenseNet121 FP16 Supported + Supported + + + INT8 + - + - + + + DenseNet161 + FP16 + Supported - @@ -90,7 +101,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 EfficientNet_B1 FP16 - - + Supported Supported @@ -98,6 +109,28 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - Supported + + EfficientNetV2 + FP16 + - + Supported + + + INT8 + - + Supported + + + EfficientNetv2_rw_t + FP16 + Supported + - + + + INT8 + - + - + GoogLeNet FP16 @@ -113,23 +146,34 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 HRNet-W18 FP16 Supported - - + Supported INT8 - - - + Supported InceptionV3 FP16 Supported - - + Supported INT8 Supported + Supported + + + Inception_ResNet_V2 + FP16 - + Supported + + + INT8 + - + Supported MobileNetV2 @@ -143,9 +187,20 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Supported - MobileNetV3 + MobileNetV3_Large FP16 + Supported - + + + INT8 + - + - + + + MobileNetV3_Small + FP16 + Supported Supported @@ -153,6 +208,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + RegNet_x_1_6gf + FP16 + Supported + - + + + INT8 + - + - + RepVGG FP16 @@ -167,9 +233,20 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Res2Net50 FP16 + Supported + Supported + + + INT8 - Supported + + ResNeSt50 + FP16 + Supported + - + INT8 - @@ -178,12 +255,23 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 ResNet101 FP16 - - - Supported + Supported + Supported INT8 + Supported + Supported + + + ResNet152 + FP16 + Supported - + + + INT8 + Supported - @@ -219,6 +307,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Supported - + + ResNet_V1_D50 + FP16 + - + Supported + + + INT8 + - + Supported + ResNeXt50_32x4d FP16 @@ -227,7 +326,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 INT8 - Supported + - - @@ -241,6 +340,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + ShuffleNetV2_x0_5 + FP16 + Supported + - + + + INT8 + - + - + SqueezeNet 1.0 FP16 @@ -252,6 +362,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - Supported + + SqueezeNet 1.1 + FP16 + - + Supported + + + INT8 + - + Supported + Swin Transformer FP16 @@ -274,6 +395,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Supported - + + Wide_ResNet50 + FP16 + Supported + - + + + INT8 + Supported + - + ### Detection @@ -285,6 +417,61 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 IGIE IxRT + + CenterNet + FP16 + Supported + - + + + INT8 + - + - + + + DETR + FP16 + - + Supported + + + INT8 + - + - + + + FCOS + FP16 + - + Supported + + + INT8 + - + - + + + FoveaBox + FP16 + Supported + - + + + INT8 + - + - + + + HRNet + FP16 + Supported + - + + + INT8 + - + - + RetinaNet FP16 @@ -300,12 +487,12 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 YOLOv3 FP16 Supported - - + Supported INT8 Supported - - + Supported YOLOv4 @@ -322,12 +509,23 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 YOLOv5 FP16 Supported - - + Supported INT8 Supported + Supported + + + YOLOv5s + FP16 - + Supported + + + INT8 + - + Supported YOLOv6 @@ -344,12 +542,12 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 YOLOv7 FP16 Supported - - + Supported INT8 Supported - - + Supported YOLOv8 @@ -395,6 +593,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 - - + + SOLOv1 + FP16 + - + Supported + + + INT8 + - + - + ### Trace @@ -428,6 +637,17 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 Supported - + + RepNet-Vehicle-ReID + FP16 + Supported + - + + + INT8 + - + - + ## NLP @@ -461,7 +681,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 INT8 - - - + Supported BERT Large SQuAD @@ -476,6 +696,41 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 +### Large Language Model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelsvLLMTensorRT-LLMTGI
Baichuan2-7BSupported--
ChatGLM-3-6BSupported--
Llama2-7B-Supported-
Qwen-7B--Supported
+ ## Speech ### Speech Recognition @@ -500,7 +755,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 ------- +--- ## 社区 diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md index 3468b21a221109b56a1718439be86436d6b378c7..9b5c20781e62eabf873f71a0dc63f61a4b015a29 100644 --- a/models/cv/classification/densenet121/ixrt/README.md +++ b/models/cv/classification/densenet121/ixrt/README.md @@ -54,6 +54,6 @@ bash scripts/infer_densenet_fp16_performance.sh ## Results -Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) ----------|-----------|----------|----------|----------|-------- -DenseNet | | FP16 | 1536.89 | 0.7442 | 0.9197 +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| -------- | --------- | --------- | ------- | -------- | -------- | +| DenseNet | 32 | FP16 | 1536.89 | 0.7442 | 0.9197 | diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index 098105ce9d0eafc99001a3a1b1a8878c0f7ce590..88ccc2aaa2b2f30a783ef3cfbdcee5c721984b3c 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -1,4 +1,4 @@ -# EfficientnetV2 +# EfficientNetV2 ## Description diff --git a/models/cv/classification/hrnet_w18/ixrt/README.md b/models/cv/classification/hrnet_w18/ixrt/README.md index 00cf3b2e5bbec9cabdbf25801177aa7d3a368f31..278d5427e513093372c6e8626595d4a4987fc296 100644 --- a/models/cv/classification/hrnet_w18/ixrt/README.md +++ b/models/cv/classification/hrnet_w18/ixrt/README.md @@ -64,7 +64,7 @@ bash scripts/infer_hrnet_w18_int8_performance.sh ## Results -Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) ----------|-----------|----------|----------|----------|-------- -ResNet50 | | | | | -ResNet50 | | | | | +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| -------- | --------- | --------- | ------- | -------- | -------- | +| ResNet50 | 32 | FP16 | 1474.26 | 0.76764 | 0.93446 | +| ResNet50 | 32 | INT8 | 1649.40 | 0.76158 | 0.93152 | diff --git a/models/cv/classification/inceptionresnetv2/ixrt/README.md b/models/cv/classification/inceptionresnetv2/ixrt/README.md index c0be6674e0480cc4d46d932e00cc07c66b23d74f..6469019389d5b4e42107ea77da0f70fdfb5a0ae3 100755 --- a/models/cv/classification/inceptionresnetv2/ixrt/README.md +++ b/models/cv/classification/inceptionresnetv2/ixrt/README.md @@ -1,4 +1,4 @@ -# InceptionResNetV2 +# Inception-ResNetV2 ## Description diff --git a/models/cv/classification/resnet_v1_d50/ixrt/README.md b/models/cv/classification/resnet_v1_d50/ixrt/README.md index 06a1ed34774be43ddc1d5f52019be0b878ab463c..42880951cd62081e983f4d1b7762004da3690325 100644 --- a/models/cv/classification/resnet_v1_d50/ixrt/README.md +++ b/models/cv/classification/resnet_v1_d50/ixrt/README.md @@ -1,4 +1,4 @@ -# ResNet50 +# ResNet_V1_D50 ## Description @@ -64,7 +64,7 @@ bash scripts/infer_resnet_v1_d50_int8_performance.sh ## Results -Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) ----------|-----------|----------|----------|----------|-------- -ResNet50 | | FP16 | 3887.55 | 0.77544 | 0.93568 -ResNet50 | | INT8 | 7148.58 | 0.7711 | 0.93514 +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| ------------- | --------- | --------- | ------- | -------- | -------- | +| ResNet_V1_D50 | 32 | FP16 | 3887.55 | 0.77544 | 0.93568 | +| ResNet_V1_D50 | 32 | INT8 | 7148.58 | 0.7711 | 0.93514 | diff --git a/models/cv/classification/squeezenet_1.1/ixrt/README.md b/models/cv/classification/squeezenet_1.1/ixrt/README.md index 088ee0adf3deef26558346dd829947c216ae4eef..08fe037a0c7c3f1037b531c440dc553d43ebdb60 100644 --- a/models/cv/classification/squeezenet_1.1/ixrt/README.md +++ b/models/cv/classification/squeezenet_1.1/ixrt/README.md @@ -70,7 +70,7 @@ bash scripts/infer_squeezenet_v11_int8_performance.sh ## Results -Model |BatchSize |Precision |FPS |Top-1(%) |Top-5(%) ----------------|-----------|----------|---------|----------|-------- -SqueezeNet 1.1 | | FP16 | 13701 | 0.58182 | 0.80622 -SqueezeNet 1.1 | | INT8 | 20128 | 0.50966 | 0.77552 +| Model | BatchSize | Precision | FPS | Top-1(%) | Top-5(%) | +| -------------- | --------- | --------- | ----- | -------- | -------- | +| SqueezeNet 1.1 | 32 | FP16 | 13701 | 0.58182 | 0.80622 | +| SqueezeNet 1.1 | 32 | INT8 | 20128 | 0.50966 | 0.77552 | diff --git a/models/cv/detection/detr/ixrt/README.md b/models/cv/detection/detr/ixrt/README.md index 5d05389dce9411d113d2cacc7b9387003b863acb..28df3f60e99b4c3901c0ee9c3c74aaa0946e7935 100755 --- a/models/cv/detection/detr/ixrt/README.md +++ b/models/cv/detection/detr/ixrt/README.md @@ -1,4 +1,4 @@ -# Detr +# DETR ## Description @@ -63,4 +63,4 @@ bash scripts/infer_detr_fp16_performance.sh Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 --------|-----------|----------|----------|----------|------------ -Detr | 1 | FP16 | 65.84 | 0.370 | 0.198 +DETR | 1 | FP16 | 65.84 | 0.370 | 0.198 diff --git a/models/cv/detection/fcos/ixrt/README.md b/models/cv/detection/fcos/ixrt/README.md index 244e8f6acd88d6ed1c21bf628992c462eea4a93d..49db1e04c9263472bd8db0675e3b543098f13362 100755 --- a/models/cv/detection/fcos/ixrt/README.md +++ b/models/cv/detection/fcos/ixrt/README.md @@ -1,4 +1,5 @@ # FCOS + ## Description FCOS is an anchor-free model based on the Fully Convolutional Network (FCN) architecture for pixel-wise object detection. It implements a proposal-free solution and introduces the concept of centerness. @@ -7,8 +8,14 @@ For more details, please refer to our [report on Arxiv](https://arxiv.org/abs/19 ## Setup ### Install -``` -yum install mesa-libGL + +```bash +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-dev + pip3 install tqdm pip3 install onnx pip3 install onnxsim @@ -36,13 +43,14 @@ sh install_mmcv.sh Pretrained model: -- COCO2017数据集准备参考: https://cocodataset.org/ +- COCO2017数据集准备参考: - 图片目录: Path/To/val2017/*.jpg - 标注文件目录: Path/To/annotations/instances_val2017.json ### Model Conversion MMDetection is an open source object detection toolbox based on PyTorch. It is a part of the OpenMMLab project.It is utilized for model conversion. In MMDetection, Execute model conversion command, and the checkpoints folder needs to be created, (mkdir checkpoints) in project + ```bash git clone -b v2.25.0 https://github.com/open-mmlab/mmdetection.git @@ -59,12 +67,13 @@ python3 tools/deployment/pytorch2onnx.py \ --skip-postprocess \ --dynamic-export \ --cfg-options \ - model.test_cfg.deploy_nms_pre=-1 \ - + model.test_cfg.deploy_nms_pre=-1 ``` -If there are issues such as input parameter mismatch during model export, it may be due to ONNX version. To resolve this, please delete the last parameter (dynamic_slice) from the return value of the _slice_helper function in the /usr/local/lib/python3.10/site-packages/mmcv/onnx/onnx_utils/symbolic_helper.py file. + +If there are issues such as input parameter mismatch during model export, it may be due to ONNX version. To resolve this, please delete the last parameter (dynamic_slice) from the return value of the_slice_helper function in the /usr/local/lib/python3.10/site-packages/mmcv/onnx/onnx_utils/symbolic_helper.py file. ## Inference + ```bash export PROJ_DIR=./ export DATASETS_DIR=/Path/to/coco/ @@ -73,6 +82,7 @@ export RUN_DIR=./ ``` ### FP16 + ```bash # Accuracy bash scripts/infer_fcos_fp16_accuracy.sh @@ -82,6 +92,6 @@ bash scripts/infer_fcos_fp16_performance.sh ## Results -Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 | ---------|-----------|----------|---------|----------|-------------| -Fcos | 1 | FP16 | 51.62 | 0.546 | 0.360 | \ No newline at end of file +| Model | BatchSize | Precision | FPS | MAP@0.5 | MAP@0.5:0.95 | +| ----- | --------- | --------- | ----- | ------- | ------------ | +| FCOS | 1 | FP16 | 51.62 | 0.546 | 0.360 | diff --git a/models/cv/detection/yolov5s/ixrt/README.md b/models/cv/detection/yolov5s/ixrt/README.md index c189fc703dab614623c827b08cc6543b007832dd..28e6cf73659496be68b7c23598a08931b4893266 100755 --- a/models/cv/detection/yolov5s/ixrt/README.md +++ b/models/cv/detection/yolov5s/ixrt/README.md @@ -1,4 +1,4 @@ -# YOLOv5-s +# YOLOv5s ## Description diff --git a/models/cv/detection/solov1/ixrt/README.md b/models/cv/segmentation/solov1/ixrt/README.md similarity index 96% rename from models/cv/detection/solov1/ixrt/README.md rename to models/cv/segmentation/solov1/ixrt/README.md index fbe6fd970406a8b567a41b45c71e1a81432f57db..d675f5494bcb969639fa84b1c2d4af78e0f075bb 100644 --- a/models/cv/detection/solov1/ixrt/README.md +++ b/models/cv/segmentation/solov1/ixrt/README.md @@ -69,4 +69,4 @@ bash scripts/infer_solov1_fp16_performance.sh Model |BatchSize |Precision |FPS |MAP@0.5 |MAP@0.5:0.95 --------|-----------|----------|----------|----------|------------ -Solov1 | 1 | FP16 | 24.67 | 0.541 | 0.338 +SOLOv1 | 1 | FP16 | 24.67 | 0.541 | 0.338 diff --git a/models/cv/detection/solov1/ixrt/build_engine.py b/models/cv/segmentation/solov1/ixrt/build_engine.py similarity index 100% rename from models/cv/detection/solov1/ixrt/build_engine.py rename to models/cv/segmentation/solov1/ixrt/build_engine.py diff --git a/models/cv/detection/solov1/ixrt/coco_instance.py b/models/cv/segmentation/solov1/ixrt/coco_instance.py similarity index 100% rename from models/cv/detection/solov1/ixrt/coco_instance.py rename to models/cv/segmentation/solov1/ixrt/coco_instance.py diff --git a/models/cv/detection/solov1/ixrt/common.py b/models/cv/segmentation/solov1/ixrt/common.py similarity index 100% rename from models/cv/detection/solov1/ixrt/common.py rename to models/cv/segmentation/solov1/ixrt/common.py diff --git a/models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh b/models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh similarity index 100% rename from models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh rename to models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_accuracy.sh diff --git a/models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh b/models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh similarity index 100% rename from models/cv/detection/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh rename to models/cv/segmentation/solov1/ixrt/scripts/infer_solov1_fp16_performance.sh diff --git a/models/cv/detection/solov1/ixrt/simplify_model.py b/models/cv/segmentation/solov1/ixrt/simplify_model.py similarity index 100% rename from models/cv/detection/solov1/ixrt/simplify_model.py rename to models/cv/segmentation/solov1/ixrt/simplify_model.py diff --git a/models/cv/detection/solov1/ixrt/solo_r50_fpn_3x_coco.py b/models/cv/segmentation/solov1/ixrt/solo_r50_fpn_3x_coco.py similarity index 100% rename from models/cv/detection/solov1/ixrt/solo_r50_fpn_3x_coco.py rename to models/cv/segmentation/solov1/ixrt/solo_r50_fpn_3x_coco.py diff --git a/models/cv/detection/solov1/ixrt/solo_torch2onnx.py b/models/cv/segmentation/solov1/ixrt/solo_torch2onnx.py similarity index 100% rename from models/cv/detection/solov1/ixrt/solo_torch2onnx.py rename to models/cv/segmentation/solov1/ixrt/solo_torch2onnx.py diff --git a/models/cv/detection/solov1/ixrt/solov1_inference.py b/models/cv/segmentation/solov1/ixrt/solov1_inference.py similarity index 100% rename from models/cv/detection/solov1/ixrt/solov1_inference.py rename to models/cv/segmentation/solov1/ixrt/solov1_inference.py diff --git a/models/cv/trace/repnet/igie/README.md b/models/cv/trace/repnet/igie/README.md index a37c6c8bcfa1efaea56226824cda28c971e8b69f..03659b81e537bde9a068deb796c10a0ab1377f13 100644 --- a/models/cv/trace/repnet/igie/README.md +++ b/models/cv/trace/repnet/igie/README.md @@ -1,4 +1,4 @@ -# RepNet-VehicleReID +# RepNet-Vehicle-ReID ## Description diff --git a/models/nlp/language_model/bert_base_squad/igie/README.md b/models/nlp/language_model/bert_base_squad/igie/README.md index cff33a6402d1533458a5f0a1146e95202d900f65..4c4ab62954a6e398012c3df39e7b52d338e30bdc 100644 --- a/models/nlp/language_model/bert_base_squad/igie/README.md +++ b/models/nlp/language_model/bert_base_squad/igie/README.md @@ -43,6 +43,6 @@ bash scripts/infer_bert_base_squad_fp16_performance.sh ## Results -Model |BatchSize |SeqLength |Precision |FPS | F1 Score ------------------|-----------|----------|----------|----------|-------- -Bertbase(Squad) | 8 | 256 | FP16 |901.81 | 88.08 +| Model | BatchSize | SeqLength | Precision | FPS | F1 Score | +| --------------- | --------- | --------- | --------- | ------ | -------- | +| BERT Base SQuAD | 8 | 256 | FP16 | 901.81 | 88.08 | diff --git a/models/nlp/language_model/bert_base_squad/ixrt/README.md b/models/nlp/language_model/bert_base_squad/ixrt/README.md index 0a7f4b3302ced700a3acefca5079b4e6bb3fed09..acc3592b59533fcfd6334628977c78b05916de02 100644 --- a/models/nlp/language_model/bert_base_squad/ixrt/README.md +++ b/models/nlp/language_model/bert_base_squad/ixrt/README.md @@ -12,23 +12,23 @@ BERT is designed to pre-train deep bidirectional representations from unlabeled docker pull nvcr.io/nvidia/tensorrt:23.04-py3 ``` -### Install +## Install -#### On iluvatar +### Install on Iluvatar ```bash cmake -S . -B build cmake --build build -j16 ``` -#### On T4 +### Install on T4 ```bash cmake -S . -B build -DUSE_TENSORRT=true cmake --build build -j16 ``` -### Download +## Download ```bash cd python @@ -37,17 +37,6 @@ bash script/prepare.sh v1_1 ## Inference -### On T4 - -```bash -# FP16 -cd python -pip install onnx pycuda -# use --bs to set max_batch_size (dynamic) -bash script/build_engine.sh --bs 32 -bash script/inference_squad.sh --bs 32 -``` - ```bash # INT8 cd python @@ -55,25 +44,41 @@ pip install onnx pycuda bash script/build_engine.sh --bs 32 --int8 bash script/inference_squad.sh --bs 32 --int8 ``` -#### On iluvatar + +### On Iluvatar + +#### FP16 ```bash -# FP16 cd python/script bash infer_bert_base_squad_fp16_ixrt.sh ``` +#### INT8 + ```bash -# INT8 cd python/script bash infer_bert_base_squad_int8_ixrt.sh ``` +### On T4 + +```bash +# FP16 +cd python +pip install onnx pycuda +# use --bs to set max_batch_size (dynamic) +bash script/build_engine.sh --bs 32 +bash script/inference_squad.sh --bs 32 +``` + ## Results -Model | BatchSize | Precision | FPS | ACC -------|-----------|-----------|-----|---- -BERT-Base-SQuAD | 32 | fp16 | Latency QPS: 1543.40 sentences/s | "exact_match": 80.92, "f1": 88.20 +| Model | BatchSize | Precision | Latency QPS | exact_match | f1 | +| --------------- | --------- | --------- | ----------- | ----------- | ----- | +| BERT Base SQuAD | 32 | FP16 | 1444.69 | 80.92 | 88.20 | +| BERT Base SQuAD | 32 | INT8 | 2325.20 | 78.41 | 86.97 | + +## Referenece -## Referenece -- [bert-base-uncased.zip 外网链接](https://drive.google.com/file/d/1_DJDdKBanqJ6h3VGhH78F9EPgE2wK_Tw/view?usp=drive_link) +- [bert-base-uncased.zip](https://drive.google.com/file/d/1_DJDdKBanqJ6h3VGhH78F9EPgE2wK_Tw/view?usp=drive_link) diff --git a/models/nlp/large_language_model/chatglm/vllm/README.md b/models/nlp/large_language_model/chatglm3-6b/vllm/README.md similarity index 100% rename from models/nlp/large_language_model/chatglm/vllm/README.md rename to models/nlp/large_language_model/chatglm3-6b/vllm/README.md diff --git a/models/nlp/large_language_model/chatglm/vllm/offline_inference.py b/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py similarity index 100% rename from models/nlp/large_language_model/chatglm/vllm/offline_inference.py rename to models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py diff --git a/models/nlp/large_language_model/chatglm/vllm/server_inference.py b/models/nlp/large_language_model/chatglm3-6b/vllm/server_inference.py similarity index 100% rename from models/nlp/large_language_model/chatglm/vllm/server_inference.py rename to models/nlp/large_language_model/chatglm3-6b/vllm/server_inference.py diff --git a/models/nlp/large_language_model/chatglm/vllm/utils.py b/models/nlp/large_language_model/chatglm3-6b/vllm/utils.py similarity index 100% rename from models/nlp/large_language_model/chatglm/vllm/utils.py rename to models/nlp/large_language_model/chatglm3-6b/vllm/utils.py diff --git a/models/nlp/large_language_model/llama2-13b/trtllm/README.md b/models/nlp/large_language_model/llama2-13b/trtllm/README.md index d10629b12e943dac4b03806888c66e4069074d35..afb1d13d571899b03fa70593ab0b9b4bd1600f85 100755 --- a/models/nlp/large_language_model/llama2-13b/trtllm/README.md +++ b/models/nlp/large_language_model/llama2-13b/trtllm/README.md @@ -53,3 +53,9 @@ bash scripts/test_trtllm_llama2_13b_gpu2_build.sh # Inference bash scripts/test_trtllm_llama2_13b_gpu2.sh ``` + +## Results + +| Model | tokens | tokens per second | +| ---------- | ------ | ----------------- | +| Llama2 13B | 1596 | 33.39 | diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md index 65bf7afaf1c3fab51bf7ff3b9909895f135a28ef..348850b3eaa5bb62c11753ef6dce9212aa87e7a8 100644 --- a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md +++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md @@ -53,3 +53,9 @@ ENABLE_INFER_PG=1 CUDA_VISIBLE_DEVICES=0 USE_FLASH_ATTENTION=true text-generatio export CUDA_VISIBLE_DEVICES=1 python3 offline_inference.py --model2path ./data/qwen-7B ``` + +## Results + +| Model | QPS | +| ------- | ----- | +| Qwen-7B | 35.64 |