From f44701659f0ebbbb6070f079890d034ebf96c75f Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Fri, 8 Aug 2025 11:26:25 +0800 Subject: [PATCH 1/2] add ernie 4.5 models using fastdeploy Signed-off-by: mingjiang.li --- .../ernie-4.5-21b-a3b/fastdeploy/README.md | 55 +++++++++++++++++++ .../ernie-4.5-21b-a3b/fastdeploy/run_demo.py | 35 ++++++++++++ .../ernie-4.5-21b-a3b/fastdeploy/run_demo.sh | 22 ++++++++ .../ernie-4.5-300b-a47b/fastdeploy/README.md | 54 ++++++++++++++++++ .../fastdeploy/run_demo.py | 35 ++++++++++++ .../fastdeploy/run_demo.sh | 22 ++++++++ 6 files changed, 223 insertions(+) create mode 100644 models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/README.md create mode 100644 models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.py create mode 100644 models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.sh create mode 100644 models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md create mode 100644 models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.py create mode 100644 models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.sh diff --git a/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/README.md b/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/README.md new file mode 100644 index 00000000..e5b4f4ce --- /dev/null +++ b/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/README.md @@ -0,0 +1,55 @@ +# ERNIE-4.5-21B-A3B (FastDeploy) + +## Model Description + +ERNIE-4.5-21B-A3B is a large-scale Mixture of Experts (MoE) language model developed by Baidu, featuring 21 billion +total parameters with 3 billion activated parameters per token. The model employs a heterogeneous MoE architecture with +64 text experts and 64 vision experts, activating 6 experts per token. It boasts an ultra-long context window of 131,072 +tokens across 28 layers. The model supports multimodal heterogeneous pre-training and utilizes 4-bit/2-bit lossless +quantization for efficient inference. Built on PaddlePaddle framework, it excels in text understanding and generation +tasks, supporting dialogue, question-answering, and various other applications as a key member of the ERNIE 4.5 series. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.3.0 | 25.09 | + +Currently, the entire model needs to be loaded into the host memory, which requires more than 600GB of host memory. This +issue will be optimized in subsequent versions. + +## Model Preparation + +### Prepare Resources + +- Model: + + + +```sh +# Pull the docker image +docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:latest + +# Start Container +docker run -itd --name paddle_infer -v /usr/src:/usr/src -v /lib/modules:/lib/modules -v /dev:/dev -v /home/paddle:/home/paddle --privileged --cap-add=ALL --pid=host ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:latest +docker exec -it paddle_infer bash +``` + +### Install Dependencies + +```sh +pip3 install paddlepaddle==3.1.0a0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ +pip3 install paddle-iluvatar-gpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ +pip3 install fastdeploy_iluvatar_gpu -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple +``` + +## Model Inference + +```sh +chmod u+x ./run_demo.sh +./run_demo.sh +``` + +## References + +- [FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/get_started/installation/iluvatar_gpu.md) diff --git a/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.py b/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.py new file mode 100644 index 00000000..6b7cd7fa --- /dev/null +++ b/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.py @@ -0,0 +1,35 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fastdeploy import LLM, SamplingParams + +prompts = [ + "Hello, my name is", + "The largest ocean is", +] + +# sampling parameters +sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=256) + +# load the model +llm = LLM(model="/home/paddle/ERNIE-4.5-21B-A3B-Paddle", tensor_parallel_size=4, max_model_len=8192, static_decode_blocks=0, quantization='wint8') + +# Perform batch inference +outputs = llm.generate(prompts, sampling_params) + +for output in outputs: + prompt = output.prompt + generated_text = output.outputs.text + print(prompt, generated_text) diff --git a/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.sh b/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.sh new file mode 100644 index 00000000..717e1fa2 --- /dev/null +++ b/models/nlp/llm/ernie-4.5-21b-a3b/fastdeploy/run_demo.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export PADDLE_XCCL_BACKEND=iluvatar_gpu +export INFERENCE_MSG_QUEUE_ID=232132 +export LD_PRELOAD=/usr/local/corex/lib64/libcuda.so.1 +export FD_DEBUG=1 + +python3 run_demo.py diff --git a/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md new file mode 100644 index 00000000..cdbeca69 --- /dev/null +++ b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/README.md @@ -0,0 +1,54 @@ +# ERNIE-4.5-300B-A47B (FastDeploy) + +## Model Description + +ERNIE-4.5-300B-A47B is a state-of-the-art large-scale language model developed by Baidu and released in June 2025 under +the Apache 2.0 license. It employs a heterogeneous MoE architecture with 300B total parameters and 47B activated +parameters per token, enabling efficient multimodal understanding while maintaining strong text performance. Trained on +PaddlePaddle, it achieves 47% Model FLOPs Utilization (MFU) during pre-training through FP8 mixed precision and +fine-grained recomputation. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.3.0 | 25.09 | + +Currently, the entire model needs to be loaded into the host memory, which requires more than 600GB of host memory. This +issue will be optimized in subsequent versions. + +## Model Preparation + +### Prepare Resources + +- Model: + + + +```sh +# Pull the docker image +docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:latest + +# Start Container +docker run -itd --name paddle_infer -v /usr/src:/usr/src -v /lib/modules:/lib/modules -v /dev:/dev -v /home/paddle:/home/paddle --privileged --cap-add=ALL --pid=host ccr-2vdh3abv-pub.cnc.bj.baidubce.com/device/paddle-ixuca:latest +docker exec -it paddle_infer bash +``` + +### Install Dependencies + +```sh +pip3 install paddlepaddle==3.1.0a0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ +pip3 install paddle-iluvatar-gpu==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ +pip3 install fastdeploy_iluvatar_gpu -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple +``` + +## Model Inference + +```sh +chmod u+x ./run_demo.sh +./run_demo.sh +``` + +## References + +- [FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/get_started/installation/iluvatar_gpu.md) diff --git a/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.py b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.py new file mode 100644 index 00000000..d323ebc6 --- /dev/null +++ b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.py @@ -0,0 +1,35 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from fastdeploy import LLM, SamplingParams + +prompts = [ + "Hello, my name is", + "The largest ocean is", +] + +# sampling parameters +sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=256) + +# load the model +llm = LLM(model="/home/paddle/ERNIE-4.5-300B-A47B-Paddle", tensor_parallel_size=4, max_model_len=8192, static_decode_blocks=0, quantization='wint8') + +# Perform batch inference +outputs = llm.generate(prompts, sampling_params) + +for output in outputs: + prompt = output.prompt + generated_text = output.outputs.text + print(prompt, generated_text) diff --git a/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.sh b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.sh new file mode 100644 index 00000000..717e1fa2 --- /dev/null +++ b/models/nlp/llm/ernie-4.5-300b-a47b/fastdeploy/run_demo.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export PADDLE_XCCL_BACKEND=iluvatar_gpu +export INFERENCE_MSG_QUEUE_ID=232132 +export LD_PRELOAD=/usr/local/corex/lib64/libcuda.so.1 +export FD_DEBUG=1 + +python3 run_demo.py -- Gitee From 095a7f083573715a0f9c24ad13c5d58047376567 Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Fri, 8 Aug 2025 15:23:05 +0800 Subject: [PATCH 2/2] add small models to model list --- README.md | 26 +++++++++++++++++++------- README_en.md | 26 +++++++++++++++++++------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 5aafc356..bb5b169f 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ |------------------------|-------|--------------------------------------------------------|-----------------------------------------------------------|-----------| | AlexNet | FP16 | [✅](models/cv/classification/alexnet/igie) | [✅](models/cv/classification/alexnet/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/classification/alexnet/igie) | [✅](models/cv/classification/alexnet/ixrt) | 4.3.0 | -| CLIP | FP16 | [✅](models/cv/classification/clip/igie) | | 4.3.0 | +| CLIP | FP16 | [✅](models/cv/classification/clip/igie) | [✅](models/cv/classification/clip/ixrt) | 4.3.0 | | Conformer-B | FP16 | [✅](models/cv/classification/conformer_base/igie) | | 4.3.0 | | ConvNeXt-Base | FP16 | [✅](models/cv/classification/convnext_base/igie) | [✅](models/cv/classification/convnext_base/ixrt) | 4.3.0 | | ConvNext-S | FP16 | [✅](models/cv/classification/convnext_s/igie) | | 4.3.0 | @@ -66,7 +66,7 @@ | | INT8 | | [✅](models/cv/classification/cspdarknet53/ixrt) | 4.3.0 | | CSPResNet50 | FP16 | [✅](models/cv/classification/cspresnet50/igie) | [✅](models/cv/classification/cspresnet50/ixrt) | 4.3.0 | | | INT8 | | [✅](models/cv/classification/cspresnet50/ixrt) | 4.3.0 | -| CSPResNeXt50 | FP16 | [✅](models/cv/classification/cspresnext50/igie) | | 4.3.0 | +| CSPResNeXt50 | FP16 | [✅](models/cv/classification/cspresnext50/igie) | [✅](models/cv/classification/cspresnext50/ixrt) | 4.3.0 | | DeiT-tiny | FP16 | [✅](models/cv/classification/deit_tiny/igie) | [✅](models/cv/classification/deit_tiny/ixrt) | 4.3.0 | | DenseNet121 | FP16 | [✅](models/cv/classification/densenet121/igie) | [✅](models/cv/classification/densenet121/ixrt) | 4.3.0 | | DenseNet161 | FP16 | [✅](models/cv/classification/densenet161/igie) | [✅](models/cv/classification/densenet161/ixrt) | 4.3.0 | @@ -78,8 +78,9 @@ | | INT8 | | [✅](models/cv/classification/efficientnet_b1/ixrt) | 4.3.0 | | EfficientNet-B2 | FP16 | [✅](models/cv/classification/efficientnet_b2/igie) | [✅](models/cv/classification/efficientnet_b2/ixrt) | 4.3.0 | | EfficientNet-B3 | FP16 | [✅](models/cv/classification/efficientnet_b3/igie) | [✅](models/cv/classification/efficientnet_b3/ixrt) | 4.3.0 | -| EfficientNet-B4 | FP16 | [✅](models/cv/classification/efficientnet_b4/igie) | | 4.3.0 | -| EfficientNet-B5 | FP16 | [✅](models/cv/classification/efficientnet_b5/igie) | | 4.3.0 | +| EfficientNet-B4 | FP16 | [✅](models/cv/classification/efficientnet_b4/igie) | [✅](models/cv/classification/efficientnet_b4/ixrt) | 4.3.0 | +| EfficientNet-B5 | FP16 | [✅](models/cv/classification/efficientnet_b5/igie) | [✅](models/cv/classification/efficientnet_b5/ixrt) | 4.3.0 | +| EfficientNet-B6 | FP16 | [✅](models/cv/classification/efficientnet_b6/igie) | | 4.3.0 | | EfficientNetV2 | FP16 | [✅](models/cv/classification/efficientnet_v2/igie) | [✅](models/cv/classification/efficientnet_v2/ixrt) | 4.3.0 | | | INT8 | | [✅](models/cv/classification/efficientnet_v2/ixrt) | 4.3.0 | | EfficientNetv2_rw_t | FP16 | [✅](models/cv/classification/efficientnetv2_rw_t/igie) | [✅](models/cv/classification/efficientnetv2_rw_t/ixrt) | 4.3.0 | @@ -96,6 +97,7 @@ | MNASNet0_5 | FP16 | [✅](models/cv/classification/mnasnet0_5/igie) | | 4.3.0 | | MNASNet0_75 | FP16 | [✅](models/cv/classification/mnasnet0_75/igie) | | 4.3.0 | | MNASNet1_0 | FP16 | [✅](models/cv/classification/mnasnet1_0/igie) | | 4.3.0 | +| MNASNet1_3 | FP16 | [✅](models/cv/classification/mnasnet1_3/igie) | | 4.3.0 | | MobileNetV2 | FP16 | [✅](models/cv/classification/mobilenet_v2/igie) | [✅](models/cv/classification/mobilenet_v2/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/classification/mobilenet_v2/igie) | [✅](models/cv/classification/mobilenet_v2/ixrt) | 4.3.0 | | MobileNetV3_Large | FP16 | [✅](models/cv/classification/mobilenet_v3_large/igie) | | 4.3.0 | @@ -104,8 +106,13 @@ | RegNet_x_16gf | FP16 | [✅](models/cv/classification/regnet_x_16gf/igie) | | 4.3.0 | | RegNet_x_1_6gf | FP16 | [✅](models/cv/classification/regnet_x_1_6gf/igie) | | 4.3.0 | | RegNet_x_3_2gf | FP16 | [✅](models/cv/classification/regnet_x_3_2gf/igie) | | 4.3.0 | +| RegNet_x_32gf | FP16 | [✅](models/cv/classification/regnet_x_32gf/igie) | | 4.3.0 | +| RegNet_x_400mf | FP16 | [✅](models/cv/classification/regnet_x_400mf/igie) | | 4.3.0 | | RegNet_y_1_6gf | FP16 | [✅](models/cv/classification/regnet_y_1_6gf/igie) | | 4.3.0 | | RegNet_y_16gf | FP16 | [✅](models/cv/classification/regnet_y_16gf/igie) | | 4.3.0 | +| RegNet_y_3_2gf | FP16 | [✅](models/cv/classification/regnet_y_3_2gf/igie) | | 4.3.0 | +| RegNet_y_32gf | FP16 | [✅](models/cv/classification/regnet_y_32gf/igie) | | 4.3.0 | +| RegNet_y_400mf | FP16 | [✅](models/cv/classification/regnet_y_400mf/igie) | | 4.3.0 | | RepVGG | FP16 | [✅](models/cv/classification/repvgg/igie) | [✅](models/cv/classification/repvgg/ixrt) | 4.3.0 | | Res2Net50 | FP16 | [✅](models/cv/classification/res2net50/igie) | [✅](models/cv/classification/res2net50/ixrt) | 4.3.0 | | | INT8 | | [✅](models/cv/classification/res2net50/ixrt) | 4.3.0 | @@ -141,6 +148,8 @@ | Twins_PCPVT | FP16 | [✅](models/cv/classification/twins_pcpvt/igie) | | 4.3.0 | | VAN_B0 | FP16 | [✅](models/cv/classification/van_b0/igie) | | 4.3.0 | | VGG11 | FP16 | [✅](models/cv/classification/vgg11/igie) | | 4.3.0 | +| VGG13 | FP16 | [✅](models/cv/classification/vgg13/igie) | | 4.3.0 | +| VGG13_BN | FP16 | [✅](models/cv/classification/vgg13_bn/igie) | | 4.3.0 | | VGG16 | FP16 | [✅](models/cv/classification/vgg16/igie) | [✅](models/cv/classification/vgg16/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/classification/vgg16/igie) | | 4.3.0 | | VGG19 | FP16 | [✅](models/cv/classification/vgg19/igie) | | 4.3.0 | @@ -154,7 +163,7 @@ | Model | Prec. | IGIE | ixRT | IXUCA SDK | |------------|-------|-------------------------------------------------|-------------------------------------------------|-----------| -| ATSS | FP16 | [✅](models/cv/object_detection/atss/igie) | | 4.3.0 | +| ATSS | FP16 | [✅](models/cv/object_detection/atss/igie) | [✅](models/cv/object_detection/atss/ixrt) | 4.3.0 | | CenterNet | FP16 | [✅](models/cv/object_detection/centernet/igie) | [✅](models/cv/object_detection/centernet/ixrt) | 4.3.0 | | DETR | FP16 | | [✅](models/cv/object_detection/detr/ixrt) | 4.3.0 | | FCOS | FP16 | [✅](models/cv/object_detection/fcos/igie) | [✅](models/cv/object_detection/fcos/ixrt) | 4.3.0 | @@ -162,11 +171,13 @@ | FSAF | FP16 | [✅](models/cv/object_detection/fsaf/igie) | [✅](models/cv/object_detection/fsaf/ixrt) | 4.3.0 | | GFL | FP16 | [✅](models/cv/object_detection/gfl/igie) | | 4.3.0 | | HRNet | FP16 | [✅](models/cv/object_detection/hrnet/igie) | [✅](models/cv/object_detection/hrnet/ixrt) | 4.3.0 | -| PAA | FP16 | [✅](models/cv/object_detection/paa/igie) | | 4.3.0 | +| PAA | FP16 | [✅](models/cv/object_detection/paa/igie) | [✅](models/cv/object_detection/paa/ixrt) | 4.3.0 | | RetinaFace | FP16 | [✅](models/cv/object_detection/retinaface/igie) | [✅](models/cv/object_detection/retinaface/ixrt) | 4.3.0 | -| RetinaNet | FP16 | [✅](models/cv/object_detection/retinanet/igie) | | 4.3.0 | +| RetinaNet | FP16 | [✅](models/cv/object_detection/retinanet/igie) | [✅](models/cv/object_detection/retinanet/ixrt) | 4.3.0 | | RTMDet | FP16 | [✅](models/cv/object_detection/rtmdet/igie) | | 4.3.0 | | SABL | FP16 | [✅](models/cv/object_detection/sabl/igie) | | 4.3.0 | +| SSD | FP16 | [✅](models/cv/object_detection/ssd/igie) | | 4.3.0 | +| YOLOF | FP16 | [✅](models/cv/object_detection/yolof/igie) | | 4.3.0 | | YOLOv3 | FP16 | [✅](models/cv/object_detection/yolov3/igie) | [✅](models/cv/object_detection/yolov3/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/object_detection/yolov3/igie) | [✅](models/cv/object_detection/yolov3/ixrt) | 4.3.0 | | YOLOv4 | FP16 | [✅](models/cv/object_detection/yolov4/igie) | [✅](models/cv/object_detection/yolov4/ixrt) | 4.3.0 | @@ -185,6 +196,7 @@ | YOLOv10 | FP16 | [✅](models/cv/object_detection/yolov10/igie) | [✅](models/cv/object_detection/yolov10/ixrt) | 4.3.0 | | YOLOv11 | FP16 | [✅](models/cv/object_detection/yolov11/igie) | [✅](models/cv/object_detection/yolov11/ixrt) | 4.3.0 | | YOLOv12 | FP16 | [✅](models/cv/object_detection/yolov12/igie) | | 4.3.0 | +| YOLOv13 | FP16 | [✅](models/cv/object_detection/yolov13/igie) | | 4.3.0 | | YOLOX | FP16 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | diff --git a/README_en.md b/README_en.md index 9c6acc03..af0e89c1 100644 --- a/README_en.md +++ b/README_en.md @@ -66,7 +66,7 @@ inference to be expanded in the future. |------------------------|-------|--------------------------------------------------------|-----------------------------------------------------------|-----------| | AlexNet | FP16 | [✅](models/cv/classification/alexnet/igie) | [✅](models/cv/classification/alexnet/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/classification/alexnet/igie) | [✅](models/cv/classification/alexnet/ixrt) | 4.3.0 | -| CLIP | FP16 | [✅](models/cv/classification/clip/igie) | | 4.3.0 | +| CLIP | FP16 | [✅](models/cv/classification/clip/igie) | [✅](models/cv/classification/clip/ixrt) | 4.3.0 | | Conformer-B | FP16 | [✅](models/cv/classification/conformer_base/igie) | | 4.3.0 | | ConvNeXt-Base | FP16 | [✅](models/cv/classification/convnext_base/igie) | [✅](models/cv/classification/convnext_base/ixrt) | 4.3.0 | | ConvNext-S | FP16 | [✅](models/cv/classification/convnext_s/igie) | | 4.3.0 | @@ -76,7 +76,7 @@ inference to be expanded in the future. | | INT8 | | [✅](models/cv/classification/cspdarknet53/ixrt) | 4.3.0 | | CSPResNet50 | FP16 | [✅](models/cv/classification/cspresnet50/igie) | [✅](models/cv/classification/cspresnet50/ixrt) | 4.3.0 | | | INT8 | | [✅](models/cv/classification/cspresnet50/ixrt) | 4.3.0 | -| CSPResNeXt50 | FP16 | [✅](models/cv/classification/cspresnext50/igie) | | 4.3.0 | +| CSPResNeXt50 | FP16 | [✅](models/cv/classification/cspresnext50/igie) | [✅](models/cv/classification/cspresnext50/ixrt) | 4.3.0 | | DeiT-tiny | FP16 | [✅](models/cv/classification/deit_tiny/igie) | [✅](models/cv/classification/deit_tiny/ixrt) | 4.3.0 | | DenseNet121 | FP16 | [✅](models/cv/classification/densenet121/igie) | [✅](models/cv/classification/densenet121/ixrt) | 4.3.0 | | DenseNet161 | FP16 | [✅](models/cv/classification/densenet161/igie) | [✅](models/cv/classification/densenet161/ixrt) | 4.3.0 | @@ -88,8 +88,9 @@ inference to be expanded in the future. | | INT8 | | [✅](models/cv/classification/efficientnet_b1/ixrt) | 4.3.0 | | EfficientNet-B2 | FP16 | [✅](models/cv/classification/efficientnet_b2/igie) | [✅](models/cv/classification/efficientnet_b2/ixrt) | 4.3.0 | | EfficientNet-B3 | FP16 | [✅](models/cv/classification/efficientnet_b3/igie) | [✅](models/cv/classification/efficientnet_b3/ixrt) | 4.3.0 | -| EfficientNet-B4 | FP16 | [✅](models/cv/classification/efficientnet_b4/igie) | | 4.3.0 | -| EfficientNet-B5 | FP16 | [✅](models/cv/classification/efficientnet_b5/igie) | | 4.3.0 | +| EfficientNet-B4 | FP16 | [✅](models/cv/classification/efficientnet_b4/igie) | [✅](models/cv/classification/efficientnet_b4/ixrt) | 4.3.0 | +| EfficientNet-B5 | FP16 | [✅](models/cv/classification/efficientnet_b5/igie) | [✅](models/cv/classification/efficientnet_b5/ixrt) | 4.3.0 | +| EfficientNet-B6 | FP16 | [✅](models/cv/classification/efficientnet_b6/igie) | | 4.3.0 | | EfficientNetV2 | FP16 | [✅](models/cv/classification/efficientnet_v2/igie) | [✅](models/cv/classification/efficientnet_v2/ixrt) | 4.3.0 | | | INT8 | | [✅](models/cv/classification/efficientnet_v2/ixrt) | 4.3.0 | | EfficientNetv2_rw_t | FP16 | [✅](models/cv/classification/efficientnetv2_rw_t/igie) | [✅](models/cv/classification/efficientnetv2_rw_t/ixrt) | 4.3.0 | @@ -106,6 +107,7 @@ inference to be expanded in the future. | MNASNet0_5 | FP16 | [✅](models/cv/classification/mnasnet0_5/igie) | | 4.3.0 | | MNASNet0_75 | FP16 | [✅](models/cv/classification/mnasnet0_75/igie) | | 4.3.0 | | MNASNet1_0 | FP16 | [✅](models/cv/classification/mnasnet1_0/igie) | | 4.3.0 | +| MNASNet1_3 | FP16 | [✅](models/cv/classification/mnasnet1_3/igie) | | 4.3.0 | | MobileNetV2 | FP16 | [✅](models/cv/classification/mobilenet_v2/igie) | [✅](models/cv/classification/mobilenet_v2/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/classification/mobilenet_v2/igie) | [✅](models/cv/classification/mobilenet_v2/ixrt) | 4.3.0 | | MobileNetV3_Large | FP16 | [✅](models/cv/classification/mobilenet_v3_large/igie) | | 4.3.0 | @@ -114,8 +116,13 @@ inference to be expanded in the future. | RegNet_x_16gf | FP16 | [✅](models/cv/classification/regnet_x_16gf/igie) | | 4.3.0 | | RegNet_x_1_6gf | FP16 | [✅](models/cv/classification/regnet_x_1_6gf/igie) | | 4.3.0 | | RegNet_x_3_2gf | FP16 | [✅](models/cv/classification/regnet_x_3_2gf/igie) | | 4.3.0 | +| RegNet_x_32gf | FP16 | [✅](models/cv/classification/regnet_x_32gf/igie) | | 4.3.0 | +| RegNet_x_400mf | FP16 | [✅](models/cv/classification/regnet_x_400mf/igie) | | 4.3.0 | | RegNet_y_1_6gf | FP16 | [✅](models/cv/classification/regnet_y_1_6gf/igie) | | 4.3.0 | | RegNet_y_16gf | FP16 | [✅](models/cv/classification/regnet_y_16gf/igie) | | 4.3.0 | +| RegNet_y_3_2gf | FP16 | [✅](models/cv/classification/regnet_y_3_2gf/igie) | | 4.3.0 | +| RegNet_y_32gf | FP16 | [✅](models/cv/classification/regnet_y_32gf/igie) | | 4.3.0 | +| RegNet_y_400mf | FP16 | [✅](models/cv/classification/regnet_y_400mf/igie) | | 4.3.0 | | RepVGG | FP16 | [✅](models/cv/classification/repvgg/igie) | [✅](models/cv/classification/repvgg/ixrt) | 4.3.0 | | Res2Net50 | FP16 | [✅](models/cv/classification/res2net50/igie) | [✅](models/cv/classification/res2net50/ixrt) | 4.3.0 | | | INT8 | | [✅](models/cv/classification/res2net50/ixrt) | 4.3.0 | @@ -151,6 +158,8 @@ inference to be expanded in the future. | Twins_PCPVT | FP16 | [✅](models/cv/classification/twins_pcpvt/igie) | | 4.3.0 | | VAN_B0 | FP16 | [✅](models/cv/classification/van_b0/igie) | | 4.3.0 | | VGG11 | FP16 | [✅](models/cv/classification/vgg11/igie) | | 4.3.0 | +| VGG13 | FP16 | [✅](models/cv/classification/vgg13/igie) | | 4.3.0 | +| VGG13_BN | FP16 | [✅](models/cv/classification/vgg13_bn/igie) | | 4.3.0 | | VGG16 | FP16 | [✅](models/cv/classification/vgg16/igie) | [✅](models/cv/classification/vgg16/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/classification/vgg16/igie) | | 4.3.0 | | VGG19 | FP16 | [✅](models/cv/classification/vgg19/igie) | | 4.3.0 | @@ -164,7 +173,7 @@ inference to be expanded in the future. | Model | Prec. | IGIE | ixRT | IXUCA SDK | |------------|-------|-------------------------------------------------|-------------------------------------------------|-----------| -| ATSS | FP16 | [✅](models/cv/object_detection/atss/igie) | | 4.3.0 | +| ATSS | FP16 | [✅](models/cv/object_detection/atss/igie) | [✅](models/cv/object_detection/atss/ixrt) | 4.3.0 | | CenterNet | FP16 | [✅](models/cv/object_detection/centernet/igie) | [✅](models/cv/object_detection/centernet/ixrt) | 4.3.0 | | DETR | FP16 | | [✅](models/cv/object_detection/detr/ixrt) | 4.3.0 | | FCOS | FP16 | [✅](models/cv/object_detection/fcos/igie) | [✅](models/cv/object_detection/fcos/ixrt) | 4.3.0 | @@ -172,11 +181,13 @@ inference to be expanded in the future. | FSAF | FP16 | [✅](models/cv/object_detection/fsaf/igie) | [✅](models/cv/object_detection/fsaf/ixrt) | 4.3.0 | | GFL | FP16 | [✅](models/cv/object_detection/gfl/igie) | | 4.3.0 | | HRNet | FP16 | [✅](models/cv/object_detection/hrnet/igie) | [✅](models/cv/object_detection/hrnet/ixrt) | 4.3.0 | -| PAA | FP16 | [✅](models/cv/object_detection/paa/igie) | | 4.3.0 | +| PAA | FP16 | [✅](models/cv/object_detection/paa/igie) | [✅](models/cv/object_detection/paa/ixrt) | 4.3.0 | | RetinaFace | FP16 | [✅](models/cv/object_detection/retinaface/igie) | [✅](models/cv/object_detection/retinaface/ixrt) | 4.3.0 | -| RetinaNet | FP16 | [✅](models/cv/object_detection/retinanet/igie) | | 4.3.0 | +| RetinaNet | FP16 | [✅](models/cv/object_detection/retinanet/igie) | [✅](models/cv/object_detection/retinanet/ixrt) | 4.3.0 | | RTMDet | FP16 | [✅](models/cv/object_detection/rtmdet/igie) | | 4.3.0 | | SABL | FP16 | [✅](models/cv/object_detection/sabl/igie) | | 4.3.0 | +| SSD | FP16 | [✅](models/cv/object_detection/ssd/igie) | | 4.3.0 | +| YOLOF | FP16 | [✅](models/cv/object_detection/yolof/igie) | | 4.3.0 | | YOLOv3 | FP16 | [✅](models/cv/object_detection/yolov3/igie) | [✅](models/cv/object_detection/yolov3/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/object_detection/yolov3/igie) | [✅](models/cv/object_detection/yolov3/ixrt) | 4.3.0 | | YOLOv4 | FP16 | [✅](models/cv/object_detection/yolov4/igie) | [✅](models/cv/object_detection/yolov4/ixrt) | 4.3.0 | @@ -195,6 +206,7 @@ inference to be expanded in the future. | YOLOv10 | FP16 | [✅](models/cv/object_detection/yolov10/igie) | [✅](models/cv/object_detection/yolov10/ixrt) | 4.3.0 | | YOLOv11 | FP16 | [✅](models/cv/object_detection/yolov11/igie) | [✅](models/cv/object_detection/yolov11/ixrt) | 4.3.0 | | YOLOv12 | FP16 | [✅](models/cv/object_detection/yolov12/igie) | | 4.3.0 | +| YOLOv13 | FP16 | [✅](models/cv/object_detection/yolov13/igie) | | 4.3.0 | | YOLOX | FP16 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | | | INT8 | [✅](models/cv/object_detection/yolox/igie) | [✅](models/cv/object_detection/yolox/ixrt) | 4.3.0 | -- Gitee