diff --git a/models/audio/speech_recognition/conformer/ixrt/README.md b/models/audio/speech_recognition/conformer/ixrt/README.md
index 765a4b28c03b194c0f702d8c55e3f374c362e567..d73a68deb6cee05e3875b37866e3850c3bd914e9 100644
--- a/models/audio/speech_recognition/conformer/ixrt/README.md
+++ b/models/audio/speech_recognition/conformer/ixrt/README.md
@@ -20,7 +20,8 @@ Dataset: <https://www.openslr.org/33/> to download the Aishell dataset.
 
 ```bash
 # Download and put model in conformer_checkpoints
-ln -s /home/deepspark/datasets/INFER/conformer/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
+wget http://files.deepspark.org.cn:880/deepspark/conformer_checkpoints.tar.gz
+tar xf conformer_checkpoints.tar.gz
 
 # Prepare AISHELL Data
 DATA_DIR=/PATH/to/aishell_test_data
diff --git a/models/audio/speech_recognition/conformer/ixrt/build_engine.py b/models/audio/speech_recognition/conformer/ixrt/build_engine.py
index aa20ee59f6ecd23d8a8cb9272ece0087ed65ab89..d87af65175436d7db6629501fb0aecfddc608f09 100644
--- a/models/audio/speech_recognition/conformer/ixrt/build_engine.py
+++ b/models/audio/speech_recognition/conformer/ixrt/build_engine.py
@@ -1,117 +1,58 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-"""
-Build Engine From FusionPlugin Onnx.
-"""
-
 import os
-import ctypes
 import json
 import onnx
 import logging
 import argparse
 
 import tensorrt
-import tensorrt as trt
 from tensorrt import Dims
 
-
-TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
-def load_ixrt_plugin(logger=trt.Logger(trt.Logger.WARNING), namespace="", dynamic_path=""):
-    if not dynamic_path:
-        dynamic_path = os.path.join(os.path.dirname(trt.__file__), "lib", "libixrt_plugin.so")
-    if not os.path.exists(dynamic_path):
-        raise FileNotFoundError(
-            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!"
-        )
-    ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
-    trt.init_libnvinfer_plugins(logger, namespace)
-    print(f"Loaded plugin from {dynamic_path}")
-
+from load_ixrt_plugin import load_ixrt_plugin
 load_ixrt_plugin()
 
 
-
 def parse_args():
-    parser = argparse.ArgumentParser(description="build tensorrt engine of conformer.", usage="")
-    parser.add_argument(
-        "--model_name",
-        type=str,
-        required=True,
-        help="conformer",
-    )
-    parser.add_argument(
-        "--onnx_path",
-        type=str,
-        required=True,
-        help="onnx_path path to save",
-    )
-    parser.add_argument(
-        "--engine_path",
-        type=str,
-        required=True,
-        help="engine path to save",
-    )
-    parser.add_argument(
-        "--max_batch_size",
-        type=int,
-        required=True,
-    )
-    parser.add_argument(
-        "--max_seq_len",
-        type=int,
-        required=True,
-    )
+    parser = argparse.ArgumentParser(description="Build tensorrt engine of deepspeech2")
+    parser.add_argument("--onnx_model", type=str, required=True, help="The onnx path")
+    parser.add_argument("--bsz", type=int, default=1, help="batch size")
+    parser.add_argument("--input_size", type=tuple, default=(-1, 161), help="inference size")
+    parser.add_argument("--engine_path", type=str, required=True, help="engine path to save")
+    parser.add_argument( "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4")
+
     args = parser.parse_args()
     return args
 
-args = parse_args()
-MaxBSZ = args.max_batch_size
-MaxSeqLen = args.max_seq_len
-
 
 def build_engine_trtapi_dynamicshape(args):
-    onnx_model = args.onnx_path
+    onnx_model = args.onnx_model
     assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
     IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
     builder = tensorrt.Builder(IXRT_LOGGER)
     EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
     network = builder.create_network(EXPLICIT_BATCH)
     build_config = builder.create_builder_config()
-
     profile = builder.create_optimization_profile()
-    profile.set_shape("input", Dims([MaxBSZ, 100, 80]), Dims([MaxBSZ, 1000, 80]), Dims([MaxBSZ, 1500, 80]))
-    profile.set_shape("mask", Dims([MaxBSZ, 1, 25]), Dims([MaxBSZ, 1, 250]), Dims([MaxBSZ, 1, 374]))
-    profile.set_shape("pos_emb", Dims([1, 25, 256]), Dims([1, 250, 256]), Dims([1, 374, 256]))
+    
+    profile.set_shape(
+        "input", Dims([1,1,80]),Dims([16,800,80]),Dims([128,1500,80])
+    )
+    profile.set_shape(
+        "seq_lengths", Dims([1]), Dims([16]), Dims([128])
+    )
+
     build_config.add_optimization_profile(profile)
 
     parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
+
     parser.parse_from_file(onnx_model)
     build_config.set_flag(tensorrt.BuilderFlag.FP16)
 
     # set dynamic
-    # input
     input_tensor = network.get_input(0)
-    input_tensor.shape = Dims([MaxBSZ, -1, 80])
-    # mask
-    mask_tensor = network.get_input(1)
-    mask_tensor.shape = Dims([MaxBSZ, 1, -1])
-    # pos_emb
-    pos_emb_tensor = network.get_input(2)
-    pos_emb_tensor.shape = Dims([1, -1, 256])
+    input_tensor.shape = Dims([-1, -1, 80])
+    
+    seq_lengths_tensor = network.get_input(1)
+    seq_lengths_tensor.shape = Dims([-1])
 
     plan = builder.build_serialized_network(network, build_config)
     with open(args.engine_path, "wb") as f:
@@ -120,26 +61,7 @@ def build_engine_trtapi_dynamicshape(args):
     print("Build dynamic shape engine done!")
 
 
-def build_engine_trtapi_staticshape(args):
-    onnx_model = args.onnx_path
-    assert os.path.isfile(onnx_model), f"The onnx model{onnx_model} must be existed!"
-    IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
-    builder = tensorrt.Builder(IXRT_LOGGER)
-    EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-    network = builder.create_network(EXPLICIT_BATCH)
-    build_config = builder.create_builder_config()
-    parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
-
-    parser.parse_from_file(onnx_model)
-    build_config.set_flag(tensorrt.BuilderFlag.FP16)
-
-    plan = builder.build_serialized_network(network, build_config)
-    with open(args.engine_path, "wb") as f:
-        f.write(plan)
-
-    print("Build static shape engine done!")
-
-
 if __name__ == "__main__":
+    args = parse_args()
     build_engine_trtapi_dynamicshape(args)
-    # build_engine_trtapi_staticshape(args)
+
diff --git a/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh b/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh
index 2ee5de0b5856d5a3e511be55609a6a137400d47e..7944a1fc2c0053e967917904cf94f2f5200a90c3 100644
--- a/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh
+++ b/models/audio/speech_recognition/conformer/ixrt/ci/prepare.sh
@@ -27,7 +27,8 @@ fi
 
 pip3 install -r requirements.txt
 
-ln -s /root/data/checkpoints/20210601_u2++_conformer_exp_aishell ./conformer_checkpoints
-
-ln -s /root/data/datasets/AISHELL/data_aishell ./aishell_test_data
+ln -s /root/data/checkpoints/conformer_checkpoints.tar.gz ./
+tar xf conformer_checkpoints.tar.gz
+cp /root/data/datasets/aishell_test_data.tar ./
+tar xf aishell_test_data.tar
 bash scripts/aishell_data_prepare.sh ./aishell_test_data ./tools
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/ixrt/common.py b/models/audio/speech_recognition/conformer/ixrt/common.py
index 89023300ddc7ca3e4f0f992f4b124d8a8c131ae5..6081f807c3a709e8d73f1c1a6bc62185ddcdfc09 100644
--- a/models/audio/speech_recognition/conformer/ixrt/common.py
+++ b/models/audio/speech_recognition/conformer/ixrt/common.py
@@ -1,43 +1,11 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
 import os
-import ctypes
 import cv2
 import glob
 import torch
 import tensorrt
-import tensorrt as trt
 import numpy as np
-import pycuda.driver as cuda
-
-from tensorrt.hook.utils import copy_ixrt_io_tensors_as_np
-
-
-TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
-def load_ixrt_plugin(logger=trt.Logger(trt.Logger.WARNING), namespace="", dynamic_path=""):
-    if not dynamic_path:
-        dynamic_path = os.path.join(os.path.dirname(trt.__file__), "lib", "libixrt_plugin.so")
-    if not os.path.exists(dynamic_path):
-        raise FileNotFoundError(
-            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!"
-        )
-    ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
-    trt.init_libnvinfer_plugins(logger, namespace)
-    print(f"Loaded plugin from {dynamic_path}")
-load_ixrt_plugin()
+import cuda.cuda as cuda
+import cuda.cudart as cudart
 
 
 def trtapi(engine_file):
@@ -66,7 +34,6 @@ def create_engine_context(engine_path, logger):
 
     return engine, context
 
-
 def get_io_bindings(engine):
     # Setup I/O bindings
     inputs = []
@@ -85,13 +52,15 @@ def get_io_bindings(engine):
         size = np.dtype(tensorrt.nptype(dtype)).itemsize
         for s in shape:
             size *= s
-        allocation = cuda.mem_alloc(size)
+        err, allocation = cudart.cudaMalloc(size)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
         binding = {
             "index": i,
             "name": name,
             "dtype": np.dtype(tensorrt.nptype(dtype)),
             "shape": list(shape),
             "allocation": allocation,
+            "nbytes": size,
         }
         print(f"binding {i}, name : {name}  dtype : {np.dtype(tensorrt.nptype(dtype))}  shape : {list(shape)}")
         allocations.append(allocation)
@@ -120,17 +89,19 @@ def setup_io_bindings(engine, context):
         size = np.dtype(tensorrt.nptype(dtype)).itemsize
         for s in shape:
             size *= s
-        allocation = cuda.mem_alloc(size)
+        err, allocation = cudart.cudaMalloc(size)
+        assert(err == cuda.CUresult.CUDA_SUCCESS)
         binding = {
             "index": i,
             "name": name,
             "dtype": np.dtype(tensorrt.nptype(dtype)),
             "shape": list(shape),
             "allocation": allocation,
+            "nbytes": size,
         }
         allocations.append(allocation)
         if engine.binding_is_input(i):
             inputs.append(binding)
         else:
             outputs.append(binding)
-    return inputs, outputs, allocations
+    return inputs, outputs, allocations
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/ixrt/convert2onnx.py b/models/audio/speech_recognition/conformer/ixrt/convert2onnx.py
deleted file mode 100644
index 823ae3215f58d18a636e868668199ed3f388ee20..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/ixrt/convert2onnx.py
+++ /dev/null
@@ -1,529 +0,0 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
-#
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
-
-"""
-Build Compute Graph(Fusion Plugin Onnx) From Checkpoints.
-"""
-
-import os
-import json
-import torch
-import argparse
-import numpy as np
-from collections import OrderedDict
-
-from tensorrt.deploy.api import GraphTransform, create_source, create_target
-from tensorrt.deploy.ir.data_type import DataType
-from tensorrt.deploy.ir.variable import Variable, VariableOptions
-from tensorrt.deploy.ir.graph import Graph
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="Build Compute Graph From Checkpoints.", usage=""
-    )
-    parser.add_argument(
-        "--model_name",
-        type=str,
-        required=True,
-        help="conformer",
-    )
-    parser.add_argument(
-        "--model_path",
-        type=str,
-        required=True,
-        help="checkpont of conformer",
-    )
-    parser.add_argument(
-        "--onnx_path",
-        type=str,
-        required=True,
-        help="raw onnx path to save",
-    )
-    parser.add_argument(
-        "--batch_size",
-        type=int,
-        required=True,
-        help="the batch size for test.",
-    )
-    args = parser.parse_args()
-    return args
-
-
-def add_global_cmvn_op(graph, state_dict, args):
-    t = graph
-
-    sub_inputs = [t.make_variable("input", dtype=DataType.FLOAT, shape=(128, 1500, 80))]
-    key = "encoder.global_cmvn.mean"
-    sub_inputs.append(t.make_variable(name=key, value=state_dict[key]))
-    sub_outputs = [t.make_variable("Sub_output_0", dtype=DataType.FLOAT, shape=(128, 1500, 80))]
-    t.make_operator(
-        "Sub",
-        inputs=sub_inputs,
-        outputs=sub_outputs,
-    )
-
-    mul_inputs = sub_outputs
-    key = "encoder.global_cmvn.istd"
-    mul_inputs.append(t.make_variable(name=key, value=state_dict[key]))
-    mul_outputs = [t.make_variable("Mul_output_0", dtype=DataType.FLOAT, shape=(128, 1500, 80))]
-    t.make_operator(
-        "Mul",
-        inputs=mul_inputs,
-        outputs=mul_outputs,
-    )
-
-    unsqueeze_inputs = mul_outputs
-    unsqueeze_inputs.append(t.make_variable("axes", value=np.array([1], dtype=np.int64)))
-    unsqueeze_outputs = [t.make_variable("Unsqueeze_output_0", dtype=DataType.FLOAT, shape=(128, 1, 1500, 80))]
-    t.make_operator(
-        "Unsqueeze",
-        inputs=unsqueeze_inputs,
-        outputs=unsqueeze_outputs,
-    )
-
-
-def add_first_submodule_op(graph, state_dict, args):
-    """
-    The firt submodule part contains follows:
-        1.Conv2d+ReLU;
-        2.Conv2d+ReLU;
-        3.Transpose+Reshape;
-        4.MatMul+Add+Mul;
-    """
-
-    t = graph
-    conv2d0_weight_keys = [
-        "encoder.embed.conv.0.weight",
-        "encoder.embed.conv.0.bias",
-    ]
-    conv2d0_attributes = {
-        "dilations": [1, 1],
-        "group": 1,
-        "kernel_shape": [3, 3],
-        "pads": [0, 0, 0, 0],
-        "strides": [2, 2],
-    }
-    conv2d0_inputs = [t.get_variable("Unsqueeze_output_0")]
-    conv2d0_outputs = [t.make_variable("Conv_output_0", dtype=DataType.FLOAT)]
-
-    for key in conv2d0_weight_keys:
-        conv2d0_inputs.append(t.make_variable(name=key, value=state_dict[key]))
-    t.make_operator(
-        "Conv",
-        inputs=conv2d0_inputs,
-        outputs=conv2d0_outputs,
-        **conv2d0_attributes
-    )
-
-    relu0_inputs = conv2d0_outputs
-    relu0_outputs = [t.make_variable("Relu_output_0", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "Relu",
-        inputs=relu0_inputs,
-        outputs=relu0_outputs
-    )
-
-    conv2d1_weight_keys = [
-        "encoder.embed.conv.2.weight",
-        "encoder.embed.conv.2.bias",
-    ]
-    conv2d1_attributes = {
-        "dilations": [1, 1],
-        "group": 1,
-        "kernel_shape": [3, 3],
-        "pads": [0, 0, 0, 0],
-        "strides": [2, 2],
-    }
-    conv2d1_inputs = relu0_outputs
-    conv2d1_outputs = [t.make_variable("Conv_output_1", dtype=DataType.FLOAT)]
-
-    for key in conv2d1_weight_keys:
-        conv2d1_inputs.append(t.make_variable(name=key, value=state_dict[key]))
-    t.make_operator(
-        "Conv",
-        inputs=conv2d1_inputs,
-        outputs=conv2d1_outputs,
-        **conv2d1_attributes
-    )
-
-    relu1_inputs = conv2d1_outputs
-    relu1_outputs = [t.make_variable("Relu_output_1", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "Relu",
-        inputs=relu1_inputs,
-        outputs=relu1_outputs
-    )
-
-    tran_inputs = relu1_outputs
-    tran_outputs = [t.make_variable("Transpose_output_0", dtype=DataType.FLOAT)]
-    tran_attributes = {"perm": [0, 2, 1, 3]}
-    t.make_operator(
-        "Transpose",
-        inputs=tran_inputs,
-        outputs=tran_outputs,
-        **tran_attributes
-    )
-
-    reshape_inputs = tran_outputs
-    reshape_inputs.append(t.make_variable(name="constant_0", value=np.array([args.batch_size, -1, 4864]), dtype=DataType.INT64))
-    reshape_outputs = [t.make_variable("Reshape_output_0", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "Reshape",
-        inputs=reshape_inputs,
-        outputs=reshape_outputs,
-    )
-
-    matmul_inputs = reshape_outputs
-    matmul_inputs.append(t.make_variable(name="embed.out.0.weight", value=state_dict["encoder.embed.out.0.weight"].transpose(1, 0)))  # (256,4864)--->(4864,256)
-    matmul_outputs = [t.make_variable("MatMul_output_0", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "MatMul",
-        inputs=matmul_inputs,
-        outputs=matmul_outputs,
-    )
-
-    add_inputs = matmul_outputs
-    add_inputs.append(t.make_variable(name="embed.out.0.bias", value=state_dict["encoder.embed.out.0.bias"]))
-    add_outputs = [t.make_variable("Add_output_0", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "Add",
-        inputs=add_inputs,
-        outputs=add_outputs,
-    )
-
-    mul_inputs = add_outputs
-    mul_inputs.append(t.make_variable(name="constant_1", value=np.array([16.], dtype=np.float32), dtype=DataType.FLOAT))
-    mul_outputs = [t.make_variable("Mul_output_1", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "Mul",
-        inputs=mul_inputs,
-        outputs=mul_outputs,
-    )
-
-
-def add_encoder_ff_macaron_op(graph, state_dict, args, index):
-
-    t = graph
-    ff_macaron_keys = [
-        "encoder.encoders.{}.norm_ff_macaron.weight",
-        "encoder.encoders.{}.norm_ff_macaron.bias",
-        "encoder.encoders.{}.feed_forward_macaron.w_1.weight",
-        "encoder.encoders.{}.feed_forward_macaron.w_1.bias",
-        "encoder.encoders.{}.feed_forward_macaron.w_2.weight",
-        "encoder.encoders.{}.feed_forward_macaron.w_2.bias",
-    ]
-
-    attributes = {
-        "in_feature": 256,
-        "hidden_size": 2048,
-        "act_type": 12,
-        "ff_scale": 0.5,
-    }
-
-    if index == 0:
-        inputs = [graph.get_variable("Mul_output_1")]
-    else:
-        inputs = [graph.get_variable("norm_final_{}_output".format(index-1))]
-
-    outputs = [t.make_variable("ff_macaron_{}_output".format(index), dtype=DataType.FLOAT)]
-
-    for key in ff_macaron_keys:
-        key = key.format(index)
-        inputs.append(t.make_variable(name=key, value=state_dict[key].half(), dtype=DataType.FLOAT16))
-
-    t.make_operator(
-        "PositionWiseFFNPluginDynamic_IxRT",
-        inputs=inputs,
-        outputs=outputs,
-        **attributes
-    )
-
-
-def add_encoder_mhsa_op(graph, state_dict, args, index):
-
-    t = graph
-    mhsa_keys = [
-        "encoder.encoders.{}.norm_mha.weight",
-        "encoder.encoders.{}.norm_mha.bias",
-        "encoder.encoders.{}.self_attn.linear_q.weight",
-        "encoder.encoders.{}.self_attn.linear_q.bias",
-        "encoder.encoders.{}.self_attn.linear_k.weight",
-        "encoder.encoders.{}.self_attn.linear_k.bias",
-        "encoder.encoders.{}.self_attn.linear_v.weight",
-        "encoder.encoders.{}.self_attn.linear_v.bias",
-        "encoder.encoders.{}.self_attn.linear_pos.weight",
-        "encoder.encoders.{}.self_attn.pos_bias_u",
-        "encoder.encoders.{}.self_attn.pos_bias_v",
-        "encoder.encoders.{}.self_attn.linear_out.weight",
-        "encoder.encoders.{}.self_attn.linear_out.bias",
-    ]
-
-    attributes = {
-        "bs": 128,
-        "seq_len": 374,
-        "n_head": 4,
-        "n_feat": 256,
-    }
-
-    if index == 0:
-        inputs = [
-            graph.get_variable("ff_macaron_{}_output".format(index)),
-            t.make_variable("mask", dtype=DataType.INT32, shape=(128, 1, 374)),
-            t.make_variable("pos_emb", dtype=DataType.FLOAT, shape=(1, 374, 256)),
-        ]
-    else:
-        inputs = [
-            graph.get_variable("ff_macaron_{}_output".format(index)),
-            graph.get_variable("mask"),
-            graph.get_variable("pos_emb"),
-        ]
-
-    outputs = [t.make_variable("mhsa_{}_output".format(index), dtype=DataType.FLOAT)]
-
-    for key in mhsa_keys:
-        key = key.format(index)
-        inputs.append(t.make_variable(name=key, value=state_dict[key].half(), dtype=DataType.FLOAT16))
-
-    t.make_operator(
-        "ConformerMultiHeadSelfAttentionPlugin_IxRT",
-        inputs=inputs,
-        outputs=outputs,
-        **attributes
-    )
-
-
-def add_encoder_conv_module_op(graph, state_dict, args, index):
-
-    t = graph
-    conv_module_keys = [
-        "encoder.encoders.{}.norm_conv.weight",
-        "encoder.encoders.{}.norm_conv.bias",
-        "encoder.encoders.{}.conv_module.pointwise_conv1.weight",
-        "encoder.encoders.{}.conv_module.pointwise_conv1.bias",
-        "encoder.encoders.{}.conv_module.depthwise_conv.weight",
-        "encoder.encoders.{}.conv_module.depthwise_conv.bias",
-        "encoder.encoders.{}.conv_module.norm.weight",
-        "encoder.encoders.{}.conv_module.norm.bias",
-        "encoder.encoders.{}.conv_module.pointwise_conv2.weight",
-        "encoder.encoders.{}.conv_module.pointwise_conv2.bias",
-    ]
-
-    attributes = {
-        "kernel_size_1": 1,
-        "stride_1": 1,
-        "odim_1": 512,
-        "kernel_size_2": 8,
-        "stride_2": 1,
-        "odim_2": 256,
-        "kernel_size_3": 1,
-        "stride_3": 1,
-        "odim_3": 256,
-    }
-
-    inputs = [
-        graph.get_variable("mhsa_{}_output".format(index)),
-        graph.get_variable("mask"),
-    ]
-    outputs = [t.make_variable("conv_module_{}_output".format(index), dtype=DataType.FLOAT)]
-
-    for key in conv_module_keys:
-        key = key.format(index)
-
-        if "conv_module.depthwise_conv.weight" in key:
-            inputs.append(t.make_variable(name=key, value=state_dict[key].permute(1, 2, 0).half(), dtype=DataType.FLOAT16))
-        elif "bias" in key and "norm" not in key:
-            inputs.append(t.make_variable(name=key, value=state_dict[key], dtype=DataType.FLOAT))
-        else:
-            inputs.append(t.make_variable(name=key, value=state_dict[key].half(), dtype=DataType.FLOAT16))
-
-    t.make_operator(
-        "ConformerConvModulePlugin_IxRT",
-        inputs=inputs,
-        outputs=outputs,
-        **attributes
-    )
-
-
-def add_encoder_positionwise_ff_op(graph, state_dict, args, index):
-
-    t = graph
-    positionwise_ff_keys = [
-        "encoder.encoders.{}.norm_ff.weight",
-        "encoder.encoders.{}.norm_ff.bias",
-        "encoder.encoders.{}.feed_forward.w_1.weight",
-        "encoder.encoders.{}.feed_forward.w_1.bias",
-        "encoder.encoders.{}.feed_forward.w_2.weight",
-        "encoder.encoders.{}.feed_forward.w_2.bias",
-    ]
-
-    attributes = {
-        "in_feature": 256,
-        "hidden_size": 2048,
-        "act_type": 12,
-        "ff_scale": 0.5,
-    }
-
-    inputs = [graph.get_variable('conv_module_{}_output'.format(index))]
-    outputs = [t.make_variable("positionwise_ff_{}_output".format(index), dtype=DataType.FLOAT)]
-
-    for key in positionwise_ff_keys:
-        key = key.format(index)
-        inputs.append(t.make_variable(name=key, value=state_dict[key].half(), dtype=DataType.FLOAT16))
-
-    t.make_operator(
-        "PositionWiseFFNPluginDynamic_IxRT",
-        inputs=inputs,
-        outputs=outputs,
-        **attributes
-    )
-
-
-def add_encoder_ln_op(graph, state_dict, args, index):
-
-    t = graph
-    ln_keys = [
-        "encoder.encoders.{}.norm_final.weight",
-        "encoder.encoders.{}.norm_final.bias",
-    ]
-
-    attributes = {
-        "axis": -1,
-        "epsilon": 0.000009999999747378752,
-        "stash_type": 1,
-    }
-
-    inputs = [graph.get_variable("positionwise_ff_{}_output".format(index))]
-    outputs = [t.make_variable("norm_final_{}_output".format(index), dtype=DataType.FLOAT)]
-
-    for key in ln_keys:
-        key = key.format(index)
-        inputs.append(t.make_variable(name=key, value=state_dict[key].half(), dtype=DataType.FLOAT16))
-
-    t.make_operator(
-        "LayerNormalization",
-        inputs=inputs,
-        outputs=outputs,
-        **attributes
-    )
-
-
-def add_final_ln_op(graph, state_dict, args):
-
-    t = graph
-    ln_keys = [
-        "encoder.after_norm.weight",
-        "encoder.after_norm.bias",
-    ]
-
-    attributes = {
-        "axis": -1,
-        "epsilon": 0.000009999999747378752,
-        "stash_type": 1,
-    }
-
-    inputs = [graph.get_variable("norm_final_11_output")]
-    outputs = [t.make_variable("norm_final_output", dtype=DataType.FLOAT)]
-
-    for key in ln_keys:
-        inputs.append(t.make_variable(name=key, value=state_dict[key].half(), dtype=DataType.FLOAT16))
-
-    t.make_operator(
-        "LayerNormalization",
-        inputs=inputs,
-        outputs=outputs,
-        **attributes
-    )
-
-
-def add_ctc_op(graph, state_dict, args):
-    t = graph
-    # matmul
-    matmul_inputs = [graph.get_variable("norm_final_output")]
-    matmul_inputs.append(t.make_variable(name="ctc.ctc_lo.weight", value=state_dict["ctc.ctc_lo.weight"].transpose(1, 0)))   # (4233,256)--->(256,4233)
-    matmul_outputs = [t.make_variable("MatMul_output_1", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "MatMul",
-        inputs=matmul_inputs,
-        outputs=matmul_outputs,
-    )
-
-    add_inputs = matmul_outputs
-    add_inputs.append(t.make_variable(name="ctc.ctc_lo.bias", value=state_dict["ctc.ctc_lo.bias"]))
-    add_outputs = [t.make_variable("Add_output_1", dtype=DataType.FLOAT)]
-    t.make_operator(
-        "Add",
-        inputs=add_inputs,
-        outputs=add_outputs,
-    )
-
-    logsoftmax_inputs = add_outputs
-    logsoftmax_outputs = [t.make_variable("output", dtype=DataType.FLOAT)]
-    attributes = {
-        "axis": 2
-    }
-    t.make_operator(
-        "LogSoftmax",
-        inputs=logsoftmax_inputs,
-        outputs=logsoftmax_outputs,
-        **attributes
-    )
-
-
-def main(args):
-    graph = Graph()
-    transform = GraphTransform(graph)
-    state_dict = torch.load(args.model_path)
-
-    # 0. Global CMVN: sub+mul+unsqueeze
-    add_global_cmvn_op(transform, state_dict, args)
-
-    # 1. First Submodule: Conv2d+Relu+Transpose+MatMul
-    add_first_submodule_op(transform, state_dict, args)
-
-    # 2. Second Submodule: ConformerEncoderLayer: 12 layers
-    for i in range(args.num_layers):
-        add_encoder_ff_macaron_op(transform, state_dict, args, i)
-        add_encoder_mhsa_op(transform, state_dict, args, i)
-        add_encoder_conv_module_op(transform, state_dict, args, i)
-        add_encoder_positionwise_ff_op(transform, state_dict, args, i)
-        add_encoder_ln_op(transform, state_dict, args, i)
-
-    # 3. Third Submodule: FinalNorm
-    add_final_ln_op(transform, state_dict, args)
-
-    # 4.Forth Submodule: CTC+LogSoftmax
-    add_ctc_op(transform, state_dict, args)
-
-    # 5. set input and output
-    graph.add_input(graph.get_variable("input"))
-    graph.add_input(graph.get_variable("mask"))
-    graph.add_input(graph.get_variable("pos_emb"))
-    graph.add_output(graph.get_variable("output"))
-    # 5. export onnx file
-    create_target(saved_path=args.onnx_path).export(graph)
-    print("save onnx: ", args.onnx_path)
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    model_name = args.model_name.lower()
-    args.num_layers = 12
-    args.hidden_size = 2048
-    args.head_num = 4
-    args.head_dim = 64
-    args.pad_id = 0
-    args.inner_size = 3072
-    main(args)
diff --git a/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py b/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
index 702221902669351bb1949453c3ee566477ed5692..8e58a24279cc1f4f3d2cdba572701ad35344ed0c 100644
--- a/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
+++ b/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_accuracy.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
+# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
 #
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-#         http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import os
 import sys
@@ -21,7 +20,6 @@ sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 import argparse
 import yaml
 import copy
-import torch
 import numpy as np
 
 from tqdm.contrib import tqdm
@@ -34,14 +32,11 @@ from tensorrt import Dims
 from common import create_engine_context, get_io_bindings,trtapi,setup_io_bindings
 import pickle
 
-import pycuda.autoinit
-import pycuda.driver as cuda
+import cuda.cuda as cuda
+import cuda.cudart as cudart
 
-from utils import make_pad_mask, RelPositionalEncoding
-from postprocess import ctc_greedy_search
-
-
-rel_positional_encoding = RelPositionalEncoding(256, 0.1)
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
 
 
 def get_args():
@@ -62,24 +57,36 @@ def get_args():
     return args
 
 
-def tensorrt_infer(engine, context, all_inputs):
 
-    input_names = ["input", "mask", "pos_emb"]
-    output_names = ["output"]
+def ixrt_infer(module, input, seq_lengths):
+    module.set_input(key="input", value=input)
+    module.set_input(key="seq_lengths", value=seq_lengths)
+    module.run()
+    out = module.get_output()
+    return out[0]
 
-    for input_name, input_data in zip(input_names, all_inputs):
-        input_idx = engine.get_binding_index(input_name)
-        input_shape = input_data.shape
-        context.set_binding_shape(input_idx, Dims(input_shape))
 
+def tensorrt_infer(engine,context, features, lengths):
+    
+    input_names=["input","seq_lengths"]
+    output_names=["output"]
+    input_idx = engine.get_binding_index(input_names[0])
+    input_shape = features.shape    
+    context.set_binding_shape(input_idx, Dims(input_shape))
+
+    seq_lengths_idx = engine.get_binding_index(input_names[1])
+    seq_lengths_shape = lengths.shape   
+    context.set_binding_shape(seq_lengths_idx, Dims(seq_lengths_shape))
+    
     inputs, outputs, allocations = setup_io_bindings(engine, context)
     pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
-
-    for i, input_data in enumerate(all_inputs):
-        cuda.memcpy_htod(inputs[i]["allocation"], input_data)
-
+    err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], features, features.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    err, = cuda.cuMemcpyHtoD(inputs[1]["allocation"], lengths, lengths.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
     context.execute_v2(allocations)
-    cuda.memcpy_dtoh(pred_output, outputs[0]["allocation"])
+    err, = cuda.cuMemcpyDtoH(pred_output, outputs[0]["allocation"], outputs[0]["nbytes"])
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
     return pred_output
 
 
@@ -87,8 +94,9 @@ def engine_init(engine):
     host_mem = tensorrt.IHostMemory
     logger = tensorrt.Logger(tensorrt.Logger.ERROR)
     engine, context = create_engine_context(engine, logger)
-
+    
     return engine,context
+    
 
 
 def calculate_cer(data, reference_data):
@@ -144,7 +152,7 @@ def main():
     args = get_args()
 
     # 读取配置文件
-    config_fn = os.path.join(args.model_dir, "train.yaml")
+    config_fn = os.path.join(args.model_dir, "config.yaml")
     with open(config_fn, "r") as fin:
         configs = yaml.load(fin, Loader=yaml.FullLoader)
 
@@ -164,7 +172,7 @@ def main():
     dataset_conf["batch_conf"]["batch_size"] = args.batch_size
 
     # Load dict
-    dict_fn = os.path.join(args.model_dir, "units.txt")
+    dict_fn = os.path.join(args.model_dir, "words.txt")
     char_dict = {}
     with open(dict_fn, "r", encoding="utf8") as fin:
         for line in fin:
@@ -199,15 +207,15 @@ def main():
                     feats_lengths.cpu().numpy().astype(np.int32),
                 ]
             )
-        with open(data_path_pkl, "wb") as f:
-            pickle.dump(
-                [
-                    eval_samples,
-                    max_batch_size,
-                    max_feature_length
-                ],
-                f,
-            )
+            with open(data_path_pkl, "wb") as f:
+                pickle.dump(
+                    [
+                        eval_samples,
+                        max_batch_size,
+                        max_feature_length
+                    ],
+                    f,
+                )
     else:
         print(f"load data from tmp: {data_path_pkl}")
         with open(data_path_pkl, "rb") as f:
@@ -221,44 +229,22 @@ def main():
     )
 
     print("*** 2. Load engine ***")
-    engine_path = os.path.join(args.model_dir, f"conformer_encoder_fusion.engine")
+    engine_path = os.path.join(args.model_dir, f"conformer_{args.infer_type}_trt.engine")
     engine, context = engine_init(engine_path)
-
+    
     print("*** 3. Warm up ***")
     if args.warm_up > 0:
         for i in range(args.warm_up):
-            feats_tmp = np.ones((args.batch_size, 1500, 80)).astype(np.float32)
-            feats_lengths_tmp = np.ones((args.batch_size)).astype(np.int32) * 1500
-            mask_tmp = make_pad_mask(feats_lengths_tmp, 1500)
-            mask_len_tmp = mask_tmp.shape[-1]
-            pos_emb_tmp = rel_positional_encoding(mask_len_tmp).numpy()
-            all_inputs = [feats_tmp, mask_tmp, pos_emb_tmp]
-            tensorrt_infer(engine, context, all_inputs)
+            feats_tmp = np.ones((args.batch_size,800,80)).astype(np.float16)
+            feats_lengths_tmp = np.ones((args.batch_size)).astype(np.int32)
+            tensorrt_infer(engine,context, feats_tmp, feats_lengths_tmp)
 
     results = []
     for keys, feats, feats_lengths in tqdm(eval_samples):
-        b, seq_len, feat = feats.shape
-
-        inputs = feats.astype(np.float32)
-        mask = make_pad_mask(feats_lengths, seq_len)
-        mask_len = mask.shape[-1]
-        pos_emb = rel_positional_encoding(mask_len).numpy()
-
-        all_inputs = [inputs, mask, pos_emb]
-        hyps = tensorrt_infer(
-            engine,
-            context,
-            all_inputs
-        )
-
-        ctc_probs = torch.from_numpy(hyps)
-        ctc_lens = torch.from_numpy(feats_lengths)
-        hyps = ctc_greedy_search(ctc_probs, ctc_lens)
-
+        hyps = tensorrt_infer(engine,context, feats, feats_lengths)
         for i, key in enumerate(keys):
             line = f"{key} "
             for w in hyps[i]:
-                w = w - 1
                 if w == eos:
                     break
                 line += char_dict[w]
@@ -271,6 +257,7 @@ def main():
         reference_data.append(line)
 
     cer, corr = calculate_cer(results, reference_data)
+
     target_cer = float(os.environ["Accuracy"])
     metricResult = {"metricResult": {}}
     metricResult["metricResult"]["CER"] = round(cer, 3)
diff --git a/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_performance.py b/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
index fcfcb55952d5b4616c140e9ae1f076f90b57ad58..ffc01462a154b564c2ddf879f17901c7039d39db 100644
--- a/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
+++ b/models/audio/speech_recognition/conformer/ixrt/ixrt_inference_performance.py
@@ -1,48 +1,45 @@
-# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
-# All Rights Reserved.
+# Copyright (c) 2020 Mobvoi Inc. (authors: Binbin Zhang, Xiaoyu Chen, Di Wu)
 #
-#    Licensed under the Apache License, Version 2.0 (the "License"); you may
-#    not use this file except in compliance with the License. You may obtain
-#    a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-#         http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-#    License for the specific language governing permissions and limitations
-#    under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import os
 import sys
-import time
 
 sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 
-import argparse
 import yaml
+import time
 import copy
-import torch
+import argparse
+import pickle
 import numpy as np
 
 from tqdm.contrib import tqdm
 from torch.utils.data import DataLoader
+
 from wenet.file_utils import read_symbol_table
 from wenet.dataset import Dataset
-from tools.compute_cer import Calculator, characterize, normalize, default_cluster
+
 import tensorrt
 from tensorrt import Dims
 from common import create_engine_context, get_io_bindings,trtapi,setup_io_bindings
 import pickle
 
-import pycuda.autoinit
-import pycuda.driver as cuda
-
-from utils import make_pad_mask, RelPositionalEncoding
-from postprocess import ctc_greedy_search
-
+import cuda.cuda as cuda
+import cuda.cudart as cudart
 
-rel_positional_encoding = RelPositionalEncoding(256, 0.1)
+from load_ixrt_plugin import load_ixrt_plugin
+load_ixrt_plugin()
 
 
 def get_args():
@@ -62,90 +59,43 @@ def get_args():
     args = parser.parse_args()
     return args
 
-
-def tensorrt_infer(engine, context, all_inputs):
-
-    input_names = ["input", "mask", "pos_emb"]
-    output_names = ["output"]
-
-    for input_name, input_data in zip(input_names, all_inputs):
-        input_idx = engine.get_binding_index(input_name)
-        input_shape = input_data.shape
-        context.set_binding_shape(input_idx, Dims(input_shape))
-
-    inputs, outputs, allocations = setup_io_bindings(engine, context)
-    pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
-
-    for i, input_data in enumerate(all_inputs):
-        cuda.memcpy_htod(inputs[i]["allocation"], input_data)
-
-    context.execute_v2(allocations)
-    cuda.memcpy_dtoh(pred_output, outputs[0]["allocation"])
-    return pred_output
-
-
 def engine_init(engine):
     host_mem = tensorrt.IHostMemory
     logger = tensorrt.Logger(tensorrt.Logger.ERROR)
     engine, context = create_engine_context(engine, logger)
-
+    
     return engine,context
 
+def tensorrt_infer(engine,context, features, lengths):
+    
+    input_names=["input","seq_lengths"]
+    output_names=["output"]
+    input_idx = engine.get_binding_index(input_names[0])
+    input_shape = features.shape    
+    context.set_binding_shape(input_idx, Dims(input_shape))
+
+    seq_lengths_idx = engine.get_binding_index(input_names[1])
+    seq_lengths_shape = lengths.shape   
+    context.set_binding_shape(seq_lengths_idx, Dims(seq_lengths_shape))
+    
+    inputs, outputs, allocations = setup_io_bindings(engine, context)
+    pred_output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])
+    err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], features, features.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    err, = cuda.cuMemcpyHtoD(inputs[1]["allocation"], lengths, lengths.nbytes)
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    context.execute_v2(allocations)
+    err, = cuda.cuMemcpyDtoH(pred_output, outputs[0]["allocation"], outputs[0]["nbytes"])
+    assert(err == cuda.CUresult.CUDA_SUCCESS)
+    return pred_output
 
-def calculate_cer(data, reference_data):
-    calculator = Calculator()
-    tochar = True
-    split = None
-    case_sensitive = False
-    ignore_words = set()
-    rec_set = {}
-    for line in data:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-    default_clusters = {}
-    default_words = {}
-    for line in reference_data:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array) == 0:
-            continue
-        fid = array[0]
-        if fid not in rec_set:
-            continue
-        lab = normalize(array[1:], ignore_words, case_sensitive, split)
-        rec = rec_set[fid]
-
-        for word in rec + lab:
-            if word not in default_words:
-                default_cluster_name = default_cluster(word)
-                if default_cluster_name not in default_clusters:
-                    default_clusters[default_cluster_name] = {}
-                if word not in default_clusters[default_cluster_name]:
-                    default_clusters[default_cluster_name][word] = 1
-                default_words[word] = default_cluster_name
-        result = calculator.calculate(lab, rec)
-
-    result = calculator.overall()
-    cer = float(result["ins"] + result["sub"] + result["del"]) / result["all"]
-    corr = result["cor"] / result["all"]
-
-    return cer, corr
 
 
 def main():
     args = get_args()
 
     # 读取配置文件
-    config_fn = os.path.join(args.model_dir, "train.yaml")
+    config_fn = os.path.join(args.model_dir, "config.yaml")
     with open(config_fn, "r") as fin:
         configs = yaml.load(fin, Loader=yaml.FullLoader)
 
@@ -165,14 +115,13 @@ def main():
     dataset_conf["batch_conf"]["batch_size"] = args.batch_size
 
     # Load dict
-    dict_fn = os.path.join(args.model_dir, "units.txt")
+    dict_fn = os.path.join(args.model_dir, "words.txt")
     char_dict = {}
     with open(dict_fn, "r", encoding="utf8") as fin:
         for line in fin:
             arr = line.strip().split()
             assert len(arr) == 2
             char_dict[int(arr[1])] = arr[0]
-    eos = len(char_dict) - 1
 
     data_type = "raw"
     test_data_fn = os.path.join(args.data_dir, "data.list")
@@ -200,15 +149,15 @@ def main():
                     feats_lengths.cpu().numpy().astype(np.int32),
                 ]
             )
-        with open(data_path_pkl, "wb") as f:
-            pickle.dump(
-                [
-                    eval_samples,
-                    max_batch_size,
-                    max_feature_length
-                ],
-                f,
-            )
+            with open(data_path_pkl, "wb") as f:
+                pickle.dump(
+                    [
+                        eval_samples,
+                        max_batch_size,
+                        max_feature_length
+                    ],
+                    f,
+                )
     else:
         print(f"load data from tmp: {data_path_pkl}")
         with open(data_path_pkl, "rb") as f:
@@ -221,40 +170,24 @@ def main():
         f"dataset max shape: batch_size: {max_batch_size}, feat_length: {max_feature_length}"
     )
 
-    print("*** 2. Load engine ***")
-    engine_path = os.path.join(args.model_dir, f"conformer_encoder_fusion.engine")
+    print("*** 2. Load IxRT engine ***")
+    engine_path = os.path.join(args.model_dir, f"conformer_{args.infer_type}_trt.engine")
     engine, context = engine_init(engine_path)
-
     print("*** 3. Warm up ***")
     if args.warm_up > 0:
         for i in range(args.warm_up):
-            feats_tmp = np.ones((args.batch_size, 1500, 80)).astype(np.float32)
-            feats_lengths_tmp = np.ones((args.batch_size)).astype(np.int32) * 1500
-            mask_tmp = make_pad_mask(feats_lengths_tmp, 1500)
-            mask_len_tmp = mask_tmp.shape[-1]
-            pos_emb_tmp = rel_positional_encoding(mask_len_tmp).numpy()
-            all_inputs = [feats_tmp, mask_tmp, pos_emb_tmp]
-            tensorrt_infer(engine, context, all_inputs)
+            feats_tmp = np.ones((args.batch_size,1200,80)).astype(np.float16)
+            feats_lengths_tmp = np.ones((args.batch_size)).astype(np.int32)
+            tensorrt_infer(engine,context, feats_tmp, feats_lengths_tmp)
 
     print("*** 4. Inference ***")
     start_time = time.time()
     num_samples = 0
     results = []
     for keys, feats, feats_lengths in tqdm(eval_samples):
-        b, seq_len, feat = feats.shape
-        num_samples += b
-        inputs = feats.astype(np.float32)
-        mask = make_pad_mask(feats_lengths, seq_len)
-        mask_len = mask.shape[-1]
-        pos_emb = rel_positional_encoding(mask_len).numpy()
-
-        all_inputs = [inputs, mask, pos_emb]
-        hyps = tensorrt_infer(
-            engine,
-            context,
-            all_inputs
-        )
-
+        num_samples += feats.shape[0]
+        hyps = tensorrt_infer(engine,context, feats, feats_lengths)
+        results.append([hyps, keys])
     eval_time = time.time() - start_time
 
     QPS = num_samples / eval_time
@@ -270,7 +203,7 @@ def main():
         exit()
     else:
         print("failed!")
-        exit(1)
+        exit(10)
 
 
 if __name__ == "__main__":
diff --git a/models/audio/speech_recognition/conformer/ixrt/load_ixrt_plugin.py b/models/audio/speech_recognition/conformer/ixrt/load_ixrt_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..932efbdfd1a4e91d8ddfd363adf6bce989df1709
--- /dev/null
+++ b/models/audio/speech_recognition/conformer/ixrt/load_ixrt_plugin.py
@@ -0,0 +1,12 @@
+import ctypes
+import tensorrt
+from os.path import join, dirname, exists
+def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
+    if not dynamic_path:
+        dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")
+    if not exists(dynamic_path):
+        raise FileNotFoundError(
+            f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")
+    ctypes.CDLL(dynamic_path)
+    tensorrt.init_libnvinfer_plugins(logger, namespace)
+    print(f"Loaded plugin from {dynamic_path}")
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/ixrt/postprocess/__init__.py b/models/audio/speech_recognition/conformer/ixrt/postprocess/__init__.py
deleted file mode 100644
index 33f8b0465aee011298fa9933086fbdc1c8dbd4d4..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/ixrt/postprocess/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .search import ctc_greedy_search
diff --git a/models/audio/speech_recognition/conformer/ixrt/postprocess/search.py b/models/audio/speech_recognition/conformer/ixrt/postprocess/search.py
deleted file mode 100644
index d2ae55650539b9d0be352e78a64999606ac12fbb..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/ixrt/postprocess/search.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import math
-from collections import defaultdict
-from typing import List, Dict
-
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-
-def remove_duplicates_and_blank(hyp: List[int],
-                                blank_id: int = 0) -> List[int]:
-    new_hyp: List[int] = []
-    cur = 0
-    while cur < len(hyp):
-        if hyp[cur] != blank_id:
-            new_hyp.append(hyp[cur])
-        prev = cur
-        while cur < len(hyp) and hyp[cur] == hyp[prev]:
-            cur += 1
-    return new_hyp
-
-
-def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (torch.Tensor): Batch of lengths (B,).
-    Returns:
-        torch.Tensor: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-    batch_size = lengths.size(0)
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = torch.arange(0,
-                             max_len,
-                             dtype=torch.int64,
-                             device=lengths.device)
-    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
-    seq_length_expand = lengths.unsqueeze(-1)
-    mask = seq_range_expand >= seq_length_expand
-
-    mask = mask[:, 2::2][:, 2::2]
-    return mask
-
-
-class DecodeResult:
-
-    def __init__(self,
-                 tokens: List[int],
-                 score: float = 0.0,
-                 confidence: float = 0.0,
-                 tokens_confidence: List[float] = None,
-                 times: List[int] = None,
-                 nbest: List[List[int]] = None,
-                 nbest_scores: List[float] = None,
-                 nbest_times: List[List[int]] = None):
-        """
-        Args:
-            tokens: decode token list
-            score: the total decode score of this result
-            confidence: the total confidence of this result, it's in 0~1
-            tokens_confidence: confidence of each token
-            times: timestamp of each token, list of (start, end)
-            nbest: nbest result
-            nbest_scores: score of each nbest
-            nbest_times:
-        """
-        self.tokens = tokens
-        self.score = score
-        self.confidence = confidence
-        self.tokens_confidence = tokens_confidence
-        self.times = times
-        self.nbest = nbest
-        self.nbest_scores = nbest_scores
-        self.nbest_times = nbest_times
-
-
-def ctc_greedy_search(ctc_probs: torch.Tensor,
-                      ctc_lens: torch.Tensor,
-                      blank_id: int = 0) -> List[DecodeResult]:
-
-    batch_size = ctc_probs.shape[0]
-    maxlen = ctc_probs.size(1)
-    topk_prob, topk_index = ctc_probs.topk(1, dim=2)  # (B, maxlen, 1)
-    topk_index = topk_index.view(batch_size, maxlen)  # (B, maxlen)
-
-    mask_ctc_lens = ctc_lens[0].item()
-    mask = make_pad_mask(ctc_lens, mask_ctc_lens)  # (B, maxlen)
-    topk_index = topk_index.masked_fill_(mask, blank_id)  # (B, maxlen)
-    hyps = [hyp.tolist() for hyp in topk_index]
-    scores = topk_prob.max(1)
-    results = []
-    for hyp in hyps:
-        results.append(remove_duplicates_and_blank(hyp, blank_id))
-    return results
-
diff --git a/models/audio/speech_recognition/conformer/ixrt/requirements.txt b/models/audio/speech_recognition/conformer/ixrt/requirements.txt
index dd4788cf7291642c165bfd61f31399f2e24213e9..3dcea1ccc8337478e16d50942acc6175d270b9b5 100644
--- a/models/audio/speech_recognition/conformer/ixrt/requirements.txt
+++ b/models/audio/speech_recognition/conformer/ixrt/requirements.txt
@@ -1,5 +1,5 @@
 tqdm
 onnx
-typeguard==2.13.3
 onnxsim
-pycuda
\ No newline at end of file
+librosa
+soundfile
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh b/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
index f1af4bb4e03a0c9c6084ae7a122f66f765c27c86..435cf5f35e064d239d7b7a6de361af3a40760238 100644
--- a/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
+++ b/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_accuracy.sh
@@ -12,38 +12,35 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-
 set -euo pipefail
 
+EXIT_STATUS=0
+check_status()
+{
+    ret_code=${PIPESTATUS[0]}
+    if [ ${ret_code} != 0 ]; then
+    echo "fails"
+    [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1
+    fi
+}
+
 current_path=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
 
 PROJECT_DIR=${current_path}/..
 DATA_DIR=${current_path}/../aishell_test_data/test
 MODEL_DIR=${current_path}/../conformer_checkpoints
 
-export Accuracy=${Accuracy:=0.052}
+export Accuracy=${Accuracy:=0.05}
 
 cd ${PROJECT_DIR}
 
-echo "Step1.Export Onnx From Checkpoints!"
-python3 convert2onnx.py \
-    --model_name "Conformer" \
-    --model_path=${MODEL_DIR}/final.pt                          \
-    --onnx_path=${MODEL_DIR}/conformer_encoder_fusion.onnx      \
-    --batch_size=8
-
-echo "Step2.Build Engine!"
 python3 build_engine.py \
-    --model_name "Conformer" \
-    --onnx_path=${MODEL_DIR}/conformer_encoder_fusion.onnx        \
-    --engine_path=${MODEL_DIR}/conformer_encoder_fusion.engine    \
-    --max_batch_size=8  \
-    --max_seq_len=1500
+        --onnx_model ${MODEL_DIR}/conformer_fp16_trt.onnx  \
+        --engine ${MODEL_DIR}/conformer_fp16_trt.engine "$@" ;check_status 
 
-echo "Step3.Inference(Test ACC)!"
 python3 ixrt_inference_accuracy.py \
     --infer_type fp16 \
-    --warm_up 3       \
-    --batch_size ${BATCH_SIZE:=8} \
+    --batch_size ${BATCH_SIZE:=24} \
     --data_dir ${DATA_DIR}  \
-    --model_dir ${MODEL_DIR}
+    --model_dir ${MODEL_DIR} "$@"; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh b/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
index dc02673c03fb21a4301b757a18885af81cbad31d..1ad506119f1ab87b08254f1f861efb9020139e6f 100644
--- a/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
+++ b/models/audio/speech_recognition/conformer/ixrt/scripts/infer_conformer_fp16_performance.sh
@@ -12,16 +12,15 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-
 set -euo pipefail
 
-
 EXIT_STATUS=0
 check_status()
 {
-    if ((${PIPESTATUS[0]} != 0));then
+    ret_code=${PIPESTATUS[0]}
+    if [ ${ret_code} != 0 ]; then
     echo "fails"
-    EXIT_STATUS=1
+    [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1
     fi
 }
 
@@ -31,29 +30,17 @@ PROJECT_DIR=${current_path}/..
 DATA_DIR=${current_path}/../aishell_test_data/test
 MODEL_DIR=${current_path}/../conformer_checkpoints
 
-export Accuracy=${Accuracy:=350}
+export Accuracy=${Accuracy:=529}
 
 cd ${PROJECT_DIR}
 
-
-echo "Step1.Export Onnx From Checkpoints!"
-python3 convert2onnx.py \
-    --model_name "Conformer" \
-    --model_path=${MODEL_DIR}/final.pt                          \
-    --onnx_path=${MODEL_DIR}/conformer_encoder_fusion.onnx      \
-    --batch_size=24
-
-echo "Step2.Build Engine!"
 python3 build_engine.py \
-    --model_name "Conformer" \
-    --onnx_path=${MODEL_DIR}/conformer_encoder_fusion.onnx        \
-    --engine_path=${MODEL_DIR}/conformer_encoder_fusion.engine    \
-    --max_batch_size=24  \
-    --max_seq_len=1500
+        --onnx_model ${MODEL_DIR}/conformer_fp16_trt.onnx  \
+        --engine ${MODEL_DIR}/conformer_fp16_trt.engine "$@" ;check_status 
 
-echo "Step3.Inference(Test QPS)!"
 python3 ixrt_inference_performance.py \
     --infer_type fp16 \
     --batch_size ${BATCH_SIZE:=24} \
     --data_dir ${DATA_DIR}  \
-    --model_dir ${MODEL_DIR}
+    --model_dir ${MODEL_DIR} "$@"; check_status
+exit ${EXIT_STATUS}
\ No newline at end of file
diff --git a/models/audio/speech_recognition/conformer/ixrt/utils/__init__.py b/models/audio/speech_recognition/conformer/ixrt/utils/__init__.py
deleted file mode 100644
index c57435c110fc12f39d79c1b02f4b2e83dfe1a3e3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/ixrt/utils/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import torch
-import numpy as np
-
-from .embedding import RelPositionalEncoding
-
-
-rel_positional_encoding = RelPositionalEncoding(256, 0.1)
-
-
-def make_pad_mask(lengths: np.ndarray, max_len: int = 0) -> np.ndarray :
-    """Make mask tensor containing indices of padded part.
-
-    See description of make_non_pad_mask.
-
-    Args:
-        lengths (numpy.ndarray): Batch of lengths (B,).
-    Returns:
-        numpy.ndarray: Mask tensor containing indices of padded part.
-
-    Examples:
-        >>> lengths = [5, 3, 2]
-        >>> make_pad_mask(lengths)
-        masks = [[0, 0, 0, 0 ,0],
-                 [0, 0, 0, 1, 1],
-                 [0, 0, 1, 1, 1]]
-    """
-
-    batch_size = lengths.shape[0]
-    max_len = max_len if max_len > 0 else lengths.max().item()
-    seq_range = np.arange(0, max_len, dtype=np.int64)
-    seq_range_expand = np.tile(seq_range, batch_size).reshape(batch_size, max_len)
-    seq_length_expand = lengths[..., None]
-    mask = seq_range_expand >= seq_length_expand
-    mask = np.expand_dims(mask, axis=1)
-    mask = ~mask
-    mask = mask[:, :, 2::2][:, :, 2::2]
-    mask = mask.astype(np.int32)
-    return mask
diff --git a/models/audio/speech_recognition/conformer/ixrt/utils/embedding.py b/models/audio/speech_recognition/conformer/ixrt/utils/embedding.py
deleted file mode 100644
index 0fd65c4cdfc3fec244c88d2c47cf94b33b9088f3..0000000000000000000000000000000000000000
--- a/models/audio/speech_recognition/conformer/ixrt/utils/embedding.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""Positonal Encoding Module."""
-
-import math
-from typing import Tuple, Union
-
-import torch
-import torch.nn.functional as F
-import numpy as np
-
-
-class PositionalEncoding(torch.nn.Module):
-    """Positional encoding.
-
-    :param int d_model: embedding dim
-    :param float dropout_rate: dropout rate
-    :param int max_len: maximum input length
-
-    PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
-    PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
-    """
-
-    def __init__(self,
-                 d_model: int,
-                 dropout_rate: float,
-                 max_len: int = 5000,
-                 reverse: bool = False):
-        """Construct an PositionalEncoding object."""
-        super().__init__()
-        self.d_model = d_model
-        self.xscale = math.sqrt(self.d_model)
-        self.dropout = torch.nn.Dropout(p=dropout_rate)
-        self.max_len = max_len
-
-        pe = torch.zeros(self.max_len, self.d_model)
-        position = torch.arange(0, self.max_len,
-                                dtype=torch.float32).unsqueeze(1)
-        div_term = torch.exp(
-            torch.arange(0, self.d_model, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.d_model))
-        pe[:, 0::2] = torch.sin(position * div_term)
-        pe[:, 1::2] = torch.cos(position * div_term)
-        pe = pe.unsqueeze(0)
-        self.register_buffer("pe", pe)
-
-    def forward(self,
-                x: torch.Tensor,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Add positional encoding.
-
-        Args:
-            x (torch.Tensor): Input. Its shape is (batch, time, ...)
-            offset (int, torch.tensor): position offset
-
-        Returns:
-            torch.Tensor: Encoded tensor. Its shape is (batch, time, ...)
-            torch.Tensor: for compatibility to RelPositionalEncoding
-        """
-
-        pos_emb = self.position_encoding(offset, x.size(1), False)
-        x = x * self.xscale + pos_emb
-        return self.dropout(x), self.dropout(pos_emb)
-
-    def position_encoding(self,
-                          offset: Union[int, torch.Tensor],
-                          size: int,
-                          apply_dropout: bool = True) -> torch.Tensor:
-        """ For getting encoding in a streaming fashion
-
-        Attention!!!!!
-        we apply dropout only once at the whole utterance level in a none
-        streaming way, but will call this function several times with
-        increasing input size in a streaming scenario, so the dropout will
-        be applied several times.
-
-        Args:
-            offset (int or torch.tensor): start offset
-            size (int): required size of position encoding
-
-        Returns:
-            torch.Tensor: Corresponding encoding
-        """
-        # How to subscript a Union type:
-        #   https://github.com/pytorch/pytorch/issues/69434
-        # import ipdb;ipdb.set_trace()
-        if isinstance(offset, int):
-            assert offset + size <= self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        elif isinstance(offset, torch.Tensor) and offset.dim() == 0:  # scalar
-            assert offset + size <= self.max_len
-            pos_emb = self.pe[:, offset:offset + size]
-        else:  # for batched streaming decoding on GPU
-            assert torch.max(offset) + size <= self.max_len
-            index = offset.unsqueeze(1) + \
-                torch.arange(0, size).to(offset.device)  # B X T
-            flag = index > 0
-            # remove negative offset
-            index = index * flag
-            pos_emb = F.embedding(index, self.pe[0])  # B X T X d_model
-
-        if apply_dropout:
-            pos_emb = self.dropout(pos_emb)
-        return pos_emb
-
-
-class RelPositionalEncoding(PositionalEncoding):
-    """Relative positional encoding module.
-    See : Appendix B in https://arxiv.org/abs/1901.02860
-    Args:
-        d_model (int): Embedding dimension.
-        dropout_rate (float): Dropout rate.
-        max_len (int): Maximum input length.
-    """
-
-    def __init__(self, d_model: int, dropout_rate: float, max_len: int = 5000):
-        """Initialize class."""
-        super().__init__(d_model, dropout_rate, max_len, reverse=True)
-
-    def forward(self,
-                seq_len: int,
-                offset: Union[int, torch.Tensor] = 0) \
-            -> Tuple[torch.Tensor, torch.Tensor]:
-        """Compute positional encoding.
-        Args:
-            x (torch.Tensor): Input tensor (batch, time, `*`).
-        Returns:
-            torch.Tensor: Encoded tensor (batch, time, `*`).
-            torch.Tensor: Positional embedding tensor (1, time, `*`).
-        """
-        pos_emb = self.position_encoding(offset, seq_len, False)
-        # return self.dropout(pos_emb)
-        return pos_emb
-
diff --git a/models/audio/speech_recognition/conformer/ixrt/wenet/__init__.py b/models/audio/speech_recognition/conformer/ixrt/wenet/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000