From 50aea3ce9f80dbd9c9f2cb3ad23929556692f84a Mon Sep 17 00:00:00 2001
From: may <xiaomei.wang@iluvatar.com>
Date: Thu, 18 Jul 2024 15:13:45 +0800
Subject: [PATCH 1/7] Add roberta demo, roformer demo and widedeep demo in
 IxRT.

---
 .../nlp/language_model/roberta/ixrt/README.md |  83 +++++
 .../roberta/ixrt/export_onnx.py               |  73 ++++
 .../language_model/roberta/ixrt/gen_data.py   |  28 ++
 .../roberta/ixrt/perf_engine.py               | 349 ++++++++++++++++++
 .../scripts/infer_roberta_fp16_performance.sh |  29 ++
 .../language_model/roformer/ixrt/README.md    |  75 ++++
 .../language_model/roformer/ixrt/deploy.py    |  21 ++
 .../roformer/ixrt/export_onnx.py              |  55 +++
 .../roformer/ixrt/perf_engine.py              | 349 ++++++++++++++++++
 .../infer_roformer_fp16_performance.sh        |  26 ++
 models/recommendation/widedeep/ixrt/README.md |  76 ++++
 .../widedeep/ixrt/change2dynamic.py           |  80 ++++
 models/recommendation/widedeep/ixrt/deploy.py |  78 ++++
 .../widedeep/ixrt/export_onnx.py              |  55 +++
 .../infer_widedeep_fp16_performance.sh        |  23 ++
 15 files changed, 1400 insertions(+)
 create mode 100644 models/nlp/language_model/roberta/ixrt/README.md
 create mode 100644 models/nlp/language_model/roberta/ixrt/export_onnx.py
 create mode 100644 models/nlp/language_model/roberta/ixrt/gen_data.py
 create mode 100644 models/nlp/language_model/roberta/ixrt/perf_engine.py
 create mode 100644 models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
 create mode 100644 models/nlp/language_model/roformer/ixrt/README.md
 create mode 100644 models/nlp/language_model/roformer/ixrt/deploy.py
 create mode 100644 models/nlp/language_model/roformer/ixrt/export_onnx.py
 create mode 100644 models/nlp/language_model/roformer/ixrt/perf_engine.py
 create mode 100644 models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
 create mode 100644 models/recommendation/widedeep/ixrt/README.md
 create mode 100644 models/recommendation/widedeep/ixrt/change2dynamic.py
 create mode 100644 models/recommendation/widedeep/ixrt/deploy.py
 create mode 100644 models/recommendation/widedeep/ixrt/export_onnx.py
 create mode 100644 models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh

diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
new file mode 100644
index 00000000..597aed5b
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -0,0 +1,83 @@
+# RoBERTa
+
+## Description
+
+Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code.
+
+## Setup
+
+### Install
+
+```bash
+
+pip3 install onnxsim
+pip3 install numa
+pip3 install bert
+
+```
+
+### Download
+
+Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta.tar>
+
+Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar>
+
+```bash
+
+wget https://raw.githubusercontent.com/bytedance/ByteMLPerf/main/byte_infer_perf/general_perf/model_zoo/roberta-torch-fp32.json
+
+# export onnx
+python3 export_onnx.py --model_path open_roberta/roberta-base-squad.pt --output_path open_roberta/roberta-torch-fp32.onnx
+
+# Simplify onnx model
+onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta-torch-fp32_sim.onnx
+```
+
+## Inference
+
+```bash
+export ORIGIN_ONNX_NAME=/Path/roberta-torch-fp32_sim
+export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export PROJ_PATH=./
+```
+
+### Performance
+
+```bash
+bash scripts/infer_roberta_fp16_performance.sh
+```
+
+### Accuracy
+
+If you want to evaluate the accuracy of this model, please visit the website: < https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer >, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
+
+```bash
+
+git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
+```
+
+For detailed steps regarding this model, please refer to this document: < https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md > Note: You need to modify the relevant paths in the code to your own correct paths.
+
+```bash
+
+pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roberta/
+mv open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+cd ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
+tar -vxf open_squad.tar
+
+sftp -P 29880 vipzjtd@iftp.iluvatar.com.cn（如果链接不上用ip替换：10.160.20.60）  密码：123..com
+get /upload/3-app/byteperf/csarron.tar
+exit
+tar -zxvf csarron.tar
+mv csarron.tar ./ByteMLPerf/byte_infer_perf/
+# Modify ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
+# AutoTokenizer.from_pretrained("csarron/roberta-base-squad-v1") => AutoTokenizer.from_pretrained("/ByteMLPerf/byte_infer_perf/csarron/roberta-base-squad-v1")
+
+cd ./ByteMLPerf/byte_infer_perf/
+python3 general_perf/core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
+```
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/export_onnx.py b/models/nlp/language_model/roberta/ixrt/export_onnx.py
new file mode 100644
index 00000000..bc9d2da7
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/export_onnx.py
@@ -0,0 +1,73 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+
+import numpy as np
+import torch
+
+
+def torch_to_onnx(model_path, output_path):
+    model_name = output_path.split(".")[0]
+    with open(model_name + ".json", "r") as f:
+        model_info = json.load(f)
+    model_inputs = model_info["inputs"].split(",")
+    input_shapes = model_info["input_shape"]
+    input_type = model_info["input_type"].split(",")
+    example_inputs = _get_fake_samples(input_shapes, input_type)
+
+    model = torch.jit.load(model_path, map_location=torch.device("cpu"))
+    model.eval()
+
+    names = model_inputs
+    dynamic_inputs = {}
+    for i in range(len(names)):
+        dynamic_inputs[names[i]] = {0: "batch_size"}
+    outputs = model_info["outputs"].split(",")
+    for output in outputs:
+        dynamic_inputs[output] = {0: "batch_size"}
+    torch.onnx.export(
+        model,
+        example_inputs,
+        output_path,
+        opset_version=11,
+        input_names=names,
+        output_names=outputs,
+        dynamic_axes=dynamic_inputs,
+    )
+
+
+def _get_fake_samples(shape, type):
+    data = []
+    idx = 0
+    for key, val in shape.items():
+        val = [val[0] * 1] + val[1:]
+        data.append(torch.from_numpy(np.random.random(val).astype(type[idx].lower())))
+        idx += 1
+    return data
+
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = get_args()
+    torch_to_onnx(args.model_path, args.output_path)
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/gen_data.py b/models/nlp/language_model/roberta/ixrt/gen_data.py
new file mode 100644
index 00000000..57d2cf9a
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/gen_data.py
@@ -0,0 +1,28 @@
+import argparse
+
+import numpy as np
+import torch
+
+
+def gen_data(batch_size, output):
+    a = torch.randint(0, 50265, (batch_size, 384))
+    a = a.numpy().astype(np.int64)
+    a.tofile(output+"input_ids.bin")
+
+    a = np.ones((batch_size, 384), dtype=np.int64)
+    a.tofile(output+"input_mask.bin")
+
+    a = np.zeros((batch_size, 384), dtype=np.int64)
+    a.tofile(output+"token_type_ids.bin")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate data for RoBERTa model.")
+    parser.add_argument(
+        "--batch_size", type=int, required=True, help="Batch size for data generation"
+    )
+    parser.add_argument("--output_path", default="")
+
+    args = parser.parse_args()
+
+    gen_data(args.batch_size, args.output_path)
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/perf_engine.py b/models/nlp/language_model/roberta/ixrt/perf_engine.py
new file mode 100644
index 00000000..f3f10847
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/perf_engine.py
@@ -0,0 +1,349 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+import logging
+import importlib
+import json
+import subprocess
+import time
+
+from typing import Any, Dict, Tuple
+from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
+from prompt_toolkit.styles import Style
+
+BYTE_MLPERF_ROOT = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(BYTE_MLPERF_ROOT)
+sys.path.insert(0, BYTE_MLPERF_ROOT)
+
+import argparse
+from general_perf.core.configs.workload_store import load_workload
+from general_perf.core.configs.dataset_store import load_dataset
+from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("PerfEngine")
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--task",
+        default="resnet50-tf-fp32",
+        help="The task going to be evaluted, refs to workloads/")
+    parser.add_argument(
+        "--hardware_type",
+        default="GPU",
+        help="The backend going to be evaluted, refs to backends/")
+    parser.add_argument("--compile_only",
+                        action='store_true',
+                        help="Run compilation only")
+
+    args = parser.parse_args()
+    return args
+
+
+class PerfEngine:
+    def __init__(self) -> None:
+        super().__init__()
+        self.args = get_args()
+        self.workload = load_workload(self.args.task)
+        self.backend_type = self.args.hardware_type
+        self.compile_backend = None
+        self.old_os_path = os.environ['PATH']
+        self.prev_sys_path = list(sys.path)
+        self.real_prefix = sys.prefix
+        self.compile_only_mode = False
+
+    def start_engine(self) -> None:
+        '''
+        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
+        '''
+        success, total = 0, len(self.workload)
+        if total == 0:
+            return
+        log.info("******************* Backend Env Initization *******************")
+        status = self.activate_venv(self.backend_type)
+        if not status:
+            log.warning("Activate virtualenv Failed, Please Check...")
+
+        self.compile_backend = init_compile_backend(self.backend_type)
+        self.runtime_backend = init_runtime_backend(self.backend_type)
+
+        output_dir = os.path.abspath('general_perf/reports/' +
+                                     self.backend_type)
+        os.makedirs(output_dir, exist_ok=True)
+        
+        status = self.single_workload_perf(self.workload)
+
+    def single_workload_perf(
+            self, workload: Dict[str, Any]) -> bool:
+        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
+
+        # Check Compile Only Mode
+        self.compile_only_mode = False
+        if self.args.compile_only or workload['compile_only']:
+            self.compile_only_mode = True
+
+        base_report = {
+            "Model": workload['model'].upper(),
+            "Backend": self.backend_type,
+            "Host Info": self.get_cpu_name()
+        }
+
+        # Initalize Model Config Info
+        model_info = self.get_model_info(workload['model'])
+        pre_compile_config = {"workload": workload, 'model_info': model_info}
+        interact_info = self.check_interact_info(pre_compile_config)
+        pre_compile_config['interact_info'] = interact_info
+        if not model_info['dataset_name']:
+            model_info['dataset_name'] = 'fake_dataset'
+
+
+        '''
+        Compile Backend could do some optimization like convert model format here
+        '''
+        log.info("******************************************* Running Backend Compilation... *******************************************")
+        log.info("Running Backend Preoptimization...")
+        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
+
+
+        # Initalize dataset
+        dataset = load_dataset(model_info)
+        dataset.preprocess()
+        base_report['Dataset'] = model_info['dataset_name'].upper(
+        ) if model_info['dataset_name'] else None
+
+        #Placeholder Only
+        segment_info = self.compile_backend.segment(pre_compile_config)
+
+        best_batch_sizes = self.compile_backend.get_best_batch_size()
+        if isinstance(best_batch_sizes, list):
+            pre_compile_config['workload'][
+                'batch_sizes'] = best_batch_sizes
+
+        log.info("Start to compile the model...")
+        start = time.time()
+        compile_info = self.compile_backend.compile(pre_compile_config,
+                                                    dataset)
+        end = time.time()
+
+        graph_compile_report = {}
+        graph_compile_report["Compile Duration"] = round(end - start, 5)
+        graph_compile_report["Compile Precision"] = compile_info[
+            'compile_precision']
+        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
+        if 'optimizations' in compile_info:
+            graph_compile_report['Optimizations'] = compile_info['optimizations']
+        if 'instance_count' in compile_info:
+            base_report['Instance Count'] = compile_info['instance_count']
+        if 'device_count' in compile_info:
+            base_report['Device Count'] = compile_info['device_count']
+        base_report['Graph Compile'] = graph_compile_report
+
+        # Initalize Output Dir and Reports
+        output_dir = os.path.abspath('general_perf/reports/' +
+                                     self.backend_type + '/' +
+                                     workload['model'])
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Compile only mode will stop here
+        if self.compile_only_mode:
+            base_report.pop("Backend")
+            return compile_info["compile_status"], base_report
+
+        # load runtime backend
+        """
+        Start Here
+        """
+        batch_sizes = pre_compile_config['workload']['batch_sizes']
+        self.runtime_backend.configs = compile_info
+        self.runtime_backend.workload = workload
+        self.runtime_backend.model_info = model_info
+
+        self.runtime_backend.load(workload['batch_sizes'][0])
+        # test accuracy
+        accuracy_report = {}
+        AccuracyChecker = self.get_accuracy_checker(
+            model_info['dataset_name']
+            if model_info['dataset_name'] else 'fake_dataset')
+        AccuracyChecker.runtime_backend = self.runtime_backend
+        AccuracyChecker.dataloader = dataset
+        AccuracyChecker.output_dir = output_dir
+        AccuracyChecker.configs = compile_info
+
+        if workload['test_accuracy']:
+            log.info("******************************************* Running Accuracy Checker... *******************************************")
+
+            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
+            accuracy_results = AccuracyChecker.calculate_acc(
+                workload['data_percent'])
+
+            accuracy_report['Data Percent'] = workload['data_percent']
+            accuracy_report.update(accuracy_results)
+
+        # test numeric
+        if workload['test_numeric']:
+            log.info("******************************************* Running Numeric Checker... *******************************************")
+
+            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
+            if not workload['test_accuracy']:
+                accuracy_results = AccuracyChecker.calculate_acc(
+                    workload['data_percent'])
+            diff_results = AccuracyChecker.calculate_diff()
+            accuracy_report.update(diff_results)
+            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
+
+        if accuracy_report:
+            base_report['Accuracy'] = accuracy_report
+
+        # function to test qps and latency
+        if workload['test_perf']:
+            log.info("******************************************* Runing QPS Checker... *******************************************")
+            performance_reports = []
+            qs_status = self.runtime_backend.is_qs_mode_supported()
+            if qs_status:
+                qs_config = self.runtime_backend.generate_qs_config()
+                performance_reports = self.qs_benchmark(qs_config)
+            else:
+                for bs in batch_sizes:
+                    self.runtime_backend.load(bs)
+                    batch_reports = self.runtime_backend.benchmark(dataset)
+                    performance_reports.append(batch_reports)
+            base_report['Performance'] = performance_reports
+
+        if "Instance Count" not in base_report:
+            log.warning("Vendors need to Add # of instances")
+        if "Device Count" not in base_report:
+            log.warning("Vendors need to Add # of devices")
+
+        # write output to json file
+        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
+        with open(output_report_path, 'w') as file:
+            json.dump(base_report, file, indent=4)
+
+        base_report.pop("Backend")
+        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
+                 format(output_dir[output_dir.rfind('general_perf'):],
+                 os.path.basename(output_report_path)))
+
+        return compile_info["compile_status"]
+
+    #WIP
+    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
+        return []
+
+    def get_accuracy_checker(self, dataset_name: str):
+        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
+                                                  dataset_name +
+                                                  ".test_accuracy")
+        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
+        return AccuracyChecker()
+
+    def get_model_info(self, model_name: str) -> Dict[str, Any]:
+        with open("general_perf/model_zoo/" + model_name + '.json',
+                  'r') as file:
+            model_info = json.load(file)
+        return model_info
+
+    def get_cpu_name(self):
+        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
+        cpu_name = subprocess.check_output(command, shell=True)
+        return cpu_name.decode().strip()
+
+    def check_interact_info(
+            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
+        interact_info = self.compile_backend.get_interact_profile(
+            pre_compile_config)
+
+        answer = {}
+        if len(interact_info) == 0:
+            return answer
+
+        dialog_style = Style.from_dict({
+            'dialog': 'bg:#88b8ff',
+            'dialog frame.label': 'bg:#ffffff #000000',
+            'dialog.body': 'bg:#000000 #a0acde',
+            'dialog shadow': 'bg:#004aaa',
+        })
+
+        input_style = Style.from_dict({
+            'dialog': 'bg:#88b8ff',
+            'dialog frame.label': 'bg:#ffffff #000000',
+            'dialog.body': 'bg:#000000 #a0acde',
+            'dialog shadow': 'bg:#004aaa',
+            'text-area.prompt': 'bg:#ffffff',
+            'text-area': '#000000',
+        })
+
+        option = yes_no_dialog(title=self.backend_type + '编译配置',
+                               text='[请选择]：是否进行编译后端配置:',
+                               style=dialog_style).run()
+        if option:
+            sum_question = len(interact_info)
+            for i, question in enumerate(interact_info):
+                if question['depends']:
+                    state = 0
+                    for title in question['depends'].split(','):
+                        if not answer[title]:
+                            state = 1
+                    if state:
+                        continue
+                if question['dialog_type'] == 'Yes/No Dialog':
+                    option = yes_no_dialog(
+                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
+                        '/' + str(sum_question) + ')',
+                        text="[Backend " + self.backend_type + "]: " +
+                        question['note'],
+                        style=dialog_style).run()
+                elif question['dialog_type'] == "Input Dialog":
+                    option = input_dialog(
+                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
+                        '/' + str(sum_question) + ')',
+                        text="[Backend " + self.backend_type + "]: " +
+                        question['note'],
+                        style=input_style).run()
+                elif question['dialog_type'] == "Radiolist Dialog":
+                    choice = [(i, text)
+                              for i, text in enumerate(question['options'])]
+                    num = radiolist_dialog(
+                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
+                        '/' + str(sum_question) + ')',
+                        text="[Backend " + self.backend_type + "]: " +
+                        question['note'],
+                        values=choice,
+                        style=dialog_style).run()
+                    option = question['options'][num] if num is not None else question[
+                        'default']
+                answer[question['name']] = option
+
+        return answer
+
+    def activate_venv(self, hardware_type: str) -> bool:
+        
+        return True
+
+    def deactivate_venv(self):
+        sys.path[:
+                 0] = self.prev_sys_path  #will also revert the added site-packages
+        sys.prefix = self.real_prefix
+        os.environ['PATH'] = self.old_os_path
+
+
+if __name__ == "__main__":
+    engine = PerfEngine()
+    engine.start_engine()
\ No newline at end of file
diff --git a/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
new file mode 100644
index 00000000..99848bd5
--- /dev/null
+++ b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
@@ -0,0 +1,29 @@
+set -x
+ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
+cd ${PROJ_PATH}
+
+run(){
+    BS=${1:-1}
+    TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx
+    TARGET_ENGINE=${ORIGIN_ONNX_NAME}_bs_${BS}_end.engine
+    if [[ ! -f "${ORIGIN_ONNX}" ]];then
+        echo "${ORIGIN_ONNX} not exists!"
+        exit 1
+    fi
+
+    python3 ${PROJ_PATH}/gen_data.py --batch_size ${BS} --output_path ${PROJ_PATH}
+
+    # Graph optimize
+    [ -f "${TARGET_ONNX}" ] || python3 ${OPTIMIER_FILE} --onnx ${ORIGIN_ONNX} --dump_onnx
+    
+    # Build Engine
+    ixrtexec --onnx ${TARGET_ONNX} --min_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                   --opt_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                   --max_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \
+                                   --save_engine ${TARGET_ENGINE} --log_level error --plugins ixrt_plugin
+
+    # Test Performance
+    ixrtexec --load_engine ${TARGET_ENGINE} --plugins ixrt_plugin --shapes input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384
+
+}
+run 1
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
new file mode 100644
index 00000000..a22fb894
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -0,0 +1,75 @@
+# RoFormer
+
+## Description
+
+Position encoding recently has shown effective in the transformer architecture. It enables valuable supervision for dependency modeling between elements at different positions of the sequence. In this paper, we first investigate various methods to integrate positional information into the learning process of transformer-based language models. Then, we propose a novel method named Rotary Position Embedding(RoPE) to effectively leverage the positional information. Specifically, the proposed RoPE encodes the absolute position with a rotation matrix and meanwhile incorporates the explicit relative position dependency in self-attention formulation. Notably, RoPE enables valuable properties, including the flexibility of sequence length, decaying inter-token dependency with increasing relative distances, and the capability of equipping the linear self-attention with relative position encoding. Finally, we evaluate the enhanced transformer with rotary position embedding, also called RoFormer, on various long text classification benchmark datasets.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install tf2onnx
+pip3 install onnxsim
+pip3 install numa
+
+```
+
+### Download
+
+Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer.tar>
+
+Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar>
+
+```bash
+# Download the pretrained model and dataset to 'data'
+mkdir data
+
+# export onnx
+python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/open_roformer/roformer-frozen_org.onnx
+
+# Simplify onnx model
+onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
+python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer-frozen.onnx
+
+```
+
+## Inference
+
+```bash
+export ORIGIN_ONNX_NAME=/Path/roformer-frozen
+export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export PROJ_PATH=./
+```
+
+### Performance
+
+```bash
+bash scripts/infer_roformer_fp16_performance.sh
+```
+
+### Accuracy
+
+If you want to evaluate the accuracy of this model, please visit the website: < https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer >, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
+
+```bash
+
+git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
+```
+
+For detailed steps regarding this model, please refer to this document: < https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md > Note: You need to modify the relevant paths in the code to your own correct paths.
+
+```bash
+
+pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
+mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+mv path/to/roformer-frozen_end.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+# Modify model_zoo/roformer-tf-fp32.json
+# "inputs": "input_segment:0,input_token:0" --> "inputs": "input_segment0,input_token0"
+# "input_shape": {"input_segment:0": [1, 1024], "input_token:0": [1, 1024]} -->"input_shape": {"input_segment0": [1, 1024], "input_token0": [1, 1024]}
+python3 core/perf_engine.py --hardware_type ILUVATAR --task roformer-tf-fp32
+```
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/deploy.py b/models/nlp/language_model/roformer/ixrt/deploy.py
new file mode 100644
index 00000000..91999398
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/deploy.py
@@ -0,0 +1,21 @@
+import onnx
+import argparse
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = get_args()
+    model = onnx.load(args.model_path)
+    for input in model.graph.input:
+        for node in model.graph.node:
+            for i, name in enumerate(node.input):
+                if name == input.name:
+                    node.input[i] =name.replace(':',"")
+        input.name=input.name.replace(':',"")# 保存修改后的模型
+    onnx.save(model, args.output_path)
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/export_onnx.py b/models/nlp/language_model/roformer/ixrt/export_onnx.py
new file mode 100644
index 00000000..04ef591a
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/export_onnx.py
@@ -0,0 +1,55 @@
+import tf2onnx
+from tf2onnx import tf_loader
+import argparse
+ONNX_OPSET = 11
+
+def _convert_graphdef_to_onnx(graph_def,
+    inputs=None,
+    outputs=None,
+    output_path='',
+    **kwargs):
+    
+    inputs_as_nchw = kwargs.get('inputs_as_nchw', None)
+    custom_ops = kwargs.get('custom_ops', None)
+    custom_op_handlers = kwargs.get('custom_op_handlers', None)
+    custom_rewriter = kwargs.get('custom_rewriter', None)
+    extra_opset = kwargs.get('extra_opset', None)
+    large_model = kwargs.get('large_model', False)
+    name = kwargs.get('name', 'habana_convert')
+    target = kwargs.get('target', None)
+    shape_override = kwargs.get('shape_override', {})
+    
+    tf2onnx.convert.from_graph_def(graph_def,
+        name=name,
+        input_names=inputs,
+        output_names=outputs,
+        opset=ONNX_OPSET,
+        custom_ops=custom_ops,
+        custom_op_handlers=custom_op_handlers,
+        custom_rewriter=custom_rewriter,
+        inputs_as_nchw=inputs_as_nchw,
+        extra_opset=extra_opset,
+        shape_override=shape_override,
+        target=target,
+        large_model=large_model,
+        output_path=output_path)
+    return output_path
+
+def savedmodel_to_onnx(model_path, output_path='', **kwargs):
+    inputs = kwargs.get('inputs', None)
+    outputs = kwargs.get('outputs', None)
+    graph_def, inputs, outputs = tf_loader.from_saved_model(
+    model_path, inputs, outputs)
+    return _convert_graphdef_to_onnx(graph_def, inputs, outputs, output_path, **kwargs)
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+ args = get_args()
+ savedmodel_to_onnx(args.model_path, args.output_path)
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/perf_engine.py b/models/nlp/language_model/roformer/ixrt/perf_engine.py
new file mode 100644
index 00000000..f3f10847
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/perf_engine.py
@@ -0,0 +1,349 @@
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+import logging
+import importlib
+import json
+import subprocess
+import time
+
+from typing import Any, Dict, Tuple
+from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog
+from prompt_toolkit.styles import Style
+
+BYTE_MLPERF_ROOT = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(BYTE_MLPERF_ROOT)
+sys.path.insert(0, BYTE_MLPERF_ROOT)
+
+import argparse
+from general_perf.core.configs.workload_store import load_workload
+from general_perf.core.configs.dataset_store import load_dataset
+from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("PerfEngine")
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--task",
+        default="resnet50-tf-fp32",
+        help="The task going to be evaluted, refs to workloads/")
+    parser.add_argument(
+        "--hardware_type",
+        default="GPU",
+        help="The backend going to be evaluted, refs to backends/")
+    parser.add_argument("--compile_only",
+                        action='store_true',
+                        help="Run compilation only")
+
+    args = parser.parse_args()
+    return args
+
+
+class PerfEngine:
+    def __init__(self) -> None:
+        super().__init__()
+        self.args = get_args()
+        self.workload = load_workload(self.args.task)
+        self.backend_type = self.args.hardware_type
+        self.compile_backend = None
+        self.old_os_path = os.environ['PATH']
+        self.prev_sys_path = list(sys.path)
+        self.real_prefix = sys.prefix
+        self.compile_only_mode = False
+
+    def start_engine(self) -> None:
+        '''
+        Byte MlPerf will create an virtual env for each backend to avoid dependance conflict
+        '''
+        success, total = 0, len(self.workload)
+        if total == 0:
+            return
+        log.info("******************* Backend Env Initization *******************")
+        status = self.activate_venv(self.backend_type)
+        if not status:
+            log.warning("Activate virtualenv Failed, Please Check...")
+
+        self.compile_backend = init_compile_backend(self.backend_type)
+        self.runtime_backend = init_runtime_backend(self.backend_type)
+
+        output_dir = os.path.abspath('general_perf/reports/' +
+                                     self.backend_type)
+        os.makedirs(output_dir, exist_ok=True)
+        
+        status = self.single_workload_perf(self.workload)
+
+    def single_workload_perf(
+            self, workload: Dict[str, Any]) -> bool:
+        log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model']))
+
+        # Check Compile Only Mode
+        self.compile_only_mode = False
+        if self.args.compile_only or workload['compile_only']:
+            self.compile_only_mode = True
+
+        base_report = {
+            "Model": workload['model'].upper(),
+            "Backend": self.backend_type,
+            "Host Info": self.get_cpu_name()
+        }
+
+        # Initalize Model Config Info
+        model_info = self.get_model_info(workload['model'])
+        pre_compile_config = {"workload": workload, 'model_info': model_info}
+        interact_info = self.check_interact_info(pre_compile_config)
+        pre_compile_config['interact_info'] = interact_info
+        if not model_info['dataset_name']:
+            model_info['dataset_name'] = 'fake_dataset'
+
+
+        '''
+        Compile Backend could do some optimization like convert model format here
+        '''
+        log.info("******************************************* Running Backend Compilation... *******************************************")
+        log.info("Running Backend Preoptimization...")
+        pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config)
+
+
+        # Initalize dataset
+        dataset = load_dataset(model_info)
+        dataset.preprocess()
+        base_report['Dataset'] = model_info['dataset_name'].upper(
+        ) if model_info['dataset_name'] else None
+
+        #Placeholder Only
+        segment_info = self.compile_backend.segment(pre_compile_config)
+
+        best_batch_sizes = self.compile_backend.get_best_batch_size()
+        if isinstance(best_batch_sizes, list):
+            pre_compile_config['workload'][
+                'batch_sizes'] = best_batch_sizes
+
+        log.info("Start to compile the model...")
+        start = time.time()
+        compile_info = self.compile_backend.compile(pre_compile_config,
+                                                    dataset)
+        end = time.time()
+
+        graph_compile_report = {}
+        graph_compile_report["Compile Duration"] = round(end - start, 5)
+        graph_compile_report["Compile Precision"] = compile_info[
+            'compile_precision']
+        graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent']
+        if 'optimizations' in compile_info:
+            graph_compile_report['Optimizations'] = compile_info['optimizations']
+        if 'instance_count' in compile_info:
+            base_report['Instance Count'] = compile_info['instance_count']
+        if 'device_count' in compile_info:
+            base_report['Device Count'] = compile_info['device_count']
+        base_report['Graph Compile'] = graph_compile_report
+
+        # Initalize Output Dir and Reports
+        output_dir = os.path.abspath('general_perf/reports/' +
+                                     self.backend_type + '/' +
+                                     workload['model'])
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Compile only mode will stop here
+        if self.compile_only_mode:
+            base_report.pop("Backend")
+            return compile_info["compile_status"], base_report
+
+        # load runtime backend
+        """
+        Start Here
+        """
+        batch_sizes = pre_compile_config['workload']['batch_sizes']
+        self.runtime_backend.configs = compile_info
+        self.runtime_backend.workload = workload
+        self.runtime_backend.model_info = model_info
+
+        self.runtime_backend.load(workload['batch_sizes'][0])
+        # test accuracy
+        accuracy_report = {}
+        AccuracyChecker = self.get_accuracy_checker(
+            model_info['dataset_name']
+            if model_info['dataset_name'] else 'fake_dataset')
+        AccuracyChecker.runtime_backend = self.runtime_backend
+        AccuracyChecker.dataloader = dataset
+        AccuracyChecker.output_dir = output_dir
+        AccuracyChecker.configs = compile_info
+
+        if workload['test_accuracy']:
+            log.info("******************************************* Running Accuracy Checker... *******************************************")
+
+            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
+            accuracy_results = AccuracyChecker.calculate_acc(
+                workload['data_percent'])
+
+            accuracy_report['Data Percent'] = workload['data_percent']
+            accuracy_report.update(accuracy_results)
+
+        # test numeric
+        if workload['test_numeric']:
+            log.info("******************************************* Running Numeric Checker... *******************************************")
+
+            dataset.rebatch(self.runtime_backend.get_loaded_batch_size())
+            if not workload['test_accuracy']:
+                accuracy_results = AccuracyChecker.calculate_acc(
+                    workload['data_percent'])
+            diff_results = AccuracyChecker.calculate_diff()
+            accuracy_report.update(diff_results)
+            # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png"
+
+        if accuracy_report:
+            base_report['Accuracy'] = accuracy_report
+
+        # function to test qps and latency
+        if workload['test_perf']:
+            log.info("******************************************* Runing QPS Checker... *******************************************")
+            performance_reports = []
+            qs_status = self.runtime_backend.is_qs_mode_supported()
+            if qs_status:
+                qs_config = self.runtime_backend.generate_qs_config()
+                performance_reports = self.qs_benchmark(qs_config)
+            else:
+                for bs in batch_sizes:
+                    self.runtime_backend.load(bs)
+                    batch_reports = self.runtime_backend.benchmark(dataset)
+                    performance_reports.append(batch_reports)
+            base_report['Performance'] = performance_reports
+
+        if "Instance Count" not in base_report:
+            log.warning("Vendors need to Add # of instances")
+        if "Device Count" not in base_report:
+            log.warning("Vendors need to Add # of devices")
+
+        # write output to json file
+        output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json"
+        with open(output_report_path, 'w') as file:
+            json.dump(base_report, file, indent=4)
+
+        base_report.pop("Backend")
+        log.info("Testing Finish. Report is saved in path: [ {}/{} ]".
+                 format(output_dir[output_dir.rfind('general_perf'):],
+                 os.path.basename(output_report_path)))
+
+        return compile_info["compile_status"]
+
+    #WIP
+    def qs_benchmark(self, qs_config: Dict[str, Any]) -> list:
+        return []
+
+    def get_accuracy_checker(self, dataset_name: str):
+        AccuracyChecker = importlib.import_module('general_perf.datasets.' +
+                                                  dataset_name +
+                                                  ".test_accuracy")
+        AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker')
+        return AccuracyChecker()
+
+    def get_model_info(self, model_name: str) -> Dict[str, Any]:
+        with open("general_perf/model_zoo/" + model_name + '.json',
+                  'r') as file:
+            model_info = json.load(file)
+        return model_info
+
+    def get_cpu_name(self):
+        command = "lscpu | grep 'Model name' | awk -F: '{print $2}'"
+        cpu_name = subprocess.check_output(command, shell=True)
+        return cpu_name.decode().strip()
+
+    def check_interact_info(
+            self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]:
+        interact_info = self.compile_backend.get_interact_profile(
+            pre_compile_config)
+
+        answer = {}
+        if len(interact_info) == 0:
+            return answer
+
+        dialog_style = Style.from_dict({
+            'dialog': 'bg:#88b8ff',
+            'dialog frame.label': 'bg:#ffffff #000000',
+            'dialog.body': 'bg:#000000 #a0acde',
+            'dialog shadow': 'bg:#004aaa',
+        })
+
+        input_style = Style.from_dict({
+            'dialog': 'bg:#88b8ff',
+            'dialog frame.label': 'bg:#ffffff #000000',
+            'dialog.body': 'bg:#000000 #a0acde',
+            'dialog shadow': 'bg:#004aaa',
+            'text-area.prompt': 'bg:#ffffff',
+            'text-area': '#000000',
+        })
+
+        option = yes_no_dialog(title=self.backend_type + '编译配置',
+                               text='[请选择]：是否进行编译后端配置:',
+                               style=dialog_style).run()
+        if option:
+            sum_question = len(interact_info)
+            for i, question in enumerate(interact_info):
+                if question['depends']:
+                    state = 0
+                    for title in question['depends'].split(','):
+                        if not answer[title]:
+                            state = 1
+                    if state:
+                        continue
+                if question['dialog_type'] == 'Yes/No Dialog':
+                    option = yes_no_dialog(
+                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
+                        '/' + str(sum_question) + ')',
+                        text="[Backend " + self.backend_type + "]: " +
+                        question['note'],
+                        style=dialog_style).run()
+                elif question['dialog_type'] == "Input Dialog":
+                    option = input_dialog(
+                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
+                        '/' + str(sum_question) + ')',
+                        text="[Backend " + self.backend_type + "]: " +
+                        question['note'],
+                        style=input_style).run()
+                elif question['dialog_type'] == "Radiolist Dialog":
+                    choice = [(i, text)
+                              for i, text in enumerate(question['options'])]
+                    num = radiolist_dialog(
+                        title=self.backend_type + '编译配置进度(' + str(i + 1) +
+                        '/' + str(sum_question) + ')',
+                        text="[Backend " + self.backend_type + "]: " +
+                        question['note'],
+                        values=choice,
+                        style=dialog_style).run()
+                    option = question['options'][num] if num is not None else question[
+                        'default']
+                answer[question['name']] = option
+
+        return answer
+
+    def activate_venv(self, hardware_type: str) -> bool:
+        
+        return True
+
+    def deactivate_venv(self):
+        sys.path[:
+                 0] = self.prev_sys_path  #will also revert the added site-packages
+        sys.prefix = self.real_prefix
+        os.environ['PATH'] = self.old_os_path
+
+
+if __name__ == "__main__":
+    engine = PerfEngine()
+    engine.start_engine()
\ No newline at end of file
diff --git a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
new file mode 100644
index 00000000..ea0a8263
--- /dev/null
+++ b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
@@ -0,0 +1,26 @@
+set -x
+ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
+cd ${PROJ_PATH}
+
+run(){
+    BS=${1:-1}
+    TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx
+    TARGET_ENGINE=${ORIGIN_ONNX_NAME}_bs_${BS}_end.engine
+    SHAPE="input_segment0:${BS}x1024,input_token0:${BS}x1024"
+    if [[ ! -f "${ORIGIN_ONNX}" ]];then
+        echo "${ORIGIN_ONNX} not exists!"
+        exit 1
+    fi
+
+    # Graph optimize
+    python3 ${OPTIMIER_FILE} --onnx ${ORIGIN_ONNX} --model_type roformer
+
+    # Build Engine
+    ixrtexec --onnx ${TARGET_ONNX} --save_engine ${TARGET_ENGINE} --log_level error --plugins ixrt_plugin \
+        --min_shape $SHAPE --opt_shape $SHAPE --max_shape $SHAPE --shapes $SHAPE
+
+    # Test Performance
+    ixrtexec --load_engine ${TARGET_ENGINE} --plugins ixrt_plugin --shapes ${SHAPE}
+
+}
+run 1
\ No newline at end of file
diff --git a/models/recommendation/widedeep/ixrt/README.md b/models/recommendation/widedeep/ixrt/README.md
new file mode 100644
index 00000000..cb9ade6c
--- /dev/null
+++ b/models/recommendation/widedeep/ixrt/README.md
@@ -0,0 +1,76 @@
+# Wide & Deep
+
+## Description
+
+Generalized linear models with nonlinear feature transformations are widely used for large-scale regression and classification problems with sparse inputs. Memorization of feature interactions through a wide set of cross-product feature transformations are effective and interpretable, while generalization requires more feature engineering effort. With less feature engineering, deep neural networks can generalize better to unseen feature combinations through low-dimensional dense embeddings learned for the sparse features. However, deep neural networks with embeddings can over-generalize and recommend less relevant items when the user-item interactions are sparse and high-rank. In this paper, we present Wide & Deep learning---jointly trained wide linear models and deep neural networks---to combine the benefits of memorization and generalization for recommender systems. We productionized and evaluated the system on Google Play, a commercial mobile app store with over one billion active users and over one million apps. Online experiment results show that Wide & Deep significantly increased app acquisitions compared with wide-only and deep-only models. We have also open-sourced our implementation in TensorFlow.
+
+## Setup
+
+### Install
+
+```bash
+pip3 install tf2onnx
+pip3 install onnxsim
+pip3 install numa
+
+```
+
+### Download
+
+Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_wide_deep_saved_model.tar>
+
+Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv >
+
+```bash
+
+# export onnx
+python3 export_onnx.py --model_path open_wide_deep_saved_model --output_path open_wide_deep_saved_model/widedeep.onnx
+
+# Simplify onnx model
+onnxsim open_wide_deep_saved_model/widedeep.onnx open_wide_deep_saved_model/widedeep_sim.onnx
+python3 deploy.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx
+python3 change2dynamic.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx
+```
+
+## Inference
+
+```bash
+export ORIGIN_ONNX_NAME=/Path/widedeep_sim
+export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export PROJ_PATH=./
+```
+
+### Performance
+
+```bash
+bash scripts/infer_widedeep_fp16_performance.sh
+```
+
+### Accuracy
+
+If you want to evaluate the accuracy of this model, please visit the website: < https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer >, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
+
+```bash
+
+git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
+```
+
+For detailed steps regarding this model, please refer to this document: < https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md > Note: You need to modify the relevant paths in the code to your own correct paths.
+
+```bash
+
+pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
+wget -O ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/eval.csv https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
+
+sftp -P 29889 user01@58.247.142.52  password：5$gS%659
+cd yudefu/bytedance_perf ; get widedeep_dynamicshape_new.onnx
+exit
+
+mv path/to/widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
+cd ./ByteMLPerf/byte_infer_perf/general_perf
+python3 core/perf_engine.py --hardware_type ILUVATAR --task widedeep-tf-fp32
+```
\ No newline at end of file
diff --git a/models/recommendation/widedeep/ixrt/change2dynamic.py b/models/recommendation/widedeep/ixrt/change2dynamic.py
new file mode 100644
index 00000000..c0ae0dc0
--- /dev/null
+++ b/models/recommendation/widedeep/ixrt/change2dynamic.py
@@ -0,0 +1,80 @@
+import argparse
+import onnx
+
+def change_input_output_dim(model):
+    # Use some symbolic name not used for any other dimension
+    sym_batch_dim = "batch"
+    # sym_batch_dim = -1
+
+    # The following code changes the first dimension of every input to be batch-dim
+    # Modify as appropriate ... note that this requires all inputs to
+    # have the same batch_dim
+    inputs = model.graph.input
+    for input in inputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = input.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        dim1.dim_param = sym_batch_dim
+
+        if input.name == "new_categorical_placeholder:0":
+            input.type.tensor_type.shape.dim[1].dim_value = int(2)
+        elif input.name == "new_numeric_placeholder:0":
+            input.type.tensor_type.shape.dim[1].dim_value = int(13)
+        elif input.name == "import/head/predictions/zeros_like:0":
+            input.type.tensor_type.shape.dim[1].dim_value = int(1)
+
+        # or update it to be an actual value:
+        # dim1.dim_value = actual_batch_dim
+
+    outputs = model.graph.output
+
+    for output in outputs:
+        # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim.
+        # Add checks as needed.
+        dim1 = output.type.tensor_type.shape.dim[0]
+        # update dim to be a symbolic value
+        dim1.dim_param = sym_batch_dim
+        
+def change_input_node_name(model, input_names):
+    for i,input in enumerate(model.graph.input):
+        input_name = input_names[i]
+        for node in model.graph.node:
+            for i, name in enumerate(node.input):
+                if name == input.name:
+                    node.input[i] = input_name
+        input.name = input_name
+
+
+def change_output_node_name(model, output_names):
+    for i,output in enumerate(model.graph.output):
+        output_name = output_names[i]
+        for node in model.graph.node:
+            for i, name in enumerate(node.output):
+                if name == output.name:
+                    node.output[i] = output_name
+        output.name = output_name
+
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+
+
+
+if __name__ == "__main__":
+    args = get_args()
+    model = onnx.load(args.model_path)
+    change_input_output_dim(model)
+    model = onnx.load(args.model_path)
+    for input in model.graph.input:
+        for node in model.graph.node:
+            for i, name in enumerate(node.input):
+                if name == input.name:
+                    node.input[i] =name.replace(':',"")
+        input.name=input.name.replace(':',"")# 保存修改后的模型
+    onnx.save(model, args.output_path)
\ No newline at end of file
diff --git a/models/recommendation/widedeep/ixrt/deploy.py b/models/recommendation/widedeep/ixrt/deploy.py
new file mode 100644
index 00000000..308a859c
--- /dev/null
+++ b/models/recommendation/widedeep/ixrt/deploy.py
@@ -0,0 +1,78 @@
+import onnx
+import argparse
+import copy
+
+from typing import Union, Callable, List
+
+from tensorrt.deploy.api import *
+from tensorrt.deploy.backend.onnx.converter import default_converter
+from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type
+from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr
+from tensorrt.deploy.ir.operator_type import OperatorType as OP
+from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name
+from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence
+from tensorrt.deploy.ir import Graph
+from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator
+from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator
+from tensorrt.deploy.api import GraphTransform, create_source, create_target
+
+class FuseGemmPass(BasePass):
+    def process(self, graph: Graph) -> Graph:
+        self.transform = GraphTransform(graph)
+
+        self.transform.find_sequence_subgraph(
+            pattern=[OP.MATMUL, OP.ADD], callback=self.fuse_gemm, strict=True
+        )
+        return graph
+
+    def fuse_gemm(self, graph, pattern: PatternGraph):
+        matmul = pattern.nodes[0]
+        add = pattern.nodes[1]
+
+        if len(add.operator.inputs) != 2:
+            return
+
+        b_var = graph.get_variable(matmul.operator.inputs[1])
+        if not graph.is_leaf_variable(b_var) or b_var.value is None:
+            return
+
+        if b_var.value.ndim != 2:
+            return
+
+        bias_var = None
+        for input in add.operator.inputs:
+            if input not in matmul.operator.outputs:
+                bias_var = input
+
+        matmul.operator.inputs.append(bias_var)
+        self.transform.delete_operator_and_link(
+            add.operator, link_input=matmul.operator.outputs[0]
+        )
+
+        matmul.operator.op_type = OP.GEMM
+        matmul.operator.attributes = attr.GemmAttr(transB=1)
+
+def replace_input(graph):
+    transformer = GraphTransform(graph)
+    from_op = graph.get_operator("Shape__8")
+    to_op = graph.get_operator('import/head/predictions/zeros_like')
+    var = graph.get_variable("import/head/predictions/zeros_like:0")
+    transformer.delete_operators_between_op_op(from_op=from_op, to_op=to_op)
+    transformer.add_input("import/head/predictions/zeros_like:0")
+    return graph
+
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+    args = get_args()
+    graph = create_source(args.model_path)()
+    graph = FuseGemmPass().process(graph)
+    graph = replace_input(graph)
+    create_target(saved_path=args.output_path).export(graph)
\ No newline at end of file
diff --git a/models/recommendation/widedeep/ixrt/export_onnx.py b/models/recommendation/widedeep/ixrt/export_onnx.py
new file mode 100644
index 00000000..04ef591a
--- /dev/null
+++ b/models/recommendation/widedeep/ixrt/export_onnx.py
@@ -0,0 +1,55 @@
+import tf2onnx
+from tf2onnx import tf_loader
+import argparse
+ONNX_OPSET = 11
+
+def _convert_graphdef_to_onnx(graph_def,
+    inputs=None,
+    outputs=None,
+    output_path='',
+    **kwargs):
+    
+    inputs_as_nchw = kwargs.get('inputs_as_nchw', None)
+    custom_ops = kwargs.get('custom_ops', None)
+    custom_op_handlers = kwargs.get('custom_op_handlers', None)
+    custom_rewriter = kwargs.get('custom_rewriter', None)
+    extra_opset = kwargs.get('extra_opset', None)
+    large_model = kwargs.get('large_model', False)
+    name = kwargs.get('name', 'habana_convert')
+    target = kwargs.get('target', None)
+    shape_override = kwargs.get('shape_override', {})
+    
+    tf2onnx.convert.from_graph_def(graph_def,
+        name=name,
+        input_names=inputs,
+        output_names=outputs,
+        opset=ONNX_OPSET,
+        custom_ops=custom_ops,
+        custom_op_handlers=custom_op_handlers,
+        custom_rewriter=custom_rewriter,
+        inputs_as_nchw=inputs_as_nchw,
+        extra_opset=extra_opset,
+        shape_override=shape_override,
+        target=target,
+        large_model=large_model,
+        output_path=output_path)
+    return output_path
+
+def savedmodel_to_onnx(model_path, output_path='', **kwargs):
+    inputs = kwargs.get('inputs', None)
+    outputs = kwargs.get('outputs', None)
+    graph_def, inputs, outputs = tf_loader.from_saved_model(
+    model_path, inputs, outputs)
+    return _convert_graphdef_to_onnx(graph_def, inputs, outputs, output_path, **kwargs)
+
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+
+if __name__ == "__main__":
+ args = get_args()
+ savedmodel_to_onnx(args.model_path, args.output_path)
\ No newline at end of file
diff --git a/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
new file mode 100644
index 00000000..d6d441bd
--- /dev/null
+++ b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
@@ -0,0 +1,23 @@
+set -x
+ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
+cd ${PROJ_PATH}
+
+run(){
+    BS=${1:-1}
+    TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx
+    TARGET_ENGINE=${ORIGIN_ONNX_NAME}_bs_${BS}_end.engine
+    if [[ ! -f "${ORIGIN_ONNX}" ]];then
+        echo "${ORIGIN_ONNX} not exists!"
+        exit 1
+    fi
+
+    # Graph optimize
+    python3 ${OPTIMIER_FILE} --onnx ${ORIGIN_ONNX} --input_shapes "new_categorical_placeholder0:$((26 * ${BS}))x2,new_numeric_placeholder0:${BS}x13,import/head/predictions/zeros_like0:${BS}x1"
+    # Build Engine
+    ixrtexec --onnx ${TARGET_ONNX} --save_engine ${TARGET_ENGINE} --log_level error
+
+    # Test Performance
+    ixrtexec --load_engine ${TARGET_ENGINE}
+
+}
+run 1
\ No newline at end of file
-- 
Gitee


From 496d87b4c68562f5ab50bf442efcabc8ebf3cbcf Mon Sep 17 00:00:00 2001
From: may <xiaomei.wang@iluvatar.com>
Date: Thu, 18 Jul 2024 15:17:52 +0800
Subject: [PATCH 2/7] Add License.

---
 .../nlp/language_model/roberta/ixrt/gen_data.py   | 15 +++++++++++++++
 models/nlp/language_model/roformer/ixrt/deploy.py | 15 +++++++++++++++
 .../language_model/roformer/ixrt/export_onnx.py   | 15 +++++++++++++++
 .../widedeep/ixrt/change2dynamic.py               | 15 +++++++++++++++
 models/recommendation/widedeep/ixrt/deploy.py     | 15 +++++++++++++++
 .../recommendation/widedeep/ixrt/export_onnx.py   | 15 +++++++++++++++
 6 files changed, 90 insertions(+)

diff --git a/models/nlp/language_model/roberta/ixrt/gen_data.py b/models/nlp/language_model/roberta/ixrt/gen_data.py
index 57d2cf9a..a59225b2 100644
--- a/models/nlp/language_model/roberta/ixrt/gen_data.py
+++ b/models/nlp/language_model/roberta/ixrt/gen_data.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 import argparse
 
 import numpy as np
diff --git a/models/nlp/language_model/roformer/ixrt/deploy.py b/models/nlp/language_model/roformer/ixrt/deploy.py
index 91999398..073fb733 100644
--- a/models/nlp/language_model/roformer/ixrt/deploy.py
+++ b/models/nlp/language_model/roformer/ixrt/deploy.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 import onnx
 import argparse
 
diff --git a/models/nlp/language_model/roformer/ixrt/export_onnx.py b/models/nlp/language_model/roformer/ixrt/export_onnx.py
index 04ef591a..475dddd7 100644
--- a/models/nlp/language_model/roformer/ixrt/export_onnx.py
+++ b/models/nlp/language_model/roformer/ixrt/export_onnx.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 import tf2onnx
 from tf2onnx import tf_loader
 import argparse
diff --git a/models/recommendation/widedeep/ixrt/change2dynamic.py b/models/recommendation/widedeep/ixrt/change2dynamic.py
index c0ae0dc0..e9bcf6f1 100644
--- a/models/recommendation/widedeep/ixrt/change2dynamic.py
+++ b/models/recommendation/widedeep/ixrt/change2dynamic.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 import argparse
 import onnx
 
diff --git a/models/recommendation/widedeep/ixrt/deploy.py b/models/recommendation/widedeep/ixrt/deploy.py
index 308a859c..0e1ac694 100644
--- a/models/recommendation/widedeep/ixrt/deploy.py
+++ b/models/recommendation/widedeep/ixrt/deploy.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 import onnx
 import argparse
 import copy
diff --git a/models/recommendation/widedeep/ixrt/export_onnx.py b/models/recommendation/widedeep/ixrt/export_onnx.py
index 04ef591a..475dddd7 100644
--- a/models/recommendation/widedeep/ixrt/export_onnx.py
+++ b/models/recommendation/widedeep/ixrt/export_onnx.py
@@ -1,3 +1,18 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
 import tf2onnx
 from tf2onnx import tf_loader
 import argparse
-- 
Gitee


From db9df01fe0355b5852e821c5a70bad1410b608d2 Mon Sep 17 00:00:00 2001
From: may <xiaomei.wang@iluvatar.com>
Date: Fri, 19 Jul 2024 17:39:47 +0800
Subject: [PATCH 3/7] Modify README.md

---
 models/nlp/language_model/roberta/ixrt/README.md   | 12 +++++++-----
 .../ixrt/scripts/infer_roberta_fp16_performance.sh | 14 ++++++++++++++
 models/nlp/language_model/roformer/ixrt/README.md  |  8 +++++---
 .../scripts/infer_roformer_fp16_performance.sh     | 14 ++++++++++++++
 models/recommendation/widedeep/ixrt/README.md      |  5 +++--
 .../scripts/infer_widedeep_fp16_performance.sh     | 14 ++++++++++++++
 6 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index 597aed5b..b2db9485 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -9,10 +9,11 @@ Language model pretraining has led to significant performance gains but careful
 ### Install
 
 ```bash
-
 pip3 install onnxsim
-pip3 install numa
+pip3 install py-libnuma==1.2
 pip3 install bert
+pip3 install pycuda
+pip3 install transformers==4.33.3
 
 ```
 
@@ -60,11 +61,11 @@ For detailed steps regarding this model, please refer to this document: < https:
 
 ```bash
 
-pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 pip3 install -r ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
-mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roberta/
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 mv open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 cd ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
 wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
@@ -73,8 +74,9 @@ tar -vxf open_squad.tar
 sftp -P 29880 vipzjtd@iftp.iluvatar.com.cn（如果链接不上用ip替换：10.160.20.60）  密码：123..com
 get /upload/3-app/byteperf/csarron.tar
 exit
-tar -zxvf csarron.tar
+
 mv csarron.tar ./ByteMLPerf/byte_infer_perf/
+tar -zxvf csarron.tar
 # Modify ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
 # AutoTokenizer.from_pretrained("csarron/roberta-base-squad-v1") => AutoTokenizer.from_pretrained("/ByteMLPerf/byte_infer_perf/csarron/roberta-base-squad-v1")
 
diff --git a/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
index 99848bd5..f6ba5743 100644
--- a/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
+++ b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
 set -x
 ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
 cd ${PROJ_PATH}
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index a22fb894..e77e50ef 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -10,8 +10,9 @@ Position encoding recently has shown effective in the transformer architecture.
 
 ```bash
 pip3 install tf2onnx
+pip3 install pycuda
 pip3 install onnxsim
-pip3 install numa
+pip3 install py-libnuma==1.2
 
 ```
 
@@ -64,9 +65,10 @@ For detailed steps regarding this model, please refer to this document: < https:
 pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
-mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-mv path/to/roformer-frozen_end.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
+# Make sure the roformer-frozen_end.onnx is in the path "./data/open_roformer". Or you should move it to './ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/'.
+# mv path/to/roformer-frozen_end.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 # Modify model_zoo/roformer-tf-fp32.json
 # "inputs": "input_segment:0,input_token:0" --> "inputs": "input_segment0,input_token0"
diff --git a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
index ea0a8263..54d9c352 100644
--- a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
+++ b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
 set -x
 ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
 cd ${PROJ_PATH}
diff --git a/models/recommendation/widedeep/ixrt/README.md b/models/recommendation/widedeep/ixrt/README.md
index cb9ade6c..a8703e29 100644
--- a/models/recommendation/widedeep/ixrt/README.md
+++ b/models/recommendation/widedeep/ixrt/README.md
@@ -10,8 +10,9 @@ Generalized linear models with nonlinear feature transformations are widely used
 
 ```bash
 pip3 install tf2onnx
+pip3 install pycuda
 pip3 install onnxsim
-pip3 install numa
+pip3 install py-libnuma==1.2
 
 ```
 
@@ -59,7 +60,7 @@ For detailed steps regarding this model, please refer to this document: < https:
 
 ```bash
 
-pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
diff --git a/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
index d6d441bd..0b968abe 100644
--- a/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
+++ b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
 set -x
 ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
 cd ${PROJ_PATH}
-- 
Gitee


From ff02919f9fb245e0e6ce0d1b7a1a9112821eb687 Mon Sep 17 00:00:00 2001
From: may <xiaomei.wang@iluvatar.com>
Date: Wed, 24 Jul 2024 17:21:02 +0800
Subject: [PATCH 4/7] Avoid the accuracy error

---
 models/nlp/language_model/roformer/ixrt/README.md          | 2 ++
 .../ixrt/scripts/infer_roformer_fp16_performance.sh        | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index e77e50ef..6ef9c784 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -66,9 +66,11 @@ pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
+# Delete Line102 ' build_engine(model_name=model_name, onnx_model_path=onnx_model_path, engine_path=engine_path, MaxBatchSize=MaxBatchSize, BuildFlag='FP16') ' which is the build engine process of conformer in the file ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
 mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 # Make sure the roformer-frozen_end.onnx is in the path "./data/open_roformer". Or you should move it to './ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/'.
 # mv path/to/roformer-frozen_end.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar -P ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 # Modify model_zoo/roformer-tf-fp32.json
 # "inputs": "input_segment:0,input_token:0" --> "inputs": "input_segment0,input_token0"
diff --git a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
index 54d9c352..f6fac705 100644
--- a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
+++ b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
@@ -19,8 +19,9 @@ cd ${PROJ_PATH}
 run(){
     BS=${1:-1}
     TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx
-    TARGET_ENGINE=${ORIGIN_ONNX_NAME}_bs_${BS}_end.engine
+    TARGET_ENGINE=${ORIGIN_ONNX_NAME}_end.engine
     SHAPE="input_segment0:${BS}x1024,input_token0:${BS}x1024"
+    MAX_SHAPE="input_segment0:64x1024,input_token0:64x1024"
     if [[ ! -f "${ORIGIN_ONNX}" ]];then
         echo "${ORIGIN_ONNX} not exists!"
         exit 1
@@ -31,10 +32,10 @@ run(){
 
     # Build Engine
     ixrtexec --onnx ${TARGET_ONNX} --save_engine ${TARGET_ENGINE} --log_level error --plugins ixrt_plugin \
-        --min_shape $SHAPE --opt_shape $SHAPE --max_shape $SHAPE --shapes $SHAPE
+        --min_shape $SHAPE --opt_shape $SHAPE --max_shape $MAX_SHAPE --shapes $SHAPE
 
     # Test Performance
     ixrtexec --load_engine ${TARGET_ENGINE} --plugins ixrt_plugin --shapes ${SHAPE}
 
 }
-run 1
\ No newline at end of file
+run 2
\ No newline at end of file
-- 
Gitee


From dd175775c6168c6328cdd8d770fbde361f65afcc Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 26 Jul 2024 15:05:10 +0800
Subject: [PATCH 5/7] update roformer readme.md

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../language_model/roformer/ixrt/README.md    | 63 +++++++++++++------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index 6ef9c784..af81d911 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -9,6 +9,8 @@ Position encoding recently has shown effective in the transformer architecture.
 ### Install
 
 ```bash
+apt install -y libnuma-dev
+
 pip3 install tf2onnx
 pip3 install pycuda
 pip3 install onnxsim
@@ -23,23 +25,34 @@ Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roforme
 Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar>
 
 ```bash
+# Go to path of this model 
+cd ${PROJ_ROOT}/models/nlp/language_model/roformer/ixrt
+
 # Download the pretrained model and dataset to 'data'
-mkdir data
+mkdir -p data/
+pushd data/
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer.tar
+tar xf open_roformer.tar
+rm -f open_roformer.tar
+popd
+```
 
+### Deal with ONNX
+
+```bash
 # export onnx
 python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/open_roformer/roformer-frozen_org.onnx
 
 # Simplify onnx model
 onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx
 python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer-frozen.onnx
-
 ```
 
 ## Inference
 
 ```bash
-export ORIGIN_ONNX_NAME=/Path/roformer-frozen
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export ORIGIN_ONNX_NAME=./data/open_roformer/roformer-frozen
+export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -51,29 +64,39 @@ bash scripts/infer_roformer_fp16_performance.sh
 
 ### Accuracy
 
-If you want to evaluate the accuracy of this model, please visit the website: < https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer >, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
+If you want to evaluate the accuracy of this model, please visit the website: <https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer>, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend.
 
-```bash
+For detailed steps regarding this model, please refer to this document: <https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md> Note: You need to modify the relevant paths in the code to your own correct paths.
 
+```bash
+# Clone ByteMLPerf
 git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
-```
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
+mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 
-For detailed steps regarding this model, please refer to this document: < https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md > Note: You need to modify the relevant paths in the code to your own correct paths.
+# Comment Line102 in compile_backend_iluvatar.py
+sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
 
-```bash
+# Move open_roformer
+mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 
-pip3 install -r https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/requirements.txt
-mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
+# Setup open_cail2019 dataset
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar
+tar xf open_cail2019.tar
+cp ./open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
+rm -f open_cail2019.tar
 
-mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-# Delete Line102 ' build_engine(model_name=model_name, onnx_model_path=onnx_model_path, engine_path=engine_path, MaxBatchSize=MaxBatchSize, BuildFlag='FP16') ' which is the build engine process of conformer in the file ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py
-mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-# Make sure the roformer-frozen_end.onnx is in the path "./data/open_roformer". Or you should move it to './ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/'.
-# mv path/to/roformer-frozen_end.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/open_roformer/
-wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar -P ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019
+# Go to general_perf/
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 # Modify model_zoo/roformer-tf-fp32.json
-# "inputs": "input_segment:0,input_token:0" --> "inputs": "input_segment0,input_token0"
-# "input_shape": {"input_segment:0": [1, 1024], "input_token:0": [1, 1024]} -->"input_shape": {"input_segment0": [1, 1024], "input_token0": [1, 1024]}
+sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
+# Run Acc scipts
 python3 core/perf_engine.py --hardware_type ILUVATAR --task roformer-tf-fp32
-```
\ No newline at end of file
+```
+
+## Results
+
+| Model    | BatchSize | Precision | FPS     | ACC     |
+| -------- | --------- | --------- | ------- | ------- |
+| RoFormer | 2         | FP16      | 195.186 | 0.33789 |
-- 
Gitee


From 438bc311344b4fd2abeffefe8e2f71146c5d1596 Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 26 Jul 2024 16:28:34 +0800
Subject: [PATCH 6/7] update roberta readme.md

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../nlp/language_model/roberta/ixrt/README.md | 58 +++++++++++--------
 .../scripts/infer_roberta_fp16_performance.sh |  1 +
 .../language_model/roformer/ixrt/README.md    |  4 +-
 .../infer_roformer_fp16_performance.sh        |  1 +
 4 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index b2db9485..cb1bbe39 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -14,7 +14,6 @@ pip3 install py-libnuma==1.2
 pip3 install bert
 pip3 install pycuda
 pip3 install transformers==4.33.3
-
 ```
 
 ### Download
@@ -24,8 +23,16 @@ Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta
 Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar>
 
 ```bash
+cd ${PROJ_ROOT}/models/nlp/language_model/roberta/ixrt/
+
+# get open_roberta
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta.tar
+tar xf open_roberta.tar
+rm -f open_roberta.tar
 
-wget https://raw.githubusercontent.com/bytedance/ByteMLPerf/main/byte_infer_perf/general_perf/model_zoo/roberta-torch-fp32.json
+# get roberta-torch-fp32.json
+git clone -b iluvatar_general_infer https://github.com/yudefu/ByteMLPerf.git
+cp ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roberta-torch-fp32.json ./
 
 # export onnx
 python3 export_onnx.py --model_path open_roberta/roberta-base-squad.pt --output_path open_roberta/roberta-torch-fp32.onnx
@@ -37,8 +44,8 @@ onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta-torch-fp32_sim
 ## Inference
 
 ```bash
-export ORIGIN_ONNX_NAME=/Path/roberta-torch-fp32_sim
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export ORIGIN_ONNX_NAME=./open_roberta/roberta-torch-fp32_sim
+export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -50,36 +57,39 @@ bash scripts/infer_roberta_fp16_performance.sh
 
 ### Accuracy
 
-If you want to evaluate the accuracy of this model, please visit the website: < https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer >, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
-
-```bash
-
-git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
-```
+If you want to evaluate the accuracy of this model, please visit the website: <https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer>, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
 
-For detailed steps regarding this model, please refer to this document: < https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md > Note: You need to modify the relevant paths in the code to your own correct paths.
+For detailed steps regarding this model, please refer to this document: <https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md> Note: You need to modify the relevant paths in the code to your own correct paths.
 
 ```bash
-
+# Install requirements
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
-pip3 install -r ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
+pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
+# Move open_roberta
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
 mv open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
-cd ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
-wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
-tar -vxf open_squad.tar
 
-sftp -P 29880 vipzjtd@iftp.iluvatar.com.cn（如果链接不上用ip替换：10.160.20.60）  密码：123..com
-get /upload/3-app/byteperf/csarron.tar
-exit
+# Get open_squad
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar
+tar xf open_squad.tar
+cp ./open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad
+rm -f open_squad.tar
 
-mv csarron.tar ./ByteMLPerf/byte_infer_perf/
-tar -zxvf csarron.tar
-# Modify ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py
-# AutoTokenizer.from_pretrained("csarron/roberta-base-squad-v1") => AutoTokenizer.from_pretrained("/ByteMLPerf/byte_infer_perf/csarron/roberta-base-squad-v1")
+# Get csarron.tar
+wget http://files.deepspark.org.cn:880/deepspark/csarron.tar
+tar xf csarron.tar
+rm -f csarron.tar
+mv csarron/ ./ByteMLPerf/byte_infer_perf/
 
+# Run Acc scripts
 cd ./ByteMLPerf/byte_infer_perf/
 python3 general_perf/core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32
-```
\ No newline at end of file
+```
+
+## Results
+
+| Model   | BatchSize | Precision | FPS    | F1       | Exact Match |
+| ------- | --------- | --------- | ------ | -------- | ----------- |
+| RoBERTa | 1         | FP16      | 355.48 | 83.14387 | 76.50175    |
diff --git a/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
index f6ba5743..90bdec9b 100644
--- a/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
+++ b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh
@@ -12,6 +12,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+
 set -x
 ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
 cd ${PROJ_PATH}
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index af81d911..c0f55b93 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -25,7 +25,7 @@ Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roforme
 Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar>
 
 ```bash
-# Go to path of this model 
+# Go to path of this model
 cd ${PROJ_ROOT}/models/nlp/language_model/roformer/ixrt
 
 # Download the pretrained model and dataset to 'data'
@@ -91,7 +91,7 @@ rm -f open_cail2019.tar
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 # Modify model_zoo/roformer-tf-fp32.json
 sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json
-# Run Acc scipts
+# Run Acc scripts
 python3 core/perf_engine.py --hardware_type ILUVATAR --task roformer-tf-fp32
 ```
 
diff --git a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
index f6fac705..0510e32d 100644
--- a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
+++ b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh
@@ -12,6 +12,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+
 set -x
 ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
 cd ${PROJ_PATH}
-- 
Gitee


From 2a154d5f6f027f457cdcc1ce4d35ff735cd634c6 Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.com>
Date: Fri, 26 Jul 2024 16:53:31 +0800
Subject: [PATCH 7/7] update wide&deep readme.md

Signed-off-by: majorli <mingjiang.li@iluvatar.com>
---
 .../nlp/language_model/roberta/ixrt/README.md |  1 +
 .../language_model/roformer/ixrt/README.md    |  2 +-
 models/recommendation/widedeep/ixrt/README.md | 43 +++++++++++--------
 .../infer_widedeep_fp16_performance.sh        |  1 +
 4 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md
index cb1bbe39..5ba6e888 100644
--- a/models/nlp/language_model/roberta/ixrt/README.md
+++ b/models/nlp/language_model/roberta/ixrt/README.md
@@ -23,6 +23,7 @@ Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta
 Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar>
 
 ```bash
+# Go to path of this model
 cd ${PROJ_ROOT}/models/nlp/language_model/roberta/ixrt/
 
 # get open_roberta
diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md
index c0f55b93..ba1e5975 100644
--- a/models/nlp/language_model/roformer/ixrt/README.md
+++ b/models/nlp/language_model/roformer/ixrt/README.md
@@ -70,7 +70,7 @@ For detailed steps regarding this model, please refer to this document: <https:/
 
 ```bash
 # Clone ByteMLPerf
-git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
+git clone -b iluvatar_general_infer https://github.com/yudefu/ByteMLPerf.git
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/
diff --git a/models/recommendation/widedeep/ixrt/README.md b/models/recommendation/widedeep/ixrt/README.md
index a8703e29..fb01a4d4 100644
--- a/models/recommendation/widedeep/ixrt/README.md
+++ b/models/recommendation/widedeep/ixrt/README.md
@@ -1,4 +1,4 @@
-# Wide & Deep
+# Wide&Deep
 
 ## Description
 
@@ -13,16 +13,17 @@ pip3 install tf2onnx
 pip3 install pycuda
 pip3 install onnxsim
 pip3 install py-libnuma==1.2
-
 ```
 
 ### Download
 
 Pretrained model: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_wide_deep_saved_model.tar>
 
-Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv >
+Dataset: <https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv>
 
 ```bash
+# Go to path of this model
+cd ${PROJ_ROOT}/models/recommendationwidedeep/ixrt
 
 # export onnx
 python3 export_onnx.py --model_path open_wide_deep_saved_model --output_path open_wide_deep_saved_model/widedeep.onnx
@@ -36,8 +37,8 @@ python3 change2dynamic.py --model_path open_wide_deep_saved_model/widedeep_sim.o
 ## Inference
 
 ```bash
-export ORIGIN_ONNX_NAME=/Path/widedeep_sim
-export OPTIMIER_FILE=/Path/ixrt/oss/tools/optimizer/optimizer.py
+export ORIGIN_ONNX_NAME=./open_wide_deep_saved_model/widedeep_sim
+export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py
 export PROJ_PATH=./
 ```
 
@@ -49,29 +50,33 @@ bash scripts/infer_widedeep_fp16_performance.sh
 
 ### Accuracy
 
-If you want to evaluate the accuracy of this model, please visit the website: < https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer >, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
-
-```bash
-
-git clone https://github.com/yudefu/ByteMLPerf.git -b iluvatar_general_infer
-```
+If you want to evaluate the accuracy of this model, please visit the website: <https://github.com/yudefu/ByteMLPerf/tree/iluvatar_general_infer>, which integrates inference and training of many models under this framework, supporting the ILUVATAR backend
 
-For detailed steps regarding this model, please refer to this document: < https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md > Note: You need to modify the relevant paths in the code to your own correct paths.
+For detailed steps regarding this model, please refer to this document: <https://github.com/yudefu/ByteMLPerf/blob/iluvatar_general_infer/byte_infer_perf/general_perf/backends/ILUVATAR/README.zh_CN.md> Note: You need to modify the relevant paths in the code to your own correct paths.
 
 ```bash
-
+# Clone ByteMLPerf
+git clone -b iluvatar_general_infer https://github.com/yudefu/ByteMLPerf.git
 pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt
 mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py
 
+# Get eval.csv and onnx
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model
 mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
-wget -O ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/eval.csv https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
 
-sftp -P 29889 user01@58.247.142.52  password：5$gS%659
-cd yudefu/bytedance_perf ; get widedeep_dynamicshape_new.onnx
-exit
+wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv
+mv eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/
+
+wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx
+mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/
 
-mv path/to/widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape.onnx
+# Run Acc scripts
 cd ./ByteMLPerf/byte_infer_perf/general_perf
 python3 core/perf_engine.py --hardware_type ILUVATAR --task widedeep-tf-fp32
-```
\ No newline at end of file
+```
+
+## Results
+
+| Model     | BatchSize | Precision | FPS      | ACC     |
+| --------- | --------- | --------- | -------- | ------- |
+| Wide&Deep | 1024      | FP16      | 77073.93 | 0.74597 |
diff --git a/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
index 0b968abe..866adb44 100644
--- a/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
+++ b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh
@@ -12,6 +12,7 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+
 set -x
 ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx
 cd ${PROJ_PATH}
-- 
Gitee