diff --git a/models/nlp/language_model/roberta/ixrt/README.md b/models/nlp/language_model/roberta/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5ba6e8880b088b23bc9d8b4205291309951f0624 --- /dev/null +++ b/models/nlp/language_model/roberta/ixrt/README.md @@ -0,0 +1,96 @@ +# RoBERTa + +## Description + +Language model pretraining has led to significant performance gains but careful comparison between different approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes, and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results highlight the importance of previously overlooked design choices, and raise questions about the source of recently reported improvements. We release our models and code. + +## Setup + +### Install + +```bash +pip3 install onnxsim +pip3 install py-libnuma==1.2 +pip3 install bert +pip3 install pycuda +pip3 install transformers==4.33.3 +``` + +### Download + +Pretrained model: + +Dataset: + +```bash +# Go to path of this model +cd ${PROJ_ROOT}/models/nlp/language_model/roberta/ixrt/ + +# get open_roberta +wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roberta.tar +tar xf open_roberta.tar +rm -f open_roberta.tar + +# get roberta-torch-fp32.json +git clone -b iluvatar_general_infer https://github.com/yudefu/ByteMLPerf.git +cp ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/roberta-torch-fp32.json ./ + +# export onnx +python3 export_onnx.py --model_path open_roberta/roberta-base-squad.pt --output_path open_roberta/roberta-torch-fp32.onnx + +# Simplify onnx model +onnxsim open_roberta/roberta-torch-fp32.onnx open_roberta/roberta-torch-fp32_sim.onnx +``` + +## Inference + +```bash +export ORIGIN_ONNX_NAME=./open_roberta/roberta-torch-fp32_sim +export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py +export PROJ_PATH=./ +``` + +### Performance + +```bash +bash scripts/infer_roberta_fp16_performance.sh +``` + +### Accuracy + +If you want to evaluate the accuracy of this model, please visit the website: , which integrates inference and training of many models under this framework, supporting the ILUVATAR backend + +For detailed steps regarding this model, please refer to this document: Note: You need to modify the relevant paths in the code to your own correct paths. + +```bash +# Install requirements +pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt +pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/requirements.txt +mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py + +# Move open_roberta +mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ +mv open_roberta ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ + +# Get open_squad +wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_squad.tar +tar xf open_squad.tar +cp ./open_squad/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_squad +rm -f open_squad.tar + +# Get csarron.tar +wget http://files.deepspark.org.cn:880/deepspark/csarron.tar +tar xf csarron.tar +rm -f csarron.tar +mv csarron/ ./ByteMLPerf/byte_infer_perf/ + +# Run Acc scripts +cd ./ByteMLPerf/byte_infer_perf/ +python3 general_perf/core/perf_engine.py --hardware_type ILUVATAR --task roberta-torch-fp32 +``` + +## Results + +| Model | BatchSize | Precision | FPS | F1 | Exact Match | +| ------- | --------- | --------- | ------ | -------- | ----------- | +| RoBERTa | 1 | FP16 | 355.48 | 83.14387 | 76.50175 | diff --git a/models/nlp/language_model/roberta/ixrt/export_onnx.py b/models/nlp/language_model/roberta/ixrt/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..bc9d2da750a00a4eefd2323faf0354d9eb3eaf69 --- /dev/null +++ b/models/nlp/language_model/roberta/ixrt/export_onnx.py @@ -0,0 +1,73 @@ +# Copyright 2023 ByteDance and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import json + +import numpy as np +import torch + + +def torch_to_onnx(model_path, output_path): + model_name = output_path.split(".")[0] + with open(model_name + ".json", "r") as f: + model_info = json.load(f) + model_inputs = model_info["inputs"].split(",") + input_shapes = model_info["input_shape"] + input_type = model_info["input_type"].split(",") + example_inputs = _get_fake_samples(input_shapes, input_type) + + model = torch.jit.load(model_path, map_location=torch.device("cpu")) + model.eval() + + names = model_inputs + dynamic_inputs = {} + for i in range(len(names)): + dynamic_inputs[names[i]] = {0: "batch_size"} + outputs = model_info["outputs"].split(",") + for output in outputs: + dynamic_inputs[output] = {0: "batch_size"} + torch.onnx.export( + model, + example_inputs, + output_path, + opset_version=11, + input_names=names, + output_names=outputs, + dynamic_axes=dynamic_inputs, + ) + + +def _get_fake_samples(shape, type): + data = [] + idx = 0 + for key, val in shape.items(): + val = [val[0] * 1] + val[1:] + data.append(torch.from_numpy(np.random.random(val).astype(type[idx].lower()))) + idx += 1 + return data + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", default="") + parser.add_argument("--output_path", default="") + args = parser.parse_args() + return args + + +if __name__ == "__main__": + args = get_args() + torch_to_onnx(args.model_path, args.output_path) \ No newline at end of file diff --git a/models/nlp/language_model/roberta/ixrt/gen_data.py b/models/nlp/language_model/roberta/ixrt/gen_data.py new file mode 100644 index 0000000000000000000000000000000000000000..a59225b2613b2e456b88ed4c79329287713e77a6 --- /dev/null +++ b/models/nlp/language_model/roberta/ixrt/gen_data.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse + +import numpy as np +import torch + + +def gen_data(batch_size, output): + a = torch.randint(0, 50265, (batch_size, 384)) + a = a.numpy().astype(np.int64) + a.tofile(output+"input_ids.bin") + + a = np.ones((batch_size, 384), dtype=np.int64) + a.tofile(output+"input_mask.bin") + + a = np.zeros((batch_size, 384), dtype=np.int64) + a.tofile(output+"token_type_ids.bin") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate data for RoBERTa model.") + parser.add_argument( + "--batch_size", type=int, required=True, help="Batch size for data generation" + ) + parser.add_argument("--output_path", default="") + + args = parser.parse_args() + + gen_data(args.batch_size, args.output_path) \ No newline at end of file diff --git a/models/nlp/language_model/roberta/ixrt/perf_engine.py b/models/nlp/language_model/roberta/ixrt/perf_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..f3f108474b281bfce71ccaf73d60ba3119cf97c1 --- /dev/null +++ b/models/nlp/language_model/roberta/ixrt/perf_engine.py @@ -0,0 +1,349 @@ +# Copyright 2023 ByteDance and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import os +import logging +import importlib +import json +import subprocess +import time + +from typing import Any, Dict, Tuple +from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog +from prompt_toolkit.styles import Style + +BYTE_MLPERF_ROOT = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +os.chdir(BYTE_MLPERF_ROOT) +sys.path.insert(0, BYTE_MLPERF_ROOT) + +import argparse +from general_perf.core.configs.workload_store import load_workload +from general_perf.core.configs.dataset_store import load_dataset +from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("PerfEngine") +os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--task", + default="resnet50-tf-fp32", + help="The task going to be evaluted, refs to workloads/") + parser.add_argument( + "--hardware_type", + default="GPU", + help="The backend going to be evaluted, refs to backends/") + parser.add_argument("--compile_only", + action='store_true', + help="Run compilation only") + + args = parser.parse_args() + return args + + +class PerfEngine: + def __init__(self) -> None: + super().__init__() + self.args = get_args() + self.workload = load_workload(self.args.task) + self.backend_type = self.args.hardware_type + self.compile_backend = None + self.old_os_path = os.environ['PATH'] + self.prev_sys_path = list(sys.path) + self.real_prefix = sys.prefix + self.compile_only_mode = False + + def start_engine(self) -> None: + ''' + Byte MlPerf will create an virtual env for each backend to avoid dependance conflict + ''' + success, total = 0, len(self.workload) + if total == 0: + return + log.info("******************* Backend Env Initization *******************") + status = self.activate_venv(self.backend_type) + if not status: + log.warning("Activate virtualenv Failed, Please Check...") + + self.compile_backend = init_compile_backend(self.backend_type) + self.runtime_backend = init_runtime_backend(self.backend_type) + + output_dir = os.path.abspath('general_perf/reports/' + + self.backend_type) + os.makedirs(output_dir, exist_ok=True) + + status = self.single_workload_perf(self.workload) + + def single_workload_perf( + self, workload: Dict[str, Any]) -> bool: + log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model'])) + + # Check Compile Only Mode + self.compile_only_mode = False + if self.args.compile_only or workload['compile_only']: + self.compile_only_mode = True + + base_report = { + "Model": workload['model'].upper(), + "Backend": self.backend_type, + "Host Info": self.get_cpu_name() + } + + # Initalize Model Config Info + model_info = self.get_model_info(workload['model']) + pre_compile_config = {"workload": workload, 'model_info': model_info} + interact_info = self.check_interact_info(pre_compile_config) + pre_compile_config['interact_info'] = interact_info + if not model_info['dataset_name']: + model_info['dataset_name'] = 'fake_dataset' + + + ''' + Compile Backend could do some optimization like convert model format here + ''' + log.info("******************************************* Running Backend Compilation... *******************************************") + log.info("Running Backend Preoptimization...") + pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config) + + + # Initalize dataset + dataset = load_dataset(model_info) + dataset.preprocess() + base_report['Dataset'] = model_info['dataset_name'].upper( + ) if model_info['dataset_name'] else None + + #Placeholder Only + segment_info = self.compile_backend.segment(pre_compile_config) + + best_batch_sizes = self.compile_backend.get_best_batch_size() + if isinstance(best_batch_sizes, list): + pre_compile_config['workload'][ + 'batch_sizes'] = best_batch_sizes + + log.info("Start to compile the model...") + start = time.time() + compile_info = self.compile_backend.compile(pre_compile_config, + dataset) + end = time.time() + + graph_compile_report = {} + graph_compile_report["Compile Duration"] = round(end - start, 5) + graph_compile_report["Compile Precision"] = compile_info[ + 'compile_precision'] + graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent'] + if 'optimizations' in compile_info: + graph_compile_report['Optimizations'] = compile_info['optimizations'] + if 'instance_count' in compile_info: + base_report['Instance Count'] = compile_info['instance_count'] + if 'device_count' in compile_info: + base_report['Device Count'] = compile_info['device_count'] + base_report['Graph Compile'] = graph_compile_report + + # Initalize Output Dir and Reports + output_dir = os.path.abspath('general_perf/reports/' + + self.backend_type + '/' + + workload['model']) + os.makedirs(output_dir, exist_ok=True) + + # Compile only mode will stop here + if self.compile_only_mode: + base_report.pop("Backend") + return compile_info["compile_status"], base_report + + # load runtime backend + """ + Start Here + """ + batch_sizes = pre_compile_config['workload']['batch_sizes'] + self.runtime_backend.configs = compile_info + self.runtime_backend.workload = workload + self.runtime_backend.model_info = model_info + + self.runtime_backend.load(workload['batch_sizes'][0]) + # test accuracy + accuracy_report = {} + AccuracyChecker = self.get_accuracy_checker( + model_info['dataset_name'] + if model_info['dataset_name'] else 'fake_dataset') + AccuracyChecker.runtime_backend = self.runtime_backend + AccuracyChecker.dataloader = dataset + AccuracyChecker.output_dir = output_dir + AccuracyChecker.configs = compile_info + + if workload['test_accuracy']: + log.info("******************************************* Running Accuracy Checker... *******************************************") + + dataset.rebatch(self.runtime_backend.get_loaded_batch_size()) + accuracy_results = AccuracyChecker.calculate_acc( + workload['data_percent']) + + accuracy_report['Data Percent'] = workload['data_percent'] + accuracy_report.update(accuracy_results) + + # test numeric + if workload['test_numeric']: + log.info("******************************************* Running Numeric Checker... *******************************************") + + dataset.rebatch(self.runtime_backend.get_loaded_batch_size()) + if not workload['test_accuracy']: + accuracy_results = AccuracyChecker.calculate_acc( + workload['data_percent']) + diff_results = AccuracyChecker.calculate_diff() + accuracy_report.update(diff_results) + # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png" + + if accuracy_report: + base_report['Accuracy'] = accuracy_report + + # function to test qps and latency + if workload['test_perf']: + log.info("******************************************* Runing QPS Checker... *******************************************") + performance_reports = [] + qs_status = self.runtime_backend.is_qs_mode_supported() + if qs_status: + qs_config = self.runtime_backend.generate_qs_config() + performance_reports = self.qs_benchmark(qs_config) + else: + for bs in batch_sizes: + self.runtime_backend.load(bs) + batch_reports = self.runtime_backend.benchmark(dataset) + performance_reports.append(batch_reports) + base_report['Performance'] = performance_reports + + if "Instance Count" not in base_report: + log.warning("Vendors need to Add # of instances") + if "Device Count" not in base_report: + log.warning("Vendors need to Add # of devices") + + # write output to json file + output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json" + with open(output_report_path, 'w') as file: + json.dump(base_report, file, indent=4) + + base_report.pop("Backend") + log.info("Testing Finish. Report is saved in path: [ {}/{} ]". + format(output_dir[output_dir.rfind('general_perf'):], + os.path.basename(output_report_path))) + + return compile_info["compile_status"] + + #WIP + def qs_benchmark(self, qs_config: Dict[str, Any]) -> list: + return [] + + def get_accuracy_checker(self, dataset_name: str): + AccuracyChecker = importlib.import_module('general_perf.datasets.' + + dataset_name + + ".test_accuracy") + AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker') + return AccuracyChecker() + + def get_model_info(self, model_name: str) -> Dict[str, Any]: + with open("general_perf/model_zoo/" + model_name + '.json', + 'r') as file: + model_info = json.load(file) + return model_info + + def get_cpu_name(self): + command = "lscpu | grep 'Model name' | awk -F: '{print $2}'" + cpu_name = subprocess.check_output(command, shell=True) + return cpu_name.decode().strip() + + def check_interact_info( + self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]: + interact_info = self.compile_backend.get_interact_profile( + pre_compile_config) + + answer = {} + if len(interact_info) == 0: + return answer + + dialog_style = Style.from_dict({ + 'dialog': 'bg:#88b8ff', + 'dialog frame.label': 'bg:#ffffff #000000', + 'dialog.body': 'bg:#000000 #a0acde', + 'dialog shadow': 'bg:#004aaa', + }) + + input_style = Style.from_dict({ + 'dialog': 'bg:#88b8ff', + 'dialog frame.label': 'bg:#ffffff #000000', + 'dialog.body': 'bg:#000000 #a0acde', + 'dialog shadow': 'bg:#004aaa', + 'text-area.prompt': 'bg:#ffffff', + 'text-area': '#000000', + }) + + option = yes_no_dialog(title=self.backend_type + '编译配置', + text='[请选择]:是否进行编译后端配置:', + style=dialog_style).run() + if option: + sum_question = len(interact_info) + for i, question in enumerate(interact_info): + if question['depends']: + state = 0 + for title in question['depends'].split(','): + if not answer[title]: + state = 1 + if state: + continue + if question['dialog_type'] == 'Yes/No Dialog': + option = yes_no_dialog( + title=self.backend_type + '编译配置进度(' + str(i + 1) + + '/' + str(sum_question) + ')', + text="[Backend " + self.backend_type + "]: " + + question['note'], + style=dialog_style).run() + elif question['dialog_type'] == "Input Dialog": + option = input_dialog( + title=self.backend_type + '编译配置进度(' + str(i + 1) + + '/' + str(sum_question) + ')', + text="[Backend " + self.backend_type + "]: " + + question['note'], + style=input_style).run() + elif question['dialog_type'] == "Radiolist Dialog": + choice = [(i, text) + for i, text in enumerate(question['options'])] + num = radiolist_dialog( + title=self.backend_type + '编译配置进度(' + str(i + 1) + + '/' + str(sum_question) + ')', + text="[Backend " + self.backend_type + "]: " + + question['note'], + values=choice, + style=dialog_style).run() + option = question['options'][num] if num is not None else question[ + 'default'] + answer[question['name']] = option + + return answer + + def activate_venv(self, hardware_type: str) -> bool: + + return True + + def deactivate_venv(self): + sys.path[: + 0] = self.prev_sys_path #will also revert the added site-packages + sys.prefix = self.real_prefix + os.environ['PATH'] = self.old_os_path + + +if __name__ == "__main__": + engine = PerfEngine() + engine.start_engine() \ No newline at end of file diff --git a/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..90bdec9be8b064f41e4c5c96a40bd09d1f52b253 --- /dev/null +++ b/models/nlp/language_model/roberta/ixrt/scripts/infer_roberta_fp16_performance.sh @@ -0,0 +1,44 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -x +ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx +cd ${PROJ_PATH} + +run(){ + BS=${1:-1} + TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx + TARGET_ENGINE=${ORIGIN_ONNX_NAME}_bs_${BS}_end.engine + if [[ ! -f "${ORIGIN_ONNX}" ]];then + echo "${ORIGIN_ONNX} not exists!" + exit 1 + fi + + python3 ${PROJ_PATH}/gen_data.py --batch_size ${BS} --output_path ${PROJ_PATH} + + # Graph optimize + [ -f "${TARGET_ONNX}" ] || python3 ${OPTIMIER_FILE} --onnx ${ORIGIN_ONNX} --dump_onnx + + # Build Engine + ixrtexec --onnx ${TARGET_ONNX} --min_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \ + --opt_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \ + --max_shape input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 \ + --save_engine ${TARGET_ENGINE} --log_level error --plugins ixrt_plugin + + # Test Performance + ixrtexec --load_engine ${TARGET_ENGINE} --plugins ixrt_plugin --shapes input_ids.1:${BS}x384,attention_mask.1:${BS}x384,token_type_ids.1:${BS}x384 + +} +run 1 \ No newline at end of file diff --git a/models/nlp/language_model/roformer/ixrt/README.md b/models/nlp/language_model/roformer/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ba1e5975e0d0f6d094f3055aaf708c65dfed9b07 --- /dev/null +++ b/models/nlp/language_model/roformer/ixrt/README.md @@ -0,0 +1,102 @@ +# RoFormer + +## Description + +Position encoding recently has shown effective in the transformer architecture. It enables valuable supervision for dependency modeling between elements at different positions of the sequence. In this paper, we first investigate various methods to integrate positional information into the learning process of transformer-based language models. Then, we propose a novel method named Rotary Position Embedding(RoPE) to effectively leverage the positional information. Specifically, the proposed RoPE encodes the absolute position with a rotation matrix and meanwhile incorporates the explicit relative position dependency in self-attention formulation. Notably, RoPE enables valuable properties, including the flexibility of sequence length, decaying inter-token dependency with increasing relative distances, and the capability of equipping the linear self-attention with relative position encoding. Finally, we evaluate the enhanced transformer with rotary position embedding, also called RoFormer, on various long text classification benchmark datasets. + +## Setup + +### Install + +```bash +apt install -y libnuma-dev + +pip3 install tf2onnx +pip3 install pycuda +pip3 install onnxsim +pip3 install py-libnuma==1.2 + +``` + +### Download + +Pretrained model: + +Dataset: + +```bash +# Go to path of this model +cd ${PROJ_ROOT}/models/nlp/language_model/roformer/ixrt + +# Download the pretrained model and dataset to 'data' +mkdir -p data/ +pushd data/ +wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_roformer.tar +tar xf open_roformer.tar +rm -f open_roformer.tar +popd +``` + +### Deal with ONNX + +```bash +# export onnx +python3 export_onnx.py --model_path ./data/open_roformer --output_path ./data/open_roformer/roformer-frozen_org.onnx + +# Simplify onnx model +onnxsim ./data/open_roformer/roformer-frozen_org.onnx ./data/open_roformer/roformer-frozen.onnx +python3 deploy.py --model_path ./data/open_roformer/roformer-frozen.onnx --output_path ./data/open_roformer/roformer-frozen.onnx +``` + +## Inference + +```bash +export ORIGIN_ONNX_NAME=./data/open_roformer/roformer-frozen +export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py +export PROJ_PATH=./ +``` + +### Performance + +```bash +bash scripts/infer_roformer_fp16_performance.sh +``` + +### Accuracy + +If you want to evaluate the accuracy of this model, please visit the website: , which integrates inference and training of many models under this framework, supporting the ILUVATAR backend. + +For detailed steps regarding this model, please refer to this document: Note: You need to modify the relevant paths in the code to your own correct paths. + +```bash +# Clone ByteMLPerf +git clone -b iluvatar_general_infer https://github.com/yudefu/ByteMLPerf.git +pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt +mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py +mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ + +# Comment Line102 in compile_backend_iluvatar.py +sed -i '102s/build_engine/# build_engine/' ./ByteMLPerf/byte_infer_perf/general_perf/backends/ILUVATAR/compile_backend_iluvatar.py + +# Move open_roformer +mv ./data/open_roformer ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/popular/ + +# Setup open_cail2019 dataset +wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/open_cail2019.tar +tar xf open_cail2019.tar +cp ./open_cail2019/* ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_cail2019 +rm -f open_cail2019.tar + +# Go to general_perf/ +cd ./ByteMLPerf/byte_infer_perf/general_perf +# Modify model_zoo/roformer-tf-fp32.json +sed -i 's/segment:0/segment0/g; s/token:0/token0/g' model_zoo/roformer-tf-fp32.json +# Run Acc scripts +python3 core/perf_engine.py --hardware_type ILUVATAR --task roformer-tf-fp32 +``` + +## Results + +| Model | BatchSize | Precision | FPS | ACC | +| -------- | --------- | --------- | ------- | ------- | +| RoFormer | 2 | FP16 | 195.186 | 0.33789 | diff --git a/models/nlp/language_model/roformer/ixrt/deploy.py b/models/nlp/language_model/roformer/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..073fb7333577624be7c304eaeb1916d272cb4dcc --- /dev/null +++ b/models/nlp/language_model/roformer/ixrt/deploy.py @@ -0,0 +1,36 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", default="") + parser.add_argument("--output_path", default="") + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + model = onnx.load(args.model_path) + for input in model.graph.input: + for node in model.graph.node: + for i, name in enumerate(node.input): + if name == input.name: + node.input[i] =name.replace(':',"") + input.name=input.name.replace(':',"")# 保存修改后的模型 + onnx.save(model, args.output_path) \ No newline at end of file diff --git a/models/nlp/language_model/roformer/ixrt/export_onnx.py b/models/nlp/language_model/roformer/ixrt/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..475dddd7c2ab27b6ca342be98ea92d2c791ff60b --- /dev/null +++ b/models/nlp/language_model/roformer/ixrt/export_onnx.py @@ -0,0 +1,70 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tf2onnx +from tf2onnx import tf_loader +import argparse +ONNX_OPSET = 11 + +def _convert_graphdef_to_onnx(graph_def, + inputs=None, + outputs=None, + output_path='', + **kwargs): + + inputs_as_nchw = kwargs.get('inputs_as_nchw', None) + custom_ops = kwargs.get('custom_ops', None) + custom_op_handlers = kwargs.get('custom_op_handlers', None) + custom_rewriter = kwargs.get('custom_rewriter', None) + extra_opset = kwargs.get('extra_opset', None) + large_model = kwargs.get('large_model', False) + name = kwargs.get('name', 'habana_convert') + target = kwargs.get('target', None) + shape_override = kwargs.get('shape_override', {}) + + tf2onnx.convert.from_graph_def(graph_def, + name=name, + input_names=inputs, + output_names=outputs, + opset=ONNX_OPSET, + custom_ops=custom_ops, + custom_op_handlers=custom_op_handlers, + custom_rewriter=custom_rewriter, + inputs_as_nchw=inputs_as_nchw, + extra_opset=extra_opset, + shape_override=shape_override, + target=target, + large_model=large_model, + output_path=output_path) + return output_path + +def savedmodel_to_onnx(model_path, output_path='', **kwargs): + inputs = kwargs.get('inputs', None) + outputs = kwargs.get('outputs', None) + graph_def, inputs, outputs = tf_loader.from_saved_model( + model_path, inputs, outputs) + return _convert_graphdef_to_onnx(graph_def, inputs, outputs, output_path, **kwargs) + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", default="") + parser.add_argument("--output_path", default="") + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + savedmodel_to_onnx(args.model_path, args.output_path) \ No newline at end of file diff --git a/models/nlp/language_model/roformer/ixrt/perf_engine.py b/models/nlp/language_model/roformer/ixrt/perf_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..f3f108474b281bfce71ccaf73d60ba3119cf97c1 --- /dev/null +++ b/models/nlp/language_model/roformer/ixrt/perf_engine.py @@ -0,0 +1,349 @@ +# Copyright 2023 ByteDance and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import os +import logging +import importlib +import json +import subprocess +import time + +from typing import Any, Dict, Tuple +from prompt_toolkit.shortcuts import radiolist_dialog, input_dialog, yes_no_dialog +from prompt_toolkit.styles import Style + +BYTE_MLPERF_ROOT = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +os.chdir(BYTE_MLPERF_ROOT) +sys.path.insert(0, BYTE_MLPERF_ROOT) + +import argparse +from general_perf.core.configs.workload_store import load_workload +from general_perf.core.configs.dataset_store import load_dataset +from general_perf.core.configs.backend_store import init_compile_backend, init_runtime_backend + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("PerfEngine") +os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3' + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--task", + default="resnet50-tf-fp32", + help="The task going to be evaluted, refs to workloads/") + parser.add_argument( + "--hardware_type", + default="GPU", + help="The backend going to be evaluted, refs to backends/") + parser.add_argument("--compile_only", + action='store_true', + help="Run compilation only") + + args = parser.parse_args() + return args + + +class PerfEngine: + def __init__(self) -> None: + super().__init__() + self.args = get_args() + self.workload = load_workload(self.args.task) + self.backend_type = self.args.hardware_type + self.compile_backend = None + self.old_os_path = os.environ['PATH'] + self.prev_sys_path = list(sys.path) + self.real_prefix = sys.prefix + self.compile_only_mode = False + + def start_engine(self) -> None: + ''' + Byte MlPerf will create an virtual env for each backend to avoid dependance conflict + ''' + success, total = 0, len(self.workload) + if total == 0: + return + log.info("******************* Backend Env Initization *******************") + status = self.activate_venv(self.backend_type) + if not status: + log.warning("Activate virtualenv Failed, Please Check...") + + self.compile_backend = init_compile_backend(self.backend_type) + self.runtime_backend = init_runtime_backend(self.backend_type) + + output_dir = os.path.abspath('general_perf/reports/' + + self.backend_type) + os.makedirs(output_dir, exist_ok=True) + + status = self.single_workload_perf(self.workload) + + def single_workload_perf( + self, workload: Dict[str, Any]) -> bool: + log.info("******************************************* Start to test model: {}. *******************************************".format(workload['model'])) + + # Check Compile Only Mode + self.compile_only_mode = False + if self.args.compile_only or workload['compile_only']: + self.compile_only_mode = True + + base_report = { + "Model": workload['model'].upper(), + "Backend": self.backend_type, + "Host Info": self.get_cpu_name() + } + + # Initalize Model Config Info + model_info = self.get_model_info(workload['model']) + pre_compile_config = {"workload": workload, 'model_info': model_info} + interact_info = self.check_interact_info(pre_compile_config) + pre_compile_config['interact_info'] = interact_info + if not model_info['dataset_name']: + model_info['dataset_name'] = 'fake_dataset' + + + ''' + Compile Backend could do some optimization like convert model format here + ''' + log.info("******************************************* Running Backend Compilation... *******************************************") + log.info("Running Backend Preoptimization...") + pre_compile_config = self.compile_backend.pre_optimize(pre_compile_config) + + + # Initalize dataset + dataset = load_dataset(model_info) + dataset.preprocess() + base_report['Dataset'] = model_info['dataset_name'].upper( + ) if model_info['dataset_name'] else None + + #Placeholder Only + segment_info = self.compile_backend.segment(pre_compile_config) + + best_batch_sizes = self.compile_backend.get_best_batch_size() + if isinstance(best_batch_sizes, list): + pre_compile_config['workload'][ + 'batch_sizes'] = best_batch_sizes + + log.info("Start to compile the model...") + start = time.time() + compile_info = self.compile_backend.compile(pre_compile_config, + dataset) + end = time.time() + + graph_compile_report = {} + graph_compile_report["Compile Duration"] = round(end - start, 5) + graph_compile_report["Compile Precision"] = compile_info[ + 'compile_precision'] + graph_compile_report["Subgraph Coverage"] = compile_info['sg_percent'] + if 'optimizations' in compile_info: + graph_compile_report['Optimizations'] = compile_info['optimizations'] + if 'instance_count' in compile_info: + base_report['Instance Count'] = compile_info['instance_count'] + if 'device_count' in compile_info: + base_report['Device Count'] = compile_info['device_count'] + base_report['Graph Compile'] = graph_compile_report + + # Initalize Output Dir and Reports + output_dir = os.path.abspath('general_perf/reports/' + + self.backend_type + '/' + + workload['model']) + os.makedirs(output_dir, exist_ok=True) + + # Compile only mode will stop here + if self.compile_only_mode: + base_report.pop("Backend") + return compile_info["compile_status"], base_report + + # load runtime backend + """ + Start Here + """ + batch_sizes = pre_compile_config['workload']['batch_sizes'] + self.runtime_backend.configs = compile_info + self.runtime_backend.workload = workload + self.runtime_backend.model_info = model_info + + self.runtime_backend.load(workload['batch_sizes'][0]) + # test accuracy + accuracy_report = {} + AccuracyChecker = self.get_accuracy_checker( + model_info['dataset_name'] + if model_info['dataset_name'] else 'fake_dataset') + AccuracyChecker.runtime_backend = self.runtime_backend + AccuracyChecker.dataloader = dataset + AccuracyChecker.output_dir = output_dir + AccuracyChecker.configs = compile_info + + if workload['test_accuracy']: + log.info("******************************************* Running Accuracy Checker... *******************************************") + + dataset.rebatch(self.runtime_backend.get_loaded_batch_size()) + accuracy_results = AccuracyChecker.calculate_acc( + workload['data_percent']) + + accuracy_report['Data Percent'] = workload['data_percent'] + accuracy_report.update(accuracy_results) + + # test numeric + if workload['test_numeric']: + log.info("******************************************* Running Numeric Checker... *******************************************") + + dataset.rebatch(self.runtime_backend.get_loaded_batch_size()) + if not workload['test_accuracy']: + accuracy_results = AccuracyChecker.calculate_acc( + workload['data_percent']) + diff_results = AccuracyChecker.calculate_diff() + accuracy_report.update(diff_results) + # accuracy_report['Diff Dist'] = compile_info['model'] + '-to-' + compile_info['compile_precision'].lower() + ".png" + + if accuracy_report: + base_report['Accuracy'] = accuracy_report + + # function to test qps and latency + if workload['test_perf']: + log.info("******************************************* Runing QPS Checker... *******************************************") + performance_reports = [] + qs_status = self.runtime_backend.is_qs_mode_supported() + if qs_status: + qs_config = self.runtime_backend.generate_qs_config() + performance_reports = self.qs_benchmark(qs_config) + else: + for bs in batch_sizes: + self.runtime_backend.load(bs) + batch_reports = self.runtime_backend.benchmark(dataset) + performance_reports.append(batch_reports) + base_report['Performance'] = performance_reports + + if "Instance Count" not in base_report: + log.warning("Vendors need to Add # of instances") + if "Device Count" not in base_report: + log.warning("Vendors need to Add # of devices") + + # write output to json file + output_report_path = output_dir + "/result-" + compile_info['compile_precision'].lower() + ".json" + with open(output_report_path, 'w') as file: + json.dump(base_report, file, indent=4) + + base_report.pop("Backend") + log.info("Testing Finish. Report is saved in path: [ {}/{} ]". + format(output_dir[output_dir.rfind('general_perf'):], + os.path.basename(output_report_path))) + + return compile_info["compile_status"] + + #WIP + def qs_benchmark(self, qs_config: Dict[str, Any]) -> list: + return [] + + def get_accuracy_checker(self, dataset_name: str): + AccuracyChecker = importlib.import_module('general_perf.datasets.' + + dataset_name + + ".test_accuracy") + AccuracyChecker = getattr(AccuracyChecker, 'AccuracyChecker') + return AccuracyChecker() + + def get_model_info(self, model_name: str) -> Dict[str, Any]: + with open("general_perf/model_zoo/" + model_name + '.json', + 'r') as file: + model_info = json.load(file) + return model_info + + def get_cpu_name(self): + command = "lscpu | grep 'Model name' | awk -F: '{print $2}'" + cpu_name = subprocess.check_output(command, shell=True) + return cpu_name.decode().strip() + + def check_interact_info( + self, pre_compile_config: Dict[str, Dict]) -> Dict[str, Any]: + interact_info = self.compile_backend.get_interact_profile( + pre_compile_config) + + answer = {} + if len(interact_info) == 0: + return answer + + dialog_style = Style.from_dict({ + 'dialog': 'bg:#88b8ff', + 'dialog frame.label': 'bg:#ffffff #000000', + 'dialog.body': 'bg:#000000 #a0acde', + 'dialog shadow': 'bg:#004aaa', + }) + + input_style = Style.from_dict({ + 'dialog': 'bg:#88b8ff', + 'dialog frame.label': 'bg:#ffffff #000000', + 'dialog.body': 'bg:#000000 #a0acde', + 'dialog shadow': 'bg:#004aaa', + 'text-area.prompt': 'bg:#ffffff', + 'text-area': '#000000', + }) + + option = yes_no_dialog(title=self.backend_type + '编译配置', + text='[请选择]:是否进行编译后端配置:', + style=dialog_style).run() + if option: + sum_question = len(interact_info) + for i, question in enumerate(interact_info): + if question['depends']: + state = 0 + for title in question['depends'].split(','): + if not answer[title]: + state = 1 + if state: + continue + if question['dialog_type'] == 'Yes/No Dialog': + option = yes_no_dialog( + title=self.backend_type + '编译配置进度(' + str(i + 1) + + '/' + str(sum_question) + ')', + text="[Backend " + self.backend_type + "]: " + + question['note'], + style=dialog_style).run() + elif question['dialog_type'] == "Input Dialog": + option = input_dialog( + title=self.backend_type + '编译配置进度(' + str(i + 1) + + '/' + str(sum_question) + ')', + text="[Backend " + self.backend_type + "]: " + + question['note'], + style=input_style).run() + elif question['dialog_type'] == "Radiolist Dialog": + choice = [(i, text) + for i, text in enumerate(question['options'])] + num = radiolist_dialog( + title=self.backend_type + '编译配置进度(' + str(i + 1) + + '/' + str(sum_question) + ')', + text="[Backend " + self.backend_type + "]: " + + question['note'], + values=choice, + style=dialog_style).run() + option = question['options'][num] if num is not None else question[ + 'default'] + answer[question['name']] = option + + return answer + + def activate_venv(self, hardware_type: str) -> bool: + + return True + + def deactivate_venv(self): + sys.path[: + 0] = self.prev_sys_path #will also revert the added site-packages + sys.prefix = self.real_prefix + os.environ['PATH'] = self.old_os_path + + +if __name__ == "__main__": + engine = PerfEngine() + engine.start_engine() \ No newline at end of file diff --git a/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..0510e32d98c50d995b584fac3241b804eca512c6 --- /dev/null +++ b/models/nlp/language_model/roformer/ixrt/scripts/infer_roformer_fp16_performance.sh @@ -0,0 +1,42 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -x +ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx +cd ${PROJ_PATH} + +run(){ + BS=${1:-1} + TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx + TARGET_ENGINE=${ORIGIN_ONNX_NAME}_end.engine + SHAPE="input_segment0:${BS}x1024,input_token0:${BS}x1024" + MAX_SHAPE="input_segment0:64x1024,input_token0:64x1024" + if [[ ! -f "${ORIGIN_ONNX}" ]];then + echo "${ORIGIN_ONNX} not exists!" + exit 1 + fi + + # Graph optimize + python3 ${OPTIMIER_FILE} --onnx ${ORIGIN_ONNX} --model_type roformer + + # Build Engine + ixrtexec --onnx ${TARGET_ONNX} --save_engine ${TARGET_ENGINE} --log_level error --plugins ixrt_plugin \ + --min_shape $SHAPE --opt_shape $SHAPE --max_shape $MAX_SHAPE --shapes $SHAPE + + # Test Performance + ixrtexec --load_engine ${TARGET_ENGINE} --plugins ixrt_plugin --shapes ${SHAPE} + +} +run 2 \ No newline at end of file diff --git a/models/recommendation/widedeep/ixrt/README.md b/models/recommendation/widedeep/ixrt/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb01a4d49378ff076dd712ca225bb352262d1f80 --- /dev/null +++ b/models/recommendation/widedeep/ixrt/README.md @@ -0,0 +1,82 @@ +# Wide&Deep + +## Description + +Generalized linear models with nonlinear feature transformations are widely used for large-scale regression and classification problems with sparse inputs. Memorization of feature interactions through a wide set of cross-product feature transformations are effective and interpretable, while generalization requires more feature engineering effort. With less feature engineering, deep neural networks can generalize better to unseen feature combinations through low-dimensional dense embeddings learned for the sparse features. However, deep neural networks with embeddings can over-generalize and recommend less relevant items when the user-item interactions are sparse and high-rank. In this paper, we present Wide & Deep learning---jointly trained wide linear models and deep neural networks---to combine the benefits of memorization and generalization for recommender systems. We productionized and evaluated the system on Google Play, a commercial mobile app store with over one billion active users and over one million apps. Online experiment results show that Wide & Deep significantly increased app acquisitions compared with wide-only and deep-only models. We have also open-sourced our implementation in TensorFlow. + +## Setup + +### Install + +```bash +pip3 install tf2onnx +pip3 install pycuda +pip3 install onnxsim +pip3 install py-libnuma==1.2 +``` + +### Download + +Pretrained model: + +Dataset: + +```bash +# Go to path of this model +cd ${PROJ_ROOT}/models/recommendationwidedeep/ixrt + +# export onnx +python3 export_onnx.py --model_path open_wide_deep_saved_model --output_path open_wide_deep_saved_model/widedeep.onnx + +# Simplify onnx model +onnxsim open_wide_deep_saved_model/widedeep.onnx open_wide_deep_saved_model/widedeep_sim.onnx +python3 deploy.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx +python3 change2dynamic.py --model_path open_wide_deep_saved_model/widedeep_sim.onnx --output_path open_wide_deep_saved_model/widedeep_sim.onnx +``` + +## Inference + +```bash +export ORIGIN_ONNX_NAME=./open_wide_deep_saved_model/widedeep_sim +export OPTIMIER_FILE=${IXRT_OSS_ROOT}/tools/optimizer/optimizer.py +export PROJ_PATH=./ +``` + +### Performance + +```bash +bash scripts/infer_widedeep_fp16_performance.sh +``` + +### Accuracy + +If you want to evaluate the accuracy of this model, please visit the website: , which integrates inference and training of many models under this framework, supporting the ILUVATAR backend + +For detailed steps regarding this model, please refer to this document: Note: You need to modify the relevant paths in the code to your own correct paths. + +```bash +# Clone ByteMLPerf +git clone -b iluvatar_general_infer https://github.com/yudefu/ByteMLPerf.git +pip3 install -r ./ByteMLPerf/byte_infer_perf/general_perf/requirements.txt +mv perf_engine.py ./ByteMLPerf/byte_infer_perf/general_perf/core/perf_engine.py + +# Get eval.csv and onnx +mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model +mkdir -p ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/ + +wget https://lf-bytemlperf.17mh.cn/obj/bytemlperf-zoo/eval.csv +mv eval.csv ./ByteMLPerf/byte_infer_perf/general_perf/datasets/open_criteo_kaggle/ + +wget http://files.deepspark.org.cn:880/deepspark/widedeep_dynamicshape_new.onnx +mv widedeep_dynamicshape_new.onnx ./ByteMLPerf/byte_infer_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/ + +# Run Acc scripts +cd ./ByteMLPerf/byte_infer_perf/general_perf +python3 core/perf_engine.py --hardware_type ILUVATAR --task widedeep-tf-fp32 +``` + +## Results + +| Model | BatchSize | Precision | FPS | ACC | +| --------- | --------- | --------- | -------- | ------- | +| Wide&Deep | 1024 | FP16 | 77073.93 | 0.74597 | diff --git a/models/recommendation/widedeep/ixrt/change2dynamic.py b/models/recommendation/widedeep/ixrt/change2dynamic.py new file mode 100644 index 0000000000000000000000000000000000000000..e9bcf6f156bcd1bfb6e9a7e150c0eb4461e70f98 --- /dev/null +++ b/models/recommendation/widedeep/ixrt/change2dynamic.py @@ -0,0 +1,95 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +import onnx + +def change_input_output_dim(model): + # Use some symbolic name not used for any other dimension + sym_batch_dim = "batch" + # sym_batch_dim = -1 + + # The following code changes the first dimension of every input to be batch-dim + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_dim + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + dim1.dim_param = sym_batch_dim + + if input.name == "new_categorical_placeholder:0": + input.type.tensor_type.shape.dim[1].dim_value = int(2) + elif input.name == "new_numeric_placeholder:0": + input.type.tensor_type.shape.dim[1].dim_value = int(13) + elif input.name == "import/head/predictions/zeros_like:0": + input.type.tensor_type.shape.dim[1].dim_value = int(1) + + # or update it to be an actual value: + # dim1.dim_value = actual_batch_dim + + outputs = model.graph.output + + for output in outputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = output.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + dim1.dim_param = sym_batch_dim + +def change_input_node_name(model, input_names): + for i,input in enumerate(model.graph.input): + input_name = input_names[i] + for node in model.graph.node: + for i, name in enumerate(node.input): + if name == input.name: + node.input[i] = input_name + input.name = input_name + + +def change_output_node_name(model, output_names): + for i,output in enumerate(model.graph.output): + output_name = output_names[i] + for node in model.graph.node: + for i, name in enumerate(node.output): + if name == output.name: + node.output[i] = output_name + output.name = output_name + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", default="") + parser.add_argument("--output_path", default="") + args = parser.parse_args() + return args + + + +if __name__ == "__main__": + args = get_args() + model = onnx.load(args.model_path) + change_input_output_dim(model) + model = onnx.load(args.model_path) + for input in model.graph.input: + for node in model.graph.node: + for i, name in enumerate(node.input): + if name == input.name: + node.input[i] =name.replace(':',"") + input.name=input.name.replace(':',"")# 保存修改后的模型 + onnx.save(model, args.output_path) \ No newline at end of file diff --git a/models/recommendation/widedeep/ixrt/deploy.py b/models/recommendation/widedeep/ixrt/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..0e1ac694f9a096b4aa6cb0b2acbbc689e5d901db --- /dev/null +++ b/models/recommendation/widedeep/ixrt/deploy.py @@ -0,0 +1,93 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import onnx +import argparse +import copy + +from typing import Union, Callable, List + +from tensorrt.deploy.api import * +from tensorrt.deploy.backend.onnx.converter import default_converter +from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type +from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr +from tensorrt.deploy.ir.operator_type import OperatorType as OP +from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name +from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence +from tensorrt.deploy.ir import Graph +from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator +from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class FuseGemmPass(BasePass): + def process(self, graph: Graph) -> Graph: + self.transform = GraphTransform(graph) + + self.transform.find_sequence_subgraph( + pattern=[OP.MATMUL, OP.ADD], callback=self.fuse_gemm, strict=True + ) + return graph + + def fuse_gemm(self, graph, pattern: PatternGraph): + matmul = pattern.nodes[0] + add = pattern.nodes[1] + + if len(add.operator.inputs) != 2: + return + + b_var = graph.get_variable(matmul.operator.inputs[1]) + if not graph.is_leaf_variable(b_var) or b_var.value is None: + return + + if b_var.value.ndim != 2: + return + + bias_var = None + for input in add.operator.inputs: + if input not in matmul.operator.outputs: + bias_var = input + + matmul.operator.inputs.append(bias_var) + self.transform.delete_operator_and_link( + add.operator, link_input=matmul.operator.outputs[0] + ) + + matmul.operator.op_type = OP.GEMM + matmul.operator.attributes = attr.GemmAttr(transB=1) + +def replace_input(graph): + transformer = GraphTransform(graph) + from_op = graph.get_operator("Shape__8") + to_op = graph.get_operator('import/head/predictions/zeros_like') + var = graph.get_variable("import/head/predictions/zeros_like:0") + transformer.delete_operators_between_op_op(from_op=from_op, to_op=to_op) + transformer.add_input("import/head/predictions/zeros_like:0") + return graph + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", default="") + parser.add_argument("--output_path", default="") + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + graph = create_source(args.model_path)() + graph = FuseGemmPass().process(graph) + graph = replace_input(graph) + create_target(saved_path=args.output_path).export(graph) \ No newline at end of file diff --git a/models/recommendation/widedeep/ixrt/export_onnx.py b/models/recommendation/widedeep/ixrt/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..475dddd7c2ab27b6ca342be98ea92d2c791ff60b --- /dev/null +++ b/models/recommendation/widedeep/ixrt/export_onnx.py @@ -0,0 +1,70 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import tf2onnx +from tf2onnx import tf_loader +import argparse +ONNX_OPSET = 11 + +def _convert_graphdef_to_onnx(graph_def, + inputs=None, + outputs=None, + output_path='', + **kwargs): + + inputs_as_nchw = kwargs.get('inputs_as_nchw', None) + custom_ops = kwargs.get('custom_ops', None) + custom_op_handlers = kwargs.get('custom_op_handlers', None) + custom_rewriter = kwargs.get('custom_rewriter', None) + extra_opset = kwargs.get('extra_opset', None) + large_model = kwargs.get('large_model', False) + name = kwargs.get('name', 'habana_convert') + target = kwargs.get('target', None) + shape_override = kwargs.get('shape_override', {}) + + tf2onnx.convert.from_graph_def(graph_def, + name=name, + input_names=inputs, + output_names=outputs, + opset=ONNX_OPSET, + custom_ops=custom_ops, + custom_op_handlers=custom_op_handlers, + custom_rewriter=custom_rewriter, + inputs_as_nchw=inputs_as_nchw, + extra_opset=extra_opset, + shape_override=shape_override, + target=target, + large_model=large_model, + output_path=output_path) + return output_path + +def savedmodel_to_onnx(model_path, output_path='', **kwargs): + inputs = kwargs.get('inputs', None) + outputs = kwargs.get('outputs', None) + graph_def, inputs, outputs = tf_loader.from_saved_model( + model_path, inputs, outputs) + return _convert_graphdef_to_onnx(graph_def, inputs, outputs, output_path, **kwargs) + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", default="") + parser.add_argument("--output_path", default="") + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + savedmodel_to_onnx(args.model_path, args.output_path) \ No newline at end of file diff --git a/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh new file mode 100644 index 0000000000000000000000000000000000000000..866adb44937ac5c616b856e13122073ea5cb4233 --- /dev/null +++ b/models/recommendation/widedeep/ixrt/scripts/infer_widedeep_fp16_performance.sh @@ -0,0 +1,38 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -x +ORIGIN_ONNX=${ORIGIN_ONNX_NAME}.onnx +cd ${PROJ_PATH} + +run(){ + BS=${1:-1} + TARGET_ONNX=${ORIGIN_ONNX_NAME}_end.onnx + TARGET_ENGINE=${ORIGIN_ONNX_NAME}_bs_${BS}_end.engine + if [[ ! -f "${ORIGIN_ONNX}" ]];then + echo "${ORIGIN_ONNX} not exists!" + exit 1 + fi + + # Graph optimize + python3 ${OPTIMIER_FILE} --onnx ${ORIGIN_ONNX} --input_shapes "new_categorical_placeholder0:$((26 * ${BS}))x2,new_numeric_placeholder0:${BS}x13,import/head/predictions/zeros_like0:${BS}x1" + # Build Engine + ixrtexec --onnx ${TARGET_ONNX} --save_engine ${TARGET_ENGINE} --log_level error + + # Test Performance + ixrtexec --load_engine ${TARGET_ENGINE} + +} +run 1 \ No newline at end of file