From 599b50b40416cff09e7cb8b94b78e0922806e0d9 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Tue, 8 Jul 2025 14:15:36 +0800 Subject: [PATCH] update match ixrt bert large ci result --- .../bert_large_squad/ixrt/evaluate-v1.1.py | 2 +- .../plm/bert_large_squad/ixrt/inference.py | 3 --- .../infer_bert_large_squad_fp16_accuracy.sh | 2 +- ...infer_bert_large_squad_fp16_performance.sh | 2 +- .../infer_bert_large_squad_int8_accuracy.sh | 2 +- ...infer_bert_large_squad_int8_performance.sh | 2 +- tests/model_info.json | 6 ++++-- tests/run_ixrt.py | 21 ++++++++++++++++++- 8 files changed, 29 insertions(+), 11 deletions(-) diff --git a/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py b/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py index 7b14e838..c73db423 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py +++ b/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py @@ -113,4 +113,4 @@ if __name__ == '__main__': with open(args.prediction_file) as prediction_file: predictions = json.load(prediction_file) f1_acc = float(args.f1_acc) - print(json.dumps(evaluate(dataset, predictions, f1_acc))) \ No newline at end of file + print(json.dumps(evaluate(dataset, predictions, f1_acc))) diff --git a/models/nlp/plm/bert_large_squad/ixrt/inference.py b/models/nlp/plm/bert_large_squad/ixrt/inference.py index 1ed6e088..95b88dc5 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/inference.py +++ b/models/nlp/plm/bert_large_squad/ixrt/inference.py @@ -408,9 +408,6 @@ if __name__ == '__main__': qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time print(f"Latency QPS: {qps} sentences/s") - metricResult = {"metricResult": {}} - metricResult["metricResult"]["qps"] = qps - print(metricResult) with open(output_prediction_file, "w") as f: f.write(json.dumps(all_predictions, indent=4)) diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh index a317feaa..e0b047b3 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh @@ -19,7 +19,7 @@ do done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh index c10f0198..ff99d695 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh @@ -20,7 +20,7 @@ done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh index 00857d37..8d6e3ab6 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh @@ -20,7 +20,7 @@ done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh index d484f7bf..1a6089e2 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh @@ -20,7 +20,7 @@ done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/tests/model_info.json b/tests/model_info.json index 58adf01b..b6f54ebb 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -5979,7 +5979,8 @@ "download_url": "https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1", "need_third_part": "", "precisions": [ - "fp16" + "fp16", + "int8" ], "type": "inference", "hasDemo": false, @@ -6012,7 +6013,8 @@ "download_url": "https://local/bert-large-uncased", "need_third_part": "", "precisions": [ - "fp16" + "fp16", + "int8" ], "type": "inference", "hasDemo": false, diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index be97a42e..72c7a9cb 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -481,12 +481,14 @@ def run_nlp_testcase(model): elif model_name == "bert_large_squad": script = f""" set -x + cd ../{model['model_path']}/ bash script/infer_bert_large_squad_fp16_accuracy.sh bash script/infer_bert_large_squad_fp16_performance.sh """ if prec == "int8": script = f""" set -x + cd ../{model['model_path']}/ bash script/infer_bert_large_squad_int8_accuracy.sh bash script/infer_bert_large_squad_int8_performance.sh """ @@ -507,11 +509,28 @@ def run_nlp_testcase(model): pattern = METRIC_PATTERN matchs = re.findall(pattern, sout) result["result"].setdefault(prec, {"status": "FAIL"}) - logging.debug(f"matchs:\n{matchs}") for m in matchs: result["result"][prec].update(get_metric_result(m)) result["result"][prec]["status"] = "PASS" + + if model_name == "bert_large_squad": + patterns = { + "LatencyQPS": r"Latency QPS\s*:\s*(\d+\.?\d*)", + "exact_match": r"\"exact_match\"\s*:\s*(\d+\.?\d*)", + "f1": r"\"f1\"\s*:\s*(\d+\.?\d*)" + } + combined_pattern = re.compile("|".join(f"(?P<{name}>{pattern})" for name, pattern in patterns.items())) + matchs = combined_pattern.finditer(sout) + result["result"].setdefault(prec, {"status": "FAIL"}) + for match in matchs: + result["result"][prec]["status"] = "PASS" + for name, value in match.groupdict().items(): + if value: + result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}") + break + + logging.debug(f"matchs:\n{matchs}") result["result"][prec]["Cost time (s)"] = t return result -- Gitee