diff --git a/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py b/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py index 7b14e8380d6a1c7348633ffacc2cd6fb2818e0a2..c73db423d3b2e357de0527271f1d2d945c6bff76 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py +++ b/models/nlp/plm/bert_large_squad/ixrt/evaluate-v1.1.py @@ -113,4 +113,4 @@ if __name__ == '__main__': with open(args.prediction_file) as prediction_file: predictions = json.load(prediction_file) f1_acc = float(args.f1_acc) - print(json.dumps(evaluate(dataset, predictions, f1_acc))) \ No newline at end of file + print(json.dumps(evaluate(dataset, predictions, f1_acc))) diff --git a/models/nlp/plm/bert_large_squad/ixrt/inference.py b/models/nlp/plm/bert_large_squad/ixrt/inference.py index 1ed6e088911d9c17166b85d5b6df81a8e3d0c667..95b88dc5153e4a8cf3f157ab2d6d980316747ed3 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/inference.py +++ b/models/nlp/plm/bert_large_squad/ixrt/inference.py @@ -408,9 +408,6 @@ if __name__ == '__main__': qps = math.ceil(len(squad_examples)/args.batch_size)*args.batch_size/infer_time print(f"Latency QPS: {qps} sentences/s") - metricResult = {"metricResult": {}} - metricResult["metricResult"]["qps"] = qps - print(metricResult) with open(output_prediction_file, "w") as f: f.write(json.dumps(all_predictions, indent=4)) diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh index a317feaa63313820fe79e0c23c57d0706314fa27..e0b047b3a96c5e0d01e125187e7d32fa20f4fcc9 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_accuracy.sh @@ -19,7 +19,7 @@ do done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh index c10f0198c68aa8aafa3fa7815a5dd5d25a0a6391..ff99d6955bcb920edaed111106ebf8bb1e9e4f51 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_fp16_performance.sh @@ -20,7 +20,7 @@ done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh index 00857d37c0e4b8923721608c89d0fc069602f77f..8d6e3ab6813616a67b8c94c0d0035333f13ae7a9 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_accuracy.sh @@ -20,7 +20,7 @@ done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh index d484f7bf2ae32ccb4f5642760d7c67eb5c0c6632..1a6089e263c339a524d9466e824e58ce0cefd3db 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh +++ b/models/nlp/plm/bert_large_squad/ixrt/script/infer_bert_large_squad_int8_performance.sh @@ -20,7 +20,7 @@ done project_path=./ checkpoints_path=${project_path}/data/checkpoints/bert-large-uncased -datasets_path=${project_path}/data/datasets/squad +datasets_path=${project_path}/data/datasets echo 'USE_TRT='${USE_TRT} export USE_TRT=$USE_TRT diff --git a/tests/model_info.json b/tests/model_info.json index 58adf01b0326d3a63a27d3bf3b1d31ebde2b15d3..b6f54ebb5b7be31eee4524d8baa5ce5a5ce74dbb 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -5979,7 +5979,8 @@ "download_url": "https://huggingface.co/neuralmagic/bert-large-uncased-finetuned-squadv1", "need_third_part": "", "precisions": [ - "fp16" + "fp16", + "int8" ], "type": "inference", "hasDemo": false, @@ -6012,7 +6013,8 @@ "download_url": "https://local/bert-large-uncased", "need_third_part": "", "precisions": [ - "fp16" + "fp16", + "int8" ], "type": "inference", "hasDemo": false, diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index be97a42e2e6884f633334bec479293e875660baf..72c7a9cb5bd0ff34f28abc92d8a546eba29fa58a 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -481,12 +481,14 @@ def run_nlp_testcase(model): elif model_name == "bert_large_squad": script = f""" set -x + cd ../{model['model_path']}/ bash script/infer_bert_large_squad_fp16_accuracy.sh bash script/infer_bert_large_squad_fp16_performance.sh """ if prec == "int8": script = f""" set -x + cd ../{model['model_path']}/ bash script/infer_bert_large_squad_int8_accuracy.sh bash script/infer_bert_large_squad_int8_performance.sh """ @@ -507,11 +509,28 @@ def run_nlp_testcase(model): pattern = METRIC_PATTERN matchs = re.findall(pattern, sout) result["result"].setdefault(prec, {"status": "FAIL"}) - logging.debug(f"matchs:\n{matchs}") for m in matchs: result["result"][prec].update(get_metric_result(m)) result["result"][prec]["status"] = "PASS" + + if model_name == "bert_large_squad": + patterns = { + "LatencyQPS": r"Latency QPS\s*:\s*(\d+\.?\d*)", + "exact_match": r"\"exact_match\"\s*:\s*(\d+\.?\d*)", + "f1": r"\"f1\"\s*:\s*(\d+\.?\d*)" + } + combined_pattern = re.compile("|".join(f"(?P<{name}>{pattern})" for name, pattern in patterns.items())) + matchs = combined_pattern.finditer(sout) + result["result"].setdefault(prec, {"status": "FAIL"}) + for match in matchs: + result["result"][prec]["status"] = "PASS" + for name, value in match.groupdict().items(): + if value: + result["result"][prec][name] = float(f"{float(value.split(':')[1].strip()):.3f}") + break + + logging.debug(f"matchs:\n{matchs}") result["result"][prec]["Cost time (s)"] = t return result