From 2ed657deeae9f4e23cb96e456b3ea4d545461216 Mon Sep 17 00:00:00 2001 From: yh_silence Date: Tue, 3 Jun 2025 17:37:29 +0800 Subject: [PATCH 1/3] openai chat text client --- .../calculators/default_perf_metric_calculator.py | 13 ++++++++----- .../calculators/stable_perf_metric_calculator.py | 12 ++++++++---- .../benchmark/clients/openai_chat_text_client.py | 2 ++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/ais_bench/benchmark/calculators/default_perf_metric_calculator.py b/ais_bench/benchmark/calculators/default_perf_metric_calculator.py index a9a7864..5a92882 100644 --- a/ais_bench/benchmark/calculators/default_perf_metric_calculator.py +++ b/ais_bench/benchmark/calculators/default_perf_metric_calculator.py @@ -63,11 +63,14 @@ class DefaultPerfMetricCalculator(BasePerfMetricCalculator): self.infer_time[stage_name] = max(result["end_time"]) - min(result["start_time"]) per_request_avg_decode_time = [] # Compute the average decode latency per request - for i, value in enumerate(result["seq_latency"]): - if value: # Skip empty lists - tpot = (value - result["prefill_latency"][i]) / result["generate_tokens_len"][i] - per_request_avg_decode_time.append(tpot) - result["average_decode_latencies"] = per_request_avg_decode_time[:] + if not math.isclose(sum(result["prefill_latency"]), 0): + for i, value in enumerate(result["seq_latency"]): + if value: # Skip empty lists + tpot = (value - result["prefill_latency"][i]) / result["generate_tokens_len"][i] + per_request_avg_decode_time.append(tpot) + result["average_decode_latencies"] = per_request_avg_decode_time[:] + else: + result["average_decode_latencies"] = result["prefill_latency"] self.logger.info("Converting perf results of stage ...") self.result[stage_name] = self.convert_result(copy.deepcopy(result)) diff --git a/ais_bench/benchmark/calculators/stable_perf_metric_calculator.py b/ais_bench/benchmark/calculators/stable_perf_metric_calculator.py index 50ab608..ff91bb5 100644 --- a/ais_bench/benchmark/calculators/stable_perf_metric_calculator.py +++ b/ais_bench/benchmark/calculators/stable_perf_metric_calculator.py @@ -107,10 +107,14 @@ class StablePerfMetricCalculator(BasePerfMetricCalculator): self.infer_time[stage_name] = self.stage_section[1] - self.stage_section[0] per_request_avg_decode_time = [] # Compute the average decode latency per request - for i, value in enumerate(result["seq_latency"]): - if value: # Skip empty lists - tpot = (value - result["prefill_latency"][i]) / result["generate_tokens_len"][i] - per_request_avg_decode_time.append(tpot) + if not math.isclose(sum(result["prefill_latency"]), 0): + for i, value in enumerate(result["seq_latency"]): + if value: # Skip empty lists + tpot = (value - result["prefill_latency"][i]) / result["generate_tokens_len"][i] + per_request_avg_decode_time.append(tpot) + result["average_decode_latencies"] = per_request_avg_decode_time[:] + else: + result["average_decode_latencies"] = result["prefill_latency"] result["average_decode_latencies"] = per_request_avg_decode_time[:] self.logger.info("Converting perf results of stage ...") self.result[stage_name] = self.convert_result(copy.deepcopy(result)) diff --git a/ais_bench/benchmark/clients/openai_chat_text_client.py b/ais_bench/benchmark/clients/openai_chat_text_client.py index ccd1e17..9086fbf 100644 --- a/ais_bench/benchmark/clients/openai_chat_text_client.py +++ b/ais_bench/benchmark/clients/openai_chat_text_client.py @@ -25,4 +25,6 @@ class OpenAIChatTextClient(BaseClient, ABC): if generated_text: inputs.output = generated_text inputs.num_generated_chars = len(generated_text) + inputs.prefill_latency = res.get("prefill_time", 0) + inputs.decode_cost = res.get("decode_time_arr", []) return generated_text -- Gitee From 62baadaee395291eb3775226508eca224fa0f320 Mon Sep 17 00:00:00 2001 From: yh_silence Date: Tue, 3 Jun 2025 18:49:12 +0800 Subject: [PATCH 2/3] openai chat text client --- ais_bench/benchmark/clients/openai_chat_text_client.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ais_bench/benchmark/clients/openai_chat_text_client.py b/ais_bench/benchmark/clients/openai_chat_text_client.py index 9086fbf..eed0d6e 100644 --- a/ais_bench/benchmark/clients/openai_chat_text_client.py +++ b/ais_bench/benchmark/clients/openai_chat_text_client.py @@ -27,4 +27,9 @@ class OpenAIChatTextClient(BaseClient, ABC): inputs.num_generated_chars = len(generated_text) inputs.prefill_latency = res.get("prefill_time", 0) inputs.decode_cost = res.get("decode_time_arr", []) + inputs.chunk_time_point_list = [ + inputs.start_time * 1000, + inputs.start_time * 1000 + inputs.prefill_latency, + inputs.start_time * 1000 + inputs.prefill_latency + sum(inputs.decode_cost), + ] return generated_text -- Gitee From 128b460adcb4137963bbcd52b140185f1b370447 Mon Sep 17 00:00:00 2001 From: yh_silence Date: Tue, 3 Jun 2025 19:14:52 +0800 Subject: [PATCH 3/3] openai chat text client --- ais_bench/benchmark/clients/openai_chat_text_client.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ais_bench/benchmark/clients/openai_chat_text_client.py b/ais_bench/benchmark/clients/openai_chat_text_client.py index eed0d6e..08fb01d 100644 --- a/ais_bench/benchmark/clients/openai_chat_text_client.py +++ b/ais_bench/benchmark/clients/openai_chat_text_client.py @@ -26,10 +26,8 @@ class OpenAIChatTextClient(BaseClient, ABC): inputs.output = generated_text inputs.num_generated_chars = len(generated_text) inputs.prefill_latency = res.get("prefill_time", 0) + inputs.chunk_time_point_list.append(inputs.chunk_time_point_list[0] + inputs.prefill_latency) inputs.decode_cost = res.get("decode_time_arr", []) - inputs.chunk_time_point_list = [ - inputs.start_time * 1000, - inputs.start_time * 1000 + inputs.prefill_latency, - inputs.start_time * 1000 + inputs.prefill_latency + sum(inputs.decode_cost), - ] + inputs.chunk_time_point_list.append(inputs.chunk_time_point_list[1] + sum(inputs.decode_cost)) + return generated_text -- Gitee