From 0123bd4daba925fee288306aa3e9bac25fd6ca94 Mon Sep 17 00:00:00 2001 From: liu lili Date: Thu, 27 Mar 2025 10:08:01 +0800 Subject: [PATCH] lll: optimize pad block tables --- .../model_executor/models/mf_models/deepseek_v3.py | 11 ++++------- .../model_executor/models/mf_models/qwen2.py | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py index a063fd09..ceeffdb2 100644 --- a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py +++ b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py @@ -74,13 +74,10 @@ def _pad_block_table(block_tables, seq_length, block_size): ) return fake_block_tables - block_tables_list = block_tables.tolist() - padded_block_tables = [ - _pad_to_max(block_table, seq_length // block_size) - for block_table in block_tables_list - ] - - return Tensor(np.array(padded_block_tables).astype(np.int32)) + block_tables_np = block_tables.asnumpy().astype(np.int32) + pad_size = seq_length // block_size - block_tables_np.shape[1] + block_tables_np = np.pad(block_tables_np, ((0, 0),(0, pad_size)), 'constant', constant_values=(-1, -1)) + return block_tables_np def _batch_seq(input_tokens, prefill): diff --git a/vllm_mindspore/model_executor/models/mf_models/qwen2.py b/vllm_mindspore/model_executor/models/mf_models/qwen2.py index bb9ffcd0..5e2b58e0 100644 --- a/vllm_mindspore/model_executor/models/mf_models/qwen2.py +++ b/vllm_mindspore/model_executor/models/mf_models/qwen2.py @@ -65,13 +65,10 @@ def _pad_block_table(block_tables, seq_length, block_size): ) return fake_block_tables - block_tables_list = block_tables.tolist() - padded_block_tables = [ - _pad_to_max(block_table, seq_length // block_size) - for block_table in block_tables_list - ] - - return Tensor(np.array(padded_block_tables).astype(np.int32)) + block_tables_np = block_tables.asnumpy().astype(np.int32) + pad_size = seq_length // block_size - block_tables_np.shape[1] + block_tables_np = np.pad(block_tables_np, ((0, 0),(0, pad_size)), 'constant', constant_values=(-1, -1)) + return block_tables_np def _batch_seq(input_tokens, prefill): -- Gitee