diff --git a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py index a063fd09e8b7df3f04ff072b6e325b1f90108798..ceeffdb2fd1a7dbf4fcc599bbbba9a88d937fd13 100644 --- a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py +++ b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py @@ -74,13 +74,10 @@ def _pad_block_table(block_tables, seq_length, block_size): ) return fake_block_tables - block_tables_list = block_tables.tolist() - padded_block_tables = [ - _pad_to_max(block_table, seq_length // block_size) - for block_table in block_tables_list - ] - - return Tensor(np.array(padded_block_tables).astype(np.int32)) + block_tables_np = block_tables.asnumpy().astype(np.int32) + pad_size = seq_length // block_size - block_tables_np.shape[1] + block_tables_np = np.pad(block_tables_np, ((0, 0),(0, pad_size)), 'constant', constant_values=(-1, -1)) + return block_tables_np def _batch_seq(input_tokens, prefill): diff --git a/vllm_mindspore/model_executor/models/mf_models/qwen2.py b/vllm_mindspore/model_executor/models/mf_models/qwen2.py index bb9ffcd082effaca55cc84322b060ed00fa098bb..5e2b58e0484e8c3028cf78690af72a5c2a581c03 100644 --- a/vllm_mindspore/model_executor/models/mf_models/qwen2.py +++ b/vllm_mindspore/model_executor/models/mf_models/qwen2.py @@ -65,13 +65,10 @@ def _pad_block_table(block_tables, seq_length, block_size): ) return fake_block_tables - block_tables_list = block_tables.tolist() - padded_block_tables = [ - _pad_to_max(block_table, seq_length // block_size) - for block_table in block_tables_list - ] - - return Tensor(np.array(padded_block_tables).astype(np.int32)) + block_tables_np = block_tables.asnumpy().astype(np.int32) + pad_size = seq_length // block_size - block_tables_np.shape[1] + block_tables_np = np.pad(block_tables_np, ((0, 0),(0, pad_size)), 'constant', constant_values=(-1, -1)) + return block_tables_np def _batch_seq(input_tokens, prefill):