diff --git a/vllm_mindspore/model_executor/layers/linear.py b/vllm_mindspore/model_executor/layers/linear.py index adfbc0861f9a5e0bc2d5b261954734ba387f0336..287f71c0753725acc4d23401a2c4cc9f9f9d76ee 100644 --- a/vllm_mindspore/model_executor/layers/linear.py +++ b/vllm_mindspore/model_executor/layers/linear.py @@ -107,12 +107,9 @@ class UnquantizedLinearMethod(LinearMethodBase): layer: nn.Cell, x: Tensor, bias: Parameter = None) -> Tensor: - output_shape = x.shape[:-1] + (self.output_size_per_partition, ) - x = x.view(-1, self.input_size_per_partition) x = self.matmul(x, layer.weight) if bias is not None: x = self.bias_add(x, bias) - x = x.view(output_shape) return x @@ -336,6 +333,10 @@ class MergedColumnParallelLinear(ColumnParallelLinear): loaded_shard_id: Optional[int] = None): param_data = param.data output_dim = getattr(param, "output_dim", None) + if loaded_shard_id is None or output_dim is None: + raise NotImplementedError( + "For MergedColumnParallelLinear weight loader, loaded_shard_id" + "and output_dim must be specified.") tp_rank = get_tensor_model_parallel_rank() tp_size = get_tensor_model_parallel_world_size() if output_dim is not None and loaded_shard_id is not None: @@ -435,6 +436,10 @@ class QKVParallelLinear(ColumnParallelLinear): loaded_weight: Tensor, loaded_shard_id: Optional[str] = None): output_dim = getattr(param, "output_dim", None) + if output_dim is None or loaded_shard_id is None: + raise NotImplementedError( + "For QKVParallelLinear weights loader, both output_dim and " + "loaded_shard_id must be specified.") tp_rank = get_tensor_model_parallel_rank() assert loaded_shard_id in ["q", "k", "v"] # If output dim is defined, use the default loading process.