diff --git a/apps/llm/adapters.py b/apps/llm/adapters.py index 2b90d66692695e466ffa3a478dc2391356a73486..5343cbcef6a297013eaedfa056653924938d3c84 100644 --- a/apps/llm/adapters.py +++ b/apps/llm/adapters.py @@ -1,46 +1,36 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. -"""LLM提供商适配器模块""" +"""LLM提供商适配器模块 V2 - 基于新的模型注册表""" import logging from abc import ABC, abstractmethod from dataclasses import dataclass from typing import Any, Dict, Optional -logger = logging.getLogger(__name__) - +from apps.llm.model_types import ModelType, ChatCapabilities -@dataclass -class ModelCapabilities: - """模型能力描述""" - supports_enable_thinking: bool = False - supports_reasoning_content: bool = False - supports_extra_body: bool = True - supports_stream_options: bool = True - max_tokens_param: str = "max_completion_tokens" # 或 "max_tokens" - - # 其他可能的能力差异 - supports_function_calling: bool = True - supports_json_mode: bool = True - supports_structured_output: bool = False +logger = logging.getLogger(__name__) -class LLMProviderAdapter(ABC): - """LLM提供商适配器基类""" +class LLMProviderAdapterV2(ABC): + """LLM提供商适配器基类 V2""" - def __init__(self, provider: str, model: str): + def __init__(self, provider: str, model: str, capabilities: Optional[ChatCapabilities] = None): self.provider = provider self.model = model - self.capabilities = self._get_model_capabilities() + self.capabilities = capabilities or self._get_default_capabilities() @abstractmethod - def _get_model_capabilities(self) -> ModelCapabilities: - """获取模型能力""" + def _get_default_capabilities(self) -> ChatCapabilities: + """获取默认能力(当注册表中没有配置时)""" pass def adapt_create_params(self, base_params: Dict[str, Any], enable_thinking: bool = False) -> Dict[str, Any]: """适配创建参数""" + if not self.capabilities: + return base_params + adapted_params = base_params.copy() # 处理思维链参数 @@ -65,6 +55,44 @@ class LLMProviderAdapter(ABC): if not adapted_params["extra_body"]: del adapted_params["extra_body"] + # 处理temperature参数 + if not self.capabilities.supports_temperature and "temperature" in adapted_params: + logger.warning(f"[{self.provider}] 模型不支持temperature参数,已移除") + del adapted_params["temperature"] + + # 处理 extra_body 中的参数过滤 + if "extra_body" in adapted_params: + extra_body = adapted_params["extra_body"] + + # 处理 frequency_penalty + if not self.capabilities.supports_frequency_penalty and "frequency_penalty" in extra_body: + logger.warning(f"[{self.provider}] 模型不支持frequency_penalty参数,已从extra_body移除") + del extra_body["frequency_penalty"] + + # 处理 presence_penalty + if not self.capabilities.supports_presence_penalty and "presence_penalty" in extra_body: + logger.warning(f"[{self.provider}] 模型不支持presence_penalty参数,已从extra_body移除") + del extra_body["presence_penalty"] + + # 处理 min_p + if not self.capabilities.supports_min_p and "min_p" in extra_body: + logger.warning(f"[{self.provider}] 模型不支持min_p参数,已从extra_body移除") + del extra_body["min_p"] + + # 处理 top_k + if not self.capabilities.supports_top_k and "top_k" in extra_body: + logger.warning(f"[{self.provider}] 模型不支持top_k参数,已从extra_body移除") + del extra_body["top_k"] + + # 如果 extra_body 为空,删除它 + if not extra_body: + del adapted_params["extra_body"] + + # 处理top_p参数(顶级参数) + if not self.capabilities.supports_top_p and "top_p" in adapted_params: + logger.warning(f"[{self.provider}] 模型不支持top_p参数,已移除") + del adapted_params["top_p"] + # 处理其他参数适配 if not self.capabilities.supports_extra_body and "extra_body" in adapted_params: logger.warning(f"[{self.provider}] 模型不支持extra_body参数,已移除") @@ -84,142 +112,150 @@ class LLMProviderAdapter(ABC): def should_use_prompt_thinking(self, enable_thinking: bool) -> bool: """判断是否应该使用prompt方式的思维链""" + if not self.capabilities: + return False return enable_thinking and not self.capabilities.supports_enable_thinking -class OpenAIAdapter(LLMProviderAdapter): - """OpenAI适配器""" +class OpenAIAdapterV2(LLMProviderAdapterV2): + """OpenAI适配器 V2""" - def _get_model_capabilities(self) -> ModelCapabilities: + def _get_default_capabilities(self) -> ChatCapabilities: # OpenAI的o1系列支持原生thinking if "o1" in self.model.lower(): - return ModelCapabilities( + return ChatCapabilities( + supports_thinking=True, + can_toggle_thinking=True, supports_enable_thinking=True, supports_reasoning_content=True, - supports_extra_body=True, - supports_stream_options=True, max_tokens_param="max_completion_tokens" ) else: - return ModelCapabilities( - supports_enable_thinking=False, - supports_reasoning_content=False, - supports_extra_body=True, - supports_stream_options=True, + return ChatCapabilities( + supports_thinking=False, max_tokens_param="max_tokens" ) -class QwenAdapter(LLMProviderAdapter): - """阿里百炼Qwen适配器""" +class QwenAdapterV2(LLMProviderAdapterV2): + """阿里百炼Qwen适配器 V2""" - def _get_model_capabilities(self) -> ModelCapabilities: + def _get_default_capabilities(self) -> ChatCapabilities: # Qwen系列的thinking支持情况 if any(model_name in self.model.lower() for model_name in ["qwen2.5", "qwen-plus", "qwen-turbo"]): - return ModelCapabilities( + return ChatCapabilities( + supports_thinking=True, + can_toggle_thinking=True, supports_enable_thinking=True, supports_reasoning_content=True, - supports_extra_body=True, - supports_stream_options=True, - max_tokens_param="max_tokens" + supports_enable_search=True ) else: - return ModelCapabilities( - supports_enable_thinking=False, - supports_reasoning_content=False, - supports_extra_body=True, - supports_stream_options=True, - max_tokens_param="max_tokens" + return ChatCapabilities( + supports_enable_search=True ) -class SiliconFlowAdapter(LLMProviderAdapter): - """硅基流动适配器""" +class SiliconFlowAdapterV2(LLMProviderAdapterV2): + """硅基流动适配器 V2""" - def _get_model_capabilities(self) -> ModelCapabilities: - # 导入模型注册表 + def _get_default_capabilities(self) -> ChatCapabilities: + # 使用模型注册表获取能力 try: - from apps.llm.model_registry import get_model_thinking_support - supports_thinking = get_model_thinking_support(self.provider, self.model) - except ImportError: - # 如果无法导入,使用静态配置 - thinking_supported_models = [ - "qwen2.5-coder-32b-instruct", - "qwen2.5-72b-instruct", - "deepseek-v2.5", - ] - supports_thinking = any(model in self.model.lower() for model in thinking_supported_models) + from apps.llm.model_registry import global_model_registry_v2 + capabilities = global_model_registry_v2.get_model_capabilities( + self.provider, self.model, ModelType.CHAT + ) + if capabilities and isinstance(capabilities, ChatCapabilities): + return capabilities + except Exception as e: + logger.warning(f"无法从注册表获取能力: {e}") - return ModelCapabilities( - supports_enable_thinking=supports_thinking, - supports_reasoning_content=supports_thinking, - supports_extra_body=True, - supports_stream_options=True, - max_tokens_param="max_tokens" - ) + # 默认能力 + return ChatCapabilities() -class DeepSeekAdapter(LLMProviderAdapter): - """DeepSeek适配器""" +class DeepSeekAdapterV2(LLMProviderAdapterV2): + """DeepSeek适配器 V2""" - def _get_model_capabilities(self) -> ModelCapabilities: + def _get_default_capabilities(self) -> ChatCapabilities: # DeepSeek R1系列支持thinking - if "r1" in self.model.lower(): - return ModelCapabilities( - supports_enable_thinking=True, - supports_reasoning_content=True, - supports_extra_body=True, - supports_stream_options=True, - max_tokens_param="max_tokens" + if "r1" in self.model.lower() or "reasoner" in self.model.lower(): + return ChatCapabilities( + supports_thinking=True, + can_toggle_thinking=False, # 不支持关闭思维链 + supports_reasoning_content=True ) else: - return ModelCapabilities( - supports_enable_thinking=False, - supports_reasoning_content=False, - supports_extra_body=True, - supports_stream_options=True, - max_tokens_param="max_tokens" - ) + return ChatCapabilities() -class DefaultAdapter(LLMProviderAdapter): - """默认适配器(保守策略)""" +class BaichuanAdapterV2(LLMProviderAdapterV2): + """百川智能适配器 V2""" - def _get_model_capabilities(self) -> ModelCapabilities: - return ModelCapabilities( - supports_enable_thinking=False, - supports_reasoning_content=False, - supports_extra_body=True, - supports_stream_options=True, - max_tokens_param="max_tokens" + def _get_default_capabilities(self) -> ChatCapabilities: + return ChatCapabilities( + supports_extra_body=False, + supports_stream_options=False, + supports_response_format=False, + supports_enable_search=True ) -class AdapterFactory: - """适配器工厂""" +class DefaultAdapterV2(LLMProviderAdapterV2): + """默认适配器 V2(保守策略)""" + + def _get_default_capabilities(self) -> ChatCapabilities: + return ChatCapabilities() + + +class AdapterFactoryV2: + """适配器工厂 V2""" _adapters = { - "openai": OpenAIAdapter, - "qwen": QwenAdapter, - "siliconflow": SiliconFlowAdapter, - "deepseek": DeepSeekAdapter, - "baichuan": DefaultAdapter, - "spark": DefaultAdapter, - "wenxin": DefaultAdapter, - "modelscope": DefaultAdapter, - "ollama": DefaultAdapter, - "vllm": DefaultAdapter, - "mindie": DefaultAdapter, + "openai": OpenAIAdapterV2, + "qwen": QwenAdapterV2, + "siliconflow": SiliconFlowAdapterV2, + "deepseek": DeepSeekAdapterV2, + "baichuan": BaichuanAdapterV2, + "spark": DefaultAdapterV2, + "wenxin": DefaultAdapterV2, + "modelscope": DefaultAdapterV2, + "ollama": DefaultAdapterV2, + "vllm": DefaultAdapterV2, + "mindie": DefaultAdapterV2, } @classmethod - def create_adapter(cls, provider: str, model: str) -> LLMProviderAdapter: - """创建适配器""" - adapter_class = cls._adapters.get(provider.lower(), DefaultAdapter) - return adapter_class(provider, model) + def create_adapter(cls, provider: str, model: str, capabilities: Optional[ChatCapabilities] = None) -> LLMProviderAdapterV2: + """创建适配器 + + Args: + provider: 供应商名称 + model: 模型名称 + capabilities: 模型能力(可选,如果提供则使用,否则使用默认) + """ + adapter_class = cls._adapters.get(provider.lower(), DefaultAdapterV2) + return adapter_class(provider, model, capabilities) + + @classmethod + def create_adapter_from_registry(cls, provider: str, model: str) -> LLMProviderAdapterV2: + """从注册表创建适配器""" + try: + from apps.llm.model_registry import global_model_registry_v2 + capabilities = global_model_registry_v2.get_model_capabilities( + provider, model, ModelType.CHAT + ) + if capabilities and isinstance(capabilities, ChatCapabilities): + return cls.create_adapter(provider, model, capabilities) + except Exception as e: + logger.warning(f"无法从注册表创建适配器: {e}") + + # 回退到默认创建方式 + return cls.create_adapter(provider, model) @classmethod - def register_adapter(cls, provider: str, adapter_class: type[LLMProviderAdapter]): + def register_adapter(cls, provider: str, adapter_class: type[LLMProviderAdapterV2]): """注册新的适配器""" cls._adapters[provider.lower()] = adapter_class @@ -247,3 +283,9 @@ def get_provider_from_endpoint(endpoint: str) -> str: return "wenxin" else: return "unknown" + + +# 兼容旧的导入 +LLMProviderAdapter = LLMProviderAdapterV2 +AdapterFactory = AdapterFactoryV2 + diff --git a/apps/llm/model_registry.py b/apps/llm/model_registry.py index 7d95feb6a37377fd268702a15f76df84c1e6cf86..45b49b19f72f8e08310e8de2447aab22cbaa7b39 100644 --- a/apps/llm/model_registry.py +++ b/apps/llm/model_registry.py @@ -1,90 +1,168 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. -"""模型注册表和配置管理""" +"""模型注册表和配置管理 V2 - 支持多级继承和模型类型分离""" import logging -from typing import Dict, List, Optional -from dataclasses import dataclass, asdict import json +import copy +from pathlib import Path +from typing import Dict, List, Optional, Any +from dataclasses import dataclass, asdict -logger = logging.getLogger(__name__) - +from apps.llm.model_types import ( + ModelType, + ChatCapabilities, + EmbeddingCapabilities, + RerankCapabilities, + AudioCapabilities, + ProviderCapabilities, + ModelConfig +) -@dataclass -class ModelInfo: - """模型信息""" - provider: str - model_name: str - supports_thinking: bool = False - can_toggle_thinking: bool = False # 是否支持开关思维链(仅当supports_thinking=True时有效) - supports_function_calling: bool = True - supports_json_mode: bool = True - supports_structured_output: bool = False - max_tokens_param: str = "max_tokens" - notes: str = "" +logger = logging.getLogger(__name__) -class ModelRegistry: - """模型注册表""" +class ModelRegistryV2: + """模型注册表 V2 - 支持供应商能力继承""" - def __init__(self): - self._models: Dict[str, ModelInfo] = {} - self._load_default_models() + def __init__(self, providers_config_path: Optional[str] = None, models_config_path: Optional[str] = None): + self._providers: Dict[str, ProviderCapabilities] = {} + self._models: Dict[str, ModelConfig] = {} + + # 加载配置 + if providers_config_path: + self.load_providers_from_file(providers_config_path) + if models_config_path: + self.load_models_from_file(models_config_path) - def _load_default_models(self): - """加载默认模型配置""" - default_models = [ - # OpenAI模型 - ModelInfo("openai", "gpt-4o", supports_thinking=False), - ModelInfo("openai", "gpt-4o-mini", supports_thinking=False), - ModelInfo("openai", "o1-preview", supports_thinking=True, can_toggle_thinking=True, max_tokens_param="max_completion_tokens"), - ModelInfo("openai", "o1-mini", supports_thinking=True, can_toggle_thinking=True, max_tokens_param="max_completion_tokens"), - - # 阿里百炼Qwen模型 - ModelInfo("qwen", "qwen-plus", supports_thinking=True, can_toggle_thinking=True), - ModelInfo("qwen", "qwen-turbo", supports_thinking=True, can_toggle_thinking=True), - ModelInfo("qwen", "qwen2.5-72b-instruct", supports_thinking=True, can_toggle_thinking=True), - ModelInfo("qwen", "qwen2.5-32b-instruct", supports_thinking=True, can_toggle_thinking=True), - ModelInfo("qwen", "qwen2.5-14b-instruct", supports_thinking=False), - ModelInfo("qwen", "qwen2.5-7b-instruct", supports_thinking=False), - - # SiliconFlow模型 - ModelInfo("siliconflow", "qwen2.5-coder-32b-instruct", supports_thinking=True, can_toggle_thinking=True), - ModelInfo("siliconflow", "qwen2.5-72b-instruct", supports_thinking=True, can_toggle_thinking=True), - ModelInfo("siliconflow", "Qwen/Qwen3-8B", supports_thinking=True, can_toggle_thinking=True), # Qwen3-8B支持enable_thinking - ModelInfo("siliconflow", "deepseek-v2.5", supports_thinking=True, can_toggle_thinking=False), # DeepSeek系列不支持关闭思维链 - ModelInfo("siliconflow", "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", supports_thinking=False), # 该模型不支持enable_thinking参数 - ModelInfo("siliconflow", "yi-lightning", supports_thinking=False), - ModelInfo("siliconflow", "glm-4-9b-chat", supports_thinking=False), - - # DeepSeek模型 - ModelInfo("deepseek", "deepseek-chat", supports_thinking=False), - ModelInfo("deepseek", "deepseek-reasoner", supports_thinking=True, can_toggle_thinking=False), # DeepSeek系列不支持关闭思维链 - ModelInfo("deepseek", "deepseek-r1", supports_thinking=True, can_toggle_thinking=False), # DeepSeek系列不支持关闭思维链 - - # 腾讯混元模型 - ModelInfo("tencent", "hunyuan-lite", supports_thinking=False), - ModelInfo("tencent", "hunyuan-standard", supports_thinking=False), - ModelInfo("tencent", "hunyuan-pro", supports_thinking=False), - ModelInfo("tencent", "hunyuan-MT-7B", supports_thinking=False), - ModelInfo("tencent", "hunyuan-turbo", supports_thinking=False), - - # 其他提供商的默认配置 - ModelInfo("baichuan", "baichuan2-turbo", supports_thinking=False), - ModelInfo("spark", "spark-lite", supports_thinking=False), - ModelInfo("wenxin", "ernie-4.0-turbo-8k", supports_thinking=False), - ] + def register_provider(self, provider_config: Dict[str, Any]): + """注册供应商配置""" + provider_name = provider_config["provider_name"] + + # 解析各类型模型能力 + chat_caps = None + if "chat_capabilities" in provider_config: + chat_caps = ChatCapabilities(**provider_config["chat_capabilities"]) + + embedding_caps = None + if "embedding_capabilities" in provider_config: + embedding_caps = EmbeddingCapabilities(**provider_config["embedding_capabilities"]) + + rerank_caps = None + if "rerank_capabilities" in provider_config: + rerank_caps = RerankCapabilities(**provider_config["rerank_capabilities"]) + + audio_caps = None + if "audio_capabilities" in provider_config: + audio_caps = AudioCapabilities(**provider_config["audio_capabilities"]) - for model in default_models: - self.register_model(model) + # 创建供应商配置对象 + provider = ProviderCapabilities( + provider_name=provider_name, + api_base_url=provider_config.get("api_base_url", ""), + auth_type=provider_config.get("auth_type", "bearer"), + auth_header=provider_config.get("auth_header", "Authorization"), + chat_capabilities=chat_caps, + embedding_capabilities=embedding_caps, + rerank_capabilities=rerank_caps, + audio_capabilities=audio_caps, + notes=provider_config.get("notes", "") + ) + + self._providers[provider_name] = provider + logger.info(f"注册供应商: {provider_name}") - def register_model(self, model_info: ModelInfo): - """注册模型""" - key = f"{model_info.provider}:{model_info.model_name}" - self._models[key] = model_info + def register_model(self, model_config: Dict[str, Any]): + """注册模型配置""" + provider = model_config["provider"] + model_name = model_config["model_name"] + key = f"{provider}:{model_name}" + + # 解析模型类型 + model_type = ModelType(model_config["model_type"]) + + # 解析能力配置(支持继承) + capabilities = None + if "capabilities" in model_config: + caps_config = model_config["capabilities"] + + # 处理继承 + if isinstance(caps_config, dict) and "_inherit" in caps_config: + inherit_from = caps_config["_inherit"] # 格式: "siliconflow.chat_capabilities" + capabilities = self._inherit_capabilities(inherit_from, caps_config, model_type) + else: + # 直接创建能力对象 + capabilities = self._create_capabilities(model_type, caps_config) + + # 创建模型配置对象 + model = ModelConfig( + provider=provider, + model_name=model_name, + model_type=model_type, + capabilities=capabilities, + series=model_config.get("series"), + display_name=model_config.get("display_name"), + context_window=model_config.get("context_window"), + notes=model_config.get("notes", "") + ) + + self._models[key] = model logger.debug(f"注册模型: {key}") - def get_model_info(self, provider: str, model_name: str) -> Optional[ModelInfo]: - """获取模型信息,支持智能推断""" + def _inherit_capabilities(self, inherit_from: str, override_config: Dict[str, Any], model_type: ModelType) -> Any: + """从供应商配置继承能力""" + # 解析继承路径: "siliconflow.chat_capabilities" + parts = inherit_from.split(".") + if len(parts) != 2: + logger.warning(f"无效的继承路径: {inherit_from}") + return None + + provider_name, capability_name = parts + provider = self._providers.get(provider_name) + if not provider: + logger.warning(f"未找到供应商: {provider_name}") + return None + + # 获取基础能力 + base_capabilities = None + if capability_name == "chat_capabilities" and provider.chat_capabilities: + base_capabilities = asdict(provider.chat_capabilities) + elif capability_name == "embedding_capabilities" and provider.embedding_capabilities: + base_capabilities = asdict(provider.embedding_capabilities) + elif capability_name == "rerank_capabilities" and provider.rerank_capabilities: + base_capabilities = asdict(provider.rerank_capabilities) + elif capability_name == "audio_capabilities" and provider.audio_capabilities: + base_capabilities = asdict(provider.audio_capabilities) + + if not base_capabilities: + logger.warning(f"未找到能力配置: {inherit_from}") + return None + + # 合并覆盖配置 + merged_config = copy.deepcopy(base_capabilities) + for key, value in override_config.items(): + if key != "_inherit": + merged_config[key] = value + + # 创建能力对象 + return self._create_capabilities(model_type, merged_config) + + def _create_capabilities(self, model_type: ModelType, config: Dict[str, Any]) -> Any: + """创建能力对象""" + try: + if model_type == ModelType.CHAT: + return ChatCapabilities(**config) + elif model_type == ModelType.EMBEDDING: + return EmbeddingCapabilities(**config) + elif model_type == ModelType.RERANK: + return RerankCapabilities(**config) + elif model_type == ModelType.AUDIO: + return AudioCapabilities(**config) + except Exception as e: + logger.error(f"创建能力对象失败: {e}") + return None + + def get_model_config(self, provider: str, model_name: str) -> Optional[ModelConfig]: + """获取模型配置,支持智能推断""" key = f"{provider}:{model_name}" # 1. 直接匹配 @@ -92,28 +170,38 @@ class ModelRegistry: return self._models[key] # 2. 相同模型名不同供应商的能力匹配 - model_info = self._find_by_model_name(model_name) - if model_info: - logger.debug(f"通过模型名匹配找到能力信息: {provider}:{model_name} -> {model_info.provider}:{model_info.model_name}") - return model_info + model_config = self._find_by_model_name(model_name) + if model_config: + logger.debug(f"通过模型名匹配找到配置: {provider}:{model_name} -> {model_config.provider}:{model_config.model_name}") + # 返回一个新的配置,使用当前provider但保留原有能力 + return ModelConfig( + provider=provider, + model_name=model_name, + model_type=model_config.model_type, + capabilities=model_config.capabilities, + series=model_config.series, + display_name=model_config.display_name, + context_window=model_config.context_window, + notes=f"基于{model_config.provider}:{model_config.model_name}推断" + ) # 3. 系列推断逻辑 - model_info = self._infer_by_series(provider, model_name) - if model_info: - logger.debug(f"通过系列推断找到能力信息: {provider}:{model_name} -> 系列匹配") - return model_info + model_config = self._infer_by_series(provider, model_name) + if model_config: + logger.debug(f"通过系列推断找到配置: {provider}:{model_name}") + return model_config return None - def _find_by_model_name(self, model_name: str) -> Optional[ModelInfo]: - """通过模型名查找相同模型的能力信息""" - for model_info in self._models.values(): - if model_info.model_name == model_name: - return model_info + def _find_by_model_name(self, model_name: str) -> Optional[ModelConfig]: + """通过模型名查找相同模型的配置信息""" + for model_config in self._models.values(): + if model_config.model_name == model_name: + return model_config return None - def _infer_by_series(self, provider: str, model_name: str) -> Optional[ModelInfo]: - """通过系列推断模型能力""" + def _infer_by_series(self, provider: str, model_name: str) -> Optional[ModelConfig]: + """通过系列推断模型配置""" model_lower = model_name.lower() # 定义系列匹配规则 @@ -132,12 +220,12 @@ class ModelRegistry: # Qwen系列 { "patterns": ["qwen2.5", "qwen-2.5"], - "reference_models": ["qwen2.5-72b-instruct", "qwen2.5-32b-instruct"], + "reference_models": ["qwen2.5-72b-instruct", "Qwen/Qwen2.5-72B-Instruct"], "series_name": "Qwen2.5" }, { "patterns": ["qwen3", "qwen-3"], - "reference_models": ["qwen2.5-72b-instruct"], # 使用qwen2.5作为参考 + "reference_models": ["Qwen/Qwen3-8B", "qwen2.5-72b-instruct"], "series_name": "Qwen3" }, { @@ -153,36 +241,49 @@ class ModelRegistry: }, { "patterns": ["deepseek-v2", "deepseek-v3"], - "reference_models": ["deepseek-v2.5"], + "reference_models": ["deepseek-ai/DeepSeek-V2.5"], "series_name": "DeepSeek-V" }, - # 其他系列 { - "patterns": ["yi-"], - "reference_models": ["yi-lightning"], + "patterns": ["deepseek-chat"], + "reference_models": ["deepseek-chat"], + "series_name": "DeepSeek" + }, + # Yi系列 + { + "patterns": ["yi-lightning", "yi-large"], + "reference_models": ["01-ai/Yi-Lightning"], "series_name": "Yi" }, + # GLM系列 { "patterns": ["glm-4", "glm4"], - "reference_models": ["glm-4-9b-chat"], + "reference_models": ["THUDM/glm-4-9b-chat", "glm-4-plus"], "series_name": "GLM-4" }, + # Baichuan系列 { "patterns": ["baichuan2", "baichuan-2"], - "reference_models": ["baichuan2-turbo"], + "reference_models": ["Baichuan2-Turbo"], "series_name": "Baichuan2" }, + { + "patterns": ["baichuan3", "baichuan-3"], + "reference_models": ["Baichuan3-Turbo"], + "series_name": "Baichuan3" + }, + # Kimi系列 + { + "patterns": ["moonshot", "kimi"], + "reference_models": ["moonshot-v1-32k"], + "series_name": "Kimi" + }, + # ERNIE系列 { "patterns": ["ernie-4", "ernie4"], "reference_models": ["ernie-4.0-turbo-8k"], "series_name": "ERNIE-4" }, - # 腾讯混元系列 - { - "patterns": ["hunyuan", "混元"], - "reference_models": ["hunyuan-MT-7B", "hunyuan-pro"], - "series_name": "Hunyuan" - } ] # 查找匹配的系列 @@ -191,122 +292,239 @@ class ModelRegistry: if pattern in model_lower: # 找到匹配的系列,使用参考模型的能力 for ref_model in rule["reference_models"]: - ref_info = self._find_by_model_name(ref_model) - if ref_info: + ref_config = self._find_by_model_name(ref_model) + if ref_config: logger.debug(f"系列推断: {model_name} 匹配 {rule['series_name']} 系列,参考模型: {ref_model}") - # 创建新的ModelInfo,保持原有provider和model_name - return ModelInfo( + # 创建新的ModelConfig,保持原有provider和model_name + return ModelConfig( provider=provider, model_name=model_name, - supports_thinking=ref_info.supports_thinking, - can_toggle_thinking=ref_info.can_toggle_thinking, - supports_function_calling=ref_info.supports_function_calling, - supports_json_mode=ref_info.supports_json_mode, - supports_structured_output=ref_info.supports_structured_output, - max_tokens_param=ref_info.max_tokens_param, + model_type=ref_config.model_type, + capabilities=ref_config.capabilities, + series=ref_config.series, + display_name=model_name, + context_window=ref_config.context_window, notes=f"基于{rule['series_name']}系列推断,参考模型: {ref_model}" ) return None - def list_models_by_provider(self, provider: str) -> List[ModelInfo]: - """按提供商列出模型""" - return [model for model in self._models.values() if model.provider == provider] + def get_model_capabilities(self, provider: str, model_name: str, model_type: ModelType = ModelType.CHAT) -> Optional[Any]: + """获取模型能力""" + model_config = self.get_model_config(provider, model_name) + if model_config: + return model_config.get_capabilities(self._providers.get(provider)) + + # 如果没有找到模型配置,尝试返回供应商默认能力 + provider_config = self._providers.get(provider) + if provider_config: + if model_type == ModelType.CHAT: + return provider_config.chat_capabilities + elif model_type == ModelType.EMBEDDING: + return provider_config.embedding_capabilities + elif model_type == ModelType.RERANK: + return provider_config.rerank_capabilities + elif model_type == ModelType.AUDIO: + return provider_config.audio_capabilities + + return None - def export_config(self) -> Dict: - """导出配置""" - return { - "models": {key: asdict(model) for key, model in self._models.items()}, - "version": "1.0" - } + def get_provider_config(self, provider: str) -> Optional[ProviderCapabilities]: + """获取供应商配置""" + return self._providers.get(provider) - def import_config(self, config: Dict): - """导入配置""" - if "models" in config: - for key, model_data in config["models"].items(): - model_info = ModelInfo(**model_data) - self._models[key] = model_info + def list_models_by_provider(self, provider: str, model_type: Optional[ModelType] = None) -> List[ModelConfig]: + """按供应商列出模型""" + models = [model for model in self._models.values() if model.provider == provider] + if model_type: + models = [model for model in models if model.model_type == model_type] + return models - def save_to_file(self, filepath: str): - """保存到文件""" - with open(filepath, 'w', encoding='utf-8') as f: - json.dump(self.export_config(), f, indent=2, ensure_ascii=False) + def list_models_by_type(self, model_type: ModelType) -> List[ModelConfig]: + """按类型列出模型""" + return [model for model in self._models.values() if model.model_type == model_type] - def load_from_file(self, filepath: str): - """从文件加载""" + def load_providers_from_file(self, filepath: str): + """从文件加载供应商配置""" try: with open(filepath, 'r', encoding='utf-8') as f: config = json.load(f) - self.import_config(config) + + if "providers" in config: + for provider_name, provider_config in config["providers"].items(): + self.register_provider(provider_config) + logger.info(f"从 {filepath} 加载了 {len(config['providers'])} 个供应商配置") + except FileNotFoundError: + logger.warning(f"供应商配置文件不存在: {filepath}") + except Exception as e: + logger.error(f"加载供应商配置失败: {e}") + + def load_models_from_file(self, filepath: str): + """从文件加载模型配置""" + try: + with open(filepath, 'r', encoding='utf-8') as f: + config = json.load(f) + + if "models" in config: + for key, model_config in config["models"].items(): + # 跳过注释键 + if key.startswith("_comment"): + continue + self.register_model(model_config) + logger.info(f"从 {filepath} 加载了模型配置") except FileNotFoundError: logger.warning(f"模型配置文件不存在: {filepath}") except Exception as e: logger.error(f"加载模型配置失败: {e}") + + def export_config(self) -> Dict[str, Any]: + """导出配置""" + return { + "providers": { + name: { + "provider_name": p.provider_name, + "api_base_url": p.api_base_url, + "auth_type": p.auth_type, + "auth_header": p.auth_header, + "chat_capabilities": asdict(p.chat_capabilities) if p.chat_capabilities else None, + "embedding_capabilities": asdict(p.embedding_capabilities) if p.embedding_capabilities else None, + "rerank_capabilities": asdict(p.rerank_capabilities) if p.rerank_capabilities else None, + "audio_capabilities": asdict(p.audio_capabilities) if p.audio_capabilities else None, + "notes": p.notes + } + for name, p in self._providers.items() + }, + "models": { + f"{m.provider}:{m.model_name}": { + "provider": m.provider, + "model_name": m.model_name, + "model_type": m.model_type.value, + "series": m.series, + "display_name": m.display_name, + "context_window": m.context_window, + "capabilities": asdict(m.capabilities) if m.capabilities else None, + "notes": m.notes + } + for m in self._models.values() + }, + "version": "2.0" + } + + def get_model_info(self, provider: str, model_name: str) -> Optional['ModelInfo']: + """ + 获取模型信息(兼容旧版API) + + 该方法用于兼容旧代码,返回一个类似旧版ModelInfo的对象 + + :param provider: 供应商名称 + :param model_name: 模型名称 + :return: ModelInfo对象(兼容格式)或None + """ + from dataclasses import dataclass + + # 定义兼容的ModelInfo类 + @dataclass + class ModelInfo: + """模型信息(兼容格式)""" + provider: str + model_name: str + supports_thinking: bool = False + can_toggle_thinking: bool = False + supports_function_calling: bool = True + supports_json_mode: bool = True + supports_structured_output: bool = False + max_tokens_param: str = "max_tokens" + notes: str = "" + + # 从V2注册表获取配置 + model_config = self.get_model_config(provider, model_name) + + if not model_config: + # 如果没有找到模型配置,尝试返回供应商默认能力 + provider_config = self._providers.get(provider) + if provider_config and provider_config.chat_capabilities: + caps = provider_config.chat_capabilities + return ModelInfo( + provider=provider, + model_name=model_name, + supports_thinking=caps.supports_thinking, + can_toggle_thinking=caps.can_toggle_thinking, + supports_function_calling=caps.supports_function_calling, + supports_json_mode=caps.supports_json_mode, + supports_structured_output=caps.supports_structured_output, + max_tokens_param=caps.max_tokens_param, + notes=provider_config.notes + ) + return None + + # 获取chat能力(如果有的话) + capabilities = model_config.get_capabilities(self._providers.get(provider)) + + if capabilities and isinstance(capabilities, ChatCapabilities): + return ModelInfo( + provider=model_config.provider, + model_name=model_config.model_name, + supports_thinking=capabilities.supports_thinking, + can_toggle_thinking=capabilities.can_toggle_thinking, + supports_function_calling=capabilities.supports_function_calling, + supports_json_mode=capabilities.supports_json_mode, + supports_structured_output=capabilities.supports_structured_output, + max_tokens_param=capabilities.max_tokens_param, + notes=model_config.notes + ) + + # 如果模型不是chat类型或没有能力信息,返回基本信息 + return ModelInfo( + provider=model_config.provider, + model_name=model_config.model_name, + notes=model_config.notes + ) -# 全局模型注册表实例 -model_registry = ModelRegistry() +# 辅助函数:兼容旧接口 +def get_model_thinking_support(provider: str, model_name: str, registry: Optional[ModelRegistryV2] = None) -> bool: + """获取模型是否支持thinking(兼容函数)""" + if registry is None: + # 如果没有提供registry,使用全局实例 + registry = global_model_registry_v2 + + capabilities = registry.get_model_capabilities(provider, model_name, ModelType.CHAT) + if capabilities and isinstance(capabilities, ChatCapabilities): + return capabilities.supports_thinking + + return False -def get_model_thinking_support(provider: str, model_name: str) -> bool: - """获取模型是否支持thinking(使用智能推断)""" - model_info = model_registry.get_model_info(provider, model_name) - if model_info: - return model_info.supports_thinking +def get_model_can_toggle_thinking(provider: str, model_name: str, registry: Optional[ModelRegistryV2] = None) -> bool: + """获取模型是否支持开关思维链""" + if registry is None: + registry = global_model_registry_v2 + + capabilities = registry.get_model_capabilities(provider, model_name, ModelType.CHAT) + if capabilities and isinstance(capabilities, ChatCapabilities): + return capabilities.can_toggle_thinking - # 如果智能推断也没有找到,使用保守的默认值 - logger.debug(f"未找到模型 {provider}:{model_name} 的能力信息,默认不支持thinking") return False -def update_siliconflow_model_support(): - """更新SiliconFlow模型支持情况""" - # 支持thinking的模型 - siliconflow_thinking_models = [ - "qwen2.5-coder-32b-instruct", - "qwen2.5-72b-instruct", - "deepseek-v2.5", - # 可以根据实际测试结果添加更多模型 - ] +# 初始化全局注册表实例 +def initialize_global_registry(providers_path: Optional[str] = None, models_path: Optional[str] = None) -> ModelRegistryV2: + """初始化全局注册表""" + # 默认配置路径 + if providers_path is None: + base_dir = Path(__file__).parent.parent.parent / "deploy" / "chart" / "euler_copilot" / "configs" / "framework" + providers_path = str(base_dir / "providers.conf") - # 支持thinking但需要特殊处理的模型 - siliconflow_special_thinking_models = [ - "Qwen/Qwen3-8B", - # 可以根据实际测试结果添加更多模型 - ] - - # 注册支持thinking的模型 - for model_name in siliconflow_thinking_models: - # 根据模型类型设置can_toggle_thinking - can_toggle = not model_name.startswith("deepseek") # DeepSeek系列不支持关闭思维链 - model_info = ModelInfo( - provider="siliconflow", - model_name=model_name, - supports_thinking=True, - can_toggle_thinking=can_toggle, - notes="经过测试确认支持thinking" - ) - model_registry.register_model(model_info) + if models_path is None: + base_dir = Path(__file__).parent.parent.parent / "deploy" / "chart" / "euler_copilot" / "configs" / "framework" + models_path = str(base_dir / "models.conf") - # 注册特殊thinking模型 - for model_name in siliconflow_special_thinking_models: - model_info = ModelInfo( - provider="siliconflow", - model_name=model_name, - supports_thinking=True, - can_toggle_thinking=True, - notes="支持thinking,根据SiliconFlow平台确认" - ) - model_registry.register_model(model_info) + registry = ModelRegistryV2(providers_path, models_path) + return registry -# 初始化时更新SiliconFlow配置 -update_siliconflow_model_support() +# 全局实例 +global_model_registry_v2 = initialize_global_registry() +# 向后兼容的别名 +model_registry = global_model_registry_v2 - -def get_model_thinking_support(provider: str, model_name: str) -> bool: - """获取模型的思维链支持情况""" - model_info = model_registry.get_model_info(provider, model_name) - if model_info: - return model_info.supports_thinking - return False diff --git a/apps/llm/model_types.py b/apps/llm/model_types.py new file mode 100644 index 0000000000000000000000000000000000000000..f7d4e44f417c5cad2aac3b199c18d5dbf85bec10 --- /dev/null +++ b/apps/llm/model_types.py @@ -0,0 +1,148 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +"""模型类型定义和能力规范""" + +from enum import Enum +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Any + + +class ModelType(str, Enum): + """模型类型枚举""" + CHAT = "chat" # 文本对话 + EMBEDDING = "embedding" # 嵌入 + RERANK = "rerank" # 重排序 + AUDIO = "audio" # 语音(预留) + IMAGE = "image" # 图像(预留) + VIDEO = "video" # 视频(预留) + + +@dataclass +class ChatCapabilities: + """文本对话模型能力""" + # 基础能力 + supports_streaming: bool = True + supports_function_calling: bool = True + supports_json_mode: bool = True + supports_structured_output: bool = False + + # 推理能力 + supports_thinking: bool = False + can_toggle_thinking: bool = False # 是否支持通过参数开关思维链 + supports_reasoning_content: bool = False # 是否返回reasoning_content字段 + + # 参数支持 + max_tokens_param: str = "max_tokens" # 或 "max_completion_tokens" + supports_temperature: bool = True + supports_top_p: bool = True + supports_top_k: bool = False + supports_frequency_penalty: bool = False + supports_presence_penalty: bool = False + supports_min_p: bool = False # Qwen3特有 + + # 高级功能 + supports_response_format: bool = True + supports_tools: bool = True + supports_tool_choice: bool = True + supports_extra_body: bool = True + supports_stream_options: bool = True + + # 特殊参数 + supports_enable_thinking: bool = False # SiliconFlow/Qwen百炼特有 + supports_thinking_budget: bool = False # 思维链token预算 + supports_enable_search: bool = False # 联网搜索 + + +@dataclass +class EmbeddingCapabilities: + """嵌入模型能力""" + # 基础能力 + max_input_tokens: int = 512 # 单次输入最大token数 + supports_batch: bool = True # 是否支持批量输入 + default_dimensions: int = 1024 # 默认向量维度 + + # 参数支持 + supports_encoding_format: bool = True # float/base64 + supports_dimensions: bool = False # 是否支持自定义维度 + available_dimensions: List[int] = field(default_factory=list) # 可用的维度选项 + + # 输入类型 + supports_text_input: bool = True + supports_image_input: bool = False # 多模态嵌入 + + +@dataclass +class RerankCapabilities: + """重排序模型能力""" + # 基础能力 + max_documents: int = 100 # 单次最大文档数 + max_query_length: int = 512 # 查询最大长度 + + # 参数支持 + supports_top_n: bool = True # 是否支持返回topN + supports_return_documents: bool = True # 是否返回文档内容 + + +@dataclass +class AudioCapabilities: + """语音模型能力(预留)""" + supports_tts: bool = False # 文本转语音 + supports_stt: bool = False # 语音转文本 + supported_formats: List[str] = field(default_factory=list) # 支持的音频格式 + + +@dataclass +class ProviderCapabilities: + """供应商能力配置""" + provider_name: str + api_base_url: str + + # 认证方式 + auth_type: str = "bearer" # bearer, api_key, custom + auth_header: str = "Authorization" + + # 各类型模型的默认能力 + chat_capabilities: Optional[ChatCapabilities] = None + embedding_capabilities: Optional[EmbeddingCapabilities] = None + rerank_capabilities: Optional[RerankCapabilities] = None + audio_capabilities: Optional[AudioCapabilities] = None + + # 其他配置 + notes: str = "" + + +@dataclass +class ModelConfig: + """模型配置""" + provider: str + model_name: str + model_type: ModelType + + # 模型能力(如果为None,则继承供应商默认配置) + capabilities: Optional[Any] = None # ChatCapabilities | EmbeddingCapabilities | RerankCapabilities + + # 系列信息(用于能力推断) + series: Optional[str] = None # 如 "qwen2.5", "gpt-4", "deepseek-r1" + + # 其他元数据 + display_name: Optional[str] = None + context_window: Optional[int] = None + notes: str = "" + + def get_capabilities(self, provider_capabilities: Optional[ProviderCapabilities] = None) -> Any: + """获取模型能力(考虑继承)""" + if self.capabilities: + return self.capabilities + + # 从供应商配置继承 + if provider_capabilities: + if self.model_type == ModelType.CHAT: + return provider_capabilities.chat_capabilities + elif self.model_type == ModelType.EMBEDDING: + return provider_capabilities.embedding_capabilities + elif self.model_type == ModelType.RERANK: + return provider_capabilities.rerank_capabilities + elif self.model_type == ModelType.AUDIO: + return provider_capabilities.audio_capabilities + + return None + diff --git a/apps/llm/patterns/executor.py b/apps/llm/patterns/executor.py index e2153487a568eaa1289677b14111bbcbcc7b68ea..34985745eaadd30f70e7d6f503049bbb82c29717 100644 --- a/apps/llm/patterns/executor.py +++ b/apps/llm/patterns/executor.py @@ -180,12 +180,25 @@ class ExecutorSummary(CorePattern): self, system_prompt: dict[LanguageType, str] | None = None, user_prompt: dict[LanguageType, str] | None = None, + llm_id: str | None = None, + enable_thinking: bool = False, ) -> None: - """初始化Background模式""" + """初始化Background模式 + + :param system_prompt: 系统提示词 + :param user_prompt: 用户提示词 + :param llm_id: 大模型ID,如果为None则使用系统默认模型 + :param enable_thinking: 是否启用思维链 + """ super().__init__(system_prompt, user_prompt) + self.llm_id = llm_id + self.enable_thinking = enable_thinking async def generate(self, **kwargs) -> str: # noqa: ANN003 """进行初始背景生成""" + import logging + logger = logging.getLogger(__name__) + logger.info(f"[ExecutorSummary] 初始化参数 - llm_id: {self.llm_id}, enable_thinking: {self.enable_thinking}") background: ExecutorBackground = kwargs["background"] conversation_str = convert_context_to_prompt(background.conversation) facts_str = facts_to_prompt(background.facts) @@ -203,8 +216,38 @@ class ExecutorSummary(CorePattern): ] result = "" - llm = ReasoningLLM() - async for chunk in llm.call(messages, streaming=False, temperature=0.7): + + # 根据llm_id获取模型配置 + llm_config = None + if self.llm_id: + from apps.services.llm import LLMManager + from apps.llm.adapters import get_provider_from_endpoint + from apps.schemas.config import LLMConfig + + llm_info = await LLMManager.get_llm_by_id(self.llm_id) + logger.info(f"[ExecutorSummary] 根据llm_id获取模型信息: {llm_info.model_name if llm_info else 'None'}") + if llm_info: + # 获取provider,如果没有则从endpoint推断 + provider = llm_info.provider or get_provider_from_endpoint(llm_info.openai_base_url) + + llm_config = LLMConfig( + provider=provider, + endpoint=llm_info.openai_base_url, + key=llm_info.openai_api_key, + model=llm_info.model_name, + max_tokens=llm_info.max_tokens, + temperature=0.7, + ) + + # 初始化LLM客户端 + llm = ReasoningLLM(llm_config) if llm_config else ReasoningLLM() + + async for chunk in llm.call( + messages, + streaming=False, + temperature=0.7, + enable_thinking=self.enable_thinking + ): result += chunk self.input_tokens = llm.input_tokens self.output_tokens = llm.output_tokens diff --git a/apps/llm/patterns/rewrite.py b/apps/llm/patterns/rewrite.py index 3fc53f811d033a79c180671feb6d74ecca9add22..60fe4468f7fc1ce701defc11bdf5fead1bece374 100644 --- a/apps/llm/patterns/rewrite.py +++ b/apps/llm/patterns/rewrite.py @@ -34,6 +34,24 @@ _env = SandboxedEnvironment( class QuestionRewrite(CorePattern): """问题补全与重写""" + def __init__( + self, + system_prompt: dict[LanguageType, str] | None = None, + user_prompt: dict[LanguageType, str] | None = None, + llm_id: str | None = None, + enable_thinking: bool = False, + ) -> None: + """初始化问题改写模式 + + :param system_prompt: 系统提示词 + :param user_prompt: 用户提示词 + :param llm_id: 大模型ID,如果为None则使用系统默认模型 + :param enable_thinking: 是否启用思维链 + """ + super().__init__(system_prompt, user_prompt) + self.llm_id = llm_id + self.enable_thinking = enable_thinking + def get_default_prompt(self) -> dict[LanguageType, str]: system_prompt = { LanguageType.CHINESE: dedent(r""" @@ -167,10 +185,32 @@ class QuestionRewrite(CorePattern): """问题补全与重写""" history = kwargs.get("history", []) question = kwargs["question"] - llm = kwargs.get("llm", None) language = kwargs.get("language", LanguageType.CHINESE) + + # 根据llm_id获取模型配置并创建LLM实例 + llm = None + if self.llm_id: + from apps.services.llm import LLMManager + from apps.llm.adapters import get_provider_from_endpoint + from apps.schemas.config import LLMConfig + + llm_info = await LLMManager.get_llm_by_id(self.llm_id) + if llm_info: + provider = llm_info.provider or get_provider_from_endpoint(llm_info.openai_base_url) + + llm_config = LLMConfig( + provider=provider, + endpoint=llm_info.openai_base_url, + key=llm_info.openai_api_key, + model=llm_info.model_name, + max_tokens=llm_info.max_tokens, + temperature=0.7, + ) + llm = ReasoningLLM(llm_config) + if not llm: llm = ReasoningLLM() + leave_tokens = llm._config.max_tokens leave_tokens -= TokenCalculator().calculate_token_length( messages=[{"role": "system", "content": _env.from_string(self.system_prompt[language]).render( @@ -197,7 +237,7 @@ class QuestionRewrite(CorePattern): messages = [{"role": "system", "content": _env.from_string(self.system_prompt[language]).render( history=qa, question=question)}, {"role": "user", "content": _env.from_string(self.user_prompt[language]).render()}] result = "" - async for chunk in llm.call(messages, streaming=False): + async for chunk in llm.call(messages, streaming=False, enable_thinking=self.enable_thinking): result += chunk self.input_tokens = llm.input_tokens self.output_tokens = llm.output_tokens diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py index 5fa162f4bbcea13d8cdc7835592818be3f7d16ce..ed7dc94e3c843c527c816fb8a50d2f0c461620ae 100644 --- a/apps/llm/reasoning.py +++ b/apps/llm/reasoning.py @@ -173,7 +173,12 @@ class ReasoningLLM: max_tokens: int | None, temperature: float | None, model: str | None = None, - enable_thinking: bool = False + enable_thinking: bool = False, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + min_p: float | None = None, + top_k: int | None = None, + top_p: float | None = None, ) -> AsyncGenerator[ChatCompletionChunk, None]: """创建流式响应""" if model is None: @@ -205,15 +210,43 @@ class ReasoningLLM: "stream": True, "stream_options": {"include_usage": True}, "timeout": 300, - "extra_body": {"enable_thinking": enable_thinking} } + # 初始化 extra_body + extra_body_params = {} + + # enable_thinking 始终放在 extra_body 中 + extra_body_params["enable_thinking"] = enable_thinking + + # 添加扩展参数到 extra_body(这些参数不被标准 OpenAI SDK 支持) + if frequency_penalty is not None: + extra_body_params["frequency_penalty"] = frequency_penalty + if presence_penalty is not None: + extra_body_params["presence_penalty"] = presence_penalty + if min_p is not None: + extra_body_params["min_p"] = min_p + if top_k is not None: + extra_body_params["top_k"] = top_k + if top_p is not None: + # top_p 是标准参数,但某些 provider 可能需要特殊处理 + base_params["top_p"] = top_p + + # 只有当有扩展参数时才添加 extra_body + if extra_body_params: + base_params["extra_body"] = extra_body_params + # 使用适配器调整参数 adapted_params = self._adapter.adapt_create_params(base_params, enable_thinking) logger.info(f"[{self._provider}] 调用参数: model={model}, enable_thinking={enable_thinking}, " f"supports_native_thinking={self._adapter.capabilities.supports_enable_thinking}") + # 打印完整请求体(排除messages内容以避免日志过长) + log_params = adapted_params.copy() + if 'messages' in log_params: + log_params['messages'] = f"<{len(log_params['messages'])} messages>" + logger.info(f"[{self._provider}] 请求体: {log_params}") + return await self._client.chat.completions.create(**adapted_params) # type: ignore[] async def call( # noqa: C901, PLR0912, PLR0913 @@ -225,7 +258,12 @@ class ReasoningLLM: streaming: bool = True, result_only: bool = True, model: str | None = None, - enable_thinking: bool = False + enable_thinking: bool = False, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + min_p: float | None = None, + top_k: int | None = None, + top_p: float | None = None, ) -> AsyncGenerator[str, None]: """调用大模型,分为流式和非流式两种""" # 检查max_tokens和temperature @@ -236,7 +274,18 @@ class ReasoningLLM: if model is None: model = self._config.model msg_list = self._validate_messages(messages) - stream = await self._create_stream(msg_list, max_tokens, temperature, model, enable_thinking) + stream = await self._create_stream( + msg_list, + max_tokens, + temperature, + model, + enable_thinking, + frequency_penalty, + presence_penalty, + min_p, + top_k, + top_p, + ) reasoning = ReasoningContent() reasoning_content = "" result = "" @@ -273,6 +322,12 @@ class ReasoningLLM: yield result logger.info("[Reasoning] 推理内容: %s\n\n%s", reasoning_content, result) + + # 如果streaming模式下没有返回任何text,至少返回一个空格 + # 避免下游处理空字符串时出错 + if streaming and not result: + logger.warning("[Reasoning] 模型没有返回任何文本内容,返回占位符") + yield " " # 更新token统计 if self.input_tokens == 0 or self.output_tokens == 0: diff --git a/apps/main.py b/apps/main.py index d440c51402fc9a13bdaf4d8085f0832f401d96d1..3c3c0a14f63d9498068eab3865760e292a4e081a 100644 --- a/apps/main.py +++ b/apps/main.py @@ -38,6 +38,7 @@ from apps.routers import ( llm, mcp_service, record, + sandbox, service, user, variable @@ -131,6 +132,7 @@ app.include_router(mcp_service.router) app.include_router(flow.router) app.include_router(user.router) app.include_router(variable.router) +app.include_router(sandbox.router) # logger配置 LOGGER_FORMAT = "%(funcName)s() - %(message)s" diff --git a/apps/routers/appcenter.py b/apps/routers/appcenter.py index 873d71d407c0c344f2da467abf7cd853c2ccb506..a3a494d215bf1df6103035b841cf08fe82926e30 100644 --- a/apps/routers/appcenter.py +++ b/apps/routers/appcenter.py @@ -281,6 +281,7 @@ async def get_application( links=app_data.links, recommendedQuestions=app_data.first_questions, dialogRounds=app_data.history_len, + enableThinking=app_data.enable_thinking, permission=AppPermissionData( visibility=app_data.permission.type, authorizedUsers=app_data.permission.users, diff --git a/apps/routers/llm.py b/apps/routers/llm.py index 95ba227d6af9a862031cddb8d1c904d86f8a91b0..d9f11b42ca36195998e25edeff3f32edd6df7a4f 100644 --- a/apps/routers/llm.py +++ b/apps/routers/llm.py @@ -212,4 +212,21 @@ async def get_reranker_config( message="success", result=result, ).model_dump(exclude_none=True, by_alias=True), + ) + + +@router.get("/capabilities", response_model=ResponseData) +async def get_llm_capabilities( + user_sub: Annotated[str, Depends(get_user)], + llm_id: Annotated[str, Query(description="大模型ID", alias="llmId")] +) -> JSONResponse: + """获取指定模型支持的参数配置项""" + capabilities = await LLMManager.get_model_capabilities(user_sub, llm_id) + return JSONResponse( + status_code=status.HTTP_200_OK, + content=ResponseData( + code=status.HTTP_200_OK, + message="success", + result=capabilities, + ).model_dump(exclude_none=True, by_alias=True), ) \ No newline at end of file diff --git a/apps/routers/sandbox.py b/apps/routers/sandbox.py new file mode 100644 index 0000000000000000000000000000000000000000..3152bbdd13ba9993d5120d180daec31e63341cf8 --- /dev/null +++ b/apps/routers/sandbox.py @@ -0,0 +1,146 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +"""代码沙箱相关接口""" + +import logging +from typing import Dict, Any +from fastapi import APIRouter, HTTPException, Path +from fastapi.responses import JSONResponse +import httpx + +from apps.common.config import Config + +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/api/sandbox", + tags=["sandbox"], +) + +def get_sandbox_service_url() -> str: + """从配置中获取沙箱服务地址""" + config = Config().get_config() + return config.sandbox.sandbox_service + + +@router.get("/code-spec/{language}") +async def get_code_specification( + language: str = Path(..., description="编程语言类型: python, javascript, bash, shell"), + lang: str = "zh" +) -> JSONResponse: + """ + 获取指定编程语言的代码安全规范文档 + + Args: + language: 编程语言类型,可选值: python, javascript, bash, shell + lang: 文档语言,可选值: zh(中文), en(英文),默认为 zh + + Returns: + 对应语言的安全规范文档内容(Markdown格式) + """ + try: + # 标准化语言参数 + language_lower = language.lower() + if language_lower not in ['python', 'javascript', 'bash', 'shell']: + raise HTTPException( + status_code=400, + detail=f"不支持的语言类型: {language}。支持的类型: python, javascript, bash, shell" + ) + + # 标准化文档语言参数 + doc_lang = lang.lower() if lang.lower() in ['zh', 'en'] else 'zh' + + # 获取沙箱服务地址 + sandbox_url = get_sandbox_service_url() + + # 转发请求到沙箱服务,带上语言参数 + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get( + f"{sandbox_url}/code-spec/{language_lower}", + params={"lang": doc_lang} + ) + + if response.status_code != 200: + logger.error(f"沙箱服务返回错误: {response.status_code}, {response.text}") + raise HTTPException( + status_code=response.status_code, + detail="获取代码规范失败" + ) + + result = response.json() + + # 返回标准格式 + return JSONResponse( + status_code=200, + content={ + "success": result.get("success", True), + "message": result.get("message", "获取代码规范成功"), + "data": result.get("data", {}) + } + ) + + except HTTPException: + raise + except httpx.TimeoutException: + logger.error("请求沙箱服务超时") + raise HTTPException( + status_code=504, + detail="请求代码沙箱服务超时,请稍后重试" + ) + except httpx.RequestError as e: + logger.error(f"请求沙箱服务失败: {e}") + raise HTTPException( + status_code=503, + detail="无法连接到代码沙箱服务" + ) + except Exception as e: + logger.error(f"获取代码规范失败: {e}", exc_info=True) + raise HTTPException( + status_code=500, + detail=f"获取代码规范失败: {str(e)}" + ) + + +@router.get("/health") +async def sandbox_health_check() -> JSONResponse: + """ + 检查代码沙箱服务健康状态 + + Returns: + 沙箱服务健康状态 + """ + try: + # 获取沙箱服务地址 + sandbox_url = get_sandbox_service_url() + + async with httpx.AsyncClient(timeout=5.0) as client: + response = await client.get(f"{sandbox_url}/health") + + if response.status_code == 200: + return JSONResponse( + status_code=200, + content={ + "success": True, + "message": "沙箱服务运行正常", + "data": response.json().get("data", {}) + } + ) + else: + return JSONResponse( + status_code=503, + content={ + "success": False, + "message": "沙箱服务不可用", + "data": None + } + ) + except Exception as e: + logger.error(f"检查沙箱服务健康状态失败: {e}") + return JSONResponse( + status_code=503, + content={ + "success": False, + "message": "无法连接到沙箱服务", + "data": None + } + ) + diff --git a/apps/routers/variable.py b/apps/routers/variable.py index 3c19221b83c9401845bfdec7e1c51d26e6862634..d8a4bc0e17a361fafb101c0a5d8d64e5da844bfb 100644 --- a/apps/routers/variable.py +++ b/apps/routers/variable.py @@ -554,16 +554,19 @@ async def update_variable( ) except ValueError as e: + logger.error(f"更新变量失败(ValueError): {e}", exc_info=True) raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=str(e) ) except PermissionError as e: + logger.error(f"更新变量失败(PermissionError): {e}", exc_info=True) raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=str(e) ) except Exception as e: + logger.error(f"更新变量失败(Exception): {e}", exc_info=True) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"更新变量失败: {str(e)}" diff --git a/apps/scheduler/call/facts/facts.py b/apps/scheduler/call/facts/facts.py index a7cbf58e21ded36121c16c8ac0bb0f5e315f53ed..fc76182e5694357eaeb1fc12ce4bdc4ce1f9d741 100644 --- a/apps/scheduler/call/facts/facts.py +++ b/apps/scheduler/call/facts/facts.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Self, ClassVar from jinja2 import BaseLoader from jinja2.sandbox import SandboxedEnvironment -from pydantic import Field +from pydantic import BaseModel, Field from apps.scheduler.call.core import CoreCall from apps.scheduler.call.facts.prompt import DOMAIN_PROMPT, FACTS_PROMPT @@ -29,6 +29,8 @@ if TYPE_CHECKING: class FactsCall(CoreCall, input_model=FactsInput, output_model=FactsOutput): """提取事实工具""" answer: str = Field(description="用户输入") + llm_id: str | None = Field(default=None, description="大模型ID,如果为None则使用系统默认模型") + enable_thinking: bool = Field(default=False, description="是否启用思维链") i18n_info: ClassVar[dict[str, dict]] = { LanguageType.CHINESE: { "name": "提取事实", @@ -45,11 +47,17 @@ class FactsCall(CoreCall, input_model=FactsInput, output_model=FactsOutput): @classmethod async def instance(cls, executor: "StepExecutor", node: NodePool | None, **kwargs: Any) -> Self: """初始化工具""" + # 提取 llm_id 和 enable_thinking,避免重复传递 + llm_id = kwargs.pop("llm_id", None) + enable_thinking = kwargs.pop("enable_thinking", False) + obj = cls( answer=executor.task.runtime.answer, name=executor.step.step.name, description=executor.step.step.description, node=node, + llm_id=llm_id, + enable_thinking=enable_thinking, **kwargs, ) @@ -88,10 +96,15 @@ class FactsCall(CoreCall, input_model=FactsInput, output_model=FactsOutput): facts_tpl = env.from_string(FACTS_PROMPT[language]) facts_prompt = facts_tpl.render(conversation=data.message) try: - facts_obj: FactsGen = await self._json([ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": facts_prompt}, - ], FactsGen) # type: ignore[arg-type] + facts_obj: FactsGen = await self._json_with_config( + [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": facts_prompt}, + ], + FactsGen, + llm_id=self.llm_id, + enable_thinking=self.enable_thinking, + ) # type: ignore[arg-type] except Exception as e: # 如果 LLM 返回格式不正确,使用默认空列表 logging.warning(f"[FactsCall] 事实提取失败,使用默认值: {e}") @@ -101,10 +114,15 @@ class FactsCall(CoreCall, input_model=FactsInput, output_model=FactsOutput): domain_tpl = env.from_string(DOMAIN_PROMPT[language]) domain_prompt = domain_tpl.render(conversation=data.message) try: - domain_list: DomainGen = await self._json([ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": domain_prompt}, - ], DomainGen) # type: ignore[arg-type] + domain_list: DomainGen = await self._json_with_config( + [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": domain_prompt}, + ], + DomainGen, + llm_id=self.llm_id, + enable_thinking=self.enable_thinking, + ) # type: ignore[arg-type] except Exception as e: # 如果 LLM 返回格式不正确,使用默认空列表 logging.warning(f"[FactsCall] 域名提取失败,使用默认值: {e}") @@ -120,6 +138,45 @@ class FactsCall(CoreCall, input_model=FactsInput, output_model=FactsOutput): domain=domain_list.keywords, ).model_dump(by_alias=True, exclude_none=True), ) + + async def _json_with_config( + self, + messages: list[dict[str, Any]], + schema: type[BaseModel], + llm_id: str | None = None, + enable_thinking: bool = False, + ) -> BaseModel: + """使用配置的模型进行JSON生成""" + from apps.llm.function import FunctionLLM + + # 根据llm_id获取模型配置 + llm_config = None + if llm_id: + from apps.services.llm import LLMManager + from apps.llm.adapters import get_provider_from_endpoint + from apps.schemas.config import LLMConfig + + llm_info = await LLMManager.get_llm_by_id(llm_id) + if llm_info: + provider = llm_info.provider or get_provider_from_endpoint(llm_info.openai_base_url) + + llm_config = LLMConfig( + provider=provider, + endpoint=llm_info.openai_base_url, + key=llm_info.openai_api_key, + model=llm_info.model_name, + max_tokens=llm_info.max_tokens, + temperature=0.7, + ) + + # 初始化Function LLM + json_gen = FunctionLLM(llm_config) if llm_config else FunctionLLM() + result = await json_gen.call( + messages=messages, + schema=schema.model_json_schema(), + enable_thinking=enable_thinking, + ) + return schema.model_validate(result) async def exec( diff --git a/apps/scheduler/call/llm/llm.py b/apps/scheduler/call/llm/llm.py index f410f7f08274799379b1e2eb4174ac9b809d7c61..c338c251b7ce365257a3e91232eeb02e4a5df99f 100644 --- a/apps/scheduler/call/llm/llm.py +++ b/apps/scheduler/call/llm/llm.py @@ -31,8 +31,12 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): to_user: bool = Field(default=True) - # 大模型参数 + # 模型配置 + llmId: str = Field(description="大模型ID", default="") + + # 大模型基础参数 temperature: float = Field(description="大模型温度(随机化程度)", default=0.7) + enable_temperature: bool = Field(description="是否启用温度参数", default=True) enable_context: bool = Field(description="是否启用上下文", default=True) enable_thinking: bool = Field(description="是否启用思维链", default=False) step_history_size: int = Field( @@ -41,6 +45,21 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): description="大模型系统提示词", default="You are a helpful assistant.") user_prompt: str = Field(description="大模型用户提示词", default=LLM_DEFAULT_PROMPT) + + # 新增参数配置 + enable_frequency_penalty: bool = Field(description="是否启用频率惩罚", default=False) + frequency_penalty: float = Field(description="频率惩罚", default=0.0) + enable_presence_penalty: bool = Field(description="是否启用内容重复度惩罚", default=False) + presence_penalty: float = Field(description="内容重复度惩罚", default=0.0) + enable_min_p: bool = Field(description="是否启用动态过滤阈值", default=False) + min_p: float = Field(description="动态过滤阈值", default=0.0) + enable_top_k: bool = Field(description="是否启用Top-K采样", default=False) + top_k: int = Field(description="Top-K采样值", default=0) + enable_top_p: bool = Field(description="是否启用Top-P采样", default=False) + top_p: float = Field(description="Top-P采样值", default=0.9) + enable_search: bool = Field(description="是否启用联网搜索", default=False) + enable_json_mode: bool = Field(description="是否启用JSON模式输出", default=False) + enable_structured_output: bool = Field(description="是否启用结构化输出", default=False) i18n_info: ClassVar[dict[str, dict]] = { LanguageType.CHINESE: { @@ -116,8 +135,51 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): """运行LLM Call""" data = LLMInput(**input_data) try: - llm = ReasoningLLM() - async for chunk in llm.call(messages=data.message, enable_thinking=self.enable_thinking): + # 根据llmId获取模型配置 + llm_config = None + if self.llmId: + from apps.services.llm import LLMManager + from apps.llm.adapters import get_provider_from_endpoint + + llm_info = await LLMManager.get_llm_by_id(self.llmId) + if llm_info: + from apps.schemas.config import LLMConfig + + # 获取provider,如果没有则从endpoint推断 + provider = llm_info.provider or get_provider_from_endpoint(llm_info.openai_base_url) + + llm_config = LLMConfig( + provider=provider, + endpoint=llm_info.openai_base_url, + key=llm_info.openai_api_key, + model=llm_info.model_name, + max_tokens=llm_info.max_tokens, + temperature=self.temperature if self.enable_temperature else 0.7, + ) + + # 初始化LLM客户端(会自动加载适配器) + llm = ReasoningLLM(llm_config) if llm_config else ReasoningLLM() + + # 准备参数,只传递enable为True的参数 + call_params = { + "messages": data.message, + "enable_thinking": self.enable_thinking, + "temperature": self.temperature if self.enable_temperature else None, + } + + # 添加可选参数(只在enable为True时传递) + if self.enable_frequency_penalty: + call_params["frequency_penalty"] = self.frequency_penalty + if self.enable_presence_penalty: + call_params["presence_penalty"] = self.presence_penalty + if self.enable_min_p: + call_params["min_p"] = self.min_p + if self.enable_top_k: + call_params["top_k"] = self.top_k + if self.enable_top_p: + call_params["top_p"] = self.top_p + + async for chunk in llm.call(**call_params): if not chunk: continue yield CallOutputChunk(type=CallOutputType.TEXT, content=chunk) diff --git a/apps/scheduler/call/summary/summary.py b/apps/scheduler/call/summary/summary.py index 9cbf8b1165ecf24dffb7f0bbc5c1db82be61d9b1..39b5a81fa9efa7ec0105ca1c73d563dc1cca8636 100644 --- a/apps/scheduler/call/summary/summary.py +++ b/apps/scheduler/call/summary/summary.py @@ -27,6 +27,8 @@ class Summary(CoreCall, input_model=DataBase, output_model=SummaryOutput): """总结工具""" context: ExecutorBackground = Field(description="对话上下文") + llm_id: str | None = Field(default=None, description="大模型ID,如果为None则使用系统默认模型") + enable_thinking: bool = Field(default=False, description="是否启用思维链") i18n_info: ClassVar[dict[str, dict]] = { LanguageType.CHINESE: { "name": "理解上下文", @@ -43,11 +45,17 @@ class Summary(CoreCall, input_model=DataBase, output_model=SummaryOutput): @classmethod async def instance(cls, executor: "StepExecutor", node: NodePool | None, **kwargs: Any) -> Self: """实例化工具""" + # 提取 llm_id 和 enable_thinking,避免重复传递 + llm_id = kwargs.pop("llm_id", None) + enable_thinking = kwargs.pop("enable_thinking", False) + obj = cls( context=executor.background, name=executor.step.step.name, description=executor.step.step.description, node=node, + llm_id=llm_id, + enable_thinking=enable_thinking, **kwargs, ) await obj._set_input(executor) @@ -63,7 +71,13 @@ class Summary(CoreCall, input_model=DataBase, output_model=SummaryOutput): self, _input_data: dict[str, Any], language: LanguageType = LanguageType.CHINESE ) -> AsyncGenerator[CallOutputChunk, None]: """执行工具""" - summary_obj = ExecutorSummary() + import logging + logger = logging.getLogger(__name__) + logger.info(f"[Summary] 使用模型ID: {self.llm_id}, 启用思维链: {self.enable_thinking}") + summary_obj = ExecutorSummary( + llm_id=self.llm_id, + enable_thinking=self.enable_thinking, + ) summary = await summary_obj.generate(background=self.context, language=language) self.tokens.input_tokens += summary_obj.input_tokens self.tokens.output_tokens += summary_obj.output_tokens diff --git a/apps/scheduler/executor/base.py b/apps/scheduler/executor/base.py index 56839ee0a9a4133606e98aad5bd0c59909c73907..fd0b3891419bc4de98ae3162d9b44b8866024f56 100644 --- a/apps/scheduler/executor/base.py +++ b/apps/scheduler/executor/base.py @@ -45,11 +45,17 @@ class BaseExecutor(BaseModel, ABC): :param data: 消息数据,如果是FLOW_START事件且data为None,则自动构建FlowStartContent """ if event_type == EventType.TEXT_ADD.value and isinstance(data, str): + # 处理空字符串的情况,避免TextAddContent验证失败 + if not data: + data = " " # 使用一个空格作为占位符 data = TextAddContent(text=data).model_dump(exclude_none=True, by_alias=True) if data is None: data = {} elif isinstance(data, str): + # 处理空字符串的情况,避免TextAddContent验证失败 + if not data: + data = " " # 使用一个空格作为占位符 data = TextAddContent(text=data).model_dump(exclude_none=True, by_alias=True) logger.info(f"[BaseExecutor] 调用msg_queue.push_output - event_type: {event_type}") diff --git a/apps/scheduler/executor/flow.py b/apps/scheduler/executor/flow.py index cdf3d6f9b7e75d51e2b9b4055593f572ea763fb8..dbb6f073c57fb732deeb01b7e9ad8ec5e93b0f27 100644 --- a/apps/scheduler/executor/flow.py +++ b/apps/scheduler/executor/flow.py @@ -64,6 +64,7 @@ class FlowExecutor(BaseExecutor): question: str = Field(description="用户输入") post_body_app: RequestDataApp = Field(description="请求体中的app信息") enable_thinking: bool = Field(description="是否启用思维链", default=False) + llm_id: str | None = Field(description="应用配置的模型ID", default=None) current_step: StepQueueItem | None = Field( description="当前执行的步骤", default=None @@ -225,10 +226,18 @@ class FlowExecutor(BaseExecutor): # 头插开始前的系统步骤,并执行 for step in FIXED_STEPS_BEFORE_START: + # 为系统步骤添加应用配置的模型信息 + step_data = step.get(self.task.language, step[LanguageType.CHINESE]) + # 将llm_id和enable_thinking添加到step的params中 + step_data_with_params = step_data.model_copy() + step_data_with_params.params = { + "llm_id": self.llm_id, + "enable_thinking": self.enable_thinking, + } self.step_queue.append( StepQueueItem( step_id=str(uuid.uuid4()), - step=step.get(self.task.language, step[LanguageType.CHINESE]), + step=step_data_with_params, enable_filling=False, to_user=False, ) @@ -313,10 +322,18 @@ class FlowExecutor(BaseExecutor): # 尾插运行结束后的系统步骤 for step in FIXED_STEPS_AFTER_END: + # 为系统步骤添加应用配置的模型信息 + step_data = step.get(self.task.language, step[LanguageType.CHINESE]) + # 将llm_id和enable_thinking添加到step的params中 + step_data_with_params = step_data.model_copy() + step_data_with_params.params = { + "llm_id": self.llm_id, + "enable_thinking": self.enable_thinking, + } self.step_queue.append( StepQueueItem( step_id=str(uuid.uuid4()), - step=step.get(self.task.language, step[LanguageType.CHINESE]), + step=step_data_with_params, ) ) await self._step_process() diff --git a/apps/scheduler/executor/step.py b/apps/scheduler/executor/step.py index a863e303d33b96fbbc1020a655939929add18b4d..9757314fe10d43b01b0319324be96e10d5939d74 100644 --- a/apps/scheduler/executor/step.py +++ b/apps/scheduler/executor/step.py @@ -478,6 +478,9 @@ class StepExecutor(BaseExecutor): # 更新history if isinstance(content, str): + # 处理空字符串的情况,避免TextAddContent验证失败 + if not content: + content = " " # 使用一个空格作为占位符 output_data = TextAddContent(text=content).model_dump(exclude_none=True, by_alias=True) else: output_data = content diff --git a/apps/scheduler/scheduler/scheduler.py b/apps/scheduler/scheduler/scheduler.py index dbcd7710c3336317cb5a1486c763a318545ae0a2..13940dbce9b4500e432910faf30a15a57570fc52 100644 --- a/apps/scheduler/scheduler/scheduler.py +++ b/apps/scheduler/scheduler/scheduler.py @@ -228,6 +228,7 @@ class Scheduler: if not app_metadata: logger.error("[Scheduler] 未找到Agent应用") return + logger.info(f"[Scheduler] 应用配置的模型ID: {app_metadata.llm_id}, 启用思维链: {app_metadata.enable_thinking if hasattr(app_metadata, 'enable_thinking') else 'N/A'}") if not app_metadata.llm_id or app_metadata.llm_id == "empty": # 获取系统默认模型 llm_collection = MongoDB().get_collection("llm") @@ -265,11 +266,17 @@ class Scheduler: ) if background.conversation and self.task.state.flow_status == FlowStatus.INIT: try: - question_obj = QuestionRewrite() + # 使用应用配置的模型进行问题改写 + llm_id_for_rewrite = app_metadata.llm_id if hasattr(app_metadata, 'llm_id') and app_metadata.llm_id != "empty" else None + enable_thinking_for_rewrite = app_metadata.enable_thinking if hasattr(app_metadata, 'enable_thinking') else False + + question_obj = QuestionRewrite( + llm_id=llm_id_for_rewrite, + enable_thinking=enable_thinking_for_rewrite, + ) post_body.question = await question_obj.generate( history=background.conversation, question=post_body.question, - llm=reasion_llm, language=post_body.language, ) except Exception: @@ -312,7 +319,8 @@ class Scheduler: msg_queue=queue, question=post_body.question, post_body_app=app_info, - enable_thinking=post_body.enable_thinking, + enable_thinking=app_metadata.enable_thinking if hasattr(app_metadata, 'enable_thinking') else False, + llm_id=app_metadata.llm_id if hasattr(app_metadata, 'llm_id') and app_metadata.llm_id != "empty" else None, background=background, ) diff --git a/apps/scheduler/variable/pool_base.py b/apps/scheduler/variable/pool_base.py index a73cf0f7299979fca0125e3d69d5dcc6774bc2ec..79df334690260471bc44259f9a10c91082a20ff3 100644 --- a/apps/scheduler/variable/pool_base.py +++ b/apps/scheduler/variable/pool_base.py @@ -119,17 +119,26 @@ class BaseVariablePool(ABC): # 🔑 新增:对于文件类型变量,在更新前清理旧文件资源 old_file_ids = await self._get_file_ids_from_variable(variable) - # 更新值 - if value is not None: - variable.value = value - - # 更新类型 - if var_type is not None: - variable.metadata.var_type = var_type - - # 更新描述 - if description is not None: - variable.metadata.description = description + # 🔑 重要:如果类型改变,需要重新创建变量对象 + if var_type is not None and var_type != variable.metadata.var_type: + from .variables import create_variable + # 创建新的元数据 + old_metadata = variable.metadata + old_metadata.var_type = var_type + if description is not None: + old_metadata.description = description + + # 创建新类型的变量对象 + variable = create_variable(old_metadata, value) + + # 更新到字典中 + self._variables[name] = variable + else: + # 类型未改变,正常更新 + if description is not None: + variable.metadata.description = description + if value is not None: + variable.value = value # 🔑 新增:清理被替换的文件 if value is not None: @@ -139,7 +148,6 @@ class BaseVariablePool(ABC): # 持久化到数据库 await self._persist_variable(variable) - logger.info(f"已更新变量: {name} 从池 {self.pool_id}, 值为{value}") return variable async def delete_variable(self, name: str) -> bool: @@ -164,7 +172,6 @@ class BaseVariablePool(ABC): # 从数据库删除 await self._delete_variable_from_db(variable) - logger.info(f"已删除变量: {name} 从池 {self.pool_id}") return True async def _cleanup_file_resources_if_needed(self, variable: BaseVariable) -> None: @@ -198,7 +205,6 @@ class BaseVariablePool(ABC): if user_id: from apps.services.document import DocumentManager await DocumentManager.delete_document(user_id, actual_cleanup_ids) - logger.info(f"已清理变量 {variable.name} 关联的 {len(actual_cleanup_ids)} 个文件") if protected_file_ids: logger.info(f"保护了变量 {variable.name} 中 {len(protected_file_ids)} 个已绑定历史记录的文件") @@ -379,7 +385,6 @@ class BaseVariablePool(ABC): if user_id: from apps.services.document import DocumentManager await DocumentManager.delete_document(user_id, actual_cleanup_ids) - logger.info(f"已清理变量 {variable.name} 被替换的 {len(actual_cleanup_ids)} 个文件") if protected_file_ids: logger.info(f"保护了变量 {variable.name} 中 {len(protected_file_ids)} 个已绑定历史记录的文件") @@ -702,13 +707,34 @@ class FlowVariablePool(BaseVariablePool): if not force_system_update and getattr(variable.metadata, 'is_system', False): raise PermissionError(f"系统变量 {name} 不允许直接修改") - # 更新变量 - if value is not None: - variable.value = value - if var_type is not None: - variable.metadata.var_type = var_type - if description is not None: - variable.metadata.description = description + # 🔑 重要:如果类型改变,需要重新创建变量对象 + if var_type is not None and var_type != variable.metadata.var_type: + from .variables import create_variable + # 创建新的元数据 + old_metadata = variable.metadata + old_metadata.var_type = var_type + if description is not None: + old_metadata.description = description + + # 创建新类型的变量对象 + variable = create_variable(old_metadata, value) + + # 更新时间戳 + from datetime import datetime, UTC + variable.metadata.updated_at = datetime.now(UTC) + + # 更新到字典中 + self._system_templates[name] = variable + else: + # 类型未改变,正常更新 + if description is not None: + variable.metadata.description = description + if value is not None: + variable.value = value + + # 更新时间戳 + from datetime import datetime, UTC + variable.metadata.updated_at = datetime.now(UTC) # 持久化 await self._persist_variable(variable) @@ -718,13 +744,34 @@ class FlowVariablePool(BaseVariablePool): elif name in self._conversation_templates: variable = self._conversation_templates[name] - # 更新变量 - if value is not None: - variable.value = value - if var_type is not None: - variable.metadata.var_type = var_type - if description is not None: - variable.metadata.description = description + # 🔑 重要:如果类型改变,需要重新创建变量对象 + if var_type is not None and var_type != variable.metadata.var_type: + from .variables import create_variable + # 创建新的元数据 + old_metadata = variable.metadata + old_metadata.var_type = var_type + if description is not None: + old_metadata.description = description + + # 创建新类型的变量对象 + variable = create_variable(old_metadata, value) + + # 更新时间戳 + from datetime import datetime, UTC + variable.metadata.updated_at = datetime.now(UTC) + + # 更新到字典中 + self._conversation_templates[name] = variable + else: + # 类型未改变,正常更新 + if description is not None: + variable.metadata.description = description + if value is not None: + variable.value = value + + # 更新时间戳 + from datetime import datetime, UTC + variable.metadata.updated_at = datetime.now(UTC) # 持久化 await self._persist_variable(variable) diff --git a/apps/schemas/appcenter.py b/apps/schemas/appcenter.py index 340035394ddeba770640359831695036ef8babca..85fa1e65fe1c04ee03cf9c663fd70bd31f342655 100644 --- a/apps/schemas/appcenter.py +++ b/apps/schemas/appcenter.py @@ -63,7 +63,7 @@ class AppData(BaseModel): default=[], alias="recommendedQuestions", description="推荐问题", max_length=3) history_len: int = Field(3, alias="dialogRounds", ge=1, le=10, description="对话轮次(1~10)") llm: str = Field(default="", description="大模型ID") - enable_thinking: bool = Field(default=True, alias="enableThinking", description="是否启用思维链") + enable_thinking: bool = Field(default=False, alias="enableThinking", description="是否启用思维链") permission: AppPermissionData = Field( default_factory=lambda: AppPermissionData(authorizedUsers=None), description="权限配置") workflows: list[AppFlowInfo] = Field(default=[], description="工作流信息列表") diff --git a/apps/schemas/flow.py b/apps/schemas/flow.py index e24af608e5f0203b7b2273860df282d18f58d89d..450c718149dac57136857d9421f9fe9a432f4536 100644 --- a/apps/schemas/flow.py +++ b/apps/schemas/flow.py @@ -151,6 +151,7 @@ class AppMetadata(MetadataBase): links: list[AppLink] = Field(description="相关链接", default=[]) first_questions: list[str] = Field(description="首次提问", default=[]) llm_id: str = Field(description="大模型ID", default="") + enable_thinking: bool = Field(description="是否启用思维链", default=False) history_len: int = Field(description="对话轮次", default=3, le=10) permission: Permission | None = Field(description="应用权限配置", default=None) flows: list[AppFlow] = Field(description="Flow列表", default=[]) diff --git a/apps/services/appcenter.py b/apps/services/appcenter.py index c9badc0c47f0441ffe31813005e73d8141f38fbc..6ab42c8d3c9f245047c37973413a53103637204e 100644 --- a/apps/services/appcenter.py +++ b/apps/services/appcenter.py @@ -469,6 +469,28 @@ class AppCenterManager: else: # 对应 create_app (app_data is None, published 参数为 None) metadata.published = False + # 处理llm_id字段 - 工作流应用也可以配置模型用于理解上下文、问题改写和记忆存储 + if data is not None and hasattr(data, "llm"): + # 创建或更新应用场景,使用传入的 llm + metadata.llm_id = data.llm if data.llm else "" + elif app_data is not None and hasattr(app_data, "llm_id"): + # 更新应用发布状态场景,使用 app_data 中的 llm_id + metadata.llm_id = app_data.llm_id if app_data.llm_id else "" + else: + # 默认为空字符串,使用系统默认模型 + metadata.llm_id = "" + + # 处理enable_thinking字段 - 工作流应用的思维链配置 + if data is not None and hasattr(data, "enable_thinking"): + # 创建或更新应用场景,使用传入的 enable_thinking 状态 + metadata.enable_thinking = data.enable_thinking + elif app_data is not None and hasattr(app_data, "enable_thinking"): + # 更新应用发布状态场景,使用 app_data 中的 enable_thinking + metadata.enable_thinking = app_data.enable_thinking + else: + # 默认值:工作流应用默认关闭思维链(因为会影响性能) + metadata.enable_thinking = False + return metadata @staticmethod diff --git a/apps/services/llm.py b/apps/services/llm.py index 25baaf51db1b6ae78d8b48a0c7f829460b4714b9..e7134fe0c114c5dd7f36e0bdfda046c41f3ccf61 100644 --- a/apps/services/llm.py +++ b/apps/services/llm.py @@ -607,3 +607,69 @@ class LLMManager: config.reranker, "System Reranker Model" ) + + @staticmethod + async def get_model_capabilities(user_sub: str, llm_id: str) -> dict: + """ + 获取指定模型支持的参数配置项 + + :param user_sub: 用户ID + :param llm_id: 模型ID + :return: 模型能力配置字典 + """ + from apps.llm.model_types import ModelType + + # 获取模型信息(支持系统模型和用户模型) + mongo = MongoDB() + llm_collection = mongo.get_collection("llm") + + # 查询用户可访问的模型(包括系统模型和自己的模型) + result = await llm_collection.find_one({ + "_id": llm_id, + "$or": [{"user_sub": user_sub}, {"user_sub": ""}] + }) + + if not result: + err = f"[LLMManager] LLM {llm_id} 不存在或无权限访问" + logger.error(err) + raise ValueError(err) + + llm = LLM.model_validate(result) + + # 从注册表获取模型能力 + provider = llm.provider or get_provider_from_endpoint(llm.openai_base_url) + capabilities = model_registry.get_model_capabilities(provider, llm.model_name, ModelType.CHAT) + + # 构建参数配置项响应 + result_dict = { + "provider": provider, + "modelName": llm.model_name, + "modelType": "chat", + + # 基础参数支持 + "supportsTemperature": capabilities.supports_temperature if capabilities else True, + "supportsTopP": capabilities.supports_top_p if capabilities else True, + "supportsTopK": capabilities.supports_top_k if capabilities else False, + "supportsFrequencyPenalty": capabilities.supports_frequency_penalty if capabilities else False, + "supportsPresencePenalty": capabilities.supports_presence_penalty if capabilities else False, + "supportsMinP": capabilities.supports_min_p if capabilities else False, + + # 高级功能 + "supportsThinking": capabilities.supports_thinking if capabilities else False, + "canToggleThinking": capabilities.can_toggle_thinking if capabilities else False, + "supportsEnableSearch": capabilities.supports_enable_search if capabilities else False, + "supportsFunctionCalling": capabilities.supports_function_calling if capabilities else True, + "supportsJsonMode": capabilities.supports_json_mode if capabilities else True, + "supportsStructuredOutput": capabilities.supports_structured_output if capabilities else False, + + # 上下文支持(所有chat模型都支持) + "supportsContext": True, + + # 参数名称 + "maxTokensParam": capabilities.max_tokens_param if capabilities else "max_tokens", + + # 备注信息 + "notes": llm.notes or "" + } + + return result_dict diff --git a/deploy/chart/euler_copilot/configs/framework/models.conf b/deploy/chart/euler_copilot/configs/framework/models.conf index d9cb854c99debe843fe9f5678e86fea6ac8f10d8..226319f38ffd77a729e959c046cbdbfc82b52e65 100644 --- a/deploy/chart/euler_copilot/configs/framework/models.conf +++ b/deploy/chart/euler_copilot/configs/framework/models.conf @@ -1,215 +1,477 @@ { + "_comment": "模型配置 V2 - 基于供应商能力继承的多级配置", + "version": "2.0", + "models": { - "openai:gpt-4o": { - "provider": "openai", - "model_name": "gpt-4o", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "OpenAI GPT-4 Omni model" - }, - "openai:gpt-4o-mini": { - "provider": "openai", - "model_name": "gpt-4o-mini", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "OpenAI GPT-4 Omni Mini model" - }, - "openai:o1-preview": { - "provider": "openai", - "model_name": "o1-preview", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_completion_tokens", - "notes": "OpenAI O1 Preview with reasoning capabilities" - }, - "openai:o1-mini": { - "provider": "openai", - "model_name": "o1-mini", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_completion_tokens", - "notes": "OpenAI O1 Mini with reasoning capabilities" + "_comment_siliconflow": "=== 硅基流动 SiliconFlow ===", + + "siliconflow:Qwen/Qwen2.5-Coder-32B-Instruct": { + "provider": "siliconflow", + "model_name": "Qwen/Qwen2.5-Coder-32B-Instruct", + "model_type": "chat", + "series": "qwen2.5-coder", + "display_name": "Qwen 2.5 Coder 32B", + "context_window": 32768, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true + }, + "notes": "支持enable_thinking参数" + }, + + "siliconflow:Qwen/Qwen2.5-72B-Instruct": { + "provider": "siliconflow", + "model_name": "Qwen/Qwen2.5-72B-Instruct", + "model_type": "chat", + "series": "qwen2.5", + "display_name": "Qwen 2.5 72B", + "context_window": 131072, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true + }, + "notes": "支持enable_thinking参数" + }, + + "siliconflow:Qwen/Qwen3-8B": { + "provider": "siliconflow", + "model_name": "Qwen/Qwen3-8B", + "model_type": "chat", + "series": "qwen3", + "display_name": "Qwen 3 8B", + "context_window": 32768, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true, + "supports_min_p": true + }, + "notes": "支持enable_thinking和min_p参数" + }, + + "siliconflow:deepseek-ai/DeepSeek-V2.5": { + "provider": "siliconflow", + "model_name": "deepseek-ai/DeepSeek-V2.5", + "model_type": "chat", + "series": "deepseek-v2", + "display_name": "DeepSeek V2.5", + "context_window": 65536, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": false, + "supports_reasoning_content": false + }, + "notes": "内置reasoning能力,不支持关闭思维链" }, + + "siliconflow:deepseek-ai/DeepSeek-R1-0528-Qwen3-8B": { + "provider": "siliconflow", + "model_name": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", + "model_type": "chat", + "series": "deepseek-r1", + "display_name": "DeepSeek R1 Qwen3 8B", + "context_window": 65536, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities", + "supports_thinking": false, + "can_toggle_thinking": false, + "supports_enable_thinking": false + }, + "notes": "该模型不支持enable_thinking参数" + }, + + "siliconflow:01-ai/Yi-Lightning": { + "provider": "siliconflow", + "model_name": "01-ai/Yi-Lightning", + "model_type": "chat", + "series": "yi", + "display_name": "Yi Lightning", + "context_window": 16384, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities" + }, + "notes": "标准对话模型" + }, + + "siliconflow:THUDM/glm-4-9b-chat": { + "provider": "siliconflow", + "model_name": "THUDM/glm-4-9b-chat", + "model_type": "chat", + "series": "glm-4", + "display_name": "GLM-4 9B", + "context_window": 131072, + "capabilities": { + "_inherit": "siliconflow.chat_capabilities" + }, + "notes": "标准对话模型" + }, + + "_comment_siliconflow_embedding": "=== 硅基流动 嵌入模型 ===", + + "siliconflow:BAAI/bge-large-zh-v1.5": { + "provider": "siliconflow", + "model_name": "BAAI/bge-large-zh-v1.5", + "model_type": "embedding", + "display_name": "BGE Large ZH v1.5", + "capabilities": { + "_inherit": "siliconflow.embedding_capabilities", + "max_input_tokens": 512, + "default_dimensions": 1024 + }, + "notes": "中文嵌入模型" + }, + + "siliconflow:BAAI/bge-m3": { + "provider": "siliconflow", + "model_name": "BAAI/bge-m3", + "model_type": "embedding", + "display_name": "BGE M3", + "capabilities": { + "_inherit": "siliconflow.embedding_capabilities", + "max_input_tokens": 8192, + "default_dimensions": 1024 + }, + "notes": "多语言嵌入模型" + }, + + "siliconflow:Qwen/Qwen3-Embedding": { + "provider": "siliconflow", + "model_name": "Qwen/Qwen3-Embedding", + "model_type": "embedding", + "display_name": "Qwen3 Embedding", + "capabilities": { + "_inherit": "siliconflow.embedding_capabilities", + "max_input_tokens": 8192, + "default_dimensions": 3584, + "supports_dimensions": true, + "available_dimensions": [512, 1024, 2048, 3584] + }, + "notes": "支持自定义维度的嵌入模型" + }, + + "_comment_siliconflow_rerank": "=== 硅基流动 重排序模型 ===", + + "siliconflow:BAAI/bge-reranker-v2-m3": { + "provider": "siliconflow", + "model_name": "BAAI/bge-reranker-v2-m3", + "model_type": "rerank", + "display_name": "BGE Reranker v2 M3", + "capabilities": { + "_inherit": "siliconflow.rerank_capabilities" + }, + "notes": "多语言重排序模型" + }, + + "_comment_qwen": "=== 阿里百炼 Qwen ===", + "qwen:qwen-plus": { "provider": "qwen", "model_name": "qwen-plus", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Alibaba Qwen Plus model with thinking support" + "model_type": "chat", + "series": "qwen-api", + "display_name": "通义千问 Plus", + "context_window": 131072, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true + }, + "notes": "支持enable_thinking参数" }, + "qwen:qwen-turbo": { "provider": "qwen", "model_name": "qwen-turbo", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Alibaba Qwen Turbo model with thinking support" + "model_type": "chat", + "series": "qwen-api", + "display_name": "通义千问 Turbo", + "context_window": 131072, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true + }, + "notes": "支持enable_thinking参数" }, + "qwen:qwen2.5-72b-instruct": { "provider": "qwen", "model_name": "qwen2.5-72b-instruct", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Qwen 2.5 72B Instruct model" + "model_type": "chat", + "series": "qwen2.5", + "display_name": "Qwen 2.5 72B", + "context_window": 131072, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true + }, + "notes": "支持enable_thinking参数" }, + "qwen:qwen2.5-32b-instruct": { "provider": "qwen", "model_name": "qwen2.5-32b-instruct", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Qwen 2.5 32B Instruct model" + "model_type": "chat", + "series": "qwen2.5", + "display_name": "Qwen 2.5 32B", + "context_window": 32768, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": true, + "supports_enable_thinking": true, + "supports_reasoning_content": true + }, + "notes": "支持enable_thinking参数" }, + "qwen:qwen2.5-14b-instruct": { "provider": "qwen", "model_name": "qwen2.5-14b-instruct", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Qwen 2.5 14B Instruct model" + "model_type": "chat", + "series": "qwen2.5", + "display_name": "Qwen 2.5 14B", + "context_window": 32768, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": false + }, + "notes": "标准对话模型,不支持thinking" }, + "qwen:qwen2.5-7b-instruct": { "provider": "qwen", "model_name": "qwen2.5-7b-instruct", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Qwen 2.5 7B Instruct model" - }, - "siliconflow:qwen2.5-coder-32b-instruct": { - "provider": "siliconflow", - "model_name": "qwen2.5-coder-32b-instruct", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "SiliconFlow Qwen 2.5 Coder 32B model" - }, - "siliconflow:qwen2.5-72b-instruct": { - "provider": "siliconflow", - "model_name": "qwen2.5-72b-instruct", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "SiliconFlow Qwen 2.5 72B model" - }, - "siliconflow:deepseek-v2.5": { - "provider": "siliconflow", - "model_name": "deepseek-v2.5", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "SiliconFlow DeepSeek V2.5 model" - }, - "siliconflow:yi-lightning": { - "provider": "siliconflow", - "model_name": "yi-lightning", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "SiliconFlow Yi Lightning model" - }, - "siliconflow:glm-4-9b-chat": { - "provider": "siliconflow", - "model_name": "glm-4-9b-chat", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "SiliconFlow GLM-4 9B Chat model" - }, - "deepseek:deepseek-chat": { - "provider": "deepseek", + "model_type": "chat", + "series": "qwen2.5", + "display_name": "Qwen 2.5 7B", + "context_window": 32768, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": false + }, + "notes": "标准对话模型,不支持thinking" + }, + + "_comment_qwen_deepseek": "=== 阿里百炼 DeepSeek系列 ===", + + "qwen:deepseek-chat": { + "provider": "qwen", "model_name": "deepseek-chat", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "DeepSeek Chat model" - }, - "deepseek:deepseek-reasoner": { - "provider": "deepseek", - "model_name": "deepseek-reasoner", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "DeepSeek Reasoner model with thinking capabilities" - }, - "deepseek:deepseek-r1": { - "provider": "deepseek", + "model_type": "chat", + "series": "deepseek", + "display_name": "DeepSeek Chat (百炼)", + "context_window": 65536, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": false + }, + "notes": "标准对话模型" + }, + + "qwen:deepseek-r1": { + "provider": "qwen", "model_name": "deepseek-r1", - "supports_thinking": true, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "DeepSeek R1 model with reasoning capabilities" + "model_type": "chat", + "series": "deepseek-r1", + "display_name": "DeepSeek R1 (百炼)", + "context_window": 65536, + "capabilities": { + "_inherit": "qwen.chat_capabilities", + "supports_thinking": true, + "can_toggle_thinking": false, + "supports_reasoning_content": true + }, + "notes": "推理模型,内置思维链,不支持关闭" + }, + + "_comment_qwen_kimi": "=== 阿里百炼 Kimi系列 ===", + + "qwen:moonshot-v1-8k": { + "provider": "qwen", + "model_name": "moonshot-v1-8k", + "model_type": "chat", + "series": "kimi", + "display_name": "Kimi 8K (百炼)", + "context_window": 8192, + "capabilities": { + "_inherit": "qwen.chat_capabilities" + }, + "notes": "Kimi系列模型" + }, + + "qwen:moonshot-v1-32k": { + "provider": "qwen", + "model_name": "moonshot-v1-32k", + "model_type": "chat", + "series": "kimi", + "display_name": "Kimi 32K (百炼)", + "context_window": 32768, + "capabilities": { + "_inherit": "qwen.chat_capabilities" + }, + "notes": "Kimi系列模型" + }, + + "qwen:moonshot-v1-128k": { + "provider": "qwen", + "model_name": "moonshot-v1-128k", + "model_type": "chat", + "series": "kimi", + "display_name": "Kimi 128K (百炼)", + "context_window": 131072, + "capabilities": { + "_inherit": "qwen.chat_capabilities" + }, + "notes": "Kimi系列模型" + }, + + "_comment_qwen_glm": "=== 阿里百炼 GLM系列 ===", + + "qwen:glm-4-plus": { + "provider": "qwen", + "model_name": "glm-4-plus", + "model_type": "chat", + "series": "glm-4", + "display_name": "GLM-4 Plus (百炼)", + "context_window": 131072, + "capabilities": { + "_inherit": "qwen.chat_capabilities" + }, + "notes": "GLM系列模型" + }, + + "_comment_qwen_embedding": "=== 阿里百炼 嵌入模型 ===", + + "qwen:text-embedding-v3": { + "provider": "qwen", + "model_name": "text-embedding-v3", + "model_type": "embedding", + "display_name": "通用文本向量 v3", + "capabilities": { + "_inherit": "qwen.embedding_capabilities", + "max_input_tokens": 8192, + "default_dimensions": 1024, + "supports_dimensions": true, + "available_dimensions": [1024, 768, 512] + }, + "notes": "通用文本嵌入" + }, + + "qwen:text-embedding-v2": { + "provider": "qwen", + "model_name": "text-embedding-v2", + "model_type": "embedding", + "display_name": "通用文本向量 v2", + "capabilities": { + "_inherit": "qwen.embedding_capabilities", + "max_input_tokens": 2048, + "default_dimensions": 1536 + }, + "notes": "通用文本嵌入" + }, + + "qwen:multimodal-embedding-one": { + "provider": "qwen", + "model_name": "multimodal-embedding-one", + "model_type": "embedding", + "display_name": "多模态向量", + "capabilities": { + "_inherit": "qwen.embedding_capabilities", + "max_input_tokens": 77, + "default_dimensions": 1536, + "supports_text_input": true, + "supports_image_input": true + }, + "notes": "支持文本和图像的多模态嵌入" + }, + + "_comment_qwen_rerank": "=== 阿里百炼 重排序模型 ===", + + "qwen:gte-rerank": { + "provider": "qwen", + "model_name": "gte-rerank", + "model_type": "rerank", + "display_name": "文本排序", + "capabilities": { + "_inherit": "qwen.rerank_capabilities", + "max_documents": 50, + "max_query_length": 2048 + }, + "notes": "文本重排序模型" }, + + "_comment_baichuan": "=== 百川智能 Baichuan ===", + "baichuan:baichuan2-turbo": { "provider": "baichuan", - "model_name": "baichuan2-turbo", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Baichuan2 Turbo model" - }, - "spark:spark-lite": { - "provider": "spark", - "model_name": "spark-lite", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "iFlytek Spark Lite model" - }, - "wenxin:ernie-4.0-turbo-8k": { - "provider": "wenxin", - "model_name": "ernie-4.0-turbo-8k", - "supports_thinking": false, - "supports_function_calling": true, - "supports_json_mode": true, - "supports_structured_output": false, - "max_tokens_param": "max_tokens", - "notes": "Baidu Wenxin ERNIE 4.0 Turbo 8K model" + "model_name": "Baichuan2-Turbo", + "model_type": "chat", + "series": "baichuan2", + "display_name": "百川2 Turbo", + "context_window": 32768, + "capabilities": { + "_inherit": "baichuan.chat_capabilities" + }, + "notes": "标准对话模型" + }, + + "baichuan:baichuan3-turbo": { + "provider": "baichuan", + "model_name": "Baichuan3-Turbo", + "model_type": "chat", + "series": "baichuan3", + "display_name": "百川3 Turbo", + "context_window": 32768, + "capabilities": { + "_inherit": "baichuan.chat_capabilities" + }, + "notes": "标准对话模型" + }, + + "baichuan:baichuan3-turbo-128k": { + "provider": "baichuan", + "model_name": "Baichuan3-Turbo-128k", + "model_type": "chat", + "series": "baichuan3", + "display_name": "百川3 Turbo 128K", + "context_window": 131072, + "capabilities": { + "_inherit": "baichuan.chat_capabilities" + }, + "notes": "长上下文对话模型" + }, + + "_comment_baichuan_embedding": "=== 百川智能 嵌入模型 ===", + + "baichuan:baichuan-text-embedding": { + "provider": "baichuan", + "model_name": "Baichuan-Text-Embedding", + "model_type": "embedding", + "display_name": "百川文本嵌入", + "capabilities": { + "_inherit": "baichuan.embedding_capabilities", + "max_input_tokens": 512, + "default_dimensions": 1024 + }, + "notes": "文本嵌入模型" } - }, - "version": "1.0" + } } + diff --git a/deploy/chart/euler_copilot/configs/framework/providers.conf b/deploy/chart/euler_copilot/configs/framework/providers.conf new file mode 100644 index 0000000000000000000000000000000000000000..987f7c9296e9ac79168e65f1189991f11d311288 --- /dev/null +++ b/deploy/chart/euler_copilot/configs/framework/providers.conf @@ -0,0 +1,142 @@ +{ + "_comment": "供应商能力配置 - 根据API手册定义各供应商的默认能力", + "version": "2.0", + "providers": { + "siliconflow": { + "provider_name": "siliconflow", + "api_base_url": "https://api.siliconflow.cn/v1", + "auth_type": "bearer", + "auth_header": "Authorization", + "chat_capabilities": { + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_structured_output": false, + "supports_thinking": false, + "can_toggle_thinking": false, + "supports_reasoning_content": false, + "max_tokens_param": "max_tokens", + "supports_temperature": true, + "supports_top_p": true, + "supports_top_k": true, + "supports_frequency_penalty": true, + "supports_presence_penalty": false, + "supports_min_p": false, + "supports_response_format": true, + "supports_tools": true, + "supports_tool_choice": true, + "supports_extra_body": true, + "supports_stream_options": true, + "supports_enable_thinking": false, + "supports_thinking_budget": false, + "supports_enable_search": false + }, + "embedding_capabilities": { + "max_input_tokens": 512, + "supports_batch": true, + "default_dimensions": 1024, + "supports_encoding_format": true, + "supports_dimensions": false, + "available_dimensions": [], + "supports_text_input": true, + "supports_image_input": false + }, + "rerank_capabilities": { + "max_documents": 100, + "max_query_length": 512, + "supports_top_n": true, + "supports_return_documents": true + }, + "notes": "硅基流动API - 根据官方文档 https://docs.siliconflow.cn/cn/api-reference/" + }, + "qwen": { + "provider_name": "qwen", + "api_base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "auth_type": "bearer", + "auth_header": "Authorization", + "chat_capabilities": { + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_structured_output": false, + "supports_thinking": false, + "can_toggle_thinking": false, + "supports_reasoning_content": false, + "max_tokens_param": "max_tokens", + "supports_temperature": true, + "supports_top_p": true, + "supports_top_k": true, + "supports_frequency_penalty": false, + "supports_presence_penalty": false, + "supports_min_p": false, + "supports_response_format": true, + "supports_tools": true, + "supports_tool_choice": true, + "supports_extra_body": true, + "supports_stream_options": true, + "supports_enable_thinking": false, + "supports_thinking_budget": false, + "supports_enable_search": true + }, + "embedding_capabilities": { + "max_input_tokens": 2048, + "supports_batch": true, + "default_dimensions": 1536, + "supports_encoding_format": true, + "supports_dimensions": false, + "available_dimensions": [], + "supports_text_input": true, + "supports_image_input": false + }, + "rerank_capabilities": { + "max_documents": 100, + "max_query_length": 512, + "supports_top_n": true, + "supports_return_documents": true + }, + "notes": "阿里百炼API - 根据官方文档 https://bailian.console.aliyun.com/" + }, + "baichuan": { + "provider_name": "baichuan", + "api_base_url": "https://api.baichuan-ai.com/v1", + "auth_type": "bearer", + "auth_header": "Authorization", + "chat_capabilities": { + "supports_streaming": true, + "supports_function_calling": true, + "supports_json_mode": true, + "supports_structured_output": false, + "supports_thinking": false, + "can_toggle_thinking": false, + "supports_reasoning_content": false, + "max_tokens_param": "max_tokens", + "supports_temperature": true, + "supports_top_p": true, + "supports_top_k": true, + "supports_frequency_penalty": false, + "supports_presence_penalty": false, + "supports_min_p": false, + "supports_response_format": false, + "supports_tools": true, + "supports_tool_choice": true, + "supports_extra_body": false, + "supports_stream_options": false, + "supports_enable_thinking": false, + "supports_thinking_budget": false, + "supports_enable_search": true + }, + "embedding_capabilities": { + "max_input_tokens": 512, + "supports_batch": true, + "default_dimensions": 1024, + "supports_encoding_format": false, + "supports_dimensions": false, + "available_dimensions": [], + "supports_text_input": true, + "supports_image_input": false + }, + "notes": "百川智能API - 根据官方文档 https://platform.baichuan-ai.com/docs/api" + } + } +} + diff --git a/deploy/chart/euler_copilot/templates/framework/framework-config.yaml b/deploy/chart/euler_copilot/templates/framework/framework-config.yaml index 0a4f8d4162597533837878d5730b8b8beb28be30..4ec4c91734e3c36a8ed034cf5c2a323b03994201 100644 --- a/deploy/chart/euler_copilot/templates/framework/framework-config.yaml +++ b/deploy/chart/euler_copilot/templates/framework/framework-config.yaml @@ -11,6 +11,8 @@ data: {{- else }} {{ tpl (.Files.Get "configs/framework/config.toml") . | indent 4 }} {{- end }} + providers.conf: |- +{{ .Files.Get "configs/framework/providers.conf" | indent 4 }} models.conf: |- {{ .Files.Get "configs/framework/models.conf" | indent 4 }} copy-config.yaml: |-