From d26b571918ded1e63191a2b5a64df55a57cfdc29 Mon Sep 17 00:00:00 2001 From: zxstty Date: Tue, 23 Sep 2025 17:54:52 +0800 Subject: [PATCH] =?UTF-8?q?=E9=83=A8=E5=88=86=E6=83=85=E5=86=B5=E4=B8=8B?= =?UTF-8?q?=E5=85=B3=E9=97=ADthink=E8=BF=87=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/llm/function.py | 15 +++++--- apps/llm/patterns/facts.py | 3 +- apps/llm/reasoning.py | 19 ++++++---- apps/scheduler/call/llm/llm.py | 14 +++++--- apps/scheduler/mcp/plan.py | 4 +-- apps/scheduler/mcp_agent/plan.py | 17 ++++++--- apps/scheduler/scheduler/scheduler.py | 3 +- apps/services/rag.py | 52 ++++++++++++++------------- 8 files changed, 76 insertions(+), 51 deletions(-) diff --git a/apps/llm/function.py b/apps/llm/function.py index 0d1fbf9a7..542d00e54 100644 --- a/apps/llm/function.py +++ b/apps/llm/function.py @@ -42,7 +42,8 @@ class FunctionLLM: self._params = { "model": self._config.model, "messages": [], - "timeout": 300 + "timeout": 300, + "extra_body": {"enable_thinking": False} } if self._config.backend == "ollama": @@ -62,7 +63,8 @@ class FunctionLLM: import openai if not self._config.api_key: - self._client = openai.AsyncOpenAI(base_url=self._config.endpoint) + self._client = openai.AsyncOpenAI( + base_url=self._config.endpoint) else: self._client = openai.AsyncOpenAI( base_url=self._config.endpoint, @@ -123,9 +125,11 @@ class FunctionLLM: }, ] - response = await self._client.chat.completions.create(**self._params) # type: ignore[arg-type] + # type: ignore[arg-type] + response = await self._client.chat.completions.create(**self._params) try: - logger.info("[FunctionCall] 大模型输出:%s", response.choices[0].message.tool_calls[0].function.arguments) + logger.info("[FunctionCall] 大模型输出:%s", + response.choices[0].message.tool_calls[0].function.arguments) return response.choices[0].message.tool_calls[0].function.arguments except Exception: # noqa: BLE001 ans = response.choices[0].message.content @@ -194,7 +198,8 @@ class FunctionLLM: "format": schema, }) - response = await self._client.chat(**self._params) # type: ignore[arg-type] + # type: ignore[arg-type] + response = await self._client.chat(**self._params) return await self.process_response(response.message.content or "") async def call( diff --git a/apps/llm/patterns/facts.py b/apps/llm/patterns/facts.py index 46707d115..334dbf0e0 100644 --- a/apps/llm/patterns/facts.py +++ b/apps/llm/patterns/facts.py @@ -126,7 +126,8 @@ class Facts(CorePattern): messages = [ {"role": "system", "content": self.system_prompt[language]}, - {"role": "user", "content": self.user_prompt[language].format(conversation=conversation)}, + {"role": "user", "content": self.user_prompt[language].format( + conversation=conversation)}, ] result = "" llm = ReasoningLLM() diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py index fddc84db8..c72601b62 100644 --- a/apps/llm/reasoning.py +++ b/apps/llm/reasoning.py @@ -32,9 +32,11 @@ class ReasoningContent: if ( hasattr(chunk.choices[0].delta, "reasoning_content") - and chunk.choices[0].delta.reasoning_content is not None # type: ignore[attr-defined] + # type: ignore[attr-defined] + and chunk.choices[0].delta.reasoning_content is not None ): - reason = "" + chunk.choices[0].delta.reasoning_content # type: ignore[attr-defined] + # type: ignore[attr-defined] + reason = "" + chunk.choices[0].delta.reasoning_content self.reasoning_type = "args" self.is_reasoning = True else: @@ -64,7 +66,8 @@ class ReasoningContent: if hasattr( chunk.choices[0].delta, "reasoning_content") and chunk.choices[0].delta.reasoning_content is not None: # type: ignore[attr-defined] # 仍在推理中,继续添加推理内容 - reason = chunk.choices[0].delta.reasoning_content or "" # type: ignore[attr-defined] + # type: ignore[attr-defined] + reason = chunk.choices[0].delta.reasoning_content or "" else: # 推理结束,设置标志并添加结束标签 self.is_reasoning = False @@ -121,7 +124,8 @@ class ReasoningLLM: """验证消息格式是否正确""" if messages[0]["role"] != "system": # 添加默认系统消息 - messages.insert(0, {"role": "system", "content": "You are a helpful assistant."}) + messages.insert( + 0, {"role": "system", "content": "You are a helpful assistant."}) if messages[-1]["role"] != "user": err = f"消息格式错误,最后一个消息必须是用户消息:{messages[-1]}" @@ -135,6 +139,7 @@ class ReasoningLLM: max_tokens: int | None, temperature: float | None, model: str | None = None, + enable_thinking: bool = False ) -> AsyncGenerator[ChatCompletionChunk, None]: """创建流式响应""" if model is None: @@ -146,7 +151,8 @@ class ReasoningLLM: temperature=temperature or self._config.temperature, stream=True, stream_options={"include_usage": True}, - timeout=300 + timeout=300, + extra_body={"enable_thinking": enable_thinking} ) # type: ignore[] async def call( # noqa: C901, PLR0912, PLR0913 @@ -158,6 +164,7 @@ class ReasoningLLM: streaming: bool = True, result_only: bool = True, model: str | None = None, + enable_thinking: bool = False ) -> AsyncGenerator[str, None]: """调用大模型,分为流式和非流式两种""" # 检查max_tokens和temperature @@ -168,7 +175,7 @@ class ReasoningLLM: if model is None: model = self._config.model msg_list = self._validate_messages(messages) - stream = await self._create_stream(msg_list, max_tokens, temperature, model) + stream = await self._create_stream(msg_list, max_tokens, temperature, model, enable_thinking) reasoning = ReasoningContent() reasoning_content = "" result = "" diff --git a/apps/scheduler/call/llm/llm.py b/apps/scheduler/call/llm/llm.py index 1e9bcb948..790f4564c 100644 --- a/apps/scheduler/call/llm/llm.py +++ b/apps/scheduler/call/llm/llm.py @@ -34,9 +34,12 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): # 大模型参数 temperature: float = Field(description="大模型温度(随机化程度)", default=0.7) enable_context: bool = Field(description="是否启用上下文", default=True) - step_history_size: int = Field(description="上下文信息中包含的步骤历史数量", default=3, ge=1, le=10) - system_prompt: str = Field(description="大模型系统提示词", default="You are a helpful assistant.") - user_prompt: str = Field(description="大模型用户提示词", default=LLM_DEFAULT_PROMPT) + step_history_size: int = Field( + description="上下文信息中包含的步骤历史数量", default=3, ge=1, le=10) + system_prompt: str = Field( + description="大模型系统提示词", default="You are a helpful assistant.") + user_prompt: str = Field(description="大模型用户提示词", + default=LLM_DEFAULT_PROMPT) i18n_info: ClassVar[dict[str, dict]] = { LanguageType.CHINESE: { @@ -74,7 +77,8 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): context_prompt = "无背景信息。" # 参数 - time = datetime.now(tz=pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S") + time = datetime.now(tz=pytz.timezone("Asia/Shanghai") + ).strftime("%Y-%m-%d %H:%M:%S") formatter = { "time": time, "context": context_prompt, @@ -110,7 +114,7 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): data = LLMInput(**input_data) try: llm = ReasoningLLM() - async for chunk in llm.call(messages=data.message): + async for chunk in llm.call(messages=data.message, enable_thinking=True): if not chunk: continue yield CallOutputChunk(type=CallOutputType.TEXT, content=chunk) diff --git a/apps/scheduler/mcp/plan.py b/apps/scheduler/mcp/plan.py index 78d695f2c..04abc7706 100644 --- a/apps/scheduler/mcp/plan.py +++ b/apps/scheduler/mcp/plan.py @@ -35,7 +35,6 @@ class MCPPlanner: # 解析为结构化数据 return await self._parse_plan_result(result, max_steps) - async def _get_reasoning_plan(self, tool_list: list[MCPTool], max_steps: int) -> str: """获取推理大模型的结果""" # 格式化Prompt @@ -67,7 +66,6 @@ class MCPPlanner: return result - async def _parse_plan_result(self, result: str, max_steps: int) -> MCPPlan: """将推理结果解析为结构化数据""" # 格式化Prompt @@ -86,7 +84,6 @@ class MCPPlanner: plan = await json_generator.generate() return MCPPlan.model_validate(plan) - async def generate_answer(self, plan: MCPPlan, memory: str) -> str: """生成最终回答""" template = self._env.from_string(FINAL_ANSWER[self.language]) @@ -102,6 +99,7 @@ class MCPPlanner: [{"role": "user", "content": prompt}], streaming=False, temperature=0.07, + enable_thinking=True ): result += chunk diff --git a/apps/scheduler/mcp_agent/plan.py b/apps/scheduler/mcp_agent/plan.py index 489efc741..a51b0da76 100644 --- a/apps/scheduler/mcp_agent/plan.py +++ b/apps/scheduler/mcp_agent/plan.py @@ -155,13 +155,15 @@ class MCPPlanner(MCPBase): prompt = template.render( goal=user_goal, error_message=error_message, - current_plan=current_plan.model_dump(exclude_none=True, by_alias=True), + current_plan=current_plan.model_dump( + exclude_none=True, by_alias=True), history=history, ) result = await MCPPlanner.get_resoning_result(prompt, reasoning_llm) # 解析为结构化数据 schema = RestartStepIndex.model_json_schema() - schema["properties"]["start_index"]["maximum"] = len(current_plan.plans) - 1 + schema["properties"]["start_index"]["maximum"] = len( + current_plan.plans) - 1 schema["properties"]["start_index"]["minimum"] = 0 restart_index = await MCPPlanner._parse_result(result, schema) # 使用RestartStepIndex模型解析结果 @@ -204,7 +206,8 @@ class MCPPlanner(MCPBase): if is_replan: template = _env.from_string(RECREATE_PLAN[language]) prompt = template.render( - current_plan=current_plan.model_dump(exclude_none=True, by_alias=True), + current_plan=current_plan.model_dump( + exclude_none=True, by_alias=True), error_message=error_message, goal=user_goal, tools=tool_list, @@ -344,7 +347,8 @@ class MCPPlanner(MCPBase): template = _env.from_string(TOOL_EXECUTE_ERROR_TYPE_ANALYSIS[language]) prompt = template.render( goal=user_goal, - current_plan=current_plan.model_dump(exclude_none=True, by_alias=True), + current_plan=current_plan.model_dump( + exclude_none=True, by_alias=True), tool_name=tool.name, tool_description=tool.description, input_param=input_param, @@ -416,7 +420,8 @@ class MCPPlanner(MCPBase): language: LanguageType = LanguageType.CHINESE, ) -> str: """将错误信息转换为工具描述""" - template = _env.from_string(CHANGE_ERROR_MESSAGE_TO_DESCRIPTION[language]) + template = _env.from_string( + CHANGE_ERROR_MESSAGE_TO_DESCRIPTION[language]) prompt = template.render( error_message=error_message, tool_name=tool.name, @@ -468,5 +473,7 @@ class MCPPlanner(MCPBase): [{"role": "user", "content": prompt}], streaming=True, temperature=0.07, + result_only=False, + enable_thinking=True ): yield chunk diff --git a/apps/scheduler/scheduler/scheduler.py b/apps/scheduler/scheduler/scheduler.py index 2bd51e745..7060272f2 100644 --- a/apps/scheduler/scheduler/scheduler.py +++ b/apps/scheduler/scheduler/scheduler.py @@ -305,7 +305,8 @@ class Scheduler: servers_id=servers_id, background=background, agent_id=app_info.app_id, - params=post_body.params + params=post_body.params, + resoning_llm=reasion_llm ) # 开始运行 logger.info("[Scheduler] 运行Executor") diff --git a/apps/services/rag.py b/apps/services/rag.py index 918962498..514931479 100644 --- a/apps/services/rag.py +++ b/apps/services/rag.py @@ -31,7 +31,7 @@ class RAG: user_prompt: dict[LanguageType, str] = { LanguageType.CHINESE: r""" - 你是openEuler社区的智能助手。请结合给出的背景信息, 回答用户的提问,并且基于给出的背景信息在相关句子后进行脚注。 + 你是智能助手。请结合给出的背景信息, 回答用户的提问,并且基于给出的背景信息在相关句子后进行脚注。 一个例子将在中给出。 上下文背景信息将在中给出。 用户的提问将在中给出。 @@ -41,6 +41,7 @@ class RAG: 3.脚注只出现在回答的句子的末尾,例如句号、问号等标点符号后面。 4.不要对脚注本身进行解释或说明。 5.请不要使用中的文档的id作为脚注。 + 6.请详细回答用户的问题。 @@ -79,48 +80,46 @@ class RAG: """, LanguageType.ENGLISH: r""" - You are a helpful assistant of openEuler community. Please answer the user's question based on the given background information and add footnotes after the related sentences. - An example will be given in . - The background information will be given in . - The user's question will be given in . + You are a helpful assistant. Please answer the user's question based on the given background information, + and provide footnotes based on the relevant sentences in the background information. + An example is given in . + The contextual background information is given in . + The user's question is given in . Note: - 1. Do not include any XML tags in the output, and do not make up any information. If you think the user's question is unrelated to the background information, please ignore the background information and directly answer. - 2. Your response should not exceed 250 words. + 1. Do not include any XML tags in the output, and do not make up any information. If you think the user's question is irrelevant to the background information, please ignore the background information and answer directly. + 2. The format of the footnotes is [[1]], [[2]], [[3]], etc., and the content of the footnotes is the id of the provided document. + 3. Footnotes only appear at the end of the sentences in the answer, such as after periods, question marks, and other punctuation marks. + 4. Do not explain or elaborate on the footnotes themselves. + 5. Please do not use the document ids in as footnotes. + 6. Please answer the user's question in detail. - openEuler community is an open source operating system community, committed to promoting the development of the Linux operating system. + The openEuler community is an open source operating system community dedicated to promoting the development of the Linux operating system. - openEuler community aims to provide users with a stable, secure, and efficient operating system platform, and support multiple hardware architectures. + The goal of the openEuler community is to provide users with a stable, secure, and efficient operating system platform that supports multiple hardware architectures. - + Members of the openEuler community come from all over the world, including developers, users, and enterprises. - Members of the openEuler community work together to promote the development of open source operating systems, and provide support and assistance to users. + Members of the openEuler community work together to promote the development of open source operating systems and provide support and assistance to users. - What is the goal of openEuler community? + What is the goal of the openEuler community? - openEuler community is an open source operating system community, committed to promoting the development of the Linux operating system. [[1]] - openEuler community aims to provide users with a stable, secure, and efficient operating system platform, and support multiple hardware architectures. [[1]] - - - - - {bac_info} - - - {user_question} - + The openEuler community is an open source operating system community dedicated to promoting the development of the Linux operating system. [[1]] + The goal of the openEuler community is to provide users with a stable, secure, and efficient operating system platform that supports multiple hardware architectures. [[1]] + + """, } @@ -151,7 +150,8 @@ class RAG: ) try: async with httpx.AsyncClient(timeout=30) as client: - data_json = tmp_data.model_dump(exclude_none=True, by_alias=True) + data_json = tmp_data.model_dump( + exclude_none=True, by_alias=True) response = await client.post(url, headers=headers, json=data_json) if response.status_code == status.HTTP_200_OK: result = response.json() @@ -161,7 +161,8 @@ class RAG: if data.kb_ids: try: async with httpx.AsyncClient(timeout=30) as client: - data_json = data.model_dump(exclude_none=True, by_alias=True) + data_json = data.model_dump( + exclude_none=True, by_alias=True) response = await client.post(url, headers=headers, json=data_json) # 检查响应状态码 if response.status_code == status.HTTP_200_OK: @@ -324,6 +325,7 @@ class RAG: temperature=0.7, result_only=False, model=llm.model_name, + enable_thinking=True ): chunk = buffer + chunk # 防止脚注被截断 -- Gitee