diff --git a/apps/llm/function.py b/apps/llm/function.py index 0d1fbf9a79c325d13831d8823d88544df88dae9a..542d00e54423a9eb1917cbf4a3a95a96d903684f 100644 --- a/apps/llm/function.py +++ b/apps/llm/function.py @@ -42,7 +42,8 @@ class FunctionLLM: self._params = { "model": self._config.model, "messages": [], - "timeout": 300 + "timeout": 300, + "extra_body": {"enable_thinking": False} } if self._config.backend == "ollama": @@ -62,7 +63,8 @@ class FunctionLLM: import openai if not self._config.api_key: - self._client = openai.AsyncOpenAI(base_url=self._config.endpoint) + self._client = openai.AsyncOpenAI( + base_url=self._config.endpoint) else: self._client = openai.AsyncOpenAI( base_url=self._config.endpoint, @@ -123,9 +125,11 @@ class FunctionLLM: }, ] - response = await self._client.chat.completions.create(**self._params) # type: ignore[arg-type] + # type: ignore[arg-type] + response = await self._client.chat.completions.create(**self._params) try: - logger.info("[FunctionCall] 大模型输出:%s", response.choices[0].message.tool_calls[0].function.arguments) + logger.info("[FunctionCall] 大模型输出:%s", + response.choices[0].message.tool_calls[0].function.arguments) return response.choices[0].message.tool_calls[0].function.arguments except Exception: # noqa: BLE001 ans = response.choices[0].message.content @@ -194,7 +198,8 @@ class FunctionLLM: "format": schema, }) - response = await self._client.chat(**self._params) # type: ignore[arg-type] + # type: ignore[arg-type] + response = await self._client.chat(**self._params) return await self.process_response(response.message.content or "") async def call( diff --git a/apps/llm/patterns/facts.py b/apps/llm/patterns/facts.py index 46707d1158f886e5e4fc12ddacef508183eb9b70..334dbf0e061b8479ace020f18f99195d1b90db5c 100644 --- a/apps/llm/patterns/facts.py +++ b/apps/llm/patterns/facts.py @@ -126,7 +126,8 @@ class Facts(CorePattern): messages = [ {"role": "system", "content": self.system_prompt[language]}, - {"role": "user", "content": self.user_prompt[language].format(conversation=conversation)}, + {"role": "user", "content": self.user_prompt[language].format( + conversation=conversation)}, ] result = "" llm = ReasoningLLM() diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py index fddc84db89d98bb8c29d96b4dee71aeed49804ea..c72601b627b923ba1de8f1faac1acc23a98d4bcf 100644 --- a/apps/llm/reasoning.py +++ b/apps/llm/reasoning.py @@ -32,9 +32,11 @@ class ReasoningContent: if ( hasattr(chunk.choices[0].delta, "reasoning_content") - and chunk.choices[0].delta.reasoning_content is not None # type: ignore[attr-defined] + # type: ignore[attr-defined] + and chunk.choices[0].delta.reasoning_content is not None ): - reason = "" + chunk.choices[0].delta.reasoning_content # type: ignore[attr-defined] + # type: ignore[attr-defined] + reason = "" + chunk.choices[0].delta.reasoning_content self.reasoning_type = "args" self.is_reasoning = True else: @@ -64,7 +66,8 @@ class ReasoningContent: if hasattr( chunk.choices[0].delta, "reasoning_content") and chunk.choices[0].delta.reasoning_content is not None: # type: ignore[attr-defined] # 仍在推理中,继续添加推理内容 - reason = chunk.choices[0].delta.reasoning_content or "" # type: ignore[attr-defined] + # type: ignore[attr-defined] + reason = chunk.choices[0].delta.reasoning_content or "" else: # 推理结束,设置标志并添加结束标签 self.is_reasoning = False @@ -121,7 +124,8 @@ class ReasoningLLM: """验证消息格式是否正确""" if messages[0]["role"] != "system": # 添加默认系统消息 - messages.insert(0, {"role": "system", "content": "You are a helpful assistant."}) + messages.insert( + 0, {"role": "system", "content": "You are a helpful assistant."}) if messages[-1]["role"] != "user": err = f"消息格式错误,最后一个消息必须是用户消息:{messages[-1]}" @@ -135,6 +139,7 @@ class ReasoningLLM: max_tokens: int | None, temperature: float | None, model: str | None = None, + enable_thinking: bool = False ) -> AsyncGenerator[ChatCompletionChunk, None]: """创建流式响应""" if model is None: @@ -146,7 +151,8 @@ class ReasoningLLM: temperature=temperature or self._config.temperature, stream=True, stream_options={"include_usage": True}, - timeout=300 + timeout=300, + extra_body={"enable_thinking": enable_thinking} ) # type: ignore[] async def call( # noqa: C901, PLR0912, PLR0913 @@ -158,6 +164,7 @@ class ReasoningLLM: streaming: bool = True, result_only: bool = True, model: str | None = None, + enable_thinking: bool = False ) -> AsyncGenerator[str, None]: """调用大模型,分为流式和非流式两种""" # 检查max_tokens和temperature @@ -168,7 +175,7 @@ class ReasoningLLM: if model is None: model = self._config.model msg_list = self._validate_messages(messages) - stream = await self._create_stream(msg_list, max_tokens, temperature, model) + stream = await self._create_stream(msg_list, max_tokens, temperature, model, enable_thinking) reasoning = ReasoningContent() reasoning_content = "" result = "" diff --git a/apps/scheduler/call/llm/llm.py b/apps/scheduler/call/llm/llm.py index 1e9bcb94804a6c446db71695a3c68752a804f0b6..790f4564c4e679e78cd4a25ec0bd83491bf7aaeb 100644 --- a/apps/scheduler/call/llm/llm.py +++ b/apps/scheduler/call/llm/llm.py @@ -34,9 +34,12 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): # 大模型参数 temperature: float = Field(description="大模型温度(随机化程度)", default=0.7) enable_context: bool = Field(description="是否启用上下文", default=True) - step_history_size: int = Field(description="上下文信息中包含的步骤历史数量", default=3, ge=1, le=10) - system_prompt: str = Field(description="大模型系统提示词", default="You are a helpful assistant.") - user_prompt: str = Field(description="大模型用户提示词", default=LLM_DEFAULT_PROMPT) + step_history_size: int = Field( + description="上下文信息中包含的步骤历史数量", default=3, ge=1, le=10) + system_prompt: str = Field( + description="大模型系统提示词", default="You are a helpful assistant.") + user_prompt: str = Field(description="大模型用户提示词", + default=LLM_DEFAULT_PROMPT) i18n_info: ClassVar[dict[str, dict]] = { LanguageType.CHINESE: { @@ -74,7 +77,8 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): context_prompt = "无背景信息。" # 参数 - time = datetime.now(tz=pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S") + time = datetime.now(tz=pytz.timezone("Asia/Shanghai") + ).strftime("%Y-%m-%d %H:%M:%S") formatter = { "time": time, "context": context_prompt, @@ -110,7 +114,7 @@ class LLM(CoreCall, input_model=LLMInput, output_model=LLMOutput): data = LLMInput(**input_data) try: llm = ReasoningLLM() - async for chunk in llm.call(messages=data.message): + async for chunk in llm.call(messages=data.message, enable_thinking=True): if not chunk: continue yield CallOutputChunk(type=CallOutputType.TEXT, content=chunk) diff --git a/apps/scheduler/mcp/plan.py b/apps/scheduler/mcp/plan.py index 78d695f2cc9fc47c995245f0e8cf059b60e76a3d..04abc7706e80174e0fe6634cecb6693b88b1eaab 100644 --- a/apps/scheduler/mcp/plan.py +++ b/apps/scheduler/mcp/plan.py @@ -35,7 +35,6 @@ class MCPPlanner: # 解析为结构化数据 return await self._parse_plan_result(result, max_steps) - async def _get_reasoning_plan(self, tool_list: list[MCPTool], max_steps: int) -> str: """获取推理大模型的结果""" # 格式化Prompt @@ -67,7 +66,6 @@ class MCPPlanner: return result - async def _parse_plan_result(self, result: str, max_steps: int) -> MCPPlan: """将推理结果解析为结构化数据""" # 格式化Prompt @@ -86,7 +84,6 @@ class MCPPlanner: plan = await json_generator.generate() return MCPPlan.model_validate(plan) - async def generate_answer(self, plan: MCPPlan, memory: str) -> str: """生成最终回答""" template = self._env.from_string(FINAL_ANSWER[self.language]) @@ -102,6 +99,7 @@ class MCPPlanner: [{"role": "user", "content": prompt}], streaming=False, temperature=0.07, + enable_thinking=True ): result += chunk diff --git a/apps/scheduler/mcp_agent/plan.py b/apps/scheduler/mcp_agent/plan.py index 489efc74108dd1da24a2159d630e166a7a56b3ab..a51b0da76926356794d80c71e6ffe77ae11cefa9 100644 --- a/apps/scheduler/mcp_agent/plan.py +++ b/apps/scheduler/mcp_agent/plan.py @@ -155,13 +155,15 @@ class MCPPlanner(MCPBase): prompt = template.render( goal=user_goal, error_message=error_message, - current_plan=current_plan.model_dump(exclude_none=True, by_alias=True), + current_plan=current_plan.model_dump( + exclude_none=True, by_alias=True), history=history, ) result = await MCPPlanner.get_resoning_result(prompt, reasoning_llm) # 解析为结构化数据 schema = RestartStepIndex.model_json_schema() - schema["properties"]["start_index"]["maximum"] = len(current_plan.plans) - 1 + schema["properties"]["start_index"]["maximum"] = len( + current_plan.plans) - 1 schema["properties"]["start_index"]["minimum"] = 0 restart_index = await MCPPlanner._parse_result(result, schema) # 使用RestartStepIndex模型解析结果 @@ -204,7 +206,8 @@ class MCPPlanner(MCPBase): if is_replan: template = _env.from_string(RECREATE_PLAN[language]) prompt = template.render( - current_plan=current_plan.model_dump(exclude_none=True, by_alias=True), + current_plan=current_plan.model_dump( + exclude_none=True, by_alias=True), error_message=error_message, goal=user_goal, tools=tool_list, @@ -344,7 +347,8 @@ class MCPPlanner(MCPBase): template = _env.from_string(TOOL_EXECUTE_ERROR_TYPE_ANALYSIS[language]) prompt = template.render( goal=user_goal, - current_plan=current_plan.model_dump(exclude_none=True, by_alias=True), + current_plan=current_plan.model_dump( + exclude_none=True, by_alias=True), tool_name=tool.name, tool_description=tool.description, input_param=input_param, @@ -416,7 +420,8 @@ class MCPPlanner(MCPBase): language: LanguageType = LanguageType.CHINESE, ) -> str: """将错误信息转换为工具描述""" - template = _env.from_string(CHANGE_ERROR_MESSAGE_TO_DESCRIPTION[language]) + template = _env.from_string( + CHANGE_ERROR_MESSAGE_TO_DESCRIPTION[language]) prompt = template.render( error_message=error_message, tool_name=tool.name, @@ -468,5 +473,7 @@ class MCPPlanner(MCPBase): [{"role": "user", "content": prompt}], streaming=True, temperature=0.07, + result_only=False, + enable_thinking=True ): yield chunk diff --git a/apps/scheduler/scheduler/scheduler.py b/apps/scheduler/scheduler/scheduler.py index 2bd51e745ea7e2a8b51f04f6f214b08b1a2ed28b..7060272f25a97ecc59d31781328e06d717d21cdb 100644 --- a/apps/scheduler/scheduler/scheduler.py +++ b/apps/scheduler/scheduler/scheduler.py @@ -305,7 +305,8 @@ class Scheduler: servers_id=servers_id, background=background, agent_id=app_info.app_id, - params=post_body.params + params=post_body.params, + resoning_llm=reasion_llm ) # 开始运行 logger.info("[Scheduler] 运行Executor") diff --git a/apps/services/rag.py b/apps/services/rag.py index 918962498bf7727dba5e62769c813c4bdb2bf456..514931479ab173ba50f9eb6c7cc74e174f86fa8d 100644 --- a/apps/services/rag.py +++ b/apps/services/rag.py @@ -31,7 +31,7 @@ class RAG: user_prompt: dict[LanguageType, str] = { LanguageType.CHINESE: r""" - 你是openEuler社区的智能助手。请结合给出的背景信息, 回答用户的提问,并且基于给出的背景信息在相关句子后进行脚注。 + 你是智能助手。请结合给出的背景信息, 回答用户的提问,并且基于给出的背景信息在相关句子后进行脚注。 一个例子将在中给出。 上下文背景信息将在中给出。 用户的提问将在中给出。 @@ -41,6 +41,7 @@ class RAG: 3.脚注只出现在回答的句子的末尾,例如句号、问号等标点符号后面。 4.不要对脚注本身进行解释或说明。 5.请不要使用中的文档的id作为脚注。 + 6.请详细回答用户的问题。 @@ -79,48 +80,46 @@ class RAG: """, LanguageType.ENGLISH: r""" - You are a helpful assistant of openEuler community. Please answer the user's question based on the given background information and add footnotes after the related sentences. - An example will be given in . - The background information will be given in . - The user's question will be given in . + You are a helpful assistant. Please answer the user's question based on the given background information, + and provide footnotes based on the relevant sentences in the background information. + An example is given in . + The contextual background information is given in . + The user's question is given in . Note: - 1. Do not include any XML tags in the output, and do not make up any information. If you think the user's question is unrelated to the background information, please ignore the background information and directly answer. - 2. Your response should not exceed 250 words. + 1. Do not include any XML tags in the output, and do not make up any information. If you think the user's question is irrelevant to the background information, please ignore the background information and answer directly. + 2. The format of the footnotes is [[1]], [[2]], [[3]], etc., and the content of the footnotes is the id of the provided document. + 3. Footnotes only appear at the end of the sentences in the answer, such as after periods, question marks, and other punctuation marks. + 4. Do not explain or elaborate on the footnotes themselves. + 5. Please do not use the document ids in as footnotes. + 6. Please answer the user's question in detail. - openEuler community is an open source operating system community, committed to promoting the development of the Linux operating system. + The openEuler community is an open source operating system community dedicated to promoting the development of the Linux operating system. - openEuler community aims to provide users with a stable, secure, and efficient operating system platform, and support multiple hardware architectures. + The goal of the openEuler community is to provide users with a stable, secure, and efficient operating system platform that supports multiple hardware architectures. - + Members of the openEuler community come from all over the world, including developers, users, and enterprises. - Members of the openEuler community work together to promote the development of open source operating systems, and provide support and assistance to users. + Members of the openEuler community work together to promote the development of open source operating systems and provide support and assistance to users. - What is the goal of openEuler community? + What is the goal of the openEuler community? - openEuler community is an open source operating system community, committed to promoting the development of the Linux operating system. [[1]] - openEuler community aims to provide users with a stable, secure, and efficient operating system platform, and support multiple hardware architectures. [[1]] - - - - - {bac_info} - - - {user_question} - + The openEuler community is an open source operating system community dedicated to promoting the development of the Linux operating system. [[1]] + The goal of the openEuler community is to provide users with a stable, secure, and efficient operating system platform that supports multiple hardware architectures. [[1]] + + """, } @@ -151,7 +150,8 @@ class RAG: ) try: async with httpx.AsyncClient(timeout=30) as client: - data_json = tmp_data.model_dump(exclude_none=True, by_alias=True) + data_json = tmp_data.model_dump( + exclude_none=True, by_alias=True) response = await client.post(url, headers=headers, json=data_json) if response.status_code == status.HTTP_200_OK: result = response.json() @@ -161,7 +161,8 @@ class RAG: if data.kb_ids: try: async with httpx.AsyncClient(timeout=30) as client: - data_json = data.model_dump(exclude_none=True, by_alias=True) + data_json = data.model_dump( + exclude_none=True, by_alias=True) response = await client.post(url, headers=headers, json=data_json) # 检查响应状态码 if response.status_code == status.HTTP_200_OK: @@ -324,6 +325,7 @@ class RAG: temperature=0.7, result_only=False, model=llm.model_name, + enable_thinking=True ): chunk = buffer + chunk # 防止脚注被截断