diff --git a/apps/llm/function.py b/apps/llm/function.py index a86155c6998ee0df5f24ef23ac560b094e13a92a..ed165fe16665548ca52e3ae1d3b9a7aea2098623 100644 --- a/apps/llm/function.py +++ b/apps/llm/function.py @@ -12,7 +12,6 @@ from asyncer import asyncify from sglang.lang.chat_template import get_chat_template from apps.common.config import config -from apps.constants import REASONING_BEGIN_TOKEN, REASONING_END_TOKEN from apps.scheduler.json_schema import build_regex_from_schema diff --git a/apps/llm/patterns/recommend.py b/apps/llm/patterns/recommend.py index 7d33a3dde79660e739a61fc87afaf73fe8a93cc8..9397d0d38832e6011362c6e152e1fcee4d2a2685 100644 --- a/apps/llm/patterns/recommend.py +++ b/apps/llm/patterns/recommend.py @@ -2,9 +2,10 @@ Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. """ -from typing import Optional +from typing import Any, ClassVar, Optional from apps.llm.patterns.core import CorePattern +from apps.llm.patterns.json import Json from apps.llm.reasoning import ReasoningLLM @@ -15,20 +16,31 @@ class Recommend(CorePattern): """系统提示词""" user_prompt: str = r""" - 根据上面的历史对话、结合给出的工具描述和用户倾向,生成预测问题。 + ## 目标: + 根据上面的历史对话,结合给出的工具描述和用户倾向,生成三个预测问题。 + ## 要求: 信息说明: - - [Empty]的含义是“空信息”,如“工具描述: [Empty]”表示当前未使用工具。 - - 历史提问信息为背景参考作用,最多提供4条。 + - [Empty]的含义是“空信息”,如“工具描述: [Empty]”表示当前未使用工具。请忽略信息为空的项,正常进行问题预测。 + - 历史提问信息是用户发生在历史对话之前的提问,仅为背景参考作用。 生成时需要遵循的要求: - 1. 从用户角度生成预测问题。 - 2. 预测问题应为疑问句或祈使句,必须少于30字。 - 3. 预测问题应优先贴合工具描述,特别是工具描述与历史对话或用户倾向无关时。 - 4. 预测问题必须精简,不得输出非必要信息,不得输出除问题以外的文字。 - 5. 预测问题不得与“用户历史提问”重复或相似。 - - ==以下是一个例子== + 1. 从用户角度生成预测问题,数量必须为3个,必须为疑问句或祈使句,必须少于30字。 + 2. 预测问题应优先贴合工具描述,除非工具描述为空。 + 3. 预测问题必须精简,不得在问题中掺杂非必要信息,不得输出除问题以外的文字。 + 4. 请以如下格式输出: + + ```json + {{ + "predicted_questions": [ + "预测问题1", + "预测问题2", + "预测问题3" + ] + }} + ``` + + ## 样例: 工具描述:调用API,查询天气数据 用户历史提问: @@ -39,10 +51,17 @@ class Recommend(CorePattern): ['旅游', '美食'] 生成的预测问题: - 杭州西湖景区的门票价格是多少? - ==例子结束== - - 现在,进行问题生成: + ```json + {{ + "predicted_questions": [ + "杭州西湖景区的门票价格是多少?", + "杭州有哪些著名景点?", + "杭州的天气怎么样?" + ] + }} + ``` + + ## 现在,进行问题生成: 工具描述:{action_description} 用户历史提问: @@ -52,14 +71,30 @@ class Recommend(CorePattern): {user_preference} 生成的预测问题: + ```json """ """用户提示词""" + slot_schema: ClassVar[dict[str, Any]] = { + "type": "object", + "properties": { + "predicted_questions": { + "type": "array", + "description": "推荐的问题列表", + "items": { + "type": "string", + }, + }, + }, + "required": ["predicted_questions"], + } + """最终输出的JSON Schema""" + def __init__(self, system_prompt: Optional[str] = None, user_prompt: Optional[str] = None) -> None: """初始化推荐问题生成Prompt""" super().__init__(system_prompt, user_prompt) - async def generate(self, task_id: str, **kwargs) -> str: # noqa: ANN003 + async def generate(self, task_id: str, **kwargs) -> list[str]: # noqa: ANN003 """生成推荐问题""" if "action_description" not in kwargs or not kwargs["action_description"]: action_description = "[Empty]" @@ -90,5 +125,11 @@ class Recommend(CorePattern): result = "" async for chunk in ReasoningLLM().call(task_id, messages, streaming=False, temperature=0.7, result_only=True): result += chunk + messages += [{"role": "assistant", "content": result}] + + question_dict = await Json().generate(task_id, conversation=messages, spec=self.slot_schema) + + if not question_dict or "predicted_questions" not in question_dict or not question_dict["predicted_questions"]: + return [] - return result + return question_dict["predicted_questions"] diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py index 64daccdedaa31fad7d68187dc9e33281cf330a0e..a9844f465ce53b9c618104e78edcfbbe21a098e3 100644 --- a/apps/llm/reasoning.py +++ b/apps/llm/reasoning.py @@ -10,7 +10,7 @@ from openai import AsyncOpenAI from apps.common.config import config from apps.common.singleton import Singleton -from apps.constants import REASONING_BEGIN_TOKEN, REASONING_END_TOKEN +from apps.constants import LOGGER, REASONING_BEGIN_TOKEN, REASONING_END_TOKEN from apps.manager.task import TaskManager @@ -155,5 +155,7 @@ class ReasoningLLM(metaclass=Singleton): yield reasoning_content yield result + LOGGER.info(f"推理LLM:{reasoning_content}\n\n{result}") + output_tokens = self._calculate_token_length([{"role": "assistant", "content": result}], pure_text=True) await TaskManager.update_token_summary(task_id, input_tokens, output_tokens) diff --git a/apps/service/suggestion.py b/apps/service/suggestion.py index d3481d835a6de6a3d64a2f0f69742e9de71d0105..cf817a60667922ab6919b44c15d50ca27526ea4d 100644 --- a/apps/service/suggestion.py +++ b/apps/service/suggestion.py @@ -19,8 +19,6 @@ from apps.manager import ( ) from apps.scheduler.pool.pool import Pool -# 推荐问题条数 -MAX_RECOMMEND = 3 # 用户领域条数 USER_TOP_DOMAINS_NUM = 5 # 历史问题条数 @@ -61,14 +59,15 @@ async def plan_next_flow(user_sub: str, task_id: str, queue: MessageQueue, user_ last_n_questions += f"Question {i+1}: {data.question}\n" if task.flow_state is None: + questions = await Recommend().generate( + task_id=task_id, + history_questions=last_n_questions, + recent_question=current_record, + user_preference=user_domain, + ) + # 当前没有使用Flow,进行普通推荐 - for _ in range(MAX_RECOMMEND): - question = await Recommend().generate( - task_id=task_id, - history_questions=last_n_questions, - recent_question=current_record, - user_preference=user_domain, - ) + for question in questions: content = SuggestContent( question=question, plugin_id="", @@ -93,33 +92,26 @@ async def plan_next_flow(user_sub: str, task_id: str, queue: MessageQueue, user_ if plugin.plugin_id and plugin.plugin_id not in plugin_ids: plugin_ids.append(plugin.plugin_id) result = Pool().get_k_flows(task.record.content.question, plugin_ids) - for i, flow in enumerate(result): - if i >= MAX_RECOMMEND: - break - # 改写问题 - rewrite_question = await Recommend().generate( + # TODO:预测问题n选3,或者针对每个flow只预测1个问题 + for flow in result: + questions = await Recommend().generate( task_id=task_id, action_description=flow.description, history_questions=last_n_questions, recent_question=current_record, user_preference=str(user_domain), ) - content = SuggestContent( plugin_id=plugin_id, flow_id=flow_id, flow_description=str(flow.description), - question=rewrite_question, + question=questions[0], ) await queue.push_output(event_type=EventType.SUGGEST, data=content.model_dump(exclude_none=True, by_alias=True)) return # 当前有next_flow - for i, next_flow in enumerate(flow_data.next_flow): - # 取前MAX_RECOMMEND个Flow,保持顺序 - if i >= MAX_RECOMMEND: - break - + for next_flow in flow_data.next_flow: if next_flow.plugin is not None: next_flow_plugin_id = next_flow.plugin else: @@ -154,11 +146,12 @@ async def plan_next_flow(user_sub: str, task_id: str, queue: MessageQueue, user_ recent_question=current_record, user_preference=str(user_domain), ) + content = SuggestContent( plugin_id=next_flow_plugin_id, flow_id=next_flow.id, flow_description=str(flow_metadata.description), - question=rewrite_question, + question=rewrite_question[0], ) await queue.push_output(event_type=EventType.SUGGEST, data=content.model_dump(exclude_none=True, by_alias=True)) continue diff --git a/deploy/chart/euler_copilot/configs/rag/.env b/deploy/chart/euler_copilot/configs/rag/.env index ca0a9c33cf92b1b5bf320fee10990fbf5109e74f..ed3c3b2395296baa8231d66374de638f07311e63 100644 --- a/deploy/chart/euler_copilot/configs/rag/.env +++ b/deploy/chart/euler_copilot/configs/rag/.env @@ -25,8 +25,8 @@ TASK_RETRY_TIME=3 # Embedding Service EMBEDDING_ENDPOINT={{ .Values.models.embedding.url }}/embeddings -EMBEDDING_KEY={{ .Values.models.embedding.key }} -EMBEDDING_MODEL={{ .Values.models.embedding.name }} +EMBEDDING_API_KEY={{ .Values.models.embedding.key }} +EMBEDDING_MODEL_NAME={{ .Values.models.embedding.name }} # Token CSRF_KEY=${csrfKey}