From 12a221c781384f3ba53f68c7e1c889ceebcc7402 Mon Sep 17 00:00:00 2001 From: huanghongyun Date: Fri, 18 Jul 2025 14:25:42 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E6=84=8F=E5=9B=BE=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E7=BB=84=E4=BB=B6=E4=B8=8A=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/component/intent_detection_comp.py | 346 ++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 jiuwen/core/component/intent_detection_comp.py diff --git a/jiuwen/core/component/intent_detection_comp.py b/jiuwen/core/component/intent_detection_comp.py new file mode 100644 index 0000000..0b4df4d --- /dev/null +++ b/jiuwen/core/component/intent_detection_comp.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright (c) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + +import re +import ast +import time +from dataclasses import dataclass, field + +from jiuwen.common.exception.base import JiuWenBaseException +from jiuwen.common.exception.status_code import StatusCode +from jiuwen.common.llm_service.language_model.base import LanguageModelInput +from jiuwen.common.llm_service.model_util import ModelUtil +from jiuwen.orchestration.flow.constant import WORKFLOW_CHAT_HISTORY +from jiuwen.orchestration.utils import Input, Output +from jiuwen.prompt import Prompt, TemplateManager, Template +from jiuwen.common.log.base import logger +from jiuwen.orchestration.common.perf_log import wf_performance_buffer +from jiuwen.planner.planning_modules.base import PromptPlanningModule + +from jiuwen.core.component.llm_comp import LLMCompConfig +from jiuwen.core.runtime.executable import Executable +from jiuwen.core.runtime.context import Context +from jiuwen.core.component.base import WorkflowComponent +from typing import Optional + +LUI = "llm" +NAME = "name" +MODEL = "model" +CLASS = "class" +REASON = "reason" +INPUT = "input" +USER_PROMPT = "user_prompt" +CATEGORY_INFO = "category_info" +CATEGORY_LIST = "category_list" +CATEGORY_NAME_LIST = "category_name_list" +DEFAULT_CLASS = "default_class" +CHAT_HISTORY = "chat_history" +EXAMPLE_CONTENT = "example_content" +ENABLE_HISTORY = "enable_history" +ENABLE_INPUT = "enable_input" +LLM_INPUTS = "llm_inputs" +LLM_OUTPUTS = "llm_outputs" +MODEL_SOURCE = "modelType" +MODEL_NAME = "modelName" +HYPTER_PARAM = "hyperParameters" +EXTENSION = "extension" +CHAT_HISTORY_MAX_TURN = "chat_history_max_turn" +INTENT_DETECTION_TEMPLATE = "intent_detection_template" +ROLE = "role" +CONTENT = "content" +ROLE_MAP = {"user": '用户', 'assistant': '助手', 'system': '系统'} +JSON_PARSE_FAIL_REASON = "当前意图识别的输出:'{result}'格式不符合有效的JSON规范,导致解析失败,因此返回默认分类。" +CLASS_KEY_MISSING_REASON = "当前意图识别的输出 '{result}' 缺少必要的输出'class'分类信息,因此返回默认分类。" +VALIDATION_FAIL_REASON = "当前意图识别的输出类别 '{intent_class}' 不在预定义的分类列表: '{category_list}'中,因此系统返回默认分类。" + +# EI增加 +RESULT = "result" +CATEGORY_NAME_ITS1 = "category_name_list" +FEW_SHOT_NUM = 5 +ENABLE_Q2L = 'enableKnowledge' +RECALLTHREADSHOLD = "recallThreshold" +DEFAULT_QUERY_CATE = 'title' +DEFAULT_CLASS_CATE = 'content' +DEFAULT_INT = "不确定,其他的意图" +SEARCH_TYPE = "faq" +SEARCH_NUM = 5 +CLASSIFICATION_ID = "classificationId" +CLASSIFICATION_DEFAULT_ID = "分类0" +CLASSIFICATION_NAME = "name" +CLASSIFICATION_DEFAULT_NAME = "其他意图" +KG_FILTER_KEY = "filter_string" +KG_FILTER_PREFIX = "category:" +KG_SCOPE = "scope" + +@dataclass +class IntentDetectionConfig: + user_prompt: str + category_info: str + category_list: list[str] + intent_detection_template: Template + category_name_list: list[str] + default_class: str = '分类1' + enable_history: bool = False + enable_input: bool = True + chat_history_max_turn: int = 3 + example_content: list[str] = field(default_factory=list) + overrideable: bool = False + enableKnowledges: bool = False + enable_q2fewshot: bool = True + enable_validation: bool = True + recallThreshold: float = 0.9 + levenshtein_ration: float = 0.8 + q2label_few_shot_score: float = 0.5 + model: 'ModelConfig' = None + + + +@dataclass() +class IntentDetectionExecutable(Executable): + def __init__(self, component_config: IntentDetectionConfig): + super().__init__() + self._context = None + self._llm = None + self._intent_config = IntentDetectionConfig(**self._get_config_info(conf)) + self._prompt_engine = PromptPlanningModule() # TODO + self._config = component_config + # 意图判定prompts + template_manager = TemplateManager() + intent_prompt = template_manager().get( + name='intent_detection', filter={ + "model_name": self._config.model.model_name + }) # TODO + if intent_prompt is not None and hasattr(intent_prompt, 'content'): + self.intent_prompt = intent_prompt.content + + def invoke(self, inputs: input, context: Context)-> Output: + """invoke IntentDetection节点""" + # 提取上下文数据 + self.conversation_id = self._context.get("workflow_execute_debug_info", {}).get("workflow_conversation_id", "") + chat_history = self._get_chat_history_from_context() + # 处理意图检测输入: + try: + current_inputs = self._prepare_detection_inputs(inputs, chat_history) + except Exception as e: + raise JiuWenBaseException( + message=StatusCode.WORKFLOW_INTENT_DETECTION_USER_INPUT_ERROR.errmsg.format( + error_mage= + ), + error_code=StatusCode.WORKFLOW_INTENT_DETECTION_USER_INPUT_ERROR.code + ) + + # 获取大模型结果 + llm_output = self._get_llm_result(current_inputs) + # 后处理意图检测结果 + intent_res = self._handle_detection_result(llm_output) + logger.info(f"{self.conversation_id}|use llm model.") + return intent_res + + def _get_chat_history_from_context(self): + """从上下文中获取开始调用不同代码""" + chat_history_obj = self._context.get(WORKFLOW_CHAT_HISTORY, None) + if chat_history_obj: + return chat_history_obj.get_conversation_history() + return [] + + def _set_context(self, context: Context): + """设置context属性""" + self._context = context + + def _prepare_detection_inputs(self, inputs, chat_history): + """准备意图检测所需的输入""" + current_inputs = {} + global_intent_map = [] + + # 添加基本配置参数 + current_inputs.update({ + USER_PROMPT: self._intent_config.user_proapt, + CATEGORY_INFO: self._intent_config.category_info, + DEFAULT_CLASS: self._intent_config.default_class, + ENABLE_HISTORY: self._intent_config.enable_history, + ENABLE_INPUT: self._intent_config.enable_input, + EXAMPLE_CONTENT: "\n\n".join(self._intent_config.example_content), + CHAT_HISTORY_MAX_TURN: self._intent_config.chat_history_max_turn + }) + + # 检查输入配置有效性 + if not self._intent_config.enable_history and not self._intent_config.enable_input: + raise ValueError("AT LEAST ONE OF INTENT_DETECTION'S ENABLE_HISTORY AND ENABLE_INPUT SHOULD ENABLE.") + + # 处理历史记录 + if self._intent_config.enable_history: + chat_history_str = self._format_chat_history(chat_history) + current_inputs.update({CHAT_HISTORY: chat_history_str}) + + # 处理当前输入 + if self._intent_config.enable_input: + current_inputs.update({INPUT: inputs.get(INPUT)}) + + # 保存全局意图映射用于后续处理 + current_inputs['global_intent_map'] = global_intent_map + + return current_inputs + + def _format_chat_history(self, chat_history): + """格式化聊天历史记录""" + chat_history_str = "" + for history in chat_history[-self._intent_config.chat_history_max_turn:]: + chat_history_str += "{}: {}\n".format( + ROLE_MAP.get(history.get(ROLE, CONTENT), "用户"), + history.get(CONTENT) + ) + return chat_history_str + + def get_llm_result(self, current_inputs): + """获取llm""" + llm_inputs = self._pre_process(current_inputs) + logger.info(f"[{self.conversation_id}]intent detection llm_inputs: {llm_inputs}") + current_inputs.update({LLM_INPUTS: llm_inputs}) + + # 获取LLM输出并处理 + try: + llm_output = self.llm.invoke(llm_inputs).content + except Exception as e: + raise JiuWenBaseException( + message=StatusCode.WORKFLOW_INTENT_DETECTION_LLM_INVOKE_ERROR.errmsg.format( + error_msg=str(e) + ), + error_code = StatusCode.WORKFLOW_INTENT_DETECTION_LLM_INVOKE_ERROR.code + ) + return llm_output + + def _pre_process(self, inputs: dict): + """Pre-process inputs for model""" + try: + final_prompts = [ + { + ROLE: prompt_message.get(ROLE, ''), + CONTENT: Prompt( + template=Template( + name='Template', + content=[prompt_message] + ) + ).invoke(inputs) + } + for prompt_message in self._intent_config.intent_detection_template.content + ] + except JiuWenBaseException: + raise + except Exception as e: + raise JiuWenBaseException( + message=StatusCode.WORKFLOW_INTENT_DETECTION_PROMPT_INVOKE_ERROR.errmsg.format( + error_msg="ERROR OCCURRED WHILE FETCHING THE INTENT DETECTION PROMPT TEMPLATE." + ), + error_code=StatusCode.WORKFLOW_INTENT_DETECTION_PROMPT_INVOKE_ERROR.code + ) from e + + try: + llm_inputs = LanguageModelInput(messages=ModelUtil.switch_message(final_prompts), tools=None) + except Exception as e: + raise JiuWenBaseException( + message=StatusCode.WORKFLOW_INTENT_DETECTION_MODEL_INPUT_ERROR.errmsg.format( + error_msg=str(e) + ), + error_code=StatusCode.WORKFLOW_INTENT_DETECTION_MODEL_INPUT_ERROR.code + ) from e + return llm_inputs + + + def _handle_detection_result(self, llm_output): + """处理意图检测结果""" + intent_class, reason = self.intent_detection_post_process(llm_output) + logger.info(f"{self.conversation_id}|intent_class: {intent_class}") + + # 验证输出有效性 + if not self.output_validation(intent_class): + return dict( + result=self._intent_config.default_class, + reason=VALIDATION_FAIL_REASON.format( + intent_class=intent_class, + category_list=self._intent_config.category_list + ) + ) + intent_id_name = self._get_intent_id_name(self._intent_config, intent_class) + return dict(result=intent_class, reason=reason, classificationId=intent_id_name.get(CLASSIFICATION_ID, ""), + name=intent_id_name.get(CLASSIFICATION_NAME, "")) + + def refix_llm_output(self, input_str): + """大模型输出后处理""" + res = input_str + json_path = r'\{.*\}' + match = re.search(json_path, input_str, re.DOTALL) + if match: + res = match.group(0) + res = res.replace("false", "False").replace("true", "True").replace("null", "None") + else: + return input_str + if "" in res: + tmp = res.split("") + res = tmp[-1] + return res + + + def intent_detection_post_process(self, result): + """ + Post-process the result + Apps: + result: The result is a dict string. + Returns: + The processed results are 'class' and 'reason' + """ + try: + # 对推理模型的返回值处理 + result = self.refix_llm_output(result) + parsed_dict = ast.literal_eval(result) + if not isinstance(parsed_dict, dict): + return self._intent_config.default_class, JSON_PARSE_FAIL_REASON.format(result=result) + except Exception: + return self._intent_config.default_class, JSON_PARSE_FAIL_REASON.format(result=result) + + #post_process class information + if not parsed_dict.get(CLASS): + return self._intent_config.default_class, CLASS_KEY_MISSING_REASON.format(result=parsed_dict) + + intent_class = parsed_dict.get(CLASS).replace('\n', '').replace(' ', '').replace('"', '').replace("'", '') + match = re.search(r"方案文件", intent_class) + if match: + parsed_dict.update({CLASS: match.group(0)}) + + return parsed_dict.get(CLASS), parsed_dict.get(REASON, '') + + def output_validation(self, result): + """ + Validation of LLM output + Args: + result: LLM output + Returns: + True: Validation passed + False: Validation failed + """ + return result in self._intent_config.category_list + + # 获取意图的id和name,用于下一节点调用 + def _get_intent_id_name(self, intent_config, intent_class): + intent_res = {CLASSIFICATION_ID: CLASSIFICATION_DEFAULT_ID, CLASSIFICATION_NAME: CLASSIFICATION_DEFAULT_NAME} + idx = next((i for i, category in enumerate(intent_config.category_list) if category == intent_class), -1) + if idx > -1: + intent_res = {CLASSIFICATION_ID: idx, CLASSIFICATION_NAME: intent_config.category_name_list[idx]} + return intent_res + +class IntentDetectionComponent(WorkflowComponent): + def __init__(self, component_config: Optional[LLMCompConfig] = None): + super().__init__(component_config) + self._executable = None + self._config = component_config + + @property + def executable(self) -> IntentDetectionExecutable: + """延迟创建executable实例""" + if self._executable is None: + self._executable = self.to_executable() + return self._executable + + def to_executable(self) -> IntentDetectionExecutable: + """创建可执行实例""" + return IntentDetectionExecutable(self._config) -- Gitee From 58749b456f58c54579c2dfa2eb82af0ccbe0bc27 Mon Sep 17 00:00:00 2001 From: huanghongyun Date: Sat, 19 Jul 2025 14:51:10 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E6=84=8F=E5=9B=BE=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E7=BB=84=E4=BB=B6=E4=B8=8A=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/component/intent_detection_comp.py | 63 +++++-------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/jiuwen/core/component/intent_detection_comp.py b/jiuwen/core/component/intent_detection_comp.py index 0b4df4d..12aaa43 100644 --- a/jiuwen/core/component/intent_detection_comp.py +++ b/jiuwen/core/component/intent_detection_comp.py @@ -7,46 +7,35 @@ import ast import time from dataclasses import dataclass, field -from jiuwen.common.exception.base import JiuWenBaseException -from jiuwen.common.exception.status_code import StatusCode +from jiuwen.core.common.exception.exception import JiuWenBaseException +from jiuwen.core.common.exception.status_code import StatusCode from jiuwen.common.llm_service.language_model.base import LanguageModelInput from jiuwen.common.llm_service.model_util import ModelUtil -from jiuwen.orchestration.flow.constant import WORKFLOW_CHAT_HISTORY -from jiuwen.orchestration.utils import Input, Output -from jiuwen.prompt import Prompt, TemplateManager, Template -from jiuwen.common.log.base import logger -from jiuwen.orchestration.common.perf_log import wf_performance_buffer -from jiuwen.planner.planning_modules.base import PromptPlanningModule +from jiuwen.prompt import Prompt from jiuwen.core.component.llm_comp import LLMCompConfig -from jiuwen.core.runtime.executable import Executable -from jiuwen.core.runtime.context import Context +from jiuwen.core.graph.executable import Executable, Input, Output +from jiuwen.core.context.context import Context from jiuwen.core.component.base import WorkflowComponent from typing import Optional -LUI = "llm" -NAME = "name" -MODEL = "model" + +from jiuwen.core.utils.prompt.template.template import Template +from jiuwen.core.utils.prompt.template.template_manager import TemplateManager + CLASS = "class" REASON = "reason" INPUT = "input" USER_PROMPT = "user_prompt" CATEGORY_INFO = "category_info" -CATEGORY_LIST = "category_list" -CATEGORY_NAME_LIST = "category_name_list" DEFAULT_CLASS = "default_class" CHAT_HISTORY = "chat_history" EXAMPLE_CONTENT = "example_content" +CHAT_HISTORY_MAX_TURN = "chat_history_max_turn" ENABLE_HISTORY = "enable_history" ENABLE_INPUT = "enable_input" LLM_INPUTS = "llm_inputs" LLM_OUTPUTS = "llm_outputs" -MODEL_SOURCE = "modelType" -MODEL_NAME = "modelName" -HYPTER_PARAM = "hyperParameters" -EXTENSION = "extension" -CHAT_HISTORY_MAX_TURN = "chat_history_max_turn" -INTENT_DETECTION_TEMPLATE = "intent_detection_template" ROLE = "role" CONTENT = "content" ROLE_MAP = {"user": '用户', 'assistant': '助手', 'system': '系统'} @@ -54,24 +43,11 @@ JSON_PARSE_FAIL_REASON = "当前意图识别的输出:'{result}'格式不符合 CLASS_KEY_MISSING_REASON = "当前意图识别的输出 '{result}' 缺少必要的输出'class'分类信息,因此返回默认分类。" VALIDATION_FAIL_REASON = "当前意图识别的输出类别 '{intent_class}' 不在预定义的分类列表: '{category_list}'中,因此系统返回默认分类。" -# EI增加 -RESULT = "result" -CATEGORY_NAME_ITS1 = "category_name_list" -FEW_SHOT_NUM = 5 -ENABLE_Q2L = 'enableKnowledge' -RECALLTHREADSHOLD = "recallThreshold" -DEFAULT_QUERY_CATE = 'title' -DEFAULT_CLASS_CATE = 'content' -DEFAULT_INT = "不确定,其他的意图" -SEARCH_TYPE = "faq" -SEARCH_NUM = 5 CLASSIFICATION_ID = "classificationId" CLASSIFICATION_DEFAULT_ID = "分类0" CLASSIFICATION_NAME = "name" CLASSIFICATION_DEFAULT_NAME = "其他意图" -KG_FILTER_KEY = "filter_string" -KG_FILTER_PREFIX = "category:" -KG_SCOPE = "scope" +WORKFLOW_CHAT_HISTORY = "workflow_chat_history" @dataclass class IntentDetectionConfig: @@ -85,13 +61,6 @@ class IntentDetectionConfig: enable_input: bool = True chat_history_max_turn: int = 3 example_content: list[str] = field(default_factory=list) - overrideable: bool = False - enableKnowledges: bool = False - enable_q2fewshot: bool = True - enable_validation: bool = True - recallThreshold: float = 0.9 - levenshtein_ration: float = 0.8 - q2label_few_shot_score: float = 0.5 model: 'ModelConfig' = None @@ -103,7 +72,6 @@ class IntentDetectionExecutable(Executable): self._context = None self._llm = None self._intent_config = IntentDetectionConfig(**self._get_config_info(conf)) - self._prompt_engine = PromptPlanningModule() # TODO self._config = component_config # 意图判定prompts template_manager = TemplateManager() @@ -118,7 +86,7 @@ class IntentDetectionExecutable(Executable): """invoke IntentDetection节点""" # 提取上下文数据 self.conversation_id = self._context.get("workflow_execute_debug_info", {}).get("workflow_conversation_id", "") - chat_history = self._get_chat_history_from_context() + chat_history = self._get_chat_history_from_context(context) # 处理意图检测输入: try: current_inputs = self._prepare_detection_inputs(inputs, chat_history) @@ -134,12 +102,11 @@ class IntentDetectionExecutable(Executable): llm_output = self._get_llm_result(current_inputs) # 后处理意图检测结果 intent_res = self._handle_detection_result(llm_output) - logger.info(f"{self.conversation_id}|use llm model.") return intent_res - def _get_chat_history_from_context(self): + def _get_chat_history_from_context(self, context): """从上下文中获取开始调用不同代码""" - chat_history_obj = self._context.get(WORKFLOW_CHAT_HISTORY, None) + chat_history_obj = context.store.read(WORKFLOW_CHAT_HISTORY) or list() if chat_history_obj: return chat_history_obj.get_conversation_history() return [] @@ -195,7 +162,6 @@ class IntentDetectionExecutable(Executable): def get_llm_result(self, current_inputs): """获取llm""" llm_inputs = self._pre_process(current_inputs) - logger.info(f"[{self.conversation_id}]intent detection llm_inputs: {llm_inputs}") current_inputs.update({LLM_INPUTS: llm_inputs}) # 获取LLM输出并处理 @@ -250,7 +216,6 @@ class IntentDetectionExecutable(Executable): def _handle_detection_result(self, llm_output): """处理意图检测结果""" intent_class, reason = self.intent_detection_post_process(llm_output) - logger.info(f"{self.conversation_id}|intent_class: {intent_class}") # 验证输出有效性 if not self.output_validation(intent_class): -- Gitee