diff --git a/apps/llm/function.py b/apps/llm/function.py
index a86155c6998ee0df5f24ef23ac560b094e13a92a..ed165fe16665548ca52e3ae1d3b9a7aea2098623 100644
--- a/apps/llm/function.py
+++ b/apps/llm/function.py
@@ -12,7 +12,6 @@ from asyncer import asyncify
 from sglang.lang.chat_template import get_chat_template
 
 from apps.common.config import config
-from apps.constants import REASONING_BEGIN_TOKEN, REASONING_END_TOKEN
 from apps.scheduler.json_schema import build_regex_from_schema
 
 
diff --git a/apps/llm/patterns/recommend.py b/apps/llm/patterns/recommend.py
index 7d33a3dde79660e739a61fc87afaf73fe8a93cc8..9397d0d38832e6011362c6e152e1fcee4d2a2685 100644
--- a/apps/llm/patterns/recommend.py
+++ b/apps/llm/patterns/recommend.py
@@ -2,9 +2,10 @@
 
 Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
 """
-from typing import Optional
+from typing import Any, ClassVar, Optional
 
 from apps.llm.patterns.core import CorePattern
+from apps.llm.patterns.json import Json
 from apps.llm.reasoning import ReasoningLLM
 
 
@@ -15,20 +16,31 @@ class Recommend(CorePattern):
     """系统提示词"""
 
     user_prompt: str = r"""
-        根据上面的历史对话、结合给出的工具描述和用户倾向，生成预测问题。
+        ## 目标：
+        根据上面的历史对话，结合给出的工具描述和用户倾向，生成三个预测问题。
 
+        ## 要求：
         信息说明：
-        - [Empty]的含义是“空信息”，如“工具描述: [Empty]”表示当前未使用工具。
-        - 历史提问信息为背景参考作用，最多提供4条。
+        - [Empty]的含义是“空信息”，如“工具描述: [Empty]”表示当前未使用工具。请忽略信息为空的项，正常进行问题预测。
+        - 历史提问信息是用户发生在历史对话之前的提问，仅为背景参考作用。
 
         生成时需要遵循的要求：
-        1. 从用户角度生成预测问题。
-        2. 预测问题应为疑问句或祈使句，必须少于30字。
-        3. 预测问题应优先贴合工具描述，特别是工具描述与历史对话或用户倾向无关时。
-        4. 预测问题必须精简，不得输出非必要信息，不得输出除问题以外的文字。
-        5. 预测问题不得与“用户历史提问”重复或相似。
-
-        ==以下是一个例子==
+        1. 从用户角度生成预测问题，数量必须为3个，必须为疑问句或祈使句，必须少于30字。
+        2. 预测问题应优先贴合工具描述，除非工具描述为空。
+        3. 预测问题必须精简，不得在问题中掺杂非必要信息，不得输出除问题以外的文字。
+        4. 请以如下格式输出：
+
+        ```json
+        {{
+            "predicted_questions": [
+                "预测问题1",
+                "预测问题2",
+                "预测问题3"
+            ]
+        }}
+        ```
+
+        ## 样例：
         工具描述：调用API，查询天气数据
 
         用户历史提问：
@@ -39,10 +51,17 @@ class Recommend(CorePattern):
         ['旅游', '美食']
 
         生成的预测问题：
-        杭州西湖景区的门票价格是多少？
-        ==例子结束==
-
-        现在，进行问题生成：
+        ```json
+        {{
+            "predicted_questions": [
+                "杭州西湖景区的门票价格是多少？",
+                "杭州有哪些著名景点？",
+                "杭州的天气怎么样？"
+            ]
+        }}
+        ```
+
+        ## 现在，进行问题生成：
         工具描述：{action_description}
 
         用户历史提问：
@@ -52,14 +71,30 @@ class Recommend(CorePattern):
         {user_preference}
 
         生成的预测问题：
+        ```json
     """
     """用户提示词"""
 
+    slot_schema: ClassVar[dict[str, Any]] = {
+        "type": "object",
+        "properties": {
+            "predicted_questions": {
+                "type": "array",
+                "description": "推荐的问题列表",
+                "items": {
+                    "type": "string",
+                },
+            },
+        },
+        "required": ["predicted_questions"],
+    }
+    """最终输出的JSON Schema"""
+
     def __init__(self, system_prompt: Optional[str] = None, user_prompt: Optional[str] = None) -> None:
         """初始化推荐问题生成Prompt"""
         super().__init__(system_prompt, user_prompt)
 
-    async def generate(self, task_id: str, **kwargs) -> str:  # noqa: ANN003
+    async def generate(self, task_id: str, **kwargs) -> list[str]:  # noqa: ANN003
         """生成推荐问题"""
         if "action_description" not in kwargs or not kwargs["action_description"]:
             action_description = "[Empty]"
@@ -90,5 +125,11 @@ class Recommend(CorePattern):
         result = ""
         async for chunk in ReasoningLLM().call(task_id, messages, streaming=False, temperature=0.7, result_only=True):
             result += chunk
+        messages += [{"role": "assistant", "content": result}]
+
+        question_dict = await Json().generate(task_id, conversation=messages, spec=self.slot_schema)
+
+        if not question_dict or "predicted_questions" not in question_dict or not question_dict["predicted_questions"]:
+            return []
 
-        return result
+        return question_dict["predicted_questions"]
diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py
index 64daccdedaa31fad7d68187dc9e33281cf330a0e..a9844f465ce53b9c618104e78edcfbbe21a098e3 100644
--- a/apps/llm/reasoning.py
+++ b/apps/llm/reasoning.py
@@ -10,7 +10,7 @@ from openai import AsyncOpenAI
 
 from apps.common.config import config
 from apps.common.singleton import Singleton
-from apps.constants import REASONING_BEGIN_TOKEN, REASONING_END_TOKEN
+from apps.constants import LOGGER, REASONING_BEGIN_TOKEN, REASONING_END_TOKEN
 from apps.manager.task import TaskManager
 
 
@@ -155,5 +155,7 @@ class ReasoningLLM(metaclass=Singleton):
                 yield reasoning_content
             yield result
 
+        LOGGER.info(f"推理LLM：{reasoning_content}\n\n{result}")
+
         output_tokens = self._calculate_token_length([{"role": "assistant", "content": result}], pure_text=True)
         await TaskManager.update_token_summary(task_id, input_tokens, output_tokens)
diff --git a/apps/service/suggestion.py b/apps/service/suggestion.py
index d3481d835a6de6a3d64a2f0f69742e9de71d0105..cf817a60667922ab6919b44c15d50ca27526ea4d 100644
--- a/apps/service/suggestion.py
+++ b/apps/service/suggestion.py
@@ -19,8 +19,6 @@ from apps.manager import (
 )
 from apps.scheduler.pool.pool import Pool
 
-# 推荐问题条数
-MAX_RECOMMEND = 3
 # 用户领域条数
 USER_TOP_DOMAINS_NUM = 5
 # 历史问题条数
@@ -61,14 +59,15 @@ async def plan_next_flow(user_sub: str, task_id: str, queue: MessageQueue, user_
         last_n_questions += f"Question {i+1}: {data.question}\n"
 
     if task.flow_state is None:
+        questions = await Recommend().generate(
+            task_id=task_id,
+            history_questions=last_n_questions,
+            recent_question=current_record,
+            user_preference=user_domain,
+        )
+
         # 当前没有使用Flow，进行普通推荐
-        for _ in range(MAX_RECOMMEND):
-            question = await Recommend().generate(
-                task_id=task_id,
-                history_questions=last_n_questions,
-                recent_question=current_record,
-                user_preference=user_domain,
-            )
+        for question in questions:
             content = SuggestContent(
                 question=question,
                 plugin_id="",
@@ -93,33 +92,26 @@ async def plan_next_flow(user_sub: str, task_id: str, queue: MessageQueue, user_
             if plugin.plugin_id and plugin.plugin_id not in plugin_ids:
                 plugin_ids.append(plugin.plugin_id)
         result = Pool().get_k_flows(task.record.content.question, plugin_ids)
-        for i, flow in enumerate(result):
-            if i >= MAX_RECOMMEND:
-                break
-            # 改写问题
-            rewrite_question = await Recommend().generate(
+        # TODO：预测问题n选3，或者针对每个flow只预测1个问题
+        for flow in result:
+            questions = await Recommend().generate(
                 task_id=task_id,
                 action_description=flow.description,
                 history_questions=last_n_questions,
                 recent_question=current_record,
                 user_preference=str(user_domain),
             )
-
             content = SuggestContent(
                 plugin_id=plugin_id,
                 flow_id=flow_id,
                 flow_description=str(flow.description),
-                question=rewrite_question,
+                question=questions[0],
             )
             await queue.push_output(event_type=EventType.SUGGEST, data=content.model_dump(exclude_none=True, by_alias=True))
         return
 
     # 当前有next_flow
-    for i, next_flow in enumerate(flow_data.next_flow):
-        # 取前MAX_RECOMMEND个Flow，保持顺序
-        if i >= MAX_RECOMMEND:
-            break
-
+    for next_flow in flow_data.next_flow:
         if next_flow.plugin is not None:
             next_flow_plugin_id = next_flow.plugin
         else:
@@ -154,11 +146,12 @@ async def plan_next_flow(user_sub: str, task_id: str, queue: MessageQueue, user_
             recent_question=current_record,
             user_preference=str(user_domain),
         )
+
         content = SuggestContent(
             plugin_id=next_flow_plugin_id,
             flow_id=next_flow.id,
             flow_description=str(flow_metadata.description),
-            question=rewrite_question,
+            question=rewrite_question[0],
         )
         await queue.push_output(event_type=EventType.SUGGEST, data=content.model_dump(exclude_none=True, by_alias=True))
         continue
diff --git a/deploy/chart/euler_copilot/configs/rag/.env b/deploy/chart/euler_copilot/configs/rag/.env
index ca0a9c33cf92b1b5bf320fee10990fbf5109e74f..ed3c3b2395296baa8231d66374de638f07311e63 100644
--- a/deploy/chart/euler_copilot/configs/rag/.env
+++ b/deploy/chart/euler_copilot/configs/rag/.env
@@ -25,8 +25,8 @@ TASK_RETRY_TIME=3
 
 # Embedding Service
 EMBEDDING_ENDPOINT={{ .Values.models.embedding.url }}/embeddings
-EMBEDDING_KEY={{ .Values.models.embedding.key }}
-EMBEDDING_MODEL={{ .Values.models.embedding.name }}
+EMBEDDING_API_KEY={{ .Values.models.embedding.key }}
+EMBEDDING_MODEL_NAME={{ .Values.models.embedding.name }}
 
 # Token
 CSRF_KEY=${csrfKey}