diff --git a/apps/llm/function.py b/apps/llm/function.py index 63d8e5cfe5112f5cbabaac6efa4b3f58c1d42af9..760eb7ecb4a0fae511c70d209f98481637de17e2 100644 --- a/apps/llm/function.py +++ b/apps/llm/function.py @@ -329,7 +329,7 @@ class JsonGenerator: prompt = await self._assemble_message() messages = [ {"role": "system", "content": prompt}, - {"role": "user", "content": "please generate a JSON response based on the above information and schema."}, + {"role": "user", "content": "please generate a JSON response based on the above information and schema./no_think"}, ] function = FunctionLLM() return await function.call(messages, self._schema, max_tokens, temperature) diff --git a/apps/llm/reasoning.py b/apps/llm/reasoning.py index c72601b627b923ba1de8f1faac1acc23a98d4bcf..1944273bce90e5f7ede75b2eed7e18bd7e60bad1 100644 --- a/apps/llm/reasoning.py +++ b/apps/llm/reasoning.py @@ -144,6 +144,14 @@ class ReasoningLLM: """创建流式响应""" if model is None: model = self._config.model + if not enable_thinking: + if len(messages): + if messages[-1]["role"] == "user": + if not messages[-1]["content"].endswith("/no_think"): + messages[-1]["content"] += "/no_think" + else: + messages.append( + {"role": "user", "content": "/no_think"}) return await self._client.chat.completions.create( model=model, messages=messages, # type: ignore[]