diff --git a/README.md b/README.md
index 2596d50b8e416917486c1496b6d8d843ebf771a2..eeb2f6ce717045a1753a50e117ebbf140c950c7d 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@
 | Baichuan2-7B                  | `vLLM`       | [✅](models/nlp/llm/baichuan2-7b/vllm)                              | 4.3.0     |
 | ChatGLM-3-6B                  | `vLLM`       | [✅](models/nlp/llm/chatglm3-6b/vllm)                               | 4.3.0     |
 | ChatGLM-3-6B-32K              | `vLLM`       | [✅](models/nlp/llm/chatglm3-6b-32k/vllm)                           | 4.3.0     |
+| CosyVoice2-0.5B               | `PyTorch`    | [✅](models/speech/speech_synthesis/cosyvoice/pytorch)              | 4.3.0     |
 | DeepSeek-R1-Distill-Llama-8B  | `vLLM`       | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm)              | 4.3.0     |
 | DeepSeek-R1-Distill-Llama-70B | `vLLM`       | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm)             | 4.3.0     |
 | DeepSeek-R1-Distill-Qwen-1.5B | `vLLM`       | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm)             | 4.3.0     |
diff --git a/README_en.md b/README_en.md
index f6da160441a54d3e4a768942edbf7a6d5955a744..7586c21ac33c5fcea12bc45db50e13dd3fa1b1ff 100644
--- a/README_en.md
+++ b/README_en.md
@@ -41,6 +41,7 @@ inference to be expanded in the future.
 | Baichuan2-7B                  | `vLLM`       | [✅](models/nlp/llm/baichuan2-7b/vllm)                              | 4.3.0     |
 | ChatGLM-3-6B                  | `vLLM`       | [✅](models/nlp/llm/chatglm3-6b/vllm)                               | 4.3.0     |
 | ChatGLM-3-6B-32K              | `vLLM`       | [✅](models/nlp/llm/chatglm3-6b-32k/vllm)                           | 4.3.0     |
+| CosyVoice2-0.5B               | `PyTorch`    | [✅](models/speech/speech_synthesis/cosyvoice/pytorch)              | 4.3.0     |
 | DeepSeek-R1-Distill-Llama-8B  | `vLLM`       | [✅](models/nlp/llm/deepseek-r1-distill-llama-8b/vllm)              | 4.3.0     |
 | DeepSeek-R1-Distill-Llama-70B | `vLLM`       | [✅](models/nlp/llm/deepseek-r1-distill-llama-70b/vllm)             | 4.3.0     |
 | DeepSeek-R1-Distill-Qwen-1.5B | `vLLM`       | [✅](models/nlp/llm/deepseek-r1-distill-qwen-1.5b/vllm)             | 4.3.0     |
diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/README.md b/models/speech/speech_synthesis/cosyvoice/pytorch/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e559dd5c33770bb4b88014879ce6341b028f28d
--- /dev/null
+++ b/models/speech/speech_synthesis/cosyvoice/pytorch/README.md
@@ -0,0 +1,49 @@
+# CosyVoice2 (pytorch)
+
+## Model Description
+
+CosyVoice2-0.5B is a small speech model designed to understand and generate human-like speech. It can be used for tasks like voice assistants, text-to-speech, or voice cloning. With 0.5 billion parameters, it is lightweight and works well on devices with limited computing power. It focuses on natural-sounding voices and easy customization.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| MR-V100 | 4.3.0     |  25.09  |
+
+## Model Preparation
+
+### Prepare Resources
+
+Pretrained model: <https://huggingface.co/FunAudioLLM/CosyVoice2-0.5B>
+
+### Install Dependencies
+
+```bash
+pip3 install -r requirements.txt
+git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
+# If you failed to clone the submodule due to network failures, please run the following command until success
+cd CosyVoice
+git submodule update --init --recursive
+
+mkdir -p pretrained_models
+# download CosyVoice2-0.5B model into pretrained_models dir
+
+# If you encounter sox compatibility issues
+# ubuntu
+sudo apt-get install sox libsox-dev
+# centos
+sudo yum install sox sox-devel
+```
+
+## Model Inference
+
+```bash
+cp ../inference_test.py ./
+python3 inference_test.py
+```
+
+## Model Results
+
+## References
+
+- [CosyVoice](https://github.com/FunAudioLLM/CosyVoice/commit/0a496c18f78ca993c63f6d880fcc60778bfc85c1)
\ No newline at end of file
diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh b/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9ab95280bd97e5d705d163d465b45916cd9239e1
--- /dev/null
+++ b/models/speech/speech_synthesis/cosyvoice/pytorch/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+apt update
+apt-get install sox libsox-dev
+
+cp -r /mnt/deepspark/data/repos/CosyVoice ./
+cd CosyVoice
+mkdir -p pretrained_models
+ln -s /mnt/deepspark/data/checkpoints/CosyVoice2-0.5B pretrained_models/
+pip3 install -r requirements.txt
+
+cp ../inference_test.py ./
\ No newline at end of file
diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py b/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dff40a055714853e1da648abce1720c1b9c5768
--- /dev/null
+++ b/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+sys.path.append('third_party/Matcha-TTS')
+from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2
+from cosyvoice.utils.file_utils import load_wav
+import torchaudio
+cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False)
+
+# NOTE if you want to reproduce the results on https://funaudiollm.github.io/cosyvoice2, please add text_frontend=False during inference
+# zero_shot usage
+prompt_speech_16k = load_wav('./asset/zero_shot_prompt.wav', 16000)
+for i, j in enumerate(cosyvoice.inference_zero_shot('收到好友从远方寄来的生日礼物，那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐，笑容如花儿般绽放。', '希望你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)):
+    torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
+
+# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248
+for i, j in enumerate(cosyvoice.inference_cross_lingual('在他讲述那个荒诞故事的过程中，他突然[laughter]停下来，因为他自己也被逗笑了[laughter]。', prompt_speech_16k, stream=False)):
+    torchaudio.save('fine_grained_control_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
+
+# instruct usage
+for i, j in enumerate(cosyvoice.inference_instruct2('收到好友从远方寄来的生日礼物，那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐，笑容如花儿般绽放。', '用四川话说这句话', prompt_speech_16k, stream=False)):
+    torchaudio.save('instruct_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate)
\ No newline at end of file
diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt b/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bc0604ee8811262e2952ba2b9f954e333b5c675
--- /dev/null
+++ b/models/speech/speech_synthesis/cosyvoice/pytorch/requirements.txt
@@ -0,0 +1,40 @@
+--extra-index-url https://download.pytorch.org/whl/cu121
+--extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ # https://github.com/microsoft/onnxruntime/issues/21684
+conformer==0.3.2
+#deepspeed==0.15.1; sys_platform == 'linux'
+diffusers==0.29.0
+fastapi==0.115.6
+fastapi-cli==0.0.4
+gdown==5.1.0
+gradio==5.4.0
+grpcio==1.57.0
+grpcio-tools==1.57.0
+hydra-core==1.3.2
+HyperPyYAML==1.2.2
+inflect==7.3.1
+librosa==0.10.2
+lightning==2.2.4
+matplotlib==3.7.5
+modelscope==1.20.0
+networkx==3.1
+omegaconf==2.3.0
+#onnx==1.16.0
+#onnxruntime-gpu==1.18.0; sys_platform == 'linux'
+onnxruntime==1.18.0; sys_platform == 'darwin' or sys_platform == 'win32'
+openai-whisper
+protobuf==4.25
+pyarrow==18.1.0
+pydantic==2.7.0
+pyworld==0.3.4
+rich==13.7.1
+soundfile==0.12.1
+#tensorboard==2.14.0
+#tensorrt-cu12==10.0.1; sys_platform == 'linux'
+#tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
+#tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
+#torch==2.3.1
+#torchaudio==2.3.1
+#transformers==4.40.1
+uvicorn==0.30.0
+wetext==0.0.4
+wget==3.2
\ No newline at end of file
diff --git a/tests/model_info.json b/tests/model_info.json
index 2ad04e4347d20aabcc7070ba918b63af6e06d7b7..731f0339bfdadc87f38e39d583343e39649684ff 100644
--- a/tests/model_info.json
+++ b/tests/model_info.json
@@ -8072,7 +8072,7 @@
             "framework": "fastdeploy",
             "release_version": "25.09",
             "release_sdk": "4.3.0",
-            "release_gpgpu": "BI-V150",
+            "release_gpgpu": "MR-V100",
             "latest_sdk": "",
             "latest_gpgpu": "",
             "category": "nlp/llm",
@@ -8105,7 +8105,7 @@
             "framework": "fastdeploy",
             "release_version": "25.09",
             "release_sdk": "4.3.0",
-            "release_gpgpu": "BI-V150",
+            "release_gpgpu": "MR-V100",
             "latest_sdk": "",
             "latest_gpgpu": "",
             "category": "nlp/llm",
@@ -8131,6 +8131,39 @@
             "type": "inference",
             "hasDemo": false,
             "demoType": ""
+        },
+        {
+            "display_name": "CosyVoice2-0.5B",
+            "model_name": "cosyvoice",
+            "framework": "pytorch",
+            "release_version": "25.09",
+            "release_sdk": "4.3.0",
+            "release_gpgpu": "MR-V100",
+            "latest_sdk": "",
+            "latest_gpgpu": "",
+            "category": "speech/speech_synthesis",
+            "toolbox": "",
+            "mdims": "",
+            "dataset": "",
+            "license": "",
+            "model_path": "models/speech/speech_synthesis/cosyvoice/pytorch",
+            "readme_file": "models/speech/speech_synthesis/cosyvoice/pytorch/README.md",
+            "bitbucket_repo": "",
+            "bitbucket_branch": "",
+            "bitbucket_path": "",
+            "develop_owner": "",
+            "github_repo": "",
+            "github_branch": "",
+            "github_path": "",
+            "datasets": "",
+            "download_url": "https://huggingface.co/FunAudioLLM/CosyVoice2-0.5B",
+            "need_third_part": false,
+            "precisions": [
+                "fp16"
+            ],
+            "type": "inference",
+            "hasDemo": false,
+            "demoType": ""
         }
     ]
 }
\ No newline at end of file
diff --git a/tests/run_vllm.py b/tests/run_vllm.py
index 39b3c2227568ccfd30da953b81954266e90961fd..eab1387958318d8c3263edb3d7725ff0f74438bb 100644
--- a/tests/run_vllm.py
+++ b/tests/run_vllm.py
@@ -56,7 +56,7 @@ def main():
 
     result = {}
     # NLP模型
-    if model["category"] in ["nlp/llm", "multimodal/vision_language_model", "speech/asr"]:
+    if model["category"] in ["nlp/llm", "multimodal/vision_language_model", "speech/asr", "speech/speech_synthesis"]:
         logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}")
         d_url = model["download_url"]
         if d_url is not None:
@@ -72,7 +72,7 @@ def get_model_config(mode_name):
         models = json.load(file)
 
     for model in models['models']:
-        if model["model_name"] == mode_name.lower() and (model["framework"] == "vllm" or model["framework"] == "lmdeploy"):
+        if model["model_name"] == mode_name.lower() and (model["framework"] == "vllm" or model["framework"] == "lmdeploy" or model["framework"] == "pytorch"):
             return model
     return
 
@@ -341,6 +341,12 @@ def run_nlp_testcase(model):
             cd ../{model['model_path']}
             python3 offline_inference_vision_language.py --model ./{model_name} --max-model-len 4096 --max-num-seqs 2  --trust-remote-code --temperature 0.0 --disable-mm-preprocessor-cache
             """
+        elif model_name == "cosyvoice":
+            script = f"""
+            set -x
+            cd ../{model['model_path']}/CosyVoice
+            python3 inference_test.py
+            """
 
         r, t = run_script(script)
         sout = r.stdout