From 8094f320ed884a03f33c7ad555771e0b0936b386 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 30 Sep 2024 19:13:26 +0800
Subject: [PATCH 01/13] add stable-audio-tools infer way

---
 .../stable-audio-open-1.0/diffusers/README.md |  10 +-
 .../precision_brownian_interval.patch         |   1 +
 .../diffusers/prompt.txt                      |   3 +
 .../stable-audio-tools/README.md              | 148 ++++++++++++++++++
 .../stable-audio-tools/conditioners.patch     |  24 +++
 .../stable-audio-tools/conditioners_patch.py  |  14 ++
 .../stable-audio-tools/pretrained.patch       |  20 +++
 .../stable-audio-tools/pretrained_patch.py    |  14 ++
 .../stable-audio-tools/prompt.txt             |   3 +
 .../stable-audio-tools/requirements.txt       |   5 +
 .../stable_audio_open_tools_pipeline.py       | 121 ++++++++++++++
 11 files changed, 358 insertions(+), 5 deletions(-)
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompt.txt
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompt.txt
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/requirements.txt
 create mode 100644 MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md
index 61a1864ca2..2176c04d80 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md
@@ -1,4 +1,4 @@
-# stable-audio-open-1.0模型-推理指导
+# stable-audio-open-1.0模型-diffusers方式推理指导
 
 - [概述](#ZH-CN_TOPIC_0000001172161501)
   
@@ -164,8 +164,8 @@
               --audio_end_in_s 10 10 47 \
               --num_waveforms_per_prompt 1 \
               --guidance_scale 7 \
-              --device 0 \
-              --save_dir ./result
+              --save_dir ./result \
+              --device 0
       ```
       
       参数说明：
@@ -173,11 +173,11 @@
       - --output_dir：存放导出模型的目录。
       - --prompt_file：提示词文件。
       - --num_inference_steps: 语音生成迭代次数。
-      - --save_dir：生成语音的存放目录。
-      - --device：推理设备ID。
       - --audio_end_in_s：生成语音的时长，如不输入则默认生成10s。
       - --num_waveforms_per_prompt：一个提示词生成的语音数量。
       - --guidance_scale：音频生成质量与准确度系数。
+      - --save_dir：生成语音的存放目录。
+      - --device：推理设备ID。
       
       执行完成后在`./results`目录下生成推理语音，语音生成顺序与文本中prompt顺序保持一致，并在终端显示推理时间。
 
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/precision_brownian_interval.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/precision_brownian_interval.patch
index fcaca7605d..d9d94e5801 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/precision_brownian_interval.patch
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/precision_brownian_interval.patch
@@ -8,3 +8,4 @@
 -    return torch.randn(size, dtype=dtype, device=device, generator=generator)
 +    torch.manual_seed(int(seed))
 +    return torch.randn(size, dtype=dtype, device="cpu").to(device)
+
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompt.txt b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompt.txt
new file mode 100644
index 0000000000..e1c7734ef9
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompt.txt
@@ -0,0 +1,3 @@
+Berlin techno, rave, drum machine, kick, ARP synthesizer, dark, moody, hypnotic, evolving, 135BPM. LOOP.
+Uplifting acoustic loop. 120 BPM.
+Disco, Driving Drum Machine, Synthesizer, Bass, Piano, Guitars, Instrumental, Clubby, Euphoric, Chicago, New York, 115 BPM.
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
new file mode 100644
index 0000000000..d9bcef1b15
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -0,0 +1,148 @@
+# stable-audio-open-1.0模型-stable-audio-tools方式推理指导
+
+- [概述](#ZH-CN_TOPIC_0000001172161501)
+  
+
+- [推理环境准备](#ZH-CN_TOPIC_0000001126281702)
+
+- [快速上手](#ZH-CN_TOPIC_0000001126281700)
+
+  - [获取源码](#section4622531142816)
+  - [模型推理](#section741711594517)
+
+- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573)
+
+# 概述<a name="ZH-CN_TOPIC_0000001172161501"></a>
+
+   [此处获得](https://huggingface.co/stabilityai/stable-audio-open-1.0)
+
+- 参考实现：
+  ```bash
+   # StableAudioOpen1.0
+   https://huggingface.co/stabilityai/stable-audio-open-1.0
+  ```
+
+# 推理环境准备<a name="ZH-CN_TOPIC_0000001126281702"></a>
+
+- 该模型需要以下插件与驱动
+
+  **表 1**  版本配套表
+
+  | 配套  | 版本 | 环境准备指导 |
+  | ----- | ----- |-----|
+  | Python | 3.10.2 | - |
+  | torch | 2.1.0 | - |
+
+该模型性能受CPU规格影响，建议使用64核CPU（arm）以复现性能
+
+# 快速上手<a name="ZH-CN_TOPIC_0000001126281700"></a>
+## 获取源码<a name="section4622531142816"></a>
+1. 安装依赖。
+   ```bash
+   pip3 install -r requirements.txt
+   ```
+
+2. 安装mindie包
+
+   ```bash
+   # 安装mindie
+   source /usr/local/Ascend/ascend-toolkit/set_env.sh
+   chmod +x ./Ascend-mindie_xxx.run
+   ./Ascend-mindie_xxx.run --install
+   source /usr/local/Ascend/mindie/set_env.sh
+   ```
+   
+3. 代码修改
+
+- 执行命令：
+   ```bash
+   python3 conditioners_patch.py
+   python3 pretrained_patch.py
+   ```
+
+## 模型推理<a name="section741711594517"></a>
+
+1. 模型准备。
+   1.  获取模型权重
+      ```bash
+      # 需要使用 git-lfs (https://git-lfs.com)
+      git lfs install
+      
+      # 下载stable-audio-open-1.0权重
+      git clone https://huggingface.co/stabilityai/stable-audio-open-1.0
+      ```
+
+   2.  设置模型权重的路径。
+      ```bash
+      # stable-audio-open-1.0 (执行时下载权重)
+      model_base="stabilityai/stable-audio-open-1.0"
+      
+      # stable-audio-open-1.0 (使用上一步下载的权重)
+      model_base="./stable-audio-open-1.0"
+      ```
+   
+   3.  获取T5模型权重（可选）
+
+      推理过程中会自动从huggingface下载T5-base的模型权重，若希望以加载本地T5-base模型权重方式进行推理，请将`model_base`路径下的`tokenizer`和`text_encoder`文件夹复制到代码执行路径中。
+
+      
+2. 开始推理验证。
+
+   1. 开启cpu高性能模式
+      ```bash
+      echo performance |tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+      sysctl -w vm.swappiness=0
+      sysctl -w kernel.numa_balancing=0
+      ```
+
+   2. 安装绑核工具
+      ```bash
+      apt-get update
+      apt-get install numactl
+      ```
+      查询卡的NUMA node
+      ```shell
+      lspci -vs bus-id
+      ```
+      bus-id可通过npu-smi info获得，查询到NUMA node，在推理命令前加上对应的数字
+
+      可通过lscpu获得NUMA node对应的CPU核数
+      ```shell
+      NUMA node0: 0-23
+      NUMA node1: 24-47
+      NUMA node2: 48-71
+      NUMA node3: 72-95
+      ```
+      当前查到NUMA node是0，对应0-23，推荐绑定其中单核以获得更好的性能。
+   
+   3. 执行推理脚本。
+      ```bash
+      numactl -C 0-23 python3 stable_audio_open_tools_pipeline.py \
+              --model ${model_base} \
+              --prompt_file ./prompts.txt \
+              --num_inference_steps 100 \
+              --seconds_total 10 10 47 \
+              --save_dir ./result \
+              --device 0 
+      ```
+      
+      参数说明：
+      - --model：模型权重路径。
+      - --prompt_file：提示词文件。
+      - --num_inference_steps: 语音生成迭代次数。
+      - --seconds_total：生成语音的时长，如不输入则默认生成10s。
+      - --save_dir：生成语音的存放目录。
+      - --device：推理设备ID。
+      
+      执行完成后在`./results`目录下生成推理语音，语音生成顺序与文本中prompt顺序保持一致，并在终端显示推理时间。
+
+
+
+# 模型推理性能&精度<a name="ZH-CN_TOPIC_0000001172201573"></a>
+性能参考下列数据。
+
+### Stable-Audio-Open-1.0
+
+| 硬件形态 | 迭代次数 | 平均耗时|
+| :------: |:----:|:----:|
+| A2     |  100  |  15.675s  |
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
new file mode 100644
index 0000000000..f635d7a20d
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
@@ -0,0 +1,24 @@
+--- conditioners.py	2024-09-30 15:31:32.480360700 +0800
++++ conditioners_patch.py	2024-09-30 18:20:43.344830200 +0800
+@@ -280,10 +280,17 @@
+         with warnings.catch_warnings():
+             warnings.simplefilter("ignore")
+             try:
+-                # self.tokenizer = T5Tokenizer.from_pretrained(t5_model_name, model_max_length = max_length)
+-                # model = T5EncoderModel.from_pretrained(t5_model_name, max_length=max_length).train(enable_grad).requires_grad_(enable_grad)
+-                self.tokenizer = AutoTokenizer.from_pretrained(t5_model_name)
+-                model = T5EncoderModel.from_pretrained(t5_model_name).train(enable_grad).requires_grad_(enable_grad).to(torch.float16)
++                import os
++                tokenizer_path = os.path.join(os.getcwd() + "tokenizer")
++                text_encoder_path = os.path.join(os.getcwd() + "text_encoder")
++                if os.path.exists(tokenizer_path) and os.path.exists(text_encoder_path):
++                    print("From loacl import T5-base . . .")
++                    self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
++                    model = T5EncoderModel.from_pretrained(text_encoder_path).train(enable_grad).requires_grad_(enable_grad).to(torch.float16)
++                else:
++                    print("From HuggingFace download T5-base . . .")
++                    self.tokenizer = AutoTokenizer.from_pretrained(t5_model_name)
++                    model = T5EncoderModel.from_pretrained(t5_model_name).train(enable_grad).requires_grad_(enable_grad).to(torch.float16)
+             finally:
+                 logging.disable(previous_level)
+             
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py
new file mode 100644
index 0000000000..7c20ccaa61
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py
@@ -0,0 +1,14 @@
+import os
+import stable_audio_tools
+
+
+def main():
+    stable_audio_tools_path = stable_audio_tools.__path__
+    stable_audio_tools_version = stable_audio_tools.__version__
+
+    assert stable_audio_tools_version is not '0.0.16', "expectation stable_audio_tools_version==0.0.16"
+    os.system(f'patch -p0 {stable_audio_tools_path[0]}/models/conditioners.py conditioners.patch')
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch
new file mode 100644
index 0000000000..ca200e51c7
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch
@@ -0,0 +1,20 @@
+--- pretrained.py	2024-09-30 15:31:40.672485200 +0800
++++ pretrained_patch.py	2024-09-30 18:15:59.061846100 +0800
+@@ -1,4 +1,5 @@
+ import json
++import os
+ 
+ from .factory import create_model_from_config
+ from .utils import load_ckpt_state_dict
+@@ -15,10 +16,7 @@
+     model = create_model_from_config(model_config)
+ 
+     # Try to download the model.safetensors file first, if it doesn't exist, download the model.ckpt file
+-    try:
+-        model_ckpt_path = hf_hub_download(name, filename="model.safetensors", repo_type='model')
+-    except Exception as e:
+-        model_ckpt_path = hf_hub_download(name, filename="model.ckpt", repo_type='model')
++    model_ckpt_path = os.path.join(name, "model.safetensors")
+ 
+     model.load_state_dict(load_ckpt_state_dict(model_ckpt_path))
+ 
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py
new file mode 100644
index 0000000000..5725b47300
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py
@@ -0,0 +1,14 @@
+import os
+import stable_audio_tools
+
+
+def main():
+    stable_audio_tools_path = stable_audio_tools.__path__
+    stable_audio_tools_version = stable_audio_tools.__version__
+
+    assert stable_audio_tools_version is not '0.0.16', "expectation stable_audio_tools_version==0.0.16"
+    os.system(f'patch -p0 {stable_audio_tools_path[0]}/models/pretrained.py pretrained.patch')
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompt.txt b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompt.txt
new file mode 100644
index 0000000000..e1c7734ef9
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompt.txt
@@ -0,0 +1,3 @@
+Berlin techno, rave, drum machine, kick, ARP synthesizer, dark, moody, hypnotic, evolving, 135BPM. LOOP.
+Uplifting acoustic loop. 120 BPM.
+Disco, Driving Drum Machine, Synthesizer, Bass, Piano, Guitars, Instrumental, Clubby, Euphoric, Chicago, New York, 115 BPM.
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/requirements.txt b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/requirements.txt
new file mode 100644
index 0000000000..df8e40939a
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/requirements.txt
@@ -0,0 +1,5 @@
+torch==2.1.0
+torchaudio==2.1.0
+stable_audio_tools==0.0.16
+transformers==4.40.0
+torch_npu==2.1.0.post6
\ No newline at end of file
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py
new file mode 100644
index 0000000000..8766a6d3cc
--- /dev/null
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py
@@ -0,0 +1,121 @@
+import torch
+import torch_npu
+import sys
+import time
+import os
+import argparse
+from safetensors.torch import load_file
+import torchaudio
+from einops import rearrange
+from stable_audio_tools import get_pretrained_model
+from stable_audio_tools.inference.generation import generate_diffusion_cond
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--prompt_file",
+        type=str,
+        default="./prompts.txt",
+        help="The prompts file to guide audio generation.",
+    )
+    parser.add_argument(
+        "--num_inference_steps",
+        type=int,
+        default=100,
+        help="The number of denoising steps. More denoising steps usually lead to a higher quality audio at the expense of slower inference.",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="./stable-audio-open-1.0",
+        help="The path of stable-audio-open-1.0.",
+    )
+    parser.add_argument(
+        "--seconds_total",
+        nargs='+',
+        default=[10],
+        help="Audio end index in seconds.",
+    )
+    parser.add_argument(
+        "--device",
+        type=int,
+        default=0,
+        help="NPU device id.",
+    )
+    parser.add_argument(
+        "--save_dir",
+        type=str,
+        default="./results",
+        help="Path to save result audio files.",
+    )
+    return parser.parse_args()
+
+def main():
+    args = parse_arguments()
+    save_dir = args.save_dir
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    torch_npu.npu.set_device(args.device)
+    npu_stream = torch_npu.npu.Stream()
+    
+    model, model_config = get_pretrained_model(args.model)
+    sample_rate = model_config["sample_rate"]
+    sample_size = model_config["sample_size"]
+
+    model = model.to("npu").to(torch.float16).eval()
+
+    conditioning = [{
+        "prompt":"",
+        "seconds_start": 0,
+        "seconds_total": 0,
+    }]
+    total_time = 0
+    prompts_num = 0
+    average_time = 0
+    skip = 2
+    with os.fdopen(os.open(args.prompt_file, os.O_RDONLY), "r") as f:
+        for i, prompt in enumerate(f):
+            with torch.no_grad():
+                conditioning[0]["prompt"] = prompt
+                conditioning[0]["seconds_total"] = float(args.audio_end_in_s[i]) if (len(args.audio_end_in_s) > i) else 10.0
+
+                npu_stream.synchronize()
+                begin = time.time()
+                output = generate_diffusion_cond(
+                    model,
+                    steps=args.num_inference_steps,
+                    cfg_scale=7,
+                    conditioning=conditioning,
+                    sample_size=sample_size,
+                    sigma_min=0.3,
+                    sigma_max=500,
+                    sampler_type="dpmpp-3m-3sde",
+                    device="npu"
+                )
+                npu_stream.synchronize()
+                end = time.time()
+                if i > skip-1:
+                    total_time += end - begin
+            prompts_num = i+1
+            wavefrom_start = int(conditioning[0]["seconds_start"] * sample_rate)
+            wavefrom_end = int(conditioning[0]["seconds_total"] * sample_rate)
+            output = output[:, :, wavefrom_start:wavefrom_end]
+            output = rearrange(output, "b d n -> d (b n)")
+            output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1,1).mul(32767).to(torch.int16).cpu()
+            torchaudio.save(args.save_dir + "/audio_by_prompt" + str(prompts_num) + ".wav", output, sample_rate)
+    if prompts_num>skip:
+        average_time = total_time/(prompts_num-skip)
+    else:
+        print("Infer average time skip first two prompts, make sure prompts.txt has three more prompts")
+    print(f"Infer average time: {average_time:.3f}s\n")
+
+if __name__ == "__main__":
+    main()
+
+
+
+
+
+
+
-- 
Gitee


From 2400e0c936534bc231cc52b7e32bd379f699a57f Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 14:59:14 +0800
Subject: [PATCH 02/13] modify patch err

---
 .../stable-audio-tools/README.md                      |  3 +--
 .../stable-audio-tools/conditioners.patch             |  2 +-
 .../stable-audio-tools/conditioners_patch.py          |  2 --
 .../stable-audio-tools/pretrained.patch               | 11 ++++++++++-
 .../stable-audio-tools/pretrained_patch.py            |  2 --
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index d9bcef1b15..fde348d621 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -53,7 +53,6 @@
    ```
    
 3. 代码修改
-
 - 执行命令：
    ```bash
    python3 conditioners_patch.py
@@ -83,7 +82,7 @@
    
    3.  获取T5模型权重（可选）
 
-      推理过程中会自动从huggingface下载T5-base的模型权重，若希望以加载本地T5-base模型权重方式进行推理，请将`model_base`路径下的`tokenizer`和`text_encoder`文件夹复制到代码执行路径中。
+      推理过程中会自动从huggingface下载T5-base的模型权重，若希望以加载本地T5-base模型权重方式进行推理，请将`model_base`路径下的`tokenizer`和`text_encoder`文件夹复制到推理代码的执行路径中。
 
       
 2. 开始推理验证。
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
index f635d7a20d..061c0e0e6f 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
@@ -12,7 +12,7 @@
 +                tokenizer_path = os.path.join(os.getcwd() + "tokenizer")
 +                text_encoder_path = os.path.join(os.getcwd() + "text_encoder")
 +                if os.path.exists(tokenizer_path) and os.path.exists(text_encoder_path):
-+                    print("From loacl import T5-base . . .")
++                    print("From local import T5-base . . .")
 +                    self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
 +                    model = T5EncoderModel.from_pretrained(text_encoder_path).train(enable_grad).requires_grad_(enable_grad).to(torch.float16)
 +                else:
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py
index 7c20ccaa61..71db741779 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners_patch.py
@@ -4,9 +4,7 @@ import stable_audio_tools
 
 def main():
     stable_audio_tools_path = stable_audio_tools.__path__
-    stable_audio_tools_version = stable_audio_tools.__version__
 
-    assert stable_audio_tools_version is not '0.0.16', "expectation stable_audio_tools_version==0.0.16"
     os.system(f'patch -p0 {stable_audio_tools_path[0]}/models/conditioners.py conditioners.patch')
 
 
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch
index ca200e51c7..f51e6a1d90 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained.patch
@@ -1,11 +1,20 @@
 --- pretrained.py	2024-09-30 15:31:40.672485200 +0800
-+++ pretrained_patch.py	2024-09-30 18:15:59.061846100 +0800
++++ pretrained_patch.py	2024-10-07 14:54:18.756960100 +0800
 @@ -1,4 +1,5 @@
  import json
 +import os
  
  from .factory import create_model_from_config
  from .utils import load_ckpt_state_dict
+@@ -7,7 +8,7 @@
+ 
+ def get_pretrained_model(name: str):
+     
+-    model_config_path = hf_hub_download(name, filename="model_config.json", repo_type='model')
++    model_config_path = os.path.join(name, "model_config.json")
+ 
+     with open(model_config_path) as f:
+         model_config = json.load(f)
 @@ -15,10 +16,7 @@
      model = create_model_from_config(model_config)
  
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py
index 5725b47300..4abdad47b0 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/pretrained_patch.py
@@ -4,9 +4,7 @@ import stable_audio_tools
 
 def main():
     stable_audio_tools_path = stable_audio_tools.__path__
-    stable_audio_tools_version = stable_audio_tools.__version__
 
-    assert stable_audio_tools_version is not '0.0.16', "expectation stable_audio_tools_version==0.0.16"
     os.system(f'patch -p0 {stable_audio_tools_path[0]}/models/pretrained.py pretrained.patch')
 
 
-- 
Gitee


From 7f897e14848bf4584950b39dc3f7a3d00d0b280d Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 15:37:14 +0800
Subject: [PATCH 03/13] modify patch err

---
 .../stable-audio-tools/conditioners.patch                     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
index 061c0e0e6f..c61a74932a 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/conditioners.patch
@@ -9,8 +9,8 @@
 -                self.tokenizer = AutoTokenizer.from_pretrained(t5_model_name)
 -                model = T5EncoderModel.from_pretrained(t5_model_name).train(enable_grad).requires_grad_(enable_grad).to(torch.float16)
 +                import os
-+                tokenizer_path = os.path.join(os.getcwd() + "tokenizer")
-+                text_encoder_path = os.path.join(os.getcwd() + "text_encoder")
++                tokenizer_path = os.path.join(os.getcwd(), "tokenizer")
++                text_encoder_path = os.path.join(os.getcwd(), "text_encoder")
 +                if os.path.exists(tokenizer_path) and os.path.exists(text_encoder_path):
 +                    print("From local import T5-base . . .")
 +                    self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
-- 
Gitee


From 459e1389d4b767706c5d6b4e844802f9d0500916 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 15:39:41 +0800
Subject: [PATCH 04/13] modify prompts err

---
 .../stable-audio-open-1.0/diffusers/{prompt.txt => prompts.txt}   | 0
 .../stable-audio-tools/{prompt.txt => prompts.txt}                | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/{prompt.txt => prompts.txt} (100%)
 rename MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/{prompt.txt => prompts.txt} (100%)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompt.txt b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompts.txt
similarity index 100%
rename from MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompt.txt
rename to MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/prompts.txt
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompt.txt b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompts.txt
similarity index 100%
rename from MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompt.txt
rename to MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/prompts.txt
-- 
Gitee


From 2076ba600ae006c8cf29f237d4dd3946f88fe7bc Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 15:48:55 +0800
Subject: [PATCH 05/13] modify pipeline err

---
 .../stable_audio_open_tools_pipeline.py            | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py
index 8766a6d3cc..192879312f 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/stable_audio_open_tools_pipeline.py
@@ -78,7 +78,7 @@ def main():
         for i, prompt in enumerate(f):
             with torch.no_grad():
                 conditioning[0]["prompt"] = prompt
-                conditioning[0]["seconds_total"] = float(args.audio_end_in_s[i]) if (len(args.audio_end_in_s) > i) else 10.0
+                conditioning[0]["seconds_total"] = float(args.seconds_total[i]) if (len(args.seconds_total) > i) else 10.0
 
                 npu_stream.synchronize()
                 begin = time.time()
@@ -90,7 +90,7 @@ def main():
                     sample_size=sample_size,
                     sigma_min=0.3,
                     sigma_max=500,
-                    sampler_type="dpmpp-3m-3sde",
+                    sampler_type="dpmpp-3m-sde",
                     device="npu"
                 )
                 npu_stream.synchronize()
@@ -98,14 +98,14 @@ def main():
                 if i > skip-1:
                     total_time += end - begin
             prompts_num = i+1
-            wavefrom_start = int(conditioning[0]["seconds_start"] * sample_rate)
-            wavefrom_end = int(conditioning[0]["seconds_total"] * sample_rate)
-            output = output[:, :, wavefrom_start:wavefrom_end]
+            waveform_start = int(conditioning[0]["seconds_start"] * sample_rate)
+            waveform_end = int(conditioning[0]["seconds_total"] * sample_rate)
+            output = output[:, :, waveform_start:waveform_end]
             output = rearrange(output, "b d n -> d (b n)")
             output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1,1).mul(32767).to(torch.int16).cpu()
             torchaudio.save(args.save_dir + "/audio_by_prompt" + str(prompts_num) + ".wav", output, sample_rate)
-    if prompts_num>skip:
-        average_time = total_time/(prompts_num-skip)
+    if prompts_num > skip:
+        average_time = total_time / (prompts_num-skip)
     else:
         print("Infer average time skip first two prompts, make sure prompts.txt has three more prompts")
     print(f"Infer average time: {average_time:.3f}s\n")
-- 
Gitee


From 9c9924235f00524b51ce0244f1495388d51366a4 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:18:10 +0800
Subject: [PATCH 06/13] modify readme err

---
 .../foundation/stable-audio-open-1.0/diffusers/README.md        | 2 +-
 .../stable-audio-open-1.0/stable-audio-tools/README.md          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md
index 2176c04d80..2ddb2adbf7 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/diffusers/README.md
@@ -164,7 +164,7 @@
               --audio_end_in_s 10 10 47 \
               --num_waveforms_per_prompt 1 \
               --guidance_scale 7 \
-              --save_dir ./result \
+              --save_dir ./results \
               --device 0
       ```
       
diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index fde348d621..df5f7f7840 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -121,7 +121,7 @@
               --prompt_file ./prompts.txt \
               --num_inference_steps 100 \
               --seconds_total 10 10 47 \
-              --save_dir ./result \
+              --save_dir ./results \
               --device 0 
       ```
       
-- 
Gitee


From bc21edc6ea721a4a9a522e857301b3f3ba2c871d Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:23:23 +0800
Subject: [PATCH 07/13] modify readme infer time

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index df5f7f7840..cc3e289a79 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -144,4 +144,4 @@
 
 | 硬件形态 | 迭代次数 | 平均耗时|
 | :------: |:----:|:----:|
-| A2     |  100  |  15.675s  |
\ No newline at end of file
+| A2     |  100  |  14.711s  |
\ No newline at end of file
-- 
Gitee


From 9f9e7e2a539c0069a0d68f5ecfe178c8543d6e4b Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:26:47 +0800
Subject: [PATCH 08/13] modify readme download model-base

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index cc3e289a79..cb2816b2ff 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -63,6 +63,7 @@
 
 1. 模型准备。
    1.  获取模型权重
+      可提前下载权重，以避免执行后面步骤时可能会出现下载失败
       ```bash
       # 需要使用 git-lfs (https://git-lfs.com)
       git lfs install
-- 
Gitee


From 0f8a344758c6fbf5133b656b8bd47bf2cd686c97 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:28:41 +0800
Subject: [PATCH 09/13] modify readme download model-base

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md        | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index cb2816b2ff..0d455aa3a6 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -62,8 +62,10 @@
 ## 模型推理<a name="section741711594517"></a>
 
 1. 模型准备。
-   1.  获取模型权重
+   1.  获取模型权重 
+
       可提前下载权重，以避免执行后面步骤时可能会出现下载失败
+      
       ```bash
       # 需要使用 git-lfs (https://git-lfs.com)
       git lfs install
-- 
Gitee


From 663aaff652409ef4bf7b57b7389b547a00520305 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:35:24 +0800
Subject: [PATCH 10/13] modify readme download model-base

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md   | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index 0d455aa3a6..3570f226ce 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -62,10 +62,9 @@
 ## 模型推理<a name="section741711594517"></a>
 
 1. 模型准备。
-   1.  获取模型权重 
-
+   1. 获取模型权重
       可提前下载权重，以避免执行后面步骤时可能会出现下载失败
-      
+
       ```bash
       # 需要使用 git-lfs (https://git-lfs.com)
       git lfs install
@@ -74,7 +73,7 @@
       git clone https://huggingface.co/stabilityai/stable-audio-open-1.0
       ```
 
-   2.  设置模型权重的路径。
+   2. 设置模型权重的路径。
       ```bash
       # stable-audio-open-1.0 (执行时下载权重)
       model_base="stabilityai/stable-audio-open-1.0"
@@ -83,7 +82,7 @@
       model_base="./stable-audio-open-1.0"
       ```
    
-   3.  获取T5模型权重（可选）
+   3. 获取T5模型权重（可选）
 
       推理过程中会自动从huggingface下载T5-base的模型权重，若希望以加载本地T5-base模型权重方式进行推理，请将`model_base`路径下的`tokenizer`和`text_encoder`文件夹复制到推理代码的执行路径中。
 
-- 
Gitee


From 2c8d70808b9b58a2323f83140da440565eab9a28 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:37:49 +0800
Subject: [PATCH 11/13] modify readme download model-base

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index 3570f226ce..e98f35145c 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -63,6 +63,7 @@
 
 1. 模型准备。
    1. 获取模型权重
+      
       可提前下载权重，以避免执行后面步骤时可能会出现下载失败
 
       ```bash
-- 
Gitee


From ede1b9d0fc8df9e4dc12610d310d221a12f8c746 Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:40:06 +0800
Subject: [PATCH 12/13] modify readme download model-base

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index e98f35145c..b0cc51741f 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -53,7 +53,7 @@
    ```
    
 3. 代码修改
-- 执行命令：
+   执行命令：
    ```bash
    python3 conditioners_patch.py
    python3 pretrained_patch.py
@@ -63,7 +63,7 @@
 
 1. 模型准备。
    1. 获取模型权重
-      
+
       可提前下载权重，以避免执行后面步骤时可能会出现下载失败
 
       ```bash
@@ -132,7 +132,7 @@
       - --model：模型权重路径。
       - --prompt_file：提示词文件。
       - --num_inference_steps: 语音生成迭代次数。
-      - --seconds_total：生成语音的时长，如不输入则默认生成10s。
+      - --seconds_total：生成语音的时长，与prompts.txt中的prompt一一对应，如不输入则默认生成10s。
       - --save_dir：生成语音的存放目录。
       - --device：推理设备ID。
       
-- 
Gitee


From c47fe263b2989f7f59f65235b4e7f3472eca4f4a Mon Sep 17 00:00:00 2001
From: zhoufan2956 <zhoufan2956@163.com>
Date: Mon, 7 Oct 2024 16:43:51 +0800
Subject: [PATCH 13/13] modify readme download model-base

---
 .../stable-audio-open-1.0/stable-audio-tools/README.md       | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
index b0cc51741f..99af1a0d3c 100644
--- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
+++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable-audio-tools/README.md
@@ -53,6 +53,7 @@
    ```
    
 3. 代码修改
+
    执行命令：
    ```bash
    python3 conditioners_patch.py
@@ -64,7 +65,7 @@
 1. 模型准备。
    1. 获取模型权重
 
-      可提前下载权重，以避免执行后面步骤时可能会出现下载失败
+      可提前下载权重，以避免执行后面步骤时可能会出现下载失败。
 
       ```bash
       # 需要使用 git-lfs (https://git-lfs.com)
@@ -132,7 +133,7 @@
       - --model：模型权重路径。
       - --prompt_file：提示词文件。
       - --num_inference_steps: 语音生成迭代次数。
-      - --seconds_total：生成语音的时长，与prompts.txt中的prompt一一对应，如不输入则默认生成10s。
+      - --seconds_total：生成语音的时长，与prompts.txt中的prompt对应，如不输入则默认生成10s。
       - --save_dir：生成语音的存放目录。
       - --device：推理设备ID。
       
-- 
Gitee