diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/attention_processor.patch b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/attention_processor.patch index 85bf489873124cc89478d0d99d61894d0c508924..1af326b75c1717aa5629b28ac90bb6d9c3a0a641 100644 --- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/attention_processor.patch +++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/attention_processor.patch @@ -14,7 +14,7 @@ + hidden_states=torch_npu.npu_prompt_flash_attention(query,key,value, + atten_mask=attention_mask, + input_layout='BNSD', -+ scale_vaule=head_dim**-0.5, ++ scale_value=head_dim**-0.5, + pre_tokens=65535, + next_tokens=65535, + num_heads=attn.heads) diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable_audio_open_pipeline.py b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable_audio_open_pipeline.py index f6682fb2d97f0ab0c00c510c7b89ec3cb7b0d3ea..39bcd361dead653d7a7bf52d19e81fa7849db765 100644 --- a/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable_audio_open_pipeline.py +++ b/MindIE/MindIE-Torch/built-in/foundation/stable-audio-open-1.0/stable_audio_open_pipeline.py @@ -97,7 +97,6 @@ def main(): data = reader.read() json_data = json.loads(data) init_dict = {key: json_data[key] for key in json_data} - print(init_dict) vae = AutoencoderOobleck(**init_dict) vae.load_state_dict(load_file(args.stable_audio_open_dir + "/vae/diffusion_pytorch_model.safetensors"), strict=False) @@ -126,7 +125,7 @@ def main(): npu_stream.synchronize() begin = time.time() audio = pipe( - prompt=args.prompt, + prompt=prompt, negative_prompt=args.negative_prompt, num_inference_steps=args.num_inference_steps, latents=args.latents.to("npu"),