diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/background_runtime.py b/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/background_runtime.py index 6f4935af2d4301cf30f38bcab89e08e4a70fad9d..482d82309cb1dac6e75a4aa8839a1af6b46d4784 100644 --- a/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/background_runtime.py +++ b/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/background_runtime.py @@ -177,7 +177,7 @@ class BackgroundRuntime: for i, _ in enumerate(output_arrays): output = output_cpu.numpy() - output_arrays[i][:] = output[i][:] + output_arrays[i][:] = output[:] infer_num += 1 sync_pipe.send('') diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/export_model.py b/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/export_model.py index 3608813b1e1977445f5cf636bfe50803793ed91b..1cda87042f27c66fb2c5582ae190ab99564b9fcb 100644 --- a/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/export_model.py +++ b/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/export_model.py @@ -70,7 +70,7 @@ def parse_arguments() -> Namespace: parser.add_argument("--use_cache", action="store_true", help="Use cache during inference.") parser.add_argument("-p", "--parallel", action="store_true", help="Export the unet of bs=1 for parallel inferencing.") - parser.add_argument("--soc", choices=["Duo", "A2"], default="A2", help="soc_version.") + parser.add_argument("--soc", help="soc_version.") parser.add_argument( "--device", default=0, @@ -195,7 +195,7 @@ def export_dit(sd_pipeline, args): [batch_size, max_position_embeddings, encoder_hidden_size * 2], dtype=torch.float32 ), torch.ones([batch_size, encoder_hidden_size], dtype=torch.float32), - torch.ones([batch_size], dtype=torch.int64) + torch.ones([1], dtype=torch.int64) ) dit = DiTExport(dit_model).eval() torch.jit.trace(dit, dummy_input).save(dit_pt_path) @@ -212,7 +212,7 @@ def export_dit(sd_pipeline, args): dtype=mindietorch.dtype.FLOAT), mindietorch.Input((batch_size, encoder_hidden_size), dtype=mindietorch.dtype.FLOAT), - mindietorch.Input((batch_size,), dtype=mindietorch.dtype.INT64)] + mindietorch.Input((1,), dtype=mindietorch.dtype.INT64)] compile_dit(model, inputs, dit_compiled_path, args.soc) else: logging.info("dit_compiled_path already exists.") diff --git a/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/stable_diffusion3_pipeline.py b/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/stable_diffusion3_pipeline.py index e5cea8855a499280cc447705509624acd045bde6..5415517f7e0160959cff4fdad0a4456233dcc8e1 100644 --- a/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/stable_diffusion3_pipeline.py +++ b/MindIE/MindIE-Torch/built-in/foundation/stable_diffusion_3/stable_diffusion3_pipeline.py @@ -190,7 +190,7 @@ class AIEStableDiffusion3Pipeline(StableDiffusion3Pipeline): (batch_size, in_channels, sample_size, sample_size), (batch_size, max_position_embeddings, encoder_hidden_size * 2), (batch_size, encoder_hidden_size), - (batch_size,), + (1,), ], input_dtypes=[np.float32, np.float32, np.float32, np.int64], output_shapes=[(batch_size, in_channels, sample_size, sample_size)], @@ -534,8 +534,7 @@ class AIEStableDiffusion3Pipeline(StableDiffusion3Pipeline): if not self.use_parallel_inferencing and self.do_classifier_free_guidance: latent_model_input = torch.cat([latents] * 2) # broadcast to batch dimension in a way that's compatible with ONNX/Core ML - timestep = t.expand(latent_model_input.shape[0]).to(torch.int64) - timestep_npu = timestep.to(f"npu:{self.device_0}") + timestep = t.to(torch.int64)[None].to(f"npu:{self.device_0}") else: latent_model_input = latents timestep = t.to(torch.int64)