diff --git a/MindIE/MultiModal/CogVideoX/inference.py b/MindIE/MultiModal/CogVideoX/inference.py index a5d878ac1a2f147c9b3eeeaef2766f588a367c4e..9c473492af8c1b937a8efcf7a87df3c274bd1f39 100644 --- a/MindIE/MultiModal/CogVideoX/inference.py +++ b/MindIE/MultiModal/CogVideoX/inference.py @@ -106,6 +106,8 @@ def generate_video( prompts = file.readlines() os.makedirs(output_path, exist_ok=True) + pipeline_total_time = 0.0 + infer_num = 0 for i, prompt in enumerate(prompts): prompt = prompt.strip() # 去掉可能的空格和换行符 torch_npu.npu.synchronize() @@ -124,7 +126,10 @@ def generate_video( ).frames[0] torch_npu.npu.synchronize() end = time.time() - print(f"Time taken for inference: {end - start} seconds") + pipeline_time = end - start + print(f"Time taken for inference: {pipeline_time} seconds") + pipeline_total_time += pipeline_time + infer_num += 1 if cache_algorithm == "sampling" and not transformer.config.use_rotary_positional_embeddings: skip_strategy = AdaStep(skip_thr=0.009, max_skip_steps=1, decay_ratio=0.99, device="npu") pipe.skip_strategy = skip_strategy @@ -151,9 +156,11 @@ def generate_video( with open(f'{output_path}/result.json', 'w', encoding='utf-8') as json_file: json.dump(result, json_file, ensure_ascii=False, indent=4) - print(f"Result saved to result.json.") + pipeline_average_time = pipeline_total_time / infer_num + print(f"Average time taken for inference: {pipeline_average_time:.3f} seconds") + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate a video from a text prompt using CogVideoX")