diff --git a/PyTorch/built-in/mlm/HunyuanDiT/README.md b/PyTorch/built-in/mlm/HunyuanDiT/README.md index b40bae9818a5e205aa43c2241adb577ff5b9345a..e06d76af2f5c684b64ecb89692154fc0cf150ccd 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/README.md +++ b/PyTorch/built-in/mlm/HunyuanDiT/README.md @@ -28,8 +28,8 @@ # 简介 ## 模型介绍 -OpenSora是HPC AI Tech开发的开源高效复现类Sora视频生成方案。OpenSora不仅实现了先进视频生成技术的低成本普及,还提供了一个精简且用户友好的方案,简化了视频制作的复杂性。 -本仓库主要将OpenSora1.1的STDiT2模型的任务迁移到了昇腾NPU上,并进行极致性能优化。 +HunyuanDiT是由腾讯开发并开源的一款先进的文生图(文本到图像)模型。该模型支持中英文双语输入,特别针对中文进行了优化,能够深刻理解中文语境和文化元素,生成高质量且富有中国文化特色的图像。HunyuanDiT经过大规模中文数据集的训练,涵盖了广泛的类别和艺术风格,能够根据文本提示生成细腻逼真的图像。 +本仓库主要将HunyuanDiT模型的任务迁移到了昇腾NPU上,并进行极致性能优化。 ## 支持任务列表 @@ -48,14 +48,14 @@ OpenSora是HPC AI Tech开发的开源高效复现类Sora视频生成方案。Ope ``` url=https://github.com/Tencent/HunyuanDiT - commit_id=(待补充) + commit_id=3bb80e1dedba5bf9728e7c9566c4b5c665bbfbd2 ``` - 适配昇腾 AI 处理器的实现: ``` url=https://gitee.com/ascend/ModelZoo-PyTorch.git - code_path=PyTorch/built-in/mlm/ + code_path=PyTorch/built-in/mlm/HunyuanDiT ``` @@ -71,7 +71,12 @@ OpenSora是HPC AI Tech开发的开源高效复现类Sora视频生成方案。Ope | 三方库 | 支持版本 | |:-----------:|:------:| | PyTorch | 2.1.0 | - | TorchVision | 0.16.0 | + | TorchVision | 0.14.1 | + | deepspeed | 0.14.4 | + | diffusers | 0.21.2 | +| transformers | 4.39.1 | +| accelerate | 0.27.2 | + 在模型根目录下执行以下命令,安装模型对应PyTorch版本需要的依赖。 @@ -79,8 +84,9 @@ OpenSora是HPC AI Tech开发的开源高效复现类Sora视频生成方案。Ope ```python source ${cann_install_path}/ascend-toolkit/set_env.sh # 激活cann环境 - cd OpenSora1.1 - pip install -v -e . # 安装本地代码仓,同时自动安装依赖 + cd HunyuanDiT + pip install -v -e . # 安装本地代码仓 + pip install -r requirements.txt #安装其它依赖 ``` ### 安装昇腾环境 @@ -97,7 +103,6 @@ OpenSora是HPC AI Tech开发的开源高效复现类Sora视频生成方案。Ope | 昇腾NPU固件 | 在研版本 | | 昇腾NPU驱动 | 在研版本 | - ### 准备数据集 #### 训练数据集准备 @@ -110,23 +115,16 @@ https://github.com/Tencent/HunyuanDiT 2. 无网络时,用户可访问huggingface官网自行下载(https://huggingface.co/Tencent-Hunyuan/HunyuanDiT/tree/main) - -3. 获取对应的预训练模型后,在以下配置文件中将`model`、`vae`的`from_pretrained`参数设置为本地预训练模型绝对路径。 - ```shell - configs/opensora-v1-1/inference/sample.py - configs/opensora-v1-1/train/stage1.py - configs/opensora-v1-1/train/stage2.py - configs/opensora-v1-1/train/stage3.py +3. 将下载好的t5模型放在本工程目录下的`ckpts`目录下,组织结构如下: ``` - -4. 将下载好的t5模型放在本工程目录下的`DeepFloyd`目录下,组织结构如下: - ``` - $OpenSora1.1 - ├── DeepFloyd - ├── ├── t5-v1_1-xxl - ├── ├── ├── config.json - ├── ├── ├── pytorch_model-00001-of-00002.bin - ├── ├── ├── ... + $HunyuanDiT + ├── ckpts + ├── ├── t2i + ├── ├── ├── clip_text_encoder + ├── ├── ├── model + ├── ├── ├── mt5 + ├── ├── ├── sdxl-vae-fp16-fix + ├── ├── ├── tokenizer └── ... ``` @@ -159,24 +157,12 @@ https://github.com/Tencent/HunyuanDiT │ │ ├──porcelain_mt.json ``` -2. 运行训练脚本(待补充)。 - - 用户可以按照自己训练需要进行参数配置,以下给出单卡和多卡的一种训练示例。 - ```shell - bash test/train_full_1p_opensorav1_1.sh --data_path=train_data.csv - # 混合精度BF16,单卡训练,stage1 - ``` +2. 运行训练脚本。 + 用户可以按照自己训练需要进行参数配置,以下给出多卡的一种训练示例。 ```shell - bash test/train_full_8p_opensorav1_1.sh --data_path=train_data.csv - # 混合精度BF16,八卡训练,stage1 - ``` - 对于本模型,可以采用绑核优化,以绑核方式启动。 - 绑核方法参考:https://gitee.com/ascend/att/tree/master/profiler/affinity_cpu_bind - 本模型使用示例如下: - ``` - python3 bind_core.py \ - -app="bash test/train_full_18p_opensorav1_1.sh --data_path=train_data.csv" + bash test/train_full_8p_bf16.sh + # 混合精度BF16,8卡训练 ``` ### 推理任务 @@ -193,35 +179,14 @@ https://github.com/Tencent/HunyuanDiT - 单机单卡推理 ```shell - bash test/infer_full_1p_opensorav1_1.sh --ckpt_path=/path/to/OpenSora-STDiT-v2-stage3/model.pth # 混精bf16 在线推理 + bash test/inference_full_1p_fp16.sh # 混精fp16 在线推理 ``` - 推理脚本参数说明如下 ```shell - test/infer_full_1p_opensorav1_1.sh - --batch_size //设置batch_size - --ckpt_path //推理加载的模型地址 - --prompt //测试用的prompt - --num_frames //生成视频的总帧数 - --img_h //生成视频的宽 - --img_w //生成视频的高 - - scripts/inference.py - config //配置文件路径 - --seed //随机种子 - --ckpt-path //推理加载的模型文件路径 - --batch-size //设置batch_size - --prompt-path //推理使用的prompt文件路径 - --prompt //测试用的prompt - --num-frames //生成视频的总帧数 - --image-size //生成视频的分辨率 - --fps //生成视频的帧率 - --save-dir //输出视频的路径 - --num-sampling-steps //推理的采样步数 - --cfg-scale //无分类器引导的权重系数 + test/inference_full_1p_fp16.sh + --prompt //测试用的prompt ``` - - # 公网地址说明 代码涉及公网地址参考 public_address_statement.md @@ -229,7 +194,7 @@ https://github.com/Tencent/HunyuanDiT ## 变更 -2024.04.29:OpenSora1.1 STDiT2 bf16训练和推理任务首次发布。 +2024.08.22:HunyuanDiT bf16训练和fp16推理任务首次发布。 # FAQ diff --git a/PyTorch/built-in/mlm/HunyuanDiT/gpu_sample.sh b/PyTorch/built-in/mlm/HunyuanDiT/gpu_sample.sh deleted file mode 100644 index f7721e48aa57c29a43307c8d61075bb89412e2d3..0000000000000000000000000000000000000000 --- a/PyTorch/built-in/mlm/HunyuanDiT/gpu_sample.sh +++ /dev/null @@ -1 +0,0 @@ -python sample_t2i.py --infer-mode fa --prompt "渔舟唱晚" --image-size 1280 768 --no-enhance \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/gpu_train.sh b/PyTorch/built-in/mlm/HunyuanDiT/gpu_train.sh deleted file mode 100644 index 95005fbcdcc032dfd647d57b054500c435722772..0000000000000000000000000000000000000000 --- a/PyTorch/built-in/mlm/HunyuanDiT/gpu_train.sh +++ /dev/null @@ -1,2 +0,0 @@ -export CUBLAS_WORKSPACE_CONFIG=:4096:8 -PYTHONPATH=./ sh hydit/train.sh \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/hydit/inference.py b/PyTorch/built-in/mlm/HunyuanDiT/hydit/inference.py index 682f29d7faa2d17a567e9f4d623f6eb8e1361181..6470cb6356362a9af95739d5c25293bf176cdb52 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/hydit/inference.py +++ b/PyTorch/built-in/mlm/HunyuanDiT/hydit/inference.py @@ -23,6 +23,12 @@ from .modules.text_encoder import MT5Embedder from .utils.tools import set_seeds from peft import LoraConfig +from utils.npu_utils import is_npu_available + +if is_npu_available(): + import torch_npu + from torch_npu.contrib import transfer_to_npu + class Resolution: def __init__(self, width, height): diff --git a/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/attn_layers.py b/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/attn_layers.py index ba666e74d93e0d905f03b74b56aacc452877a272..7ddfbafd071f734b53cec3f534a9775526f91888 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/attn_layers.py +++ b/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/attn_layers.py @@ -2,6 +2,7 @@ import torch import torch.nn as nn from typing import Tuple, Union, Optional +import math from utils.npu_utils import is_npu_available if is_npu_available(): import torch_npu @@ -148,7 +149,7 @@ class FlashSelfMHAModified(nn.Module): device=None, dtype=None, norm_layer=nn.LayerNorm, - FAG_deterministic=False, + fa_deterministic=False, ): factory_kwargs = {'device': device, 'dtype': dtype} super().__init__() @@ -167,7 +168,7 @@ class FlashSelfMHAModified(nn.Module): if is_npu_available(): self.inner_attn = NpuFlashAttention(attention_dropout=attn_drop) else: - self.inner_attn = FlashSelfAttention(attention_dropout=attn_drop, deterministic=FAG_deterministic) + self.inner_attn = FlashSelfAttention(attention_dropout=attn_drop, deterministic=fa_deterministic) self.out_proj = nn.Linear(dim, dim, bias=qkv_bias, **factory_kwargs) self.proj_drop = nn.Dropout(proj_drop) @@ -230,7 +231,7 @@ class FlashCrossMHAModified(nn.Module): device=None, dtype=None, norm_layer=nn.LayerNorm, - FAG_deterministic=False, + fa_deterministic=False, ): factory_kwargs = {'device': device, 'dtype': dtype} super().__init__() @@ -255,7 +256,7 @@ class FlashCrossMHAModified(nn.Module): if is_npu_available(): self.inner_attn = NpuFlashAttention(attention_dropout=attn_drop) else: - self.inner_attn = FlashCrossAttention(attention_dropout=attn_drop, deterministic=FAG_deterministic) + self.inner_attn = FlashCrossAttention(attention_dropout=attn_drop, deterministic=fa_deterministic) self.out_proj = nn.Linear(qdim, qdim, bias=qkv_bias, **factory_kwargs) self.proj_drop = nn.Dropout(proj_drop) diff --git a/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/models.py b/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/models.py index 69dfc4384f1ecf579e8ac32a91c264d21ac04573..b0b20472d53848de33925863a1767bee4e6a2013 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/models.py +++ b/PyTorch/built-in/mlm/HunyuanDiT/hydit/modules/models.py @@ -326,7 +326,7 @@ class HunYuanDiT(ModelMixin, ConfigMixin, PeftAdapterMixin): # Build image meta size tokens image_meta_size = timestep_embedding(image_meta_size.view(-1), 256) # [B * 6, 256] - if self.args.use_fp16 and self.args.autocast_dtype == "fp16": + if self.args.use_fp16: image_meta_size = image_meta_size.half() image_meta_size = image_meta_size.view(-1, 6 * 256) extra_vec = torch.cat([extra_vec, image_meta_size], dim=1) # [B, D + 6 * 256] diff --git a/PyTorch/built-in/mlm/HunyuanDiT/hydit/run_g.sh b/PyTorch/built-in/mlm/HunyuanDiT/hydit/run_g.sh index 63a9e30f9fe3f30f334d2f976f528be3d94628af..f145b56937b9649a888090b9ce53026831563d06 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/hydit/run_g.sh +++ b/PyTorch/built-in/mlm/HunyuanDiT/hydit/run_g.sh @@ -6,26 +6,3 @@ params=" \ --rope-real \ " deepspeed --num_gpus 8 --num_nodes 1 --master_port=29000 hydit/train_deepspeed.py ${params} "$@" - -#HOSTFILE="/home/l50041210/HunyuanDiT_combine/hostfile" -#MASTER_ADDR=$(head -n1 $HOSTFILE | awk '{print $1;}') -#MASTER_PORT=6001 -#NODE_ADDR=`hostname -I | awk '{for(i=1;i<=NF;i++)print $i}' | grep ${MASTER_ADDR%.*}. | awk -F " "'{print$1}'` -#NODE_RANK=$(awk '{ranks[$1]=(FNR-1);}END{print ranks["'$NODE_ADDR'"];}' $HOSTFILE) -#NNODES=$(cat $HOSTFILE | wc -l) -#NPUS_PER_NODE=8 -#WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) -#echo $MASTER_ADDR -#echo $NODE_ADDR -#echo $NODE_RANK -#echo $NNODES -# -#DISTRIBUTED_ARGS=" -# --nproc_per_node $NPUS_PER_NODE \ -# --nnodes $NNODES \ -# --node_rank $NODE_RANK \ -# --master_addr $MASTER_ADDR \ -# --master_port $MASTER_PORT -##" -# -#torchrun $DISTRIBUTED_ARGS hydit/train_deepspeed.py ${params} "$@" \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/hydit/train.sh b/PyTorch/built-in/mlm/HunyuanDiT/hydit/train.sh index 10c856dfe1064716d2227a4ca53385da709c5160..c6c235d2688394874fe2a35ff1678890f4aab372 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/hydit/train.sh +++ b/PyTorch/built-in/mlm/HunyuanDiT/hydit/train.sh @@ -43,4 +43,4 @@ sh $(dirname "$0")/run_g.sh \ --max-training-steps 5000 \ --norm 'layer' \ --autocast-dtype 'bf16' - "$@" \ No newline at end of file + "$@" diff --git a/PyTorch/built-in/mlm/HunyuanDiT/hydit/train_deepspeed.py b/PyTorch/built-in/mlm/HunyuanDiT/hydit/train_deepspeed.py index 05ce0655f821dbb21d80d3d04297f43b61020f82..c82c29232cd4d9508cd65824c2d29bb1bb3661f1 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/hydit/train_deepspeed.py +++ b/PyTorch/built-in/mlm/HunyuanDiT/hydit/train_deepspeed.py @@ -539,7 +539,5 @@ def main(args): if __name__ == "__main__": args = get_args() - if args.seed_all: - seed_all(is_gpu=not is_npu_available(), mode=True) # Start main(args) diff --git a/PyTorch/built-in/mlm/HunyuanDiT/npu_sample.sh b/PyTorch/built-in/mlm/HunyuanDiT/npu_sample.sh deleted file mode 100644 index df4d8f0fa7c2c440e622a217f13e4a57517d5a14..0000000000000000000000000000000000000000 --- a/PyTorch/built-in/mlm/HunyuanDiT/npu_sample.sh +++ /dev/null @@ -1,8 +0,0 @@ -source /home/l50041210/cann-b20/ascend-toolkit/set_env.sh -export ASCEND_RT_VISIBLE_DEVICES=0 -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 - -python sample_t2i.py --infer-mode fa --prompt "渔舟唱晚" --image-size 1280 768 --no-enhance \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/npu_train.sh b/PyTorch/built-in/mlm/HunyuanDiT/npu_train.sh deleted file mode 100644 index 57e991e0dc377a14ab7af0f5b3db3763b123b3cb..0000000000000000000000000000000000000000 --- a/PyTorch/built-in/mlm/HunyuanDiT/npu_train.sh +++ /dev/null @@ -1,7 +0,0 @@ -source /home/l50041210/cann-b020/ascend-toolkit/set_env.sh -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 - -PYTHONPATH=./ sh hydit/train.sh \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/public_address_statement.md b/PyTorch/built-in/mlm/HunyuanDiT/public_address_statement.md new file mode 100644 index 0000000000000000000000000000000000000000..226cffb32fe8b808c20bcc91d1a82afb71a5b3c6 --- /dev/null +++ b/PyTorch/built-in/mlm/HunyuanDiT/public_address_statement.md @@ -0,0 +1,26 @@ +| 类型 | 开源代码地址 | 文件名 | 公网IP地址/公网URL地址/域名/邮箱地址 | 用途说明 | +| ------- |------------------------------------------------------------|---------------------------------|------------------------|---------------------| +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/utils/collect_env.py | ./utils/collect_env.py | https://github.com/open-mmlab/mmengine/issues/931 | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/mllm/llava/utils.py | ./mllm/llava/utils.py | https://api.openai.com/v1/moderations | 文件交互链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/mllm/llava/model/builder.py | ./mllm/llava/model/builder.py | https://github.com/haotian-liu/LLaVA#launch-a-model-worker-lora-weights-unmerged | 模型权重公网下载说明 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/lite/inference.py | ./lite/inference.py | https://huggingface.co/Tencent-Hunyuan | huggingface官仓说明链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/modules/posemb_layers.py | hydit/modules/posemb_layers.py | https://github.com/facebookresearch/llama/blob/main/llama/model.py | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/modules/posemb_layers.py | hydit/modules/posemb_layers.py | https://github.com/facebookresearch/mae/blob/main/util/pos_embed.py | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/modules/embedders.py | hydit/modules/embedders.py | https://github.com/openai/glide-text2im/blob/main/glide_text2im/nn.py | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/modules/embedders.py | hydit/modules/embedders.py | https://github.com/google-research/vision_transformer | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/lr_scheduler.py | hydit/lr_scheduler.py | https://arxiv.org/abs/1803.09820 | 参考论文链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://arxiv.org/pdf/2305.08891.pdf | 参考论文链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://arxiv.org/abs/2010.02502 | 参考论文链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://arxiv.org/pdf/2205.11487.pdf | 参考论文链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://huggingface.co/openai/clip-vit-large-patch14 | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://huggingface.co/runwayml/stable-diffusion-v1-5 | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://github.com/huggingface/diffusers/pull/254 | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://pytorch.org/docs/stable/generated/torch.Generator.html | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/hydit/diffusion/pipeline_controlnet.py | hydit/diffusion/pipeline_controlnet.py | https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/diffusion/gaussian_diffusion.py | comfyui-hydit/hydit/diffusion/gaussian_diffusion.py | https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/diffusion/gaussian_diffusion.py | comfyui-hydit/hydit/diffusion/gaussian_diffusion.py | https://www.crosslabs.org/blog/diffusion-with-offset-noise | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/diffusion/gaussian_diffusion.py | comfyui-hydit/hydit/diffusion/gaussian_diffusion.py | https://openreview.net/forum?id=PlKWVd2yBkY | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/annotator/glyph.py | comfyui-hydit/hydit/annotator/glyph.py | https://github.com/AIGText/GlyphControl-release | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/annotator/dwpose/util.py | comfyui-hydit/hydit/annotator/dwpose/util.py | https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/annotator/dwpose/__init__.py | comfyui-hydit/hydit/annotator/dwpose/__init__.py | https://github.com/CMU-Perceptual-Computing-Lab/openpose | 注释参考链接 | +| 开源代码引入 | https://github.com/Tencent/HunyuanDiT/comfyui-hydit/hydit/annotator/dwpose/__init__.py | comfyui-hydit/hydit/annotator/dwpose/__init__.py | https://github.com/Hzzone/pytorch-openpose | 注释参考链接 | \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/requirements.txt b/PyTorch/built-in/mlm/HunyuanDiT/requirements.txt index 8de35fd32eb220912bf782b94dad64beb2776e20..ba84538f16a8010a2a3b8092691ddf56ffd0c876 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/requirements.txt +++ b/PyTorch/built-in/mlm/HunyuanDiT/requirements.txt @@ -1,22 +1,14 @@ ---extra-index-url https://pypi.ngc.nvidia.com timm==0.9.5 diffusers==0.21.2 -peft==0.10.0 +peft==0.11.1 protobuf==3.19.0 torchvision==0.14.1 transformers==4.39.1 -peft==0.10.0 -accelerate==0.29.3 +accelerate==0.27.2 loguru==0.7.2 einops==0.7.0 sentencepiece==0.1.99 -cuda-python==11.7.1 -onnxruntime==1.12.1 -onnx==1.12.0 nvidia-pyindex==1.0.9 -onnx-graphsurgeon==0.3.27 -polygraphy==0.47.1 pandas==2.0.3 gradio==3.50.2 -deepspeed==0.6.3 -pyarrow==16.1.0 \ No newline at end of file +deepspeed==0.14.4 \ No newline at end of file diff --git a/PyTorch/built-in/mlm/HunyuanDiT/test/env_npu.sh b/PyTorch/built-in/mlm/HunyuanDiT/test/env_npu.sh index cd24c60a82fa5e910db8306e559579206d7465d9..a2c0d400c514eb870c15222c73906ddea8dae0b5 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/test/env_npu.sh +++ b/PyTorch/built-in/mlm/HunyuanDiT/test/env_npu.sh @@ -33,4 +33,4 @@ export COMBINED_ENABLE=1 #HCCL白名单开关,1-关闭/0-开启 export HCCL_WHITELIST_DISABLE=1 export HCCL_IF_IP=$(hostname -I |awk '{print $1}') -export HCCL_CONNECT_TIMEOUT=1200 \ No newline at end of file +export HCCL_CONNECT_TIMEOUT=1200 diff --git a/PyTorch/built-in/mlm/HunyuanDiT/test/inference_full_1p_fp16.sh b/PyTorch/built-in/mlm/HunyuanDiT/test/inference_full_1p_fp16.sh new file mode 100644 index 0000000000000000000000000000000000000000..063dbba0e6c91fda2489e2d899586fb6bd082584 --- /dev/null +++ b/PyTorch/built-in/mlm/HunyuanDiT/test/inference_full_1p_fp16.sh @@ -0,0 +1,58 @@ +# 网络名称,权重路径以及相关参数,需要模型审视修改 +Network="HunyuanDiT" +prompt="渔舟唱晚" +image_size_height=1280 +image_size_weight=768 + +for para in $* +do + if [[ $para == --prompt* ]]; then + prompt=$(echo ${para#*=}) + fi +done + +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=$(pwd) +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ]; then + test_path_dir=${cur_path} + cd .. + cur_path=$(pwd) +else + test_path_dir=${cur_path}/test +fi + +source ${test_path_dir}/env_npu.sh + +#创建DeviceID输出目录,不需要修改 +output_path=${cur_path}/test/output/${ASCEND_DEVICE_ID} + +mkdir -p ${output_path} + +#推理开始时间,不需要修改 +start_time=$(date +%s) +echo "start_time: ${start_time}" + +python sample_t2i.py --infer-mode fa --prompt ${prompt} --image-size ${image_size_height} ${image_size_weight} --no-enhance \ + > ${test_path_dir}/output/$ASCEND_DEVICE_ID/inference_${ASCEND_DEVICE_ID}.log 2>&1 & + +wait + +#推理结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(($end_time - $start_time)) + + +# 训练用例信息,不需要修改 +BatchSize=${BATCH_SIZE} +DeviceType=$(uname -m) +CaseName=${Network}_bs${BatchSize}_${WORLD_SIZE}'p'_'acc' + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" >${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}_perf_report.log diff --git a/PyTorch/built-in/mlm/HunyuanDiT/test/train_full_8p_bf16.sh b/PyTorch/built-in/mlm/HunyuanDiT/test/train_full_8p_bf16.sh index 596ac19052029386566f1a0b10b81191da787a13..fe0fb6048a78ff1ce593ea3e37d5f39192aeca28 100644 --- a/PyTorch/built-in/mlm/HunyuanDiT/test/train_full_8p_bf16.sh +++ b/PyTorch/built-in/mlm/HunyuanDiT/test/train_full_8p_bf16.sh @@ -1,4 +1,4 @@ -# 微调生成的ckpt路径 +# 网络名称,权重路径以及相关参数,需要模型审视修改 Network="HunyuanDiT" BATCH_SIZE=1 max_train_steps=5000 @@ -85,6 +85,8 @@ deepspeed --num_gpus ${WORLD_SIZE} --num_nodes 1 --master_port=${MASTER_PORT} hy --multireso \ --reso-step 64 \ --max-training-steps ${max_train_steps} \ + --norm "layer" \ + --autocast-dtype "bf16" \ > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait @@ -102,7 +104,9 @@ CaseName=${Network}_bs${BatchSize}_${WORLD_SIZE}'p'_'acc' # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -FPS=`grep -a 'FPS' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "FPS " '{print $2}' | tail -100 | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` +avg_time =`grep -a 'Steps/Sec:' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Steps/Sec: " '{print $2}' | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` +FPS=`echo "$avg_time * $BatchSize" |bc` + # 打印,不需要修改 echo "Final Performance images/sec : $FPS" echo "E2E Training Duration sec : $e2e_time" @@ -122,5 +126,6 @@ echo "RankSize = ${WORLD_SIZE}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${Ca echo "BatchSize = ${BatchSize}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "DeviceType = ${DeviceType}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "CaseName = ${CaseName}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}_perf_report.log echo "TrainingTime = ${TrainingTime}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}_perf_report.log -echo "E2ETrainingTime = ${e2e_time}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}_perf_report.log \ No newline at end of file +echo "E2ETrainingTime = ${e2e_time}" >>${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}_perf_report.log