From c89d3b818fa8f46fb9ebc0473e2669a235c6b10d Mon Sep 17 00:00:00 2001 From: xinyanhe Date: Thu, 24 Oct 2024 14:26:41 +0800 Subject: [PATCH] change chip name in README --- .../audio/ESPnet2_for_PyTorch/README.md | 20 +++---- .../Beit2_for_PyTorch/README.md | 2 +- .../classification/CRNN_for_PyTorch/README.md | 4 +- .../ResNet50_for_PyTorch/README.md | 8 +-- .../detection/DB_ID0706_for_PyTorch/README.md | 8 +-- .../BiseNetV1_for_PyTorch/README.md | 6 +-- .../built-in/diffusion/diffusers/README.md | 16 +++--- .../diffusion/sd-scripts-xl/README.md | 38 +++++++------ PyTorch/built-in/foundation/Aquila2/README.md | 33 +++++++----- .../built-in/foundation/ChatGLM-6B/README.md | 4 +- .../built-in/foundation/ChatGLM3-6B/README.md | 4 +- .../built-in/foundation/CodeGeeX2/README.md | 6 +-- .../foundation/CodeShell-7B/README.md | 31 +++++++---- .../built-in/foundation/GPT-NeoX/README.md | 2 +- .../built-in/foundation/LLaMA-13B/README.md | 2 +- PyTorch/built-in/mm/AltCLIP/README.md | 32 ++++++----- PyTorch/built-in/mm/AnimateDiff/README.md | 8 +-- PyTorch/built-in/mm/DiT/README.md | 41 ++++++++------ PyTorch/built-in/mm/LLaVA/README.md | 10 ++-- .../mm/OpenSora-master/docs/zh_CN/README.md | 2 +- .../mm/OpenSora1.0/docs/zh_CN/README.md | 2 +- .../mm/OpenSora1.1/docs/zh_CN/README.md | 2 +- PyTorch/built-in/mm/OpenSoraPlan1.0/README.md | 54 +++++++++++-------- PyTorch/built-in/mm/Qwen-VL/README.md | 2 +- .../Bert_Chinese_ID3433_for_PyTorch/README.md | 6 +-- PyTorch/contrib/audio/tdnn/README.md | 6 +-- .../HRNet_ID1780_for_PyTorch/README.md | 8 +-- .../InceptionV3_ID1596_for_PyTorch/README.md | 10 ++-- .../classification/MAE_for_PyTorch/README.md | 32 ++++++----- .../SE-ResNext-101-32x4d/README.md | 4 +- PyTorch/contrib/cv/detection/CTPN/README.md | 4 +- PyTorch/contrib/cv/detection/DSFD/README.md | 2 +- PyTorch/contrib/cv/detection/GCNet/README.md | 4 +- .../contrib/cv/detection/RetinaMask/README.md | 4 +- .../3D_EDSR_ID3005_for_PyTorch/README.md | 4 +- .../cv/others/3D_Nested_Unet/README.md | 14 ++--- .../FSRCNN_ID2990_for_PyTorch/README.md | 4 +- .../others/LPTN_ID2780_for_PyTorch/README.md | 4 +- .../Lifespan_ID2972_for_pytorch/README.md | 2 +- PyTorch/contrib/cv/others/Pix2Pix/README.md | 4 +- PyTorch/contrib/cv/others/Pix2PixHD/README.md | 4 +- .../README.md | 4 +- PyTorch/contrib/cv/others/SRGAN/README.md | 4 +- .../cv/others/Srcnn_x2_for_Pytorch/README.md | 4 +- .../README.md | 2 +- .../cv/pose_estimation/DeepPose/README.md | 4 +- .../cv/pose_estimation/HigherHRNet/README.md | 4 +- .../cv/pose_estimation/TransPose/README.md | 4 +- .../cv/semantic_segmentation/ENet/README.md | 4 +- .../cv/semantic_segmentation/ErfNet/README.md | 4 +- .../MedSAM_for_PyTorch/README.md | 24 ++++++--- .../cv/semantic_segmentation/SeMask/README.md | 23 +++++--- .../nlp/MAG-Bert_ID2985_for_PyTorch/README.md | 2 +- .../nlp/NCF_ID2943_for_PyTorch/readme.md | 4 +- .../nlp/albert_ID0335_for_PyTorch/README.md | 6 +-- .../README.md | 2 +- PyTorch/dev/nlp/Textcnn_for_PyTorch/README.md | 2 +- 57 files changed, 317 insertions(+), 233 deletions(-) diff --git a/PyTorch/built-in/audio/ESPnet2_for_PyTorch/README.md b/PyTorch/built-in/audio/ESPnet2_for_PyTorch/README.md index 2d673ee025..09c3172922 100644 --- a/PyTorch/built-in/audio/ESPnet2_for_PyTorch/README.md +++ b/PyTorch/built-in/audio/ESPnet2_for_PyTorch/README.md @@ -333,16 +333,16 @@ Conformer是将CNN用于增强Transformer来做ASR的结构 ``` npu-smi info - #该设备芯片名为Ascend910A (自行替换) + #该设备芯片名为Atlas (自行替换) 回显如下: +-------------------|-----------------|------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | +===================+=================+======================================================+ ``` @@ -352,10 +352,10 @@ Conformer是将CNN用于增强Transformer来做ASR的结构 将xformer_encoder.sh,xformer_decoder.sh,transformer_lm.sh,ctc.sh放置到/root/.cache/espnet_onnx/asr_train_asr_qkv/full目录下,运行xformer_encoder.sh导出encoder`OM`模型,默认保存在当前文件夹下,其他模型类似。 ``` - bash xformer_encoder.sh Ascend910A - bash xformer_decoder.sh Ascend910A - bash transformer_lm.sh Ascend910A - bash ctc.sh Ascend910A + bash xformer_encoder.sh Atlas + bash xformer_decoder.sh Atlas + bash transformer_lm.sh Atlas + bash ctc.sh Atlas ``` ### 2 开始推理验证 @@ -407,6 +407,6 @@ Conformer是将CNN用于增强Transformer来做ASR的结构 | 芯片型号 | 配置 | 数据集 | 精度(overall) | 性能(fps) | | :-----------: | :------------------------------------: | :-------: | :-------------: | - | GPU | encoder/decoder/ctc/lm(beam_size=20) | aishell | 95.27% | - | GPU | encoder/decoder/ctc/lm(beam_size=2) | aishell | 95.08% | - | Ascend910A | encoder/decoder/ctc/lm(default) | aishell | 95.02% | + | 竞品A | encoder/decoder/ctc/lm(beam_size=20) | aishell | 95.27% | + | 竞品A | encoder/decoder/ctc/lm(beam_size=2) | aishell | 95.08% | + | Atlas | encoder/decoder/ctc/lm(default) | aishell | 95.02% | diff --git a/PyTorch/built-in/cv/classification/Beit2_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/Beit2_for_PyTorch/README.md index e5881a77cd..5fcc14711f 100644 --- a/PyTorch/built-in/cv/classification/Beit2_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/classification/Beit2_for_PyTorch/README.md @@ -153,7 +153,7 @@ cd .. **表 2** 训练结果展示表 -这里使用了单机进行预训练,采用的NPU型号为910B1 +这里使用了单机进行预训练,采用的NPU型号为Atlas 900 A2 PODc | NAME | single-step time | Iterations | DataType|Torch_Version | |:-:|:-:|:-:|:-:|:-:| diff --git a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/README.md index 3c9c9537e5..441cfe34b8 100644 --- a/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/classification/CRNN_for_PyTorch/README.md @@ -301,7 +301,7 @@ CRNN (Convolutional Recurrent Neural Network) 于2015年由华中科技大学的 ```shell npu-smi info ``` - 该设备芯片名为Ascend910A (请根据实际芯片填入) + 该设备芯片名为Atlas (请根据实际芯片填入) 4. 执行atc命令 @@ -375,7 +375,7 @@ CRNN (Convolutional Recurrent Neural Network) 于2015年由华中科技大学的 | 芯片型号 | Batch Size | 数据集 | 精度 | | -------- | ---------- | ----------- | ------ | -| 910A | 16 | IIIT5K_lmdb | 76.57% | +| Atlas | 16 | IIIT5K_lmdb | 76.57% | # 公网地址说明 diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md index 1ae40c101a..09a3baa2df 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md @@ -297,16 +297,16 @@ ResNet是ImageNet竞赛中分类问题效果较好的网络,它引入了残差 ``` npu-smi info - #该设备芯片名为Ascend910A (请根据实际芯片填入) + #该设备芯片名为Atlas (请根据实际芯片填入) 回显如下: +-------------------+-----------------+------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 69.5 40 0 / 0 | + | 0 Atlas | OK | 69.5 40 0 / 0 | | 0 0 | 0000:82:00.0 | 0 950 / 15137 | +===================+=================+======================================================+ - | 1 910A | OK | 65.3 36 0 / 0 | + | 1 Atlas | OK | 65.3 36 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1613 / 15137 | +===================+=================+======================================================+ ``` @@ -373,7 +373,7 @@ c. 精度验证。 | 芯片型号 | Batch Size | 数据集 | 精度 | | --------- | ---------------- | ---------- | ---------- | -| 910A | 64 | ImageNet | top-1: 76.96%
top-5: 93.24% | +| Atlas | 64 | ImageNet | top-1: 76.96%
top-5: 93.24% | diff --git a/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/README.md b/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/README.md index 42a28a1122..a2f55fa45f 100644 --- a/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/detection/DB_ID0706_for_PyTorch/README.md @@ -338,16 +338,16 @@ DB(Differentiable Binarization)是一种使用可微分二值图来实时文字 ```sh npu-smi info - #该设备芯片名为Ascend910A (自行替换) + #该设备芯片名为Atlas (自行替换) 回显如下: +-------------------+-----------------+------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | +===================+=================+======================================================+ ``` @@ -428,7 +428,7 @@ DB(Differentiable Binarization)是一种使用可微分二值图来实时文字 | 芯片型号 | Batch Size | 数据集 | 精度 | | :------: | :--------: | :-------: | :--: | -| 910A | 1 | icdar2015 | 0.896 | +| Atlas | 1 | icdar2015 | 0.896 | # 公网地址说明 diff --git a/PyTorch/built-in/cv/semantic_segmentation/BiseNetV1_for_PyTorch/README.md b/PyTorch/built-in/cv/semantic_segmentation/BiseNetV1_for_PyTorch/README.md index a31255d1fc..280752635b 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/BiseNetV1_for_PyTorch/README.md +++ b/PyTorch/built-in/cv/semantic_segmentation/BiseNetV1_for_PyTorch/README.md @@ -188,10 +188,10 @@ | Name | mIoU | FPS | Device | Npu_nums | Steps | AMP_Type | CPU | |-----------|:-----:|:---:|:--------:|:--------:|:-----:|:--------:|:---:| | 1p-*PU | - | 9 | - | - | 400 | O1 | x86 | -| 1p-NPU1.8 | - | 12 | 910A | 1 | 400 | O1 | ARM | +| 1p-NPU1.8 | - | 12 | Atlas | 1 | 400 | O1 | ARM | | 8p-*PU | 75.80 | 62 | - | - | 40000 | O1 | x86 | -| 8p-NPU1.8 | - | 88 | 910A | 8 | 400 | O1 | ARM | -| 8p-NPU1.8 | 76.03 | 88 | 910A | 8 | 40000 | O1 | ARM | +| 8p-NPU1.8 | - | 88 | Atlas | 8 | 400 | O1 | ARM | +| 8p-NPU1.8 | 76.03 | 88 | Atlas | 8 | 40000 | O1 | ARM | # 公网地址说明 diff --git a/PyTorch/built-in/diffusion/diffusers/README.md b/PyTorch/built-in/diffusion/diffusers/README.md index 7fc3307723..72d9523c8e 100644 --- a/PyTorch/built-in/diffusion/diffusers/README.md +++ b/PyTorch/built-in/diffusion/diffusers/README.md @@ -60,16 +60,16 @@ - [推理任务](#推理任务-4) - [获取预训练模型](#获取预训练模型-4) - [开始推理](#开始推理-4) -- [SD3](#SD3) - - [准备环境](#准备环境-5) +- [SD3](#sd3) + - [准备环境](#准备环境-2) - [安装模型环境](#安装模型环境-5) - [安装昇腾环境](#安装昇腾环境-5) - [快速开始](#快速开始-5) - - [训练任务](#训练任务-5) + - [训练任务](#训练任务) - [获取预训练模型](#获取预训练模型-5) - - [开始训练](#开始训练-1) + - [开始训练](#开始训练-1) - [推理任务](#推理任务-5) - - [开始推理](#开始推理-5) + - [开始推理](#开始推理-5) - [公网地址说明](#公网地址说明) - [变更说明](#变更说明) - [变更](#变更) @@ -313,11 +313,11 @@ https://huggingface.co/docs/diffusers/installation | 芯片 | 卡数 | 任务 | FPS | batch_size | AMP_Type | Torch_Version | deepspeed | |:---:|:---:|:----------:|:-----:|:----------:|:---:|:---:|:---:| -| GPU | 8p | LoRA | 23.38 | 7 | fp16 | 2.1 | ✔ | +| 竞品A | 8p | LoRA | 23.38 | 7 | fp16 | 2.1 | ✔ | | Atlas A2 |8p | LoRA | 28.75 | 7 | fp16 | 2.1 | ✔ | -| GPU | 8p | Controlnet | 32.5 | 5 | fp16 | 2.1 | ✔ | +| 竞品A | 8p | Controlnet | 32.5 | 5 | fp16 | 2.1 | ✔ | | Atlas A2 |8p | Controlnet | 28.42 | 5 | fp16 | 2.1 | ✔ | -| GPU | 8p | Finetune | 142.7 | 24 | fp16 | 2.1 | ✔ | +| 竞品A | 8p | Finetune | 142.7 | 24 | fp16 | 2.1 | ✔ | | Atlas A2 |8p | Finetune | 172.9 | 24 | fp16 | 2.1 | ✔ | ### 推理任务 diff --git a/PyTorch/built-in/diffusion/sd-scripts-xl/README.md b/PyTorch/built-in/diffusion/sd-scripts-xl/README.md index 17fdb82779..1519e09b51 100644 --- a/PyTorch/built-in/diffusion/sd-scripts-xl/README.md +++ b/PyTorch/built-in/diffusion/sd-scripts-xl/README.md @@ -1,18 +1,26 @@ # sd-scripts-xl for PyTorch # 目录 -- [简介](#简介) - - [模型介绍](#模型介绍) - - [支持任务列表](#支持任务列表) - - [代码实现](#代码实现) -- [sd-scripts-xl](#sd-scripts-xl) - - [准备训练环境](#准备训练环境) - - [准备数据集](#准备数据集) - - [快速开始](#快速开始) - - [预训练任务(SDXL+CLIP)](#预训练任务sdxlclip) - - [预训练任务(SDXL+MT5)](#预训练任务sdxlmt5) -- [公网地址说明](#公网地址说明) -- [变更说明](#变更说明) -- [FAQ](#FAQ) +- [sd-scripts-xl for PyTorch](#sd-scripts-xl-for-pytorch) +- [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [支持任务列表](#支持任务列表) + - [代码实现](#代码实现) +- [sd-scripts-xl](#sd-scripts-xl) + - [准备训练环境](#准备训练环境) + - [安装模型环境](#安装模型环境) + - [安装昇腾环境](#安装昇腾环境) + - [准备数据集](#准备数据集) + - [获取预训练模型](#获取预训练模型) +- [快速开始](#快速开始) + - [预训练任务(SDXL+CLIP)](#预训练任务sdxlclip) + - [开始训练](#开始训练) + - [预训练任务(SDXL+MT5)](#预训练任务sdxlmt5) + - [开始训练](#开始训练-1) +- [训练结果展示](#训练结果展示) +- [公网地址说明](#公网地址说明) +- [变更说明](#变更说明) +- [FAQ](#faq) @@ -309,7 +317,7 @@ replace_token_length $mt5_tokenizer_path/tokenizer_config.json | NAME | sd版本 | FPS | batch_size | AMP_Type | Torch_Version | | :------: | :---: | :--: | :------: | :-----------: | :-----------: | -| GPU | xl | 20.2 | 4 | fp16 | 1.13 | +| 竞品A | xl | 20.2 | 4 | fp16 | 1.13 | | Atlas A2 | xl | 10.4 | 4 | fp16 | 1.11 | @@ -317,7 +325,7 @@ replace_token_length $mt5_tokenizer_path/tokenizer_config.json | NAME | sd版本 | FPS | batch_size | AMP_Type | Torch_Version | | :------: | :---: | :--: |:----------:|:--------:| :-----------: | -| GPU | xl | 9.754 | 2 | bf16 | 1.13 | +| 竞品A | xl | 9.754 | 2 | bf16 | 1.13 | | Atlas A2 | xl | 10.71| 2 | bf16 | 1.11 | diff --git a/PyTorch/built-in/foundation/Aquila2/README.md b/PyTorch/built-in/foundation/Aquila2/README.md index 220ac3d7a9..c24212b0de 100644 --- a/PyTorch/built-in/foundation/Aquila2/README.md +++ b/PyTorch/built-in/foundation/Aquila2/README.md @@ -1,16 +1,25 @@ # Aquila2 for Pytorch # 目录 -- [简介](#简介) - - [模型介绍](#模型介绍) - - [代码实现](#代码实现) -- [Aquila2](#Aquila2) - - [准备训练环境](#准备训练环境) - - [快速开始](#快速开始) - - [预训练任务](#预训练任务) -- [公网地址说明](#公网地址说明) -- [变更说明](#变更说明) -- [FAQ](#FAQ) +- [Aquila2 for Pytorch](#aquila2-for-pytorch) +- [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [代码实现](#代码实现) +- [Aquila2](#aquila2) + - [准备训练环境](#准备训练环境) + - [安装模型环境](#安装模型环境) + - [安装昇腾环境](#安装昇腾环境) + - [准备数据集](#准备数据集) + - [预训练数据集准备](#预训练数据集准备) + - [快速开始](#快速开始) + - [预训练任务](#预训练任务) + - [开始训练](#开始训练) + - [训练结果](#训练结果) +- [公网地址说明](#公网地址说明) +- [变更说明](#变更说明) + - [变更](#变更) +- [FAQ](#faq) # 简介 ## 模型介绍 @@ -144,9 +153,9 @@ Aquila2是智源发布的业内领先的大语言模型,在多个领域都有 **表 3** 训练结果展示表 | 芯片 | 卡数 | 参数规模 | seq_length | micro_batch_size | global_batch_size | 单步迭代时间 (s/step) | tokens吞吐 (tokens/s/p) |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| GPU | 16p | 34B | 4096 | 1 | 32 | 10.8 | 756 | +| 竞品A | 16p | 34B | 4096 | 1 | 32 | 10.8 | 756 | | Atlas A2 | 16p | 34B | 4096 | 2 | 64 | - | - | -| GPU | 32p | 70B | 4096 | 1 | 44 | - | - | +| 竞品A | 32p | 70B | 4096 | 1 | 44 | - | - | | Atlas A2 | 32p | 70B | 4096 | 1 | 44 | - | - | # 公网地址说明 diff --git a/PyTorch/built-in/foundation/ChatGLM-6B/README.md b/PyTorch/built-in/foundation/ChatGLM-6B/README.md index e6ef63c9ea..a55239aae0 100644 --- a/PyTorch/built-in/foundation/ChatGLM-6B/README.md +++ b/PyTorch/built-in/foundation/ChatGLM-6B/README.md @@ -210,13 +210,13 @@ bash preprocess.sh | NAME | SamplesPerSec | Iterations | DataType | Torch_Version | Card | |:-------------:|:-------------:|:-:|:-:|:-:|:----:| | Finetune -NPU | 2213 | 5000 | fp16 | 1.11 | 910 | -| Finetune -GPU | 2048 | 5000 | fp16 | 1.11 | A800 | +| Finetune -竞品A | 2048 | 5000 | fp16 | 1.11 | 竞品A | 说明:P-Tuning 仅打通功能,无性能优化。 **表 2** 评估结果展示表 -| 评估项 | NPU | GPU | +| 评估项 | NPU | 竞品A | |:-------:|:-------:|:-------:| | BLEU-4 | 8.2853 | 8.1127 | | ROUGE-1 | 31.1898 | 30.7429 | diff --git a/PyTorch/built-in/foundation/ChatGLM3-6B/README.md b/PyTorch/built-in/foundation/ChatGLM3-6B/README.md index a1a51980b8..690e8aaf5d 100644 --- a/PyTorch/built-in/foundation/ChatGLM3-6B/README.md +++ b/PyTorch/built-in/foundation/ChatGLM3-6B/README.md @@ -189,9 +189,9 @@ | 芯片 | 卡数 | 模型 | Iterations | Global Batch Size | Train Samples per Second | --------- |---| ----------- | ---------------- | ----------------------------- | ---------------------------- | | Atlas A2 |8p| ChatGLM3-6B | 2000 | 16 |13.781 | -| GPU |8p| ChatGLM3-6B | 2000 | 16 |15.094 | +| 竞品A |8p| ChatGLM3-6B | 2000 | 16 |15.094 | | Atlas A2 |8p| ChatGLM3-6B-32K | 2000 | 16 | 11.819 | -| GPU |8p| ChatGLM3-6B-32K | 2000 | 16 |12.088 | +| 竞品A |8p| ChatGLM3-6B-32K | 2000 | 16 |12.088 | diff --git a/PyTorch/built-in/foundation/CodeGeeX2/README.md b/PyTorch/built-in/foundation/CodeGeeX2/README.md index cdf7bbb9af..2ec3a54629 100644 --- a/PyTorch/built-in/foundation/CodeGeeX2/README.md +++ b/PyTorch/built-in/foundation/CodeGeeX2/README.md @@ -225,14 +225,14 @@ bash preprocess.sh | NAME | SamplesPerSec | Iterations | DataType | Torch_Version | Card | | :-----------: | :-----------: | :--------: | :------: | :-----------: | :--: | -| Finetune -NPU | (待补充) | (待补充) | bf16? | 1.11 | 910B | -| Finetune -GPU | (待补充) | (待补充) | bf16? | 1.11 | A800 | +| Finetune -NPU | (待补充) | (待补充) | bf16? | 1.11 | Atlas 900 A2 PODc | +| Finetune -竞品A | (待补充) | (待补充) | bf16? | 1.11 | 竞品A | 说明:P-Tuning 仅打通功能,无性能优化。 **表 2** 评估结果展示表 -| 评估项 | NPU | GPU | +| 评估项 | NPU | 竞品A | | :-----: | :--: | :--: | | human pass@1 | 0.37 | 0.35 | diff --git a/PyTorch/built-in/foundation/CodeShell-7B/README.md b/PyTorch/built-in/foundation/CodeShell-7B/README.md index 2153c61de2..f5a729a64c 100644 --- a/PyTorch/built-in/foundation/CodeShell-7B/README.md +++ b/PyTorch/built-in/foundation/CodeShell-7B/README.md @@ -2,16 +2,25 @@ ## 目录 -- [简介](#简介) - - [模型介绍](#模型介绍) - - [支持任务列表](#支持任务列表) - - [代码实现](#代码实现) -- [CodeShell](#CodeShell) - - [准备训练环境](#准备训练环境) - - [快速开始](#快速开始) -- [公网地址说明](#公网地址说明) -- [变更说明](#变更说明) -- [FAQ](#FAQ) +- [CodeShell-7B for PyTorch](#codeshell-7b-for-pytorch) + - [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [支持任务列表](#支持任务列表) + - [代码实现](#代码实现) +- [CodeShell](#codeshell) + - [准备训练环境](#准备训练环境) + - [安装环境](#安装环境) + - [安装昇腾环境](#安装昇腾环境) + - [准备预训练权重](#准备预训练权重) + - [准备数据集](#准备数据集) + - [快速开始](#快速开始) + - [训练任务](#训练任务) + - [开始训练](#开始训练) + - [训练结果](#训练结果) +- [公网地址说明](#公网地址说明) +- [变更说明](#变更说明) +- [FAQ](#faq) # 简介 @@ -156,7 +165,7 @@ python convert_alpaca.py --in-file finetune/alpaca_data.json --out-file finetune | 芯片 | 卡数 | Batch size | Steps | Train_Samples_Per_Second | |----------|:--------:|:----------:|:-----:|:------------------------:| -| GPU | 8p | 6 | 2000 | 40.952 | +| 竞品A | 8p | 6 | 2000 | 40.952 | | Atlas-A2 | 8p | 6 | 2000 | 36.801 | diff --git a/PyTorch/built-in/foundation/GPT-NeoX/README.md b/PyTorch/built-in/foundation/GPT-NeoX/README.md index 2d9d24c1c0..cb6868e85a 100644 --- a/PyTorch/built-in/foundation/GPT-NeoX/README.md +++ b/PyTorch/built-in/foundation/GPT-NeoX/README.md @@ -323,7 +323,7 @@ GPT-NeoX-20B 是由EleutherAI和Hugging face合作开发的一个超大规模的 | NAME | tflops | Iterations | DataType | Torch_Version | Card | |:-------------:|:-------------:|:-:|:-:|:-:|:----:| -| GPU-2pp4mp2dp | 100 | 5000 | fp16 | 1.5 | A100 | +| 竞品A-2pp4mp2dp | 100 | 5000 | fp16 | 1.5 | 竞品A | | NPU-2pp4mp2dp | 150 | 5000 | fp16 | 1.5 | 910 | diff --git a/PyTorch/built-in/foundation/LLaMA-13B/README.md b/PyTorch/built-in/foundation/LLaMA-13B/README.md index cfe099663e..049fe1ce35 100644 --- a/PyTorch/built-in/foundation/LLaMA-13B/README.md +++ b/PyTorch/built-in/foundation/LLaMA-13B/README.md @@ -201,7 +201,7 @@ per_bs * grad_acc * seq_len / time | 13B-NPU(单机20层) | 1619 | 3 | | 13B-竞品A(单机20层) | 1740 | 3 | -注:这里vicuna 7B/13B在NPU上使用910B3(313T)训练,竞品使用A800训练 +注:这里vicuna 7B/13B在NPU上使用Atlas 800T A2训练 # 推理 ## 推理环境搭建 这里要替换transformers库中的部分文件,用于推理(评估)场景,后续如果要进行训练再更改为transformers_modify中的文件。 diff --git a/PyTorch/built-in/mm/AltCLIP/README.md b/PyTorch/built-in/mm/AltCLIP/README.md index 85d155a08d..86b0b3b2c5 100644 --- a/PyTorch/built-in/mm/AltCLIP/README.md +++ b/PyTorch/built-in/mm/AltCLIP/README.md @@ -2,17 +2,25 @@ ## 目录 -- [简介](#简介) - - [模型介绍](#模型介绍) - - [支持任务列表](#支持任务列表) - - [代码实现](#代码实现) -- [AltCLIP](#AltCLIP) - - [准备训练环境](#准备训练环境) - - [快速开始](#快速开始) - - [CIFAR10微调任务](#CIFAR10微调任务) -- [公网地址说明](#公网地址说明) -- [变更说明](#变更说明) -- [FAQ](#FAQ) +- [AltCLIP for PyTorch](#altclip-for-pytorch) + - [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [支持任务列表](#支持任务列表) + - [代码实现](#代码实现) +- [AltCLIP](#altclip) + - [准备训练环境](#准备训练环境) + - [安装环境](#安装环境) + - [安装昇腾环境](#安装昇腾环境) + - [准备预训练权重](#准备预训练权重) + - [准备数据集](#准备数据集) + - [快速开始](#快速开始) + - [CIFAR10微调任务](#cifar10微调任务) + - [开始训练](#开始训练) + - [训练结果](#训练结果) +- [公网地址说明](#公网地址说明) +- [变更说明](#变更说明) +- [FAQ](#faq) # 简介 @@ -166,7 +174,7 @@ clip_benchmark_datasets #### 训练结果 | 芯片 | 卡数 | 精度acc | 性能FPS | batch size | Precision | Torch Version | | -------------------- | :--: | :-----: | :-----: | :--------: | :-------: | :-----------: | -| GPU | 8p | 0.9737 | 338 | 512 | bf16 | 2.1 | +| 竞品A | 8p | 0.9737 | 338 | 512 | bf16 | 2.1 | | Atlas A200T A2 Box16 | 8p | 0.9732 | 295 | 512 | bf16 | 2.1 | diff --git a/PyTorch/built-in/mm/AnimateDiff/README.md b/PyTorch/built-in/mm/AnimateDiff/README.md index 068eaf6956..6e806d871b 100644 --- a/PyTorch/built-in/mm/AnimateDiff/README.md +++ b/PyTorch/built-in/mm/AnimateDiff/README.md @@ -4,10 +4,12 @@ # 目录 -- [AnimateDiff](#animatediff-for-pytorch) +- [AnimateDiff for PyTorch](#animatediff-for-pytorch) +- [目录](#目录) - [概述](#概述) + - [模型介绍](#模型介绍) - [准备训练环境](#准备训练环境) - - [创建Python环境](#创建Python环境) + - [创建Python环境](#创建python环境) - [准备数据集](#准备数据集) - [准备预训练权重](#准备预训练权重) - [准备推理权重](#准备推理权重) @@ -211,7 +213,7 @@ AnimateDiff提出了一个有效的框架,可将现有的大多数个性化文 | 芯片 | 卡数 | samples per second | batch_size | AMP_Type | Torch_Version | |:---:|:---:|:------------------:|:----------:|:--------:|:---:| -| GPU | 8p | 469.1 | 64 | fp16 | 2.1 | +| 竞品A | 8p | 469.1 | 64 | fp16 | 2.1 | | Atlas A2 | 8p | 410.7 | 64 | fp16 | 2.1 | ### 模型推理 diff --git a/PyTorch/built-in/mm/DiT/README.md b/PyTorch/built-in/mm/DiT/README.md index b9da5949d1..f3d2274aa4 100644 --- a/PyTorch/built-in/mm/DiT/README.md +++ b/PyTorch/built-in/mm/DiT/README.md @@ -2,18 +2,27 @@ ## 目录 -- [简介](#简介) - - [模型介绍](#模型介绍) - - [支持任务列表](#支持任务列表) - - [代码实现](#代码实现) -- [DiT](#DiT) - - [准备训练环境](#准备训练环境) - - [快速开始](#快速开始) - - [训练任务](#训练任务) - - [在线推理](#在线推理) -- [公网地址说明](#公网地址说明) -- [变更说明](#变更说明) -- [FAQ](#FAQ) +- [DiT for PyTorch](#dit-for-pytorch) + - [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [支持任务列表](#支持任务列表) + - [代码实现](#代码实现) +- [DiT](#dit) + - [准备训练环境](#准备训练环境) + - [安装环境](#安装环境) + - [安装昇腾环境](#安装昇腾环境) + - [准备预训练权重](#准备预训练权重) + - [准备数据集](#准备数据集) + - [快速开始](#快速开始) + - [训练任务](#训练任务) + - [开始训练](#开始训练) + - [训练结果](#训练结果) + - [在线推理](#在线推理) + - [开始推理](#开始推理) +- [公网地址说明](#公网地址说明) +- [变更说明](#变更说明) +- [FAQ](#faq) # 简介 @@ -156,13 +165,13 @@ Scalable Diffusion Models with Transformers,是完全基于transformer架构 #### 训练结果 | 芯片 | 卡数 | image size | global batch size | Precision | 性能FPS | | ------------- | :--: | :--------: | :---------------: | :-------: | :-----: | -| GPU | 8p | 256 | 256 | fp32 | 432 | +| 竞品A | 8p | 256 | 256 | fp32 | 432 | | Atlas 800T A2 | 8p | 256 | 256 | fp32 | 376 | -| GPU | 8p | 256 | 512 | bf16 | 727 | +| 竞品A | 8p | 256 | 512 | bf16 | 727 | | Atlas 800T A2 | 8p | 256 | 512 | bf16 | 586 | -| GPU | 8p | 512 | 64 | fp32 | 80 | +| 竞品A | 8p | 512 | 64 | fp32 | 80 | | Atlas 800T A2 | 8p | 512 | 64 | fp32 | 77 | -| GPU | 8p | 512 | 128 | bf16 | 151 | +| 竞品A | 8p | 512 | 128 | bf16 | 151 | | Atlas 800T A2 | 8p | 512 | 128 | bf16 | 122 | ### 在线推理 diff --git a/PyTorch/built-in/mm/LLaVA/README.md b/PyTorch/built-in/mm/LLaVA/README.md index a86a53a088..8044bfc5cb 100644 --- a/PyTorch/built-in/mm/LLaVA/README.md +++ b/PyTorch/built-in/mm/LLaVA/README.md @@ -4,8 +4,12 @@ # 目录 -- [LLaVA](#llava-for-pytorch) +- [LLaVA for PyTorch](#llava-for-pytorch) +- [目录](#目录) - [概述](#概述) + - [模型介绍](#模型介绍) + - [支持任务列表](#支持任务列表) + - [代码实现](#代码实现) - [准备训练环境](#准备训练环境) - [创建Python环境](#创建python环境) - [准备数据集](#准备数据集) @@ -17,7 +21,7 @@ - [模型推理](#模型推理) - [公网地址说明](#公网地址说明) - [变更说明](#变更说明) - - [FQA](#faq) + - [FAQ](#faq) @@ -143,7 +147,7 @@ LLaVA是一种新颖的端到端训练的大型多模态模型,它结合了视 | 芯片 | 卡数 | samples per second | batch_size | AMP_Type | Torch_Version | |:---:|:---:|:------------------:|:----------:|:---:|:---:| -| GPU | 8p | 18.62 | 16 | bf16 | 2.1 | +| 竞品A | 8p | 18.62 | 16 | bf16 | 2.1 | | Atlas A2 | 8p | 20.13 | 16 | bf16 | 2.1 | ### 模型评估 diff --git a/PyTorch/built-in/mm/OpenSora-master/docs/zh_CN/README.md b/PyTorch/built-in/mm/OpenSora-master/docs/zh_CN/README.md index 43753e3571..8b4c420376 100644 --- a/PyTorch/built-in/mm/OpenSora-master/docs/zh_CN/README.md +++ b/PyTorch/built-in/mm/OpenSora-master/docs/zh_CN/README.md @@ -213,7 +213,7 @@ docker run -ti --gpus all -v {MOUNT_DIR}:/data opensora
查看更多 -| 分辨率 | 模型大小 | 数据 | 迭代次数 | 批量大小 | GPU 天数 (H800) | 网址 +| 分辨率 | 模型大小 | 数据 | 迭代次数 | 批量大小 | 竞品A 天数 (H800) | 网址 | ---------- | ---------- | ------ | ----------- | ---------- | --------------- | | 16×512×512 | 700M | 20K HQ | 20k | 2×64 | 35 | [:link:](https://huggingface.co/hpcai-tech/Open-Sora/blob/main/OpenSora-v1-HQ-16x512x512.pth) | | 16×256×256 | 700M | 20K HQ | 24k | 8×64 | 45 | [:link:](https://huggingface.co/hpcai-tech/Open-Sora/blob/main/OpenSora-v1-HQ-16x256x256.pth) | diff --git a/PyTorch/built-in/mm/OpenSora1.0/docs/zh_CN/README.md b/PyTorch/built-in/mm/OpenSora1.0/docs/zh_CN/README.md index 8e52abf169..401281c19b 100644 --- a/PyTorch/built-in/mm/OpenSora1.0/docs/zh_CN/README.md +++ b/PyTorch/built-in/mm/OpenSora1.0/docs/zh_CN/README.md @@ -115,7 +115,7 @@ pip install -v . ## 模型权重 -| 分辨率 | 数据 | 迭代次数 | 批量大小 | GPU 天数 (H800) | 网址 | +| 分辨率 | 数据 | 迭代次数 | 批量大小 | 竞品A 天数 (H800) | 网址 | | ---------- | ------ | ----------- | ---------- | --------------- | ---------- | | 16×256×256 | 366K | 80k | 8×64 | 117 | [:link:]() | | 16×256×256 | 20K HQ | 24k | 8×64 | 45 | [:link:]() | diff --git a/PyTorch/built-in/mm/OpenSora1.1/docs/zh_CN/README.md b/PyTorch/built-in/mm/OpenSora1.1/docs/zh_CN/README.md index 21f8c6d799..cdc5633e4a 100644 --- a/PyTorch/built-in/mm/OpenSora1.1/docs/zh_CN/README.md +++ b/PyTorch/built-in/mm/OpenSora1.1/docs/zh_CN/README.md @@ -116,7 +116,7 @@ pip install -v . ## 模型权重 -| 分辨率 | 数据 | 迭代次数 | 批量大小 | GPU 天数 (H800) | 网址 | +| 分辨率 | 数据 | 迭代次数 | 批量大小 | 竞品A 天数 (H800) | 网址 | | ---------- | ------ | ----------- | ---------- | --------------- | ---------- | | 16×256×256 | 366K | 80k | 8×64 | 117 | [:link:]() | | 16×256×256 | 20K HQ | 24k | 8×64 | 45 | [:link:]() | diff --git a/PyTorch/built-in/mm/OpenSoraPlan1.0/README.md b/PyTorch/built-in/mm/OpenSoraPlan1.0/README.md index db82219532..f95520bbd3 100644 --- a/PyTorch/built-in/mm/OpenSoraPlan1.0/README.md +++ b/PyTorch/built-in/mm/OpenSoraPlan1.0/README.md @@ -1,27 +1,37 @@ # OpenSoraPlan1.0 for PyTorch # 目录 -- [简介](#简介) - - [模型介绍](#模型介绍) - - [支持任务列表](#支持任务列表) - - [代码实现](#代码实现) - -- [准备训练环境](#准备训练环境) -- [VideoGPT](#VideoGPT) - - [训练数据集准备](#训练数据集准备) - - [快速开始](#快速开始) - - [训练任务](#训练任务) - - [性能展示](#性能展示) -- [LatteT2V](#LatteT2V) - - [训练数据集准备](#训练数据集准备) - - [准备预训练模型](#准备预训练模型) - - [快速开始](#快速开始) - - [训练任务](#训练任务) - - [性能展示](#性能展示) - - [在线推理任务](#在线推理任务) -- [公网地址说明](#公网地址说明) -- [变更说明](#变更说明) -- [FAQ](#FAQ) +- [OpenSoraPlan1.0 for PyTorch](#opensoraplan10-for-pytorch) +- [目录](#目录) +- [简介](#简介) + - [模型介绍](#模型介绍) + - [支持任务列表](#支持任务列表) + - [代码实现](#代码实现) +- [准备训练环境](#准备训练环境) + - [安装模型环境](#安装模型环境) + - [安装昇腾环境](#安装昇腾环境) + - [训练数据集准备](#训练数据集准备) +- [VideoGPT](#videogpt) + - [训练数据集准备](#训练数据集准备-1) + - [快速开始](#快速开始) + - [训练任务](#训练任务) + - [开始训练](#开始训练) + - [性能展示](#性能展示) + - [性能](#性能) +- [LatteT2V](#lattet2v) + - [训练数据集准备](#训练数据集准备-2) + - [准备预训练模型](#准备预训练模型) + - [快速开始](#快速开始-1) + - [训练任务](#训练任务-1) + - [开始训练](#开始训练-1) + - [性能展示](#性能展示-1) + - [性能](#性能-1) + - [在线推理任务](#在线推理任务) + - [开始推理](#开始推理) +- [公网地址说明](#公网地址说明) +- [变更说明](#变更说明) + - [变更](#变更) +- [FAQ](#faq) # 简介 ## 模型介绍 @@ -305,7 +315,7 @@ python dataset/preprocess_msrvtt.py --data_path dataset/msrvtt/train/annotations | 芯片 | 卡数 | 单步迭代时间(s/step) | batch_size | AMP_Type | Torch_Version | |:---:|:---:|:----:|:----------:|:---:|:---:| -| GPU | 8p | 1.84 | 4 | bf16 | 2.1 | +| 竞品A | 8p | 1.84 | 4 | bf16 | 2.1 | | Atlas A2 | 8p | 1.95 | 4 | bf16 | 2.1 | ### 在线推理任务 diff --git a/PyTorch/built-in/mm/Qwen-VL/README.md b/PyTorch/built-in/mm/Qwen-VL/README.md index 2e1f22d92f..ddc017e358 100644 --- a/PyTorch/built-in/mm/Qwen-VL/README.md +++ b/PyTorch/built-in/mm/Qwen-VL/README.md @@ -197,7 +197,7 @@ ##### 性能 | 芯片 | 卡数 | model_max_length | batch_size | gradient_accumulation_steps | AMP_Type | Torch_Version | tokens/p/s | |:---:|:---:|:----:|:----------:|:---:|:---:|:---:|:---:| -| GPU | 8p | 2048 | 1 | 16 | bf16 | 2.1 | 1796 | +| 竞品A | 8p | 2048 | 1 | 16 | bf16 | 2.1 | 1796 | | Atlas A2 | 8p | 2048 | 1 | 16 | bf16 | 2.1 | 1910 | # 公网地址说明 diff --git a/PyTorch/built-in/nlp/Bert_Chinese_ID3433_for_PyTorch/README.md b/PyTorch/built-in/nlp/Bert_Chinese_ID3433_for_PyTorch/README.md index a21c3ae3aa..c80299a725 100644 --- a/PyTorch/built-in/nlp/Bert_Chinese_ID3433_for_PyTorch/README.md +++ b/PyTorch/built-in/nlp/Bert_Chinese_ID3433_for_PyTorch/README.md @@ -467,16 +467,16 @@ BERT的全称是Bidirectional Encoder Representation from Transformers,即双 ``` npu-smi info - #该设备芯片名为Ascend910A (自行替换) + #该设备芯片名为Atlas (自行替换) 回显如下: +-------------------|-----------------|------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | +===================+=================+======================================================+ ``` diff --git a/PyTorch/contrib/audio/tdnn/README.md b/PyTorch/contrib/audio/tdnn/README.md index 02836ee4f6..347b3b98d3 100644 --- a/PyTorch/contrib/audio/tdnn/README.md +++ b/PyTorch/contrib/audio/tdnn/README.md @@ -306,16 +306,16 @@ TDNN是一种经典的语音识别网络结构,主要由Conv1D+Relu+BN组成 ``` npu-smi info - #该设备芯片名为Ascend910A (自行替换) + #该设备芯片名为Atlas (自行替换) 回显如下: +-------------------+-----------------+------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 32768 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 32768 | +===================+=================+======================================================+ ``` diff --git a/PyTorch/contrib/cv/classification/HRNet_ID1780_for_PyTorch/README.md b/PyTorch/contrib/cv/classification/HRNet_ID1780_for_PyTorch/README.md index bb2409d89b..5c441b9690 100644 --- a/PyTorch/contrib/cv/classification/HRNet_ID1780_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/classification/HRNet_ID1780_for_PyTorch/README.md @@ -322,16 +322,16 @@ HRNet(High-Resolution Net)是针对2D人体姿态估计(Human Pose Estimat ``` npu-smi info - #该设备芯片名为Ascend 910A (自行替换) + #该设备芯片名为Atlas (自行替换) 回显如下: +-------------------+-----------------+------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | +===================+=================+======================================================+ ``` @@ -418,7 +418,7 @@ HRNet(High-Resolution Net)是针对2D人体姿态估计(Human Pose Estimat | 芯片型号 | Batch Size | 数据集 | 精度 | | --------- |------------| ---------- |-----------------------| -| 910A | 1 | ImageNet | 76.02/Top1 91.72/Top5 | +| Atlas | 1 | ImageNet | 76.02/Top1 91.72/Top5 | # 公网地址说明 diff --git a/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/README.md b/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/README.md index 177fc1cf41..562498e74e 100644 --- a/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/classification/InceptionV3_ID1596_for_PyTorch/README.md @@ -292,16 +292,16 @@ InceptionV3 模型是谷歌 Inception 系列里面的第三代模型,在 Incep ```bash npu-smi info ``` - 例如该设备芯片名为 910A,回显如下: + 例如该设备芯片名为 Atlas,回显如下: ``` +-------------------+-----------------+------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | +===================+=================+======================================================+ ``` @@ -391,11 +391,11 @@ InceptionV3 模型是谷歌 Inception 系列里面的第三代模型,在 Incep ---- # 性能&精度 -在910A设备上,OM模型的精度为 **{Top1@Acc=77.31%, Top5@Acc=93.46%}**。 +在Atlas设备上,OM模型的精度为 **{Top1@Acc=77.31%, Top5@Acc=93.46%}**。 | 芯片型号 | BatchSize | 数据集 | 精度 | | --------- | --------- | ----------- | --------------- | -|Ascend910A| 128 | ILSVRC2012 | Top1Acc=78.06% Top5@Acc=93.81% +|Atlas| 128 | ILSVRC2012 | Top1Acc=78.06% Top5@Acc=93.81% # 公网地址说明 diff --git a/PyTorch/contrib/cv/classification/MAE_for_PyTorch/README.md b/PyTorch/contrib/cv/classification/MAE_for_PyTorch/README.md index 313ec83f73..948807e028 100644 --- a/PyTorch/contrib/cv/classification/MAE_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/classification/MAE_for_PyTorch/README.md @@ -1,10 +1,18 @@ # MAE for PyTorch -- [概述](#概述) -- [准备训练环境](#准备训练环境) -- [开始训练](#开始训练) -- [训练结果展示](#训练结果展示) -- [版本说明](#版本说明) +- [MAE for PyTorch](#mae-for-pytorch) +- [概述](#概述) + - [简述](#简述) +- [准备训练环境](#准备训练环境) + - [准备环境](#准备环境) + - [准备数据集](#准备数据集) +- [开始训练](#开始训练) + - [训练模型](#训练模型) +- [训练结果展示](#训练结果展示) +- [版本说明](#版本说明) + - [变更](#变更) + - [FAQ](#faq) +- [公网地址说明](#公网地址说明) @@ -116,30 +124,30 @@ MAE的设计虽然简单,但已被证明是一个强大的、可扩展的视 1. 预训练 ```bash # pre-training 1p performance,单p上运行1个epoch,运行时间约为1h - # 输出性能日志./output_pretrain_1p/910A_1p_pretrain.log、总结性日志./output_pretrain_1p/log.txt + # 输出性能日志./output_pretrain_1p/Atlas_1p_pretrain.log、总结性日志./output_pretrain_1p/log.txt bash ./test/pretrain_performance_1p.sh --data_path=real_data_path # pre-training 8p performance,8p上运行1个epoch,运行时间约为9min - # 输出性能日志./output_pretrain_8p/910A_8p_pretrain.log、总结性日志./output_pretrain_8p/log.txt + # 输出性能日志./output_pretrain_8p/Atlas_8p_pretrain.log、总结性日志./output_pretrain_8p/log.txt bash ./test/pretrain_performance_8p.sh --data_path=real_data_path # pre-training 8p full,8p上运行400个epoch,运行时间约为60h - # 输出完整预训练日志./output_pretrain_full_8p/910A_8p_pretrain_full.log、总结性日志./output_pretrain_full_8p/log.txt + # 输出完整预训练日志./output_pretrain_full_8p/Atlas_8p_pretrain_full.log、总结性日志./output_pretrain_full_8p/log.txt bash ./test/pretrain_full_8p.sh --data_path=real_data_path ``` 2. fine-tuning ```bash # fine-tuning 1p performance,单p上运行1个epoch,运行时间约为1h15min, - # 输出性能日志./output_finetune_1p/910A_1p_finetune.log、总结性日志./output_finetune_1p/log.txt + # 输出性能日志./output_finetune_1p/Atlas_1p_finetune.log、总结性日志./output_finetune_1p/log.txt bash ./test/finetune_performance_1p.sh --data_path=real_data_path --finetune_pth=pretrained_model_path # fine-tuning 8p performance,8p上运行1个epoch,运行时间约为11min - # 输出性能日志./output_finetune_8p/910A_8p_finetune.log、总结性日志./output_finetune_8p/log.txt + # 输出性能日志./output_finetune_8p/Atlas_8p_finetune.log、总结性日志./output_finetune_8p/log.txt bash ./test/finetune_performance_8p.sh --data_path=real_data_path --finetune_pth=pretrained_model_path # fine-tuning 8p full,8p上运行100个epoch,运行时间约为18h - # 输出完整微调日志./output_finetune_full_8p/910A_8p_finetune_full.log、总结性日志./output_finetune_full_8p/log.txt + # 输出完整微调日志./output_finetune_full_8p/Atlas_8p_finetune_full.log、总结性日志./output_finetune_full_8p/log.txt bash ./test/finetune_full_8p.sh --data_path=real_data_path --finetune_pth=pretrained_model_path # fine-tuning_large 8p performance,8p上运行1个epoch,910运行时间约为14min @@ -152,7 +160,7 @@ MAE的设计虽然简单,但已被证明是一个强大的、可扩展的视 bash ./test/finetune_full_large_16p.sh --data_path=real_data_path --finetune_pth=pretrained_model_path # 8p Base_eval,运行时间约为3min - # 输出eval日志./output_finetune_eval_8p/910A_8p_finetune_eval.log + # 输出eval日志./output_finetune_eval_8p/Atlas_8p_finetune_eval.log bash ./test/finetune_eval_8p.sh --data_path=real_data_path --resume_pth=finetuned_model_path ``` diff --git a/PyTorch/contrib/cv/classification/SE-ResNext-101-32x4d/README.md b/PyTorch/contrib/cv/classification/SE-ResNext-101-32x4d/README.md index 78b4bf7f6d..afef738ce3 100644 --- a/PyTorch/contrib/cv/classification/SE-ResNext-101-32x4d/README.md +++ b/PyTorch/contrib/cv/classification/SE-ResNext-101-32x4d/README.md @@ -52,9 +52,9 @@ test/output/devie_id/Se-ResNext101_bs1024_8p_acc.log # 8p training accuracy re | Acc@1 | FPS | Platform| Device_nums| Epochs | Type | | :------: | :------: | :------ | :------ | :------: | :------: | -| - | 221 | GPU | 1 | 1 | O2 | +| - | 221 | 竞品A | 1 | 1 | O2 | | - | 395 | NPU | 1 | 1 | O2 | -| 78.34 | 1480 | GPU | 8 | 100 | O2 | +| 78.34 | 1480 | 竞品A | 8 | 100 | O2 | | 77.75 | 1978 | NPU | 8 | 100 | O2 | # 公网地址说明 diff --git a/PyTorch/contrib/cv/detection/CTPN/README.md b/PyTorch/contrib/cv/detection/CTPN/README.md index 8c91c1284c..f2b523e99e 100644 --- a/PyTorch/contrib/cv/detection/CTPN/README.md +++ b/PyTorch/contrib/cv/detection/CTPN/README.md @@ -43,6 +43,6 @@ Calculated!{"precision": 0.7331386861313869, "recall": 0.7094063926940639, "hmea | 名称 | 精度 | 性能 | | :----: | :--: | :------: | | NPU-8p | 72.1 | 17.66fps | -| GPU-8p | 72.4 | 13.25fps | +| 竞品A-8p | 72.4 | 13.25fps | | NPU-1p | | 1.695fps | -| GPU-1p | | 6.295fps| \ No newline at end of file +| 竞品A-1p | | 6.295fps| \ No newline at end of file diff --git a/PyTorch/contrib/cv/detection/DSFD/README.md b/PyTorch/contrib/cv/detection/DSFD/README.md index 20fdb5e814..dda16aba3d 100644 --- a/PyTorch/contrib/cv/detection/DSFD/README.md +++ b/PyTorch/contrib/cv/detection/DSFD/README.md @@ -100,7 +100,7 @@ Reference: | | Acc | | :-------------- | ---------------------------- | | 参考精度 | E:0.951 M:0.936 H:0.837 | -| GPU 8P 自测精度 | E 0.9473, M 0.9362, H 0.8651 | +| 竞品A 8P 自测精度 | E 0.9473, M 0.9362, H 0.8651 | # Statement diff --git a/PyTorch/contrib/cv/detection/GCNet/README.md b/PyTorch/contrib/cv/detection/GCNet/README.md index cedc3bfa01..a4ac91c314 100644 --- a/PyTorch/contrib/cv/detection/GCNet/README.md +++ b/PyTorch/contrib/cv/detection/GCNet/README.md @@ -129,8 +129,8 @@ bash ./test/eval.sh --weight_path=数据集路径 | 名称 | 精度(mAP) | 性能(fps) | | ------ | --------- | --------- | -| GPU-1p | - | 8.47 | -| GPU-8p | 39.9 | 44.62 | +| 竞品A-1p | - | 8.47 | +| 竞品A-8p | 39.9 | 44.62 | | NPU-1p | - | 0.52 | | NPU-8p | 39.1 | 2.35 | diff --git a/PyTorch/contrib/cv/detection/RetinaMask/README.md b/PyTorch/contrib/cv/detection/RetinaMask/README.md index a42bb5da01..43b6c5706f 100644 --- a/PyTorch/contrib/cv/detection/RetinaMask/README.md +++ b/PyTorch/contrib/cv/detection/RetinaMask/README.md @@ -72,8 +72,8 @@ bash test/train_eval_1p.sh --data_path=./dataset/ --weight_path=./model_0044999. | NAME | Steps | BBOX-MAP | SEGM-MAP | FPS | | :----: | :----: | :------: | :------: | :--: | -| GPU-1p | 360000 | - | - | 8.7 | -| GPU-8p | 20000 | 29.0 | 25.7 | 55.1 | +| 竞品A-1p | 360000 | - | - | 8.7 | +| 竞品A-8p | 20000 | 29.0 | 25.7 | 55.1 | | NPU-1p | 400 | - | - | 4.6 | | NPU-8p | 20000 | 28.8 | 25.7 | 34.8 | diff --git a/PyTorch/contrib/cv/others/3D_EDSR_ID3005_for_PyTorch/README.md b/PyTorch/contrib/cv/others/3D_EDSR_ID3005_for_PyTorch/README.md index 4283d5e49a..291bdba2c7 100644 --- a/PyTorch/contrib/cv/others/3D_EDSR_ID3005_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/others/3D_EDSR_ID3005_for_PyTorch/README.md @@ -39,8 +39,8 @@ Log path: test/output/devie_id/train_${device_id}.log or obs://cann-idxxx/npu/wo | | PSNR (dB) | Npu_nums | Epochs | AMP_Type | FPS |single step cost| | --------- | --------- | -------- | ------ | -------- |-------- |--------| | NPU | 23.0884 | 1 | 50 | O2 | 1.12 | 0.43 | -| | PSNR (dB) | Gpu_nums | Epochs | AMP_Type | FPS |single step cost| -| GPU | 23.0939 | 1 | 50 | O2 | 4.54 | 0.65| +| | PSNR (dB) | 竞品A_nums | Epochs | AMP_Type | FPS |single step cost| +| 竞品A | 23.0939 | 1 | 50 | O2 | 4.54 | 0.65| diff --git a/PyTorch/contrib/cv/others/3D_Nested_Unet/README.md b/PyTorch/contrib/cv/others/3D_Nested_Unet/README.md index e2c3f46dc1..40096261cc 100644 --- a/PyTorch/contrib/cv/others/3D_Nested_Unet/README.md +++ b/PyTorch/contrib/cv/others/3D_Nested_Unet/README.md @@ -26,10 +26,10 @@ test文件夹 其他附件(不在本代码仓中获得) ├── v100_1p.log //GPU 1P训练日志 ├── v100_8p.log //GPU 8P训练日志 -├── 910A_1p.log //NPU 1P训练日志 -├── 910A_8p.log //NPU 8P训练日志 +├── Atlas_1p.log //NPU 1P训练日志 +├── Atlas_8p.log //NPU 8P训练日志 ├── v100_1p.prof //GPU 1P prof文件 -├── 910A_1p.prof //NPU 1P prof文件 +├── Atlas_1p.prof //NPU 1P prof文件 └── gpu_code.tar //GPU 1P及GPU 8P训练代码 ``` **关键环境:** @@ -310,10 +310,10 @@ python -m torch.distributed.launch --master_port=1234 --nproc_per_node=8 run/run | :------: | :------: | :------: | :------: | | [UNET++官方汇报](https://github.com/MrGiovanni/UNetPlusPlus/tree/master/pytorch) | 95.80 | 65.60 | --- | | 使用作者提供的fold_0预训练权重 | 96.55 | 71.97 | --- | -| GPU 1P bs=1 | 6.86 | 0.08 | 1.931 | -| GPU 1P bs=2 | --- | --- | 1.450 | -| GPU 8P bs=8 | 96.59 | 71.43 | 6.922 | -| GPU 8P bs=16 | 96.68 | 70.43 | 6.283 | +| 竞品A 1P bs=1 | 6.86 | 0.08 | 1.931 | +| 竞品A 1P bs=2 | --- | --- | 1.450 | +| 竞品A 8P bs=8 | 96.59 | 71.43 | 6.922 | +| 竞品A 8P bs=16 | 96.68 | 70.43 | 6.283 | | NPU 1P bs=1 | 6.02 | 0.05 | 2.477 | | NPU 1P bs=2 | --- | --- | 2.509 | | NPU 8P bs=8 | 96.67 | 71.42 | 4.209 | diff --git a/PyTorch/contrib/cv/others/FSRCNN_ID2990_for_PyTorch/README.md b/PyTorch/contrib/cv/others/FSRCNN_ID2990_for_PyTorch/README.md index 7c601e6184..247886d01b 100644 --- a/PyTorch/contrib/cv/others/FSRCNN_ID2990_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/others/FSRCNN_ID2990_for_PyTorch/README.md @@ -73,7 +73,7 @@ python3 prepare.py --images-dir "images-dir" \ 2、精度指标 - | PSNR | 论文 | GPU | NPU | + | PSNR | 论文 | 竞品A | NPU | | ------ | ----- | -------- | ----- | | Scale2 | 37.12 | 37.06 | 37.03 | | Scale3 | 33.22 | 33.61 | 33.56 | @@ -81,7 +81,7 @@ python3 prepare.py --images-dir "images-dir" \ 3、性能指标 - | GPU | NPU | + | 竞品A | NPU | | --------- | --------- | | 2500 it/s | 3800 it/s | diff --git a/PyTorch/contrib/cv/others/LPTN_ID2780_for_PyTorch/README.md b/PyTorch/contrib/cv/others/LPTN_ID2780_for_PyTorch/README.md index a0dfcaaafe..4fcb9c5336 100644 --- a/PyTorch/contrib/cv/others/LPTN_ID2780_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/others/LPTN_ID2780_for_PyTorch/README.md @@ -313,7 +313,7 @@ PYTHONPATH="./:${PYTHONPATH}" python3 scripts/data_preparation/create_lmdb.py - 精度结果比对 -| 精度指标项 | 论文发布 | GPU实测 | NPU实测 | +| 精度指标项 | 论文发布 | 竞品A实测 | NPU实测 | | ---------- | -------- | ------- | ------- | | PSNR | 22.12 | 22.8 | 22.3 | | SSIM | 0.878 | 0.885 | 0.8715 | @@ -321,7 +321,7 @@ PYTHONPATH="./:${PYTHONPATH}" python3 scripts/data_preparation/create_lmdb.py - 性能结果比对 -| 性能指标项 | GPU实测 | NPU实测 | +| 性能指标项 | 竞品A实测 | NPU实测 | | ---------- | ------- | ------- | |average duration(秒) | 0.06675|0.07552 | diff --git a/PyTorch/contrib/cv/others/Lifespan_ID2972_for_pytorch/README.md b/PyTorch/contrib/cv/others/Lifespan_ID2972_for_pytorch/README.md index 82d01faa71..974701fd70 100644 --- a/PyTorch/contrib/cv/others/Lifespan_ID2972_for_pytorch/README.md +++ b/PyTorch/contrib/cv/others/Lifespan_ID2972_for_pytorch/README.md @@ -153,7 +153,7 @@ Lifespan Age Transformation Synthesis 是一种基于GAN的方法,用于从单 | NAME | - | FPS | Epochs | sec/epoch | acc | | ------ | --------------------- | --------- | ------ | -------- | ---- | | NPU_1p | torch1.5+Ascend910 | 0.001385 | 15 | 2943.1 | None | -| GPU_1p | torch1.5+V100 | 0.001019 | 15 | 2166.1 | None | +| 竞品A_1p | torch1.5+V100 | 0.001019 | 15 | 2166.1 | None | # 公网地址说明 diff --git a/PyTorch/contrib/cv/others/Pix2Pix/README.md b/PyTorch/contrib/cv/others/Pix2Pix/README.md index eaff186688..ed6006504f 100644 --- a/PyTorch/contrib/cv/others/Pix2Pix/README.md +++ b/PyTorch/contrib/cv/others/Pix2Pix/README.md @@ -6,8 +6,8 @@ | 名称 | 精度 | 性能 | | :------: | :------: | :------: | - | GPU-1p | - | 15 | - | GPU-8p | - | 31 | + | 竞品A-1p | - | 15 | + | 竞品A-8p | - | 31 | | NPU-1p | - | 8 | | NPU-8p | - | 8 | # 自验报告 diff --git a/PyTorch/contrib/cv/others/Pix2PixHD/README.md b/PyTorch/contrib/cv/others/Pix2PixHD/README.md index 4c83213b08..2f8253ea72 100644 --- a/PyTorch/contrib/cv/others/Pix2PixHD/README.md +++ b/PyTorch/contrib/cv/others/Pix2PixHD/README.md @@ -128,9 +128,9 @@ bash ./test/train_eval_1p.sh --data_path=./datasets | 名称 | 精度 | 性能 | | ------ | ----- | -------- | -| GPU-1p | - | 4.55 fps | +| 竞品A-1p | - | 4.55 fps | | NPU-1p | - | 3.76 fps | -| GPU-8p | - | 19.14 fps | +| 竞品A-8p | - | 19.14 fps | | NPU-8p | - | 13.79 fps | diff --git a/PyTorch/contrib/cv/others/Pytorch-Super-Resolution-Implementations_ID3004_for_Pytorch/README.md b/PyTorch/contrib/cv/others/Pytorch-Super-Resolution-Implementations_ID3004_for_Pytorch/README.md index fb5c319000..792570c5b0 100644 --- a/PyTorch/contrib/cv/others/Pytorch-Super-Resolution-Implementations_ID3004_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/others/Pytorch-Super-Resolution-Implementations_ID3004_for_Pytorch/README.md @@ -233,13 +233,13 @@ https://e-share.obs-website.cn-north-1.myhuaweicloud.com?token=kMZ5rC1dFBGYHI6Yc ``` 3. 精度指标。 - | 精度指标项 | 论文发布 | GPU实测 | NPU实测 | + | 精度指标项 | 论文发布 | 竞品A实测 | NPU实测 | | ---------- | -------- | ------- | ------- | | PSNR | 32.47 | 16.978 | 17.35 | 4. 性能指标。 - | 性能指标项 | 论文发布 | GPU实测 | NPU实测 | + | 性能指标项 | 论文发布 | 竞品A实测 | NPU实测 | | ---------- | -------- | ------- | ------- | | s/epoch | 无 | 26.136 | 22.506 | diff --git a/PyTorch/contrib/cv/others/SRGAN/README.md b/PyTorch/contrib/cv/others/SRGAN/README.md index 0a8125ae6c..acbf33676e 100644 --- a/PyTorch/contrib/cv/others/SRGAN/README.md +++ b/PyTorch/contrib/cv/others/SRGAN/README.md @@ -79,8 +79,8 @@ bash ./test/train_full_8p.sh --data_path=../data | ---------- | ---- | ------ | -------- | ------- | ------ | | NPU 1p_1.5 | 270 | 100 | O1 | 33.0558 | 0.9226 | | NPU 8P_1.5 | 1200 | 100 | O1 | 32.1882 | 0.9172 | -| GPU 1p | 360 | 100 | O1 | 33.4604 | 0.9308 | -| GPU 8P | 1400 | 100 | O1 | 31.0824 | 0.9191 | +| 竞品A 1p | 360 | 100 | O1 | 33.4604 | 0.9308 | +| 竞品A 8P | 1400 | 100 | O1 | 31.0824 | 0.9191 | | NPU 1p_1.8 | 180 | 100 | O1 | 33.3234 | 0.9302 | | NPU 8p_1.8 | 1200 | 100 | O1 | 33.2284 | 0.9312 | diff --git a/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/README.md b/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/README.md index 6dda360d80..91a3dfb287 100644 --- a/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/others/Srcnn_x2_for_Pytorch/README.md @@ -49,8 +49,8 @@ Log path: | 名称 | 精度(PSNR)| 性能(FPS) | | :---: | :----:| :--------: | -| GPU-1P | - | 4060.2765 | -| GPU-8P | - | 12944.0177 | +| 竞品A-1P | - | 4060.2765 | +| 竞品A-8P | - | 12944.0177 | | NPU-1P | - | 5736.3541 | | NPU-8P | 36.62 | 13953.0316 | diff --git a/PyTorch/contrib/cv/others/Super-Resolution_CNN_ID3003_for_Pytorch/README.md b/PyTorch/contrib/cv/others/Super-Resolution_CNN_ID3003_for_Pytorch/README.md index 3e1f16903a..36fdeac42d 100644 --- a/PyTorch/contrib/cv/others/Super-Resolution_CNN_ID3003_for_Pytorch/README.md +++ b/PyTorch/contrib/cv/others/Super-Resolution_CNN_ID3003_for_Pytorch/README.md @@ -228,7 +228,7 @@ pip3 install requirements.txt 3. 精度指标。 ``` - | 精度指标项 | 论文发布 | GPU实测 | NPU实测 | + | 精度指标项 | 论文发布 | 竞品A实测 | NPU实测 | | ---------- | -------- | ------- | ------- | | PSNR | 32.75 | 33.27 | 33.27 | diff --git a/PyTorch/contrib/cv/pose_estimation/DeepPose/README.md b/PyTorch/contrib/cv/pose_estimation/DeepPose/README.md index 83f381fc61..95500b03fd 100644 --- a/PyTorch/contrib/cv/pose_estimation/DeepPose/README.md +++ b/PyTorch/contrib/cv/pose_estimation/DeepPose/README.md @@ -51,8 +51,8 @@ python3 pthtar2onnx.py | 名称 | 精度 | 性能 | AMP_Type | | :----: | ----- | ------- | -------- | -| GPU-1p | - | 194 | O2 | -| GPU-8p | 52.50 | 1160 | O2 | +| 竞品A-1p | - | 194 | O2 | +| 竞品A-8p | 52.50 | 1160 | O2 | | NPU-1p | - | 117 | O2 | | NPU-8p | 52.65 | 650-830 | O2 | diff --git a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md index bc2d5d139a..395cf0a047 100644 --- a/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md +++ b/PyTorch/contrib/cv/pose_estimation/HigherHRNet/README.md @@ -88,9 +88,9 @@ bash test/train_eval_8p.sh --data_path=real_data_path --pth_path=real_pre_train_ | 名称 | 精度 | 性能 | | :----: | :--: | :------: | | NPU-8p | 66.9 | 2.2s/step | -| GPU-8p | 67.1 | 1.2s/step | +| 竞品A-8p | 67.1 | 1.2s/step | | NPU-1p | | 1.1s/step | -| GPU-1p | | 0.7s/step| +| 竞品A-1p | | 0.7s/step| # Statement diff --git a/PyTorch/contrib/cv/pose_estimation/TransPose/README.md b/PyTorch/contrib/cv/pose_estimation/TransPose/README.md index c15f65eaae..60e26f998e 100644 --- a/PyTorch/contrib/cv/pose_estimation/TransPose/README.md +++ b/PyTorch/contrib/cv/pose_estimation/TransPose/README.md @@ -74,8 +74,8 @@ bash test/train_finetune_1p.sh --data_path=real_data_path --pth_path=real_pre_tr | 名称 | 精度 | 性能 | AMP_Type | | :----: | :--: | :--: | :------: | -| GPU-1p | - | 0.34s/step | O1 | -| GPU-8p | 71.7 | 0.98s/step | O1 | +| 竞品A-1p | - | 0.34s/step | O1 | +| 竞品A-8p | 71.7 | 0.98s/step | O1 | | NPU-1p | - | 0.34s/step | O1 | | NPU-8p | 72.5 | 0.95s/step | O1 | diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md b/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md index 1d635087ef..f5f056b92c 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/README.md @@ -49,8 +49,8 @@ After running,you can see the results in `./NPU/stargan_full_8p/samples` or `./N | :----: | :-----: | :----: | :------: | | NPU-1p | 14.398 | 400 | O2 | | NPU-8p | 74.310 | 400 | O2 | -| GPU-1p | 21.885 | 400 | O2 | -| GPU-8p | 161.495 | 400 | O2 | +| 竞品A-1p | 21.885 | 400 | O2 | +| 竞品A-8p | 161.495 | 400 | O2 | # Statement diff --git a/PyTorch/contrib/cv/semantic_segmentation/ErfNet/README.md b/PyTorch/contrib/cv/semantic_segmentation/ErfNet/README.md index d49c2bf9a1..b9363f425c 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ErfNet/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/ErfNet/README.md @@ -88,8 +88,8 @@ python demo.py | 名称 | iou | fps | | :------: | :------: | :------: | -| GPU-1p | - | 14.52 | -| GPU-8p | - | 94.64 | +| 竞品A-1p | - | 14.52 | +| 竞品A-8p | - | 94.64 | | NPU-1p | - | 24.08 | | NPU-8p | 71.47 | 143.15 | diff --git a/PyTorch/contrib/cv/semantic_segmentation/MedSAM_for_PyTorch/README.md b/PyTorch/contrib/cv/semantic_segmentation/MedSAM_for_PyTorch/README.md index dedf12cb46..2aa70ade83 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/MedSAM_for_PyTorch/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/MedSAM_for_PyTorch/README.md @@ -1,11 +1,19 @@ # MedSAM for PyTorch -- [概述](#概述) -- [准备训练环境](#准备训练环境) -- [开始训练](#开始训练) -- [推理评估](#推理评估) -- [训练结果展示](#训练结果展示) -- [版本说明](#版本说明) +- [MedSAM for PyTorch](#medsam-for-pytorch) +- [概述](#概述) + - [简述](#简述) +- [准备训练环境](#准备训练环境) + - [准备环境](#准备环境) + - [准备数据集](#准备数据集) + - [获取预训练模型](#获取预训练模型) +- [开始训练](#开始训练) + - [训练模型](#训练模型) +- [训练结果展示](#训练结果展示) +- [版本说明](#版本说明) + - [变更](#变更) + - [已知问题](#已知问题) +- [公网地址说明](#公网地址说明) @@ -203,8 +211,8 @@ | NAME | DSC | MIOU | FPS | Epochs | AMP_Type| | :-------: | :-----: | :-----: | :---: | :------: | :-------: | -| 8p-A100 | 95.9 | 92.4 | 0.76 | 100 | O0 | -| 8p-昇腾910 | 96.2 | 92.8 | 0.81 | 100 | O0 | +| 8p-竞品A | 95.9 | 92.4 | 0.76 | 100 | O0 | +| 8p-NPU | 96.2 | 92.8 | 0.81 | 100 | O0 | # 版本说明 diff --git a/PyTorch/contrib/cv/semantic_segmentation/SeMask/README.md b/PyTorch/contrib/cv/semantic_segmentation/SeMask/README.md index 07969c5daf..710e7754e4 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/SeMask/README.md +++ b/PyTorch/contrib/cv/semantic_segmentation/SeMask/README.md @@ -1,10 +1,19 @@ # SeMask -- [概述](#概述) -- [准备训练环境](#准备训练环境) -- [开始训练](#开始训练) -- [训练结果展示](#训练结果展示) -- [版本说明](#版本说明) +- [SeMask](#semask) +- [概述](#概述) + - [简述](#简述) +- [准备训练环境](#准备训练环境) + - [准备环境](#准备环境) + - [准备数据集](#准备数据集) + - [获取预训练模型](#获取预训练模型) +- [开始训练](#开始训练) + - [训练模型](#训练模型) +- [训练结果展示](#训练结果展示) +- [版本说明](#版本说明) + - [变更](#变更) + - [已知问题](#已知问题) +- [公网地址说明](#公网地址说明) # 概述 @@ -159,8 +168,8 @@ SeMask是一个图像语义分割框架,通过以下两种技术将语义信 | NAME | Acc@1 | FPS | PyTorch_version | |--------|-------|-------|-----------------| - | GPU-1P | - | 9.521 | 1.5 | - | GPU-8P | 79.02 | 63.15 | 1.5 | + | 竞品A-1P | - | 9.521 | 1.5 | + | 竞品A-8P | 79.02 | 63.15 | 1.5 | | NPU-1P | - | 8.153 | 1.5 | | NPU-8P | 79.02 | 62.126 | 1.5 | diff --git a/PyTorch/contrib/nlp/MAG-Bert_ID2985_for_PyTorch/README.md b/PyTorch/contrib/nlp/MAG-Bert_ID2985_for_PyTorch/README.md index 66dde33858..2be50c5bf8 100644 --- a/PyTorch/contrib/nlp/MAG-Bert_ID2985_for_PyTorch/README.md +++ b/PyTorch/contrib/nlp/MAG-Bert_ID2985_for_PyTorch/README.md @@ -253,7 +253,7 @@ pip3 install requirements.txt 3. 精度指标。 ``` - | 精度指标项 | 论文发布 | GPU实测 | NPU实测 | + | 精度指标项 | 论文发布 | 竞品A实测 | NPU实测 | | ---------- | -------- | ------- | ------- | | F1_SCORE | 84.1 | 84.27 | 82.59 | diff --git a/PyTorch/contrib/nlp/NCF_ID2943_for_PyTorch/readme.md b/PyTorch/contrib/nlp/NCF_ID2943_for_PyTorch/readme.md index 061761259b..2360539d49 100644 --- a/PyTorch/contrib/nlp/NCF_ID2943_for_PyTorch/readme.md +++ b/PyTorch/contrib/nlp/NCF_ID2943_for_PyTorch/readme.md @@ -35,7 +35,7 @@ python3 train.py | 200Epoch | HR | NDCG | | -------- | ------ | ------ | -| GPU | 0.6400 | 0.2950 | +| 竞品A | 0.6400 | 0.2950 | | NPU | 0.6407 | 0.3696 | 精度达标 @@ -43,7 +43,7 @@ python3 train.py | 设备 | 单batch耗时 | | :--- | ----------- | -| GPU | 0.0179s | +| 竞品A | 0.0179s | | NPU | 0.0183s | 性能达标 diff --git a/PyTorch/contrib/nlp/albert_ID0335_for_PyTorch/README.md b/PyTorch/contrib/nlp/albert_ID0335_for_PyTorch/README.md index 9eb3d72a6e..6df80ff634 100644 --- a/PyTorch/contrib/nlp/albert_ID0335_for_PyTorch/README.md +++ b/PyTorch/contrib/nlp/albert_ID0335_for_PyTorch/README.md @@ -363,16 +363,16 @@ https://gitee.com/ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/contrib/nlp/al ``` npu-smi info - #该设备芯片名为Ascend910A (自行替换) + #该设备芯片名为Atlas (自行替换) 回显如下: +-------------------|-----------------|------------------------------------------------------+ | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | +===================+=================+======================================================+ - | 0 910A | OK | 15.8 42 0 / 0 | + | 0 Atlas | OK | 15.8 42 0 / 0 | | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | +===================+=================+======================================================+ - | 1 910A | OK | 15.4 43 0 / 0 | + | 1 Atlas | OK | 15.4 43 0 / 0 | | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | +===================+=================+======================================================+ ``` diff --git a/PyTorch/contrib/others/Low-rank-Multimodal-Fusion_ID2983_for_Pytorch/README.md b/PyTorch/contrib/others/Low-rank-Multimodal-Fusion_ID2983_for_Pytorch/README.md index 81a4e6d9a3..11eba9fba7 100644 --- a/PyTorch/contrib/others/Low-rank-Multimodal-Fusion_ID2983_for_Pytorch/README.md +++ b/PyTorch/contrib/others/Low-rank-Multimodal-Fusion_ID2983_for_Pytorch/README.md @@ -271,7 +271,7 @@ pip3 install requirements.txt - 精度结果比对 -| 精度指标项 | 论文发布 | GPU实测 | NPU实测 | +| 精度指标项 | 论文发布 | 竞品A实测 | NPU实测 | | ---------- | -------- | ------- | ------- | | F1_angry | 89.0 | 87.86 | 89.38 | | F1_sad | 85.9 | 84.37 | 84.06 | diff --git a/PyTorch/dev/nlp/Textcnn_for_PyTorch/README.md b/PyTorch/dev/nlp/Textcnn_for_PyTorch/README.md index f534634de1..d9073e98a3 100644 --- a/PyTorch/dev/nlp/Textcnn_for_PyTorch/README.md +++ b/PyTorch/dev/nlp/Textcnn_for_PyTorch/README.md @@ -86,7 +86,7 @@ Iter: 700, Train Loss: 0.21, Train Acc: 93.75%, Val Loss: 0.26, Val Acc: #### 5、GPU/NPU loss收敛趋势: -| step | GPU loss | NPU loss | +| step | 竞品A loss | NPU loss | | :--- | -------- | :------- | | | | | | | | | -- Gitee