diff --git a/ACL_PyTorch/built-in/foundation_models/open_clip/.keep b/ACL_PyTorch/built-in/foundation_models/open_clip/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ACL_PyTorch/built-in/foundation_models/open_clip/export.py b/ACL_PyTorch/built-in/foundation_models/open_clip/export.py new file mode 100644 index 0000000000000000000000000000000000000000..2c8a2e73c371a66b1fe5ca1a33017840fb3f97da --- /dev/null +++ b/ACL_PyTorch/built-in/foundation_models/open_clip/export.py @@ -0,0 +1,97 @@ +import torch +import torch.onnx +from PIL import Image +from onnx import load_model, save_model +from onnxmltools.utils import convert_float_to_float16 +import onnxruntime +import open_clip + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model-arch", + required=True, + choices=["ViT-B-16", "ViT-L-14", "ViT-L-14-336", "ViT-H-14", "RN50"], + help="Specify the architecture (model scale) of Chinese-CLIP model to be converted." + ) + parser.add_argument( + "--pytorch-ckpt-path", + default=None, + type=str, + help="Path of the input PyTorch Chinese-CLIP checkpoint. Default to None which will automatically download the pretrained checkpoint." + ) + parser.add_argument( + "--download-root", + default=None, + type=str, + help="If --pytorch-ckpt-path is None, official pretrained ckpt will be downloaded under --download-root directory and converted. Default to ~/cache/clip/ ." + ) + parser.add_argument( + "--save-onnx-path", + required=True, + type=str, + help="Path (prefix) of the output converted ONNX Chinese-CLIP text or vision model." + ) + parser.add_argument( + "--convert-text", + action="store_true", + help="Whether to convert the text encoder (text feature extractor) into ONNX." + ) + parser.add_argument( + "--convert-vision", + action="store_true", + help="Whether to convert the vision encoder (vision feature extractor) into ONNX." + ) + parser.add_argument( + "--context-length", type=int, default=52, help="The padded length of input text (include [CLS] & [SEP] tokens). Default to 52." + ) + args = parser.parse_args() + return args + +if __name__ == '__main__': + model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k') + tokenizer = open_clip.get_tokenizer('ViT-B-32') + image = preprocess(Image.open("CLIP.png")).unsqueeze(0) + text = tokenizer(["a diagram", "a dog", "a cat"]) + args = parse_args() + # perform conversions, ONNX text and vision encoders will be saved into separated files + if args.convert_text: + # convert text FP32 ONNX model + text_fp32_onnx_path = f"{args.save_onnx_path}.txt.fp32.onnx" + torch.onnx.export(model, + (None, text), + text_fp32_onnx_path, + input_names=['text'], + output_names=['unnorm_text_features'], + export_params=True, + opset_version=13, + verbose=True) + # convert text FP16 ONNX model based on the FP32 model + text_fp16_onnx_path = f"{args.save_onnx_path}.txt.fp16.onnx" + text_fp32_onnx_model = load_model(text_fp32_onnx_path) + text_fp16_onnx_model = convert_float_to_float16(text_fp32_onnx_model, keep_io_types=True, disable_shape_infer=True) + save_model(text_fp16_onnx_model, + text_fp16_onnx_path, + convert_attribute=True) + + if args.convert_vision: + # convert vision FP32 ONNX model + vision_fp32_onnx_path = f"{args.save_onnx_path}.img.fp32.onnx" + vision_fp32_onnx_hasextra = False + torch.onnx.export(model, + (image, None), + vision_fp32_onnx_path, + input_names=['image'], + output_names=['unnorm_image_features'], + export_params=True, + do_constant_folding=False, + opset_version=13, + verbose=True) + + # convert vision FP16 ONNX model based on the FP32 model + vision_fp16_onnx_path = f"{args.save_onnx_path}.img.fp16.onnx" + vision_fp32_onnx_model = load_model(vision_fp32_onnx_path) + vision_fp16_onnx_model = convert_float_to_float16(vision_fp32_onnx_model, keep_io_types=True, disable_shape_infer=True) + save_model(vision_fp16_onnx_model, + vision_fp16_onnx_path, + convert_attribute=True) \ No newline at end of file diff --git a/ACL_PyTorch/built-in/foundation_models/open_clip/preprocess.py b/ACL_PyTorch/built-in/foundation_models/open_clip/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..47bd6fa6aad616817a5342fbd85dc4d8f600b758 --- /dev/null +++ b/ACL_PyTorch/built-in/foundation_models/open_clip/preprocess.py @@ -0,0 +1,43 @@ +import torch +import numpy as np +from PIL import Image +import open_clip +import argparse + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--image_features_path", + default='./npy/image_features', + type=str, + help="Path of the image_features" + ) + parser.add_argument( + "--text_features_path", + default='./npy/text_features', + type=str, + help="Path of the text_features." + ) + args = parser.parse_args() + return args + + + +if __name__ == '__main__': + args = parse_args() + model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k') + tokenizer = open_clip.get_tokenizer('ViT-B-32') + image = preprocess(Image.open("CLIP.png")).unsqueeze(0) + text = tokenizer(["a diagram", "a dog", "a cat"]) + with torch.no_grad(), torch.cuda.amp.autocast(): + image_features = model.encode_image(image) + text_features = model.encode_text(text) + np.save(args.image_features_path, image_features) + np.save(args.text_features_path, text_features) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + + text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + print("Label probs:", text_probs) # prints: [[1., 0., 0.]] + diff --git a/ACL_PyTorch/built-in/foundation_models/open_clip/readme.md b/ACL_PyTorch/built-in/foundation_models/open_clip/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..91116f4f7b7c1053f6baa50965bb46fbc4bdb2a4 --- /dev/null +++ b/ACL_PyTorch/built-in/foundation_models/open_clip/readme.md @@ -0,0 +1,214 @@ +# OPEN_CLIP模型-推理指导 + + +- [概述](#ZH-CN_TOPIC_0000001172161501) + + - [输入输出数据](#section540883920406) + + + +- [推理环境准备](#ZH-CN_TOPIC_0000001126281702) + +- [快速上手](#ZH-CN_TOPIC_0000001126281700) + + - [获取源码](#section4622531142816) + - [准备数据](#section183221994411) + - [模型推理](#section741711594517) + +- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573) + + ****** + + + + +# 概述 + +该存储库的目标是启用具有对比图像-文本监督的训练模型,并研究其属性,例如对分布偏移的鲁棒性。我们的起点是 CLIP 的实现,当在同一数据集上训练时,它与原始 CLIP 模型的准确性相匹配。 + + + + +- 参考实现: + + ``` + url=https://github.com/mlfoundations/open_clip + commit_id=c22a8ecaf95ace2e1ac785e3384689c03754bd40 + code_path=built-in/foundation_models/open_clip + ``` + + + + +## 输入输出数据 + +- 输入数据 + + | 输入数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | ------------------------- | ------------ | + | input1 | FP32 | batchsize x 3 x 224 x 224 | NCHW | + | input2 | INT64 | 3 x 77 | NCHW | + +- 输出数据 + + | 输出数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | -------- | ------------ | + | output1 | FLOAT32 | 1 x 512 | NCHW | + | output2 | FLOAT32 | 3 x 512 | NCHW | + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------------------------------------------------------------ | ------- | ------------------------------------------------------------ | + | 固件与驱动 | 1.0.17(NPU驱动固件版本为6.0.RC1) | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) | + | CANN | 6.3.RC1 | - | + | Python | 3.7.5 | - | + | Pytorch | 1.7.0 | - | + + + +# 快速上手 + +## 获取源码 + +1. 安装依赖。 + + ``` + pip install -r requirements.txt + ``` + +2. 获取源码。 + ``` + git clone https://github.com/mlfoundations/open_clip + cd open_clip + git reset --hard c22a8ecaf95ace2e1ac785e3384689c03754bd40 + ``` + +## 准备demo数据 + +1.数据前处理 + + ``` + mkdir npy + python3 preprocess.py --image_features_path ./npy/image_features.npy --text_features_path ./npy/text_features.npy + + ``` + + +## 模型推理 + +1. 模型转换。 + + 运行export.py文件,自动下载权重并转换为.onnx文件,再使用ATC工具将.onnx文件转为离线推理模型文件.om文件。 + 2. 导出onnx文件。 + ``` + python3 export.py --convert-text --save-onnx-path ./open_clip --convert-vision + + ``` + - 参数说明: + - --convert-text : 模型大小 + - --convert-vision : 输出onnx模型 + - --save-onnx-path : 保存onnx的路径 + + 3. 使用ATC工具将ONNX模型转OM模型。 + + 1. 配置环境变量。 + + ``` + source /usr/local/Ascend/ascend-toolkit/set_env.sh + ``` + + 2. 执行命令查看芯片名称($\{chip\_name\})。 + + ``` + npu-smi info + #该设备芯片名为Ascend310P3 (自行替换) + 回显如下: + +-------------------+-----------------+------------------------------------------------------+ + | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | + | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | + +===================+=================+======================================================+ + | 0 310P3 | OK | 15.8 42 0 / 0 | + | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | + +===================+=================+======================================================+ + | 1 310P3 | OK | 15.4 43 0 / 0 | + | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | + +===================+=================+======================================================+ + ``` + + 3. 执行ATC命令。 + (1)导出text模型 + + ``` + atc --framework=5 \ + --model=open_clip.txt.fp32.onnx \ + --output=open_clip.txt.fp32 \ + --input_format=NCHW \ + --input_shape="text:3,77" \ + --log=debug \ + --soc_version=Ascend310P3 + ``` + (2)导出image模型 + ``` + atc --framework=5 \ + --model=open_clip.image.fp32.onnx \ + --output=open_clip.image.fp32 \ + --input_format=NCHW \ + --input_shape="image:1,3,224,224" \ + --log=debug \ + --soc_version=Ascend310P3 + ``` + - 参数说明: + + - --model:为ONNX模型文件。 + - --framework:5代表ONNX模型。 + - --output:输出的OM模型。 + - --input\_format:输入数据的格式。 + - --input\_shape:输入数据的shape。 + - --log:日志级别。 + - --soc\_version:处理器型号。 + - --out_nodes: 输出节点 + + +2. 开始推理验证。 + + 1. 安装ais_bench推理工具。 + + 请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_bench)代码仓,根据readme文档进行工具安装。 + + 2. 执行推理。 + + + ``` + python -m ais_bench --model ./open_clip.image.fp32.om \ + --input ./npy/image_features.npy \ + --output ./ \ + --batchsize 1 \ + --outfmt BIN \ + --output_dirname result + ``` + + ``` + python -m ais_bench --model ./open_clip.txt.fp32.om \ + --input ./npy/text_features.npy \ + --output ./ \ + --batchsize 1 \ + --outfmt BIN \ + --output_dirname result + ``` + - 参数说明: + + - --model: OM模型路径。 + - --input: 存放预处理bin文件的目录路径 + - --output: 存放推理结果的目录路径 + - --batchsize:每次输入模型的样本数 + - --outfmt: 推理结果数据的格式 + - --output_dirname: 输出结果子目录 + 推理后的输出默认在当前目录result下。 + + +