diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..03034a222e7810f6ee96b09fd43ffa5f08d86b90 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/README.md @@ -0,0 +1,105 @@ +# bge_large_zh_v1.5模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署 bge_large_zh_v1.5 模型 + +- 模型路径: + ```bash + https://huggingface.co/BAAI/bge-large-zh + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/BAAI/bge-large-zh + git checkout b5d9f5c027e87b6f0b6fa4b614f8f9cdc45ce0e8 + ``` + +3. 获取模型依赖transformers, 并修改 + ``` + pip install transformers==4.44.0 + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + + 目录结构 + ``` + bge_large_zh_v1.5 + ├── bge-large-zh + └── config.json + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` +4. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +1. 将模型转成torchscript格式 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +环境:800I A2 + +|输入batch size | 输入 seqlen | 推理时延 | +| ---- | ---- | ----- | +| 8 | 512 | 30ms| +| 128 | 512 | 721 ms| + + + + diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..3061831885ec070ca870dbbc6190dfb03019bd5e --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/compile.py @@ -0,0 +1,24 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) +# 2 inputs +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..d0beb47ff5bdb0c881dea88d88ca6a65a29c730b --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/infer.py @@ -0,0 +1,40 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) +cpu_model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) + +compiled_model_path = "compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 3 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + token_type_ids = inputs["token_type_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)[0] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu, token_type_ids) + last_hiddens_states = npu_outputs[0].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..647f8e036d8c2910c012b5c60ccc54a9a91af9f8 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_large_zh_v1.5/trace.py @@ -0,0 +1,20 @@ +from transformers import AutoTokenizer, AutoModel +from torch import Tensor +import torch +sentences = ["样例数据-1", "样例数据-2"] + +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt', max_length=128) +with torch.no_grad(): + model_output = model(**encoded_input) +# 3 inputs +input_ids = encoded_input["input_ids"] +token_type_ids = encoded_input["token_type_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask, token_type_ids)) +traced_model.save("traced.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d669f16daf445e6294d547f15773a5bb3d1e383f --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/README.md @@ -0,0 +1,107 @@ +# bge_reranker_large模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署 bge_reranker_large 模型 + +- 模型路径: + ```bash + https://huggingface.co/BAAI/bge-reranker-large + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/BAAI/bge-reranker-large + git checkout 55611d7bca2a7133960a6d3b71e083071bbfc312 + ``` + +3. 获取模型依赖transformers, 并修改 + ``` + pip install transformers==4.44.0 + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + + 目录结构 + ``` + bge_reranker_large + ├── bge-reranker-large + └── config.json + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` +4. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理样例 +1. 将模型转成torchscript格式 +注意, 如果使用arm架构机器cpu, 由于pytorch2.1.0版本问题, 需要将pytorch版本升级到2.2.0, 然后执行trace.py脚本, 之后将pytorch版本降级到2.1.0再进行下面的编译优化和推理 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +环境:800I A2 + +|输入batch size | 输入 seqlen | 推理时延 | +| ---- | ---- | ----- | +| 20 | 512 | 98ms| + + + + + + diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..512bd94b42e3099633b4d32b8e4798bb4b48b6b8 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/compile.py @@ -0,0 +1,23 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) +# 2 inputs +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..2f9bd8371ab7734552c4e4833daaece94dd7a0c6 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/infer.py @@ -0,0 +1,28 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) + +compiled_model_path = "compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 2 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + with torch.no_grad(): + npu_outputs = npu_model(input_ids_npu, attention_mask_npu) + last_hiddens_states = npu_outputs["logits"].to("cpu") + \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..93e7fe839b853981efbe4b6039246a497a1bba80 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/bge_reranker_large/trace.py @@ -0,0 +1,16 @@ +from transformers import AutoTokenizer, AutoModelForSequenceClassification +import torch + +pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']] + +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModelForSequenceClassification.from_pretrained(path) +model.eval() + +encoded_input = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=128) +input_ids = encoded_input["input_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask), strict=False) +traced_model.save("traced.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e46a65a38e48544d6b7f1a62a84102f0c774150 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/README.md @@ -0,0 +1,126 @@ +# gte-large-en-v1.5 模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署gte-large-en-v1.5模型 + +- 模型路径: + ```bash + https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5 + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5 gtelargeenv15 + cd gtelargeenv15 + git checkout 104333d6af6f97649377c2afbde10a7704870c7b + ``` + +3. 获取模型源码, 并把模型脚本放到权重目录下 + ``` + pip install transformers==4.44.0 + cd gtelargeenv15 + + # 拉取源码仓库, 并将模型源码和配置脚本放到权重目录下 + git clone https://huggingface.co/Alibaba-NLP/new-impl + cd new-impl/ + git checkout 40ced75c3017eb27626c9d4ea981bde21a2662f4 + cd - + cp new-impl/configuration.py ./ + cp new-impl/modeling.py ./ + + # 修改config.json中auto_map一项为: + "auto_map": { + "AutoConfig": "configuration.NewConfig", + "AutoModel": "modeling.NewModel", + "AutoModelForMaskedLM": "modeling.NewForMaskedLM", + "AutoModelForMultipleChoice": "modeling.NewForMultipleChoice", + "AutoModelForQuestionAnswering": "modeling.NewForQuestionAnswering", + "AutoModelForSequenceClassification": "modeling.NewForSequenceClassification", + "AutoModelForTokenClassification": "modeling.NewForTokenClassification" + } + ``` + + 目录结构 + ``` + nomic_embed_text_v1.5 + ├── nomicembedtextv15 + └── config.json + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` + +4. 获取模型依赖transformers, 并修改 + ``` + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + +5. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +1. 将模型转成torchscript格式 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +|输入batch size | 输入 seqlen | 300I Pro 性能(FPS) | 800I A2 性能(FPS)| +| ---- | ---- | ----- |---- | +| 1 | 128 | 101.94| 127.50 | +| 8 | 128 | 24.08| 55.86 | + diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..3061831885ec070ca870dbbc6190dfb03019bd5e --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/compile.py @@ -0,0 +1,24 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) +# 2 inputs +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..dc3558b53f6000671c0361511aca04d5ac903ca1 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/infer.py @@ -0,0 +1,40 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) +cpu_model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) + +compiled_model_path = "compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 3 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + token_type_ids = inputs["token_type_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)["last_hiddens_states"] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu, token_type_ids) + last_hiddens_states = npu_outputs["last_hiddens_states"].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..278a04976d609377de0f937843cda48decadcb4c --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/gte_large_en_v1.5/trace.py @@ -0,0 +1,26 @@ +from transformers import AutoTokenizer, AutoModel +from torch import Tensor +import torch + +sentences = [ + "what is the capital of China?", + "how to implement quick sort in python?", + "Beijing", + "sorting algorithms" +] + +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt', max_length=128) +with torch.no_grad(): + model_output = model(**encoded_input) +# 3 inputs +input_ids = encoded_input["input_ids"] +token_type_ids = encoded_input["token_type_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask, token_type_ids), strict=False) +traced_model.save("traced.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c7f5c5012405c5d68a13518371542344765541a --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/README.md @@ -0,0 +1,125 @@ +# jina_embedding_v2_base_code模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署jina_embedding_v2_base_code模型 + +- 模型路径: + ```bash + https://huggingface.co/jinaai/jina-embeddings-v2-base-code + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/jinaai/jina-embeddings-v2-base-code jinaembeddingsv2basecode + cd jinaembeddingsv2basecode + git checkout fa8baa2e34f0fe28aae07f9bd7bcd1215de41dce + ``` + +3. 获取模型源码, 并把模型脚本放到权重目录下 + ``` + pip install transformers==4.44.0 + cd jinaembeddingsv2basecode + + # 拉取源码仓库, 并将模型源码和配置脚本放到权重目录下 + git clone https://huggingface.co/jinaai/jina-bert-v2-qk-post-norm + cd jina-bert-v2-qk-post-norm/ + git checkout 2122141789228461a7a8b8d373d36283b567e3b4 + cd - + cp jina-bert-v2-qk-post-norm/modeling_bert.py ./ + cp jina-bert-v2-qk-post-norm/configuration_bert.py ./ + + # 修改config.json中auto_map一项为: + "auto_map": { + "AutoConfig": "configuration_bert.JinaBertConfig", + "AutoModel": "modeling_bert.JinaBertModel", + "AutoModelForMaskedLM": "modeling_bert.JinaBertForMaskedLM", + "AutoModelForSequenceClassification": "modeling_bert.JinaBertForSequenceClassification"} + ``` + + 目录结构 + ``` + jina_embedding_v2_base_code + ├── jinaembeddingsv2basecode + └── config.json + └── modeling_bert.py + └── configuration_bert.py + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` + +4. 获取模型依赖transformers, 并修改 + ``` + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + +5. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +1. 将模型转成torchscript格式 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +|输入batch size | 输入 seqlen | 300I Pro 性能(FPS) | 800I A2 性能(FPS)| +| ---- | ---- | ----- |---- | +| 1 | 128 | 237.69 | 287.21 | +| 8 | 128 | 57.15 | 127.19 | + + diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..512bd94b42e3099633b4d32b8e4798bb4b48b6b8 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/compile.py @@ -0,0 +1,23 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) +# 2 inputs +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..1e080190a3e93ed0a7cda954c92cb45d9648f1f0 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/infer.py @@ -0,0 +1,39 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) +cpu_model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) + +compiled_model_path = "compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 2 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)["last_hiddens_states"] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu) + last_hiddens_states = npu_outputs["last_hiddens_states"].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..3eb8bca64e8dbf6ce2c3114b3839380e476a0ac6 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_code/trace.py @@ -0,0 +1,23 @@ +from transformers import AutoTokenizer, AutoModel +from torch import Tensor +import torch + +sentences = [ + 'How do I access the index while iterating over a sequence with a for loop?', + '# Use the built-in enumerator\nfor idx, x in enumerate(xs):\n print(idx, x)', + ] + +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt', max_length=128) +with torch.no_grad(): + model_output = model(**encoded_input) +# 2 inputs +input_ids = encoded_input["input_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask), strict=False) +traced_model.save("traced.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_en/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_en/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4493b929eb3750818795edf57460e1ead0369f65 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_en/README.md @@ -0,0 +1,107 @@ +# jina_embedding_v2_base_en模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署jina_embedding_v2_base_en模型 + +- 模型路径: + ```bash + https://huggingface.co/jinaai/jina-embeddings-v2-base-en + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/jinaai/jina-embeddings-v2-base-en jinaembeddingsv2baseen + cd jinaembeddingsv2baseen + git checkout 6e85f575bc273f1fd840a658067d0157933c83f0 + ``` + +3. 获取模型源码, 并把模型脚本放到权重目录下 + ``` + pip install transformers==4.44.0 + cd jinaembeddingsv2baseen + + # 拉取源码仓库, 并将模型源码和配置脚本放到权重目录下 + git clone https://huggingface.co/jinaai/jina-bert-implementation + cd jina-bert-implementation/ + git checkout f3ec4cf7de7e561007f27c9efc7148b0bd713f81 + cd - + cp jina-bert-implementation/modeling_bert.py ./ + cp jina-bert-implementation/configuration_bert.py ./ + + # 修改config.json中auto_map一项为: + "auto_map": { + "AutoConfig": "configuration_bert.JinaBertConfig", + "AutoModelForMaskedLM": "modeling_bert.JinaBertForMaskedLM", + "AutoModel": "modeling_bert.JinaBertModel", + "AutoModelForSequenceClassification": "modeling_bert.JinaBertForSequenceClassification"} + ``` + + 目录结构 + ``` + jina_embedding_v2_base_code + ├── jinaembeddingsv2baseen + └── config.json + └── ... + ├── README.md + └── infer.py + + ``` +4. 获取模型依赖transformers, 并修改 + ``` + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + +5. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +|输入batch size | 输入 seqlen | 300I Pro 性能(FPS) | 800I A2 性能(FPS)| +| ---- | ---- | ----- |---- | +| 1 | 128 | 270.68 | 416.12 | +| 8 | 128 | 61.86 | 208.15 | diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_en/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_en/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..8bf59598172272574213bde5ba18ee5a9dc58d7d --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/jina_embedding_v2_base_en/infer.py @@ -0,0 +1,60 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +# compile +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + ir="dynamo", + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + + +# infer +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + +cpu_model = model + +device_id = 1 +mindietorch.set_device(device_id) +npu_model = compiled_module + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 2 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)["last_hiddens_states"] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu) + last_hiddens_states = npu_outputs["last_hiddens_states"].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/README.md new file mode 100644 index 0000000000000000000000000000000000000000..140bdda91b0a19da4d53e65fd270f0ad73988e36 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/README.md @@ -0,0 +1,100 @@ +# intfloat/multilingual-e5-large模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署multilingual模型 + +- 模型路径: + ```bash + https://huggingface.co/intfloat/multilingual-e5-large + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/intfloat/multilingual-e5-large-instruct + ``` + +3. 获取模型源码 + ``` + pip install transformers==4.44.0 + ``` + ``` + multilingual_e5_large_instruct + ├── multilingual-e5-large + └── config.json + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` +4. 获取模型依赖transformers, 并修改 + ``` + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + +5. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +1. 将模型转成torchscript格式 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +|输入batch size | 输入 seqlen | 300I Pro 性能(FPS) | 800I A2 性能(FPS)| +| ---- | ---- | ----- |---- | +| 1 | 128 | 90.80 | 240.24 | +| 8 | 128 | 30.88 | 97.96 | \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..dc563046aef1a4d1adcfa3b59051d641750fa33e --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/compile.py @@ -0,0 +1,23 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) + +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("multilingual_compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..bd403942fc7ce33d56fe8d41994aab46cb83217a --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/infer.py @@ -0,0 +1,39 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) +cpu_model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) + +compiled_model_path = "multilingual_compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 2 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)[0] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu) + last_hiddens_states = npu_outputs[0].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..abeea5604df50b8dcdc96274c0c8b848813a9cb0 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/multilingual_e5_large_instruct/trace.py @@ -0,0 +1,23 @@ +from transformers import AutoTokenizer, AutoModel +from torch import Tensor +import torch + +sentences = ['query: how much protein should a female eat', + 'query: 南瓜的家常做法', + "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.", + "passage: 1.清炒南瓜丝 原料:嫩南瓜半个 调料:葱、盐、白糖、鸡精 做法: 1、南瓜用刀薄薄的削去表面一层皮,用勺子刮去瓤 2、擦成细丝(没有擦菜板就用刀慢慢切成细丝) 3、锅烧热放油,入葱花煸出香味 4、入南瓜丝快速翻炒一分钟左右,放盐、一点白糖和鸡精调味出锅 2.香葱炒南瓜 原料:南瓜1只 调料:香葱、蒜末、橄榄油、盐 做法: 1、将南瓜去皮,切成片 2、油锅8成热后,将蒜末放入爆香 3、爆香后,将南瓜片放入,翻炒 4、在翻炒的同时,可以不时地往锅里加水,但不要太多 5、放入盐,炒匀 6、南瓜差不多软和绵了之后,就可以关火 7、撒入香葱,即可出锅"] + +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') +with torch.no_grad(): + model_output = model(**encoded_input) +# 2 inputs +input_ids = encoded_input["input_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask)) +traced_model.save("multilingual__traced.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..29241045d07bdd5d8b2662ec4ea261701485ad38 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/README.md @@ -0,0 +1,121 @@ +# nomic_embed_text_v1.5 模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署nomic_embed_text_v1.5模型 + +- 模型路径: + ```bash + https://huggingface.co/nomic-ai/nomic-embed-text-v1.5 + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/nomic-ai/nomic-embed-text-v1.5 nomicembedtextv15 + cd nomicembedtextv15 + git checkout c4f06e01594879a8ccc5c40b0b0a0e2ad46e3a62 + ``` + +3. 获取模型源码, 并把模型脚本放到权重目录下 + ``` + pip install transformers==4.44.0 + cd nomicembedtextv15 + + # 拉取源码仓库, 并将模型源码和配置脚本放到权重目录下 + git clone https://huggingface.co/nomic-ai/nomic-bert-2048 + cd nomic-bert-2048/ + git checkout e55a7d4324f65581af5f483e830b80f34680e8ff + cd - + cp nomic-bert-2048/configuration_hf_nomic_bert.py ./ + cp nomic-bert-2048/modeling_hf_nomic_bert.py ./ + + # 修改config.json中auto_map一项为: + "auto_map": { + "AutoConfig": "configuration_hf_nomic_bert.NomicBertConfig", + "AutoModel": "modeling_hf_nomic_bert.NomicBertModel", + "AutoModelForMaskedLM": "modeling_hf_nomic_bert.NomicBertForPreTraining" + } + ``` + + 目录结构 + ``` + nomic_embed_text_v1.5 + ├── nomicembedtextv15 + └── config.json + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` + +4. 获取模型依赖transformers, 并修改 + ``` + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + +5. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +1. 将模型转成torchscript格式 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + + +## 性能 +|输入batch size | 输入 seqlen | 300I Pro 性能(FPS) | 800I A2 性能(FPS)| +| ---- | ---- | ----- |---- | +| 1 | 128 | 268.92 | 274.23 | +| 8 | 128 | 79.27 | 155.58 | \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..512bd94b42e3099633b4d32b8e4798bb4b48b6b8 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/compile.py @@ -0,0 +1,23 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) +# 2 inputs +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..df20118617e8094f395eaa956607949d8cb6c1b6 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/infer.py @@ -0,0 +1,39 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) +cpu_model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) + +compiled_model_path = "compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 3 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)["last_hiddens_states"] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu) + last_hiddens_states = npu_outputs["last_hiddens_states"].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..680a95add6c873ff17b4bb321dcadeb5f0b782ab --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1.5/trace.py @@ -0,0 +1,18 @@ +from transformers import AutoTokenizer, AutoModel +import torch + +sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten'] +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') +with torch.no_grad(): + model_output = model(**encoded_input) + +input_ids = encoded_input["input_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask), strict=False) +traced_model.save("traced.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/README.md b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..162d7c7c496a3494db8bfd67ba2e241c8c67f396 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/README.md @@ -0,0 +1,119 @@ +# nomic_embed_text_v1 模型-推理指导 + +- [概述](#概述) +- [推理环境准备](#推理环境准备) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [模型推理](#模型推理) + +# 概述 + +该工程使用mindietorch部署 nomic_embed_text_v1 模型 + +- 模型路径: + ```bash + https://huggingface.co/nomic-ai/nomic-embed-text-v1 + ``` + + +# 推理环境准备 + +- 该模型需要以下插件与驱动 + + **表 1** 版本配套表 + + | 配套 | 版本 | 环境准备指导 | + | ------ | ------- | ------------ | + | Python | 3.10.13 | - | + | torch | 2.1.0+cpu | - | + | torch_audio | 2.1.0+cpu | - | + | CANN | 8.0.RC2 | - | + | MindIE | 1.0.RC3.B030 | - | + +# 快速上手 +## 获取源码 + +1. 安装mindie包 + + ```bash + # 安装mindie + chmod +x ./Ascend-mindie_xxx.run + ./Ascend-mindie_xxx.run --install + source /usr/local/Ascend/mindie/set_env.sh + ``` + +2. 获取模型权重 + + ``` + git lfs install + git clone https://huggingface.co/nomic-ai/nomic-embed-text-v1 nomicembedtextv1 + cd nomicembedtextv1 + git checkout ec7a86b7066613e0a8acf87e1fcaaf23f8733dd6 + ``` + +3. 获取模型源码, 并把模型脚本放到权重目录下 + ``` + pip install transformers==4.44.0 + cd nomicembedtextv1 + + # 拉取源码仓库, 并将模型源码和配置脚本放到权重目录下 + git clone https://huggingface.co/nomic-ai/nomic-bert-2048 + cd nomic-bert-2048/ + git checkout e55a7d4324f65581af5f483e830b80f34680e8ff + cd - + cp nomic-bert-2048/configuration_hf_nomic_bert.py ./ + cp nomic-bert-2048/modeling_hf_nomic_bert.py ./ + + # 修改config.json中auto_map一项为: + "auto_map": { + "AutoConfig": "configuration_hf_nomic_bert.NomicBertConfig", + "AutoModel": "modeling_hf_nomic_bert.NomicBertModel", + "AutoModelForMaskedLM": "modeling_hf_nomic_bert.NomicBertForPreTraining" + } + ``` + + 目录结构 + ``` + nomic_embed_text_v1 + ├── nomicembedtextv1 + └── config.json + └── ... + ├── README.md + ├── trace.py + ├── compile.py + └── infer.py + + ``` +4. 获取模型依赖transformers, 并修改 + ``` + pip show transformers + ``` + 找到transformers 的安装路径,并修改transformers/modeling_utils.py文件 + 将1129行的 `extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min` + 修改为`extended_attention_mask = (1.0 - extended_attention_mask) * (-1000)` + +5. 获取数据集 +使用[banking77](https://huggingface.co/datasets/legacy-datasets/banking77)数据集进行测试,手动将其data目录下的test-00000-of-00001.parquet 数据集 下载到本地。 + +## 模型推理 +1. 将模型转成torchscript格式 +``` +python trace.py +``` + +2. 模型编译优化 +``` +python compile.py +``` + +3. 模型推理 +``` +# 请指定数据集路径 +python infer.py +``` + +## 性能 +|输入batch size | 输入 seqlen | 300I Pro 性能(FPS) | 800I A2 性能(FPS)| +| ---- | ---- | ----- |---- | +| 1 | 128 | 270.82 | 276.86 | +| 8 | 128 | 85.76 | 156.87 | diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/compile.py b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/compile.py new file mode 100644 index 0000000000000000000000000000000000000000..512bd94b42e3099633b4d32b8e4798bb4b48b6b8 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/compile.py @@ -0,0 +1,23 @@ +import torch +import mindietorch + +traced_model_path = "traced.ts" +model = torch.jit.load(traced_model_path) +# 2 inputs +inputs = [mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64), + mindietorch.Input(min_shape=(1, 1), max_shape=(1, 200), dtype=mindietorch.dtype.INT64)] + +compiled_module = mindietorch.compile( + model, + inputs=inputs, + precision_policy=mindietorch.PrecisionPolicy.FP16, + truncate_long_and_double=True, + require_full_comilation=False, + allow_tensor_replace_int=False, + min_blocks_size=3, + torch_executed_ops=[], + soc_version="Ascendxxx", # supports Atlas 300I Duo 推理卡, Atlas 800I A2推理产品 + optimization_level=0 +) + +compiled_module.save("compiled.ts") diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/infer.py b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..1e080190a3e93ed0a7cda954c92cb45d9648f1f0 --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/infer.py @@ -0,0 +1,39 @@ +import torch +import mindietorch +import pandas as pd +from transformers import AutoTokenizer, AutoModel + +test_dataset = pd.read_parquet("/path/to/your/local/banking77/test-00000-of-00001.parquet") +test_texts = test_dataset['text'].tolist() +test_labels = test_dataset['label'].tolist() + + +path = "path_to_your_model_weights" +tokenizer = AutoTokenizer.from_pretrained(path) +cpu_model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) + +compiled_model_path = "compiled.ts" +device_id = 1 +mindietorch.set_device(device_id) +npu_model = torch.jit.load(compiled_model_path) + + +cossim_list = [] + +for text in test_texts: + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128) + + # 2 inputs + input_ids_npu = inputs["input_ids"].to(f"npu:{device_id}") + attention_mask_npu = inputs["attention_mask"].to(f"npu:{device_id}") + + + with torch.no_grad(): + pred_cpu = cpu_model(**inputs)["last_hiddens_states"] + npu_outputs = npu_model(input_ids_npu, attention_mask_npu) + last_hiddens_states = npu_outputs["last_hiddens_states"].to("cpu") + + cossim = torch.nn.functional.cosine_similarity(pred_cpu.reshape(1, -1), last_hiddens_states.reshape(1, -1)) + cossim_list.append(cossim) + +print(cossim) \ No newline at end of file diff --git a/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/trace.py b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/trace.py new file mode 100644 index 0000000000000000000000000000000000000000..4990f30faadbb0ad7834557001a38f043d8a16ef --- /dev/null +++ b/MindIE/MindIE-Torch/built-in/text_embedding/nomic_embed_text_v1/trace.py @@ -0,0 +1,19 @@ +from transformers import AutoTokenizer, AutoModel +from torch import Tensor +import torch + +sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten'] +path = "path-to-your-model" +tokenizer = AutoTokenizer.from_pretrained(path) +model = AutoModel.from_pretrained(path, trust_remote_code=True, torchscript=True) +model.eval() + +encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') +with torch.no_grad(): + model_output = model(**encoded_input) +# 2 inputs +input_ids = encoded_input["input_ids"] +attention_mask = encoded_input["attention_mask"] + +traced_model = torch.jit.trace(model, (input_ids, attention_mask), strict=False) +traced_model.save("traced.ts")