From 802779b289acd0a3aa4856f04f33a60e31c80006 Mon Sep 17 00:00:00 2001 From: LiuWei Date: Wed, 13 Nov 2024 02:51:15 +0000 Subject: [PATCH 1/9] !2875 update hashmap import export counts name and type. Merge pull request !2875 from LiuWei/fix_es_type_name --- tf_adapter/ops/aicpu/npu_embedding_ops.cc | 4 ++-- tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tf_adapter/ops/aicpu/npu_embedding_ops.cc b/tf_adapter/ops/aicpu/npu_embedding_ops.cc index 495a2db78..5487dd609 100644 --- a/tf_adapter/ops/aicpu/npu_embedding_ops.cc +++ b/tf_adapter/ops/aicpu/npu_embedding_ops.cc @@ -490,7 +490,7 @@ REGISTER_OP("EmbeddingHashmapExport") .Input("table_names: string") .Input("global_step: TStep") .Input("keys: num * int64") - .Input("counts: num * int64") + .Input("counters: num * uint64") .Input("filter_flags: num * uint8") .Input("values: num * float32") .Attr("num: int >= 1") @@ -527,7 +527,7 @@ REGISTER_OP("EmbeddingHashmapImport") .Input("table_names: string") .Input("global_step: TStep") .Output("keys: num * int64") - .Output("counts: num * int64") + .Output("counters: num * uint64") .Output("filter_flags: num * uint8") .Output("values: num * float32") .Attr("embedding_dims: list(int)") diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index a29a93c5e..417119584 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -516,15 +516,16 @@ def embedding_hashmap_table_size_v2(table_ids, filter_export_flag, export_mode): # @param table_names string 类型 # @param global_step int32/int64 类型 # @param keys int64 类型 -# @param counts int64 类型 +# @param counters uint64 类型 # @param filter_flag uint8 类型 # @param values float32 类型 # @param num int64 类型 -def embedding_hashmap_export_v2(file_path, table_ids, table_names, global_step, keys, counts, filter_flag, values, num): +def embedding_hashmap_export_v2(file_path, table_ids, table_names, global_step, + keys, counters, filter_flag, values, num): """ host embedding hashmap export. """ gen_npu_cpu_ops.EmbeddingHashmapExport( file_path=file_path, table_ids=table_ids, table_names=table_names, global_step=global_step, - keys=keys, counts=counts, filter_flag=filter_flag, values=values, num=num) + keys=keys, counters=counters, filter_flag=filter_flag, values=values, num=num) ## 提供host侧hashmap文件大小功能 @@ -550,8 +551,8 @@ def embedding_hashmap_file_size_v2(file_path, table_ids, table_names, global_ste # @param global_step int32/int64 类型 # @param embedding_dims int 类型 # @param num int64 类型 -# @return keys(int64)/counts(int64)/filter_flag(uint8)/values(float32) -def embedding_feature_mapping_import(file_path, table_ids, table_sizes, table_names, global_step, embedding_dims, num): +# @return keys(int64)/counters(uint64)/filter_flag(uint8)/values(float32) +def embedding_hashmap_import_v2(file_path, table_ids, table_sizes, table_names, global_step, embedding_dims, num): """ host embedding feature mapping import. """ result = gen_npu_cpu_ops.EmbeddingHashmapImport( file_path=file_path, table_ids=table_ids, table_sizes=table_sizes, -- Gitee From 157946113aa936ea8330f5c916004dae145e9fe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=8F=E5=90=8D=E9=A6=99?= Date: Tue, 1 Apr 2025 02:26:02 +0000 Subject: [PATCH 2/9] =?UTF-8?q?!2954=20fix=20destruction=20order=20Merge?= =?UTF-8?q?=20pull=20request=20!2954=20from=20=E6=99=8F=E5=90=8D=E9=A6=99/?= =?UTF-8?q?master-0331?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter_2.x/npu_device/core/npu_hdc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter_2.x/npu_device/core/npu_hdc.cpp b/tf_adapter_2.x/npu_device/core/npu_hdc.cpp index 9b5b049f7..f34d52182 100644 --- a/tf_adapter_2.x/npu_device/core/npu_hdc.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_hdc.cpp @@ -350,7 +350,7 @@ tensorflow::Status HdcChannel::SendTensorsByAcl(acltdtTensorType acl_type, auto acl_status = ACL_ERROR_NONE; do { acl_status = acltdtSendTensor(handle_.acl_handle, acl_dataset, -1 /* no timeout */); - } while (acl_status == ACL_ERROR_RT_QUEUE_FULL); + } while ((acl_status == ACL_ERROR_RT_QUEUE_FULL) && (!destroyed_)); TF_RETURN_IF_ERROR(DestroyAclDataset(acl_dataset)); if (acl_status != ACL_ERROR_NONE) { return tensorflow::errors::Internal("Acl send data failed, acl status:", acl_status); -- Gitee From 101245ce31de423e1e45ae664c363c0b1cf0145c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=BE=B7=E9=92=8A?= Date: Wed, 30 Apr 2025 02:42:45 +0000 Subject: [PATCH 3/9] =?UTF-8?q?!2962=20add=20drop=5Fout=5Fv3=20tfa=20Merge?= =?UTF-8?q?=20pull=20request=20!2962=20from=20=E6=9D=8E=E5=BE=B7=E9=92=8A/?= =?UTF-8?q?master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/kernels/aicore/npu_aicore_ops.cc | 10 ++++++++++ tf_adapter/ops/aicore/npu_aicore_ops.cc | 14 ++++++++++++++ .../python/npu_bridge/npu_cpu/npu_cpu_ops.py | 19 +++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/tf_adapter/kernels/aicore/npu_aicore_ops.cc b/tf_adapter/kernels/aicore/npu_aicore_ops.cc index 83a027b6e..b66740143 100644 --- a/tf_adapter/kernels/aicore/npu_aicore_ops.cc +++ b/tf_adapter/kernels/aicore/npu_aicore_ops.cc @@ -91,6 +91,16 @@ class FastGeluGradOp : public tensorflow::OpKernel { } }; +class StatelessDropoutOp : public tensorflow::OpKernel { +public: + explicit StatelessDropoutOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} + ~StatelessDropoutOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("StatelessDropout") +.Device(tensorflow::DEVICE_CPU), StatelessDropoutOp); + REGISTER_KERNEL_BUILDER( Name("FastGeluGrad") . diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc index fbe6035bd..dfaac8e8f 100644 --- a/tf_adapter/ops/aicore/npu_aicore_ops.cc +++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc @@ -97,6 +97,20 @@ REGISTER_OP("DynamicGruV2") return Status::OK(); }); +REGISTER_OP("StatelessDropout") + .Input("x: T") + .Input("noise_shape: int64") + .Input("p: T") + .Input("seed: int64") + .Input("offset: int64") + .Output("y: T") + .Attr("T: {float16, float32, bfloat16}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + REGISTER_OP("DynamicGruV2Grad") .Input("x: T") .Input("weight_input: T") diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index 417119584..ba6ed2b5d 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -87,6 +87,25 @@ def dense_image_warp(image, flow, name=None): return result +## 提供host侧StatelessDropout功能 +# @param x float32,float16,bfloat16 类型 +# @param noise_shape int64 类型 +# @param p float32,float16,bfloat16 类型 +# @param seed int64 类型 +# @param offset int64 类型 +# @return values float32,float16,bfloat16 类型 +def stateless_dropout(x, noise_shape, p, seed, offset): + """ host stateless_dropout. """ + result = gen_npu_cpu_ops.StatelessDropout( + x=x, + noise_shape=noise_shape, + p=p, + seed=seed, + offset=offset + ) + return result + + ## DenseImageWarp的梯度函数 @ops.RegisterGradient("DenseImageWarp") def dense_image_warp_grad(op, grad): -- Gitee From 784c6eb7028f14b9c0f959105eea4cff595cd87b Mon Sep 17 00:00:00 2001 From: sihaixianyu Date: Wed, 28 May 2025 06:54:16 +0000 Subject: [PATCH 4/9] !2976 Register new operator 'EmbeddingHashTableEvict'. Merge pull request !2976 from sihaixianyu/master --- tf_adapter/kernels/aicore/npu_aicore_ops.cc | 12 +++++++++- tf_adapter/ops/aicore/npu_aicore_ops.cc | 10 +++++++++ .../python/npu_bridge/npu_cpu/npu_cpu_ops.py | 22 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/tf_adapter/kernels/aicore/npu_aicore_ops.cc b/tf_adapter/kernels/aicore/npu_aicore_ops.cc index b66740143..cfa22fb75 100644 --- a/tf_adapter/kernels/aicore/npu_aicore_ops.cc +++ b/tf_adapter/kernels/aicore/npu_aicore_ops.cc @@ -121,4 +121,14 @@ REGISTER_KERNEL_BUILDER( Device(tensorflow::DEVICE_CPU) .TypeConstraint("T"), FastGeluGradOp); -} // namespace tensorflow \ No newline at end of file + +class EmbeddingHashTableEvictOp : public tensorflow::OpKernel { +public: + explicit EmbeddingHashTableEvictOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} + ~EmbeddingHashTableEvictOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("EmbeddingHashTableEvict") +.Device(tensorflow::DEVICE_CPU), EmbeddingHashTableEvictOp); +} // namespace tensorflow diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc index dfaac8e8f..acfad59f3 100644 --- a/tf_adapter/ops/aicore/npu_aicore_ops.cc +++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc @@ -792,5 +792,15 @@ REGISTER_OP("TabulateFusionGrad") c->set_output(1, c->input(3)); return Status::OK(); }); + +REGISTER_OP("EmbeddingHashTableEvict") + .Input("table_handle: int64") + .Input("keys: int64") + .Attr("table_cap: int") + .Attr("embedding_dim: int") + .Attr("init_mode: string='constant'") + .Attr("const_val: float=0.0") + .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs); } // namespace } // namespace tensorflow diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index ba6ed2b5d..de9586551 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -577,3 +577,25 @@ def embedding_hashmap_import_v2(file_path, table_ids, table_sizes, table_names, file_path=file_path, table_ids=table_ids, table_sizes=table_sizes, table_names=table_names, global_step=global_step, embedding_dims=embedding_dims, num=num) return result + + +## 提供device侧FeatureMapping Evict功能 +# @param table_handle int64 类型 +# @param keys int64 类型 +# @param sampled_values float 类型 +# @param table_cap int 类型 +# @param embedding_dim int 类型 +# @param init_mode string 类型 +# @param const_val int 类型 +# @return table_handle int64 类型 +def embedding_hashtable_evict(table_handle, keys, sampled_values, table_cap, embedding_dim, init_mode, const_val): + """device embedding feature mapping evict.""" + return gen_npu_cpu_ops.EmbeddingHashTableEvict( + table_handle=table_handle, + keys=keys, + sampled_values=sampled_values, + table_cap=table_cap, + embedding_dim=embedding_dim, + init_mode=init_mode, + const_val=const_val, + ) -- Gitee From 7bafbbe2ef615b900367bbe4694b5a7222759299 Mon Sep 17 00:00:00 2001 From: cathychan0027 Date: Thu, 29 May 2025 01:05:34 +0000 Subject: [PATCH 5/9] !2981 embedding hash Merge pull request !2981 from cathychan0027/embeddinghash --- tf_adapter/kernels/aicore/npu_aicore_ops.cc | 50 ++++++++++ tf_adapter/ops/aicore/npu_aicore_ops.cc | 87 ++++++++++++++++ .../python/npu_bridge/npu_cpu/npu_cpu_ops.py | 99 +++++++++++++++++++ 3 files changed, 236 insertions(+) diff --git a/tf_adapter/kernels/aicore/npu_aicore_ops.cc b/tf_adapter/kernels/aicore/npu_aicore_ops.cc index cfa22fb75..34de0097e 100644 --- a/tf_adapter/kernels/aicore/npu_aicore_ops.cc +++ b/tf_adapter/kernels/aicore/npu_aicore_ops.cc @@ -43,6 +43,36 @@ class FastGeluOp : public tensorflow::OpKernel { } }; +class EmbeddingHashTableImportOp : public tensorflow::OpKernel { +public: + explicit EmbeddingHashTableImportOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} + ~EmbeddingHashTableImportOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("EmbeddingHashTableImport") +.Device(tensorflow::DEVICE_CPU), EmbeddingHashTableImportOp); + +class EmbeddingHashTableExportOp : public tensorflow::OpKernel { +public: + explicit EmbeddingHashTableExportOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} + ~EmbeddingHashTableExportOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("EmbeddingHashTableExport") +.Device(tensorflow::DEVICE_CPU), EmbeddingHashTableExportOp); + +class EmbeddingHashTableLookupOrInsertOp : public tensorflow::OpKernel { +public: + explicit EmbeddingHashTableLookupOrInsertOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} + ~EmbeddingHashTableLookupOrInsertOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("EmbeddingHashTableLookupOrInsert") +.Device(tensorflow::DEVICE_CPU), EmbeddingHashTableLookupOrInsertOp); + REGISTER_KERNEL_BUILDER( Name("FastGelu") . @@ -122,6 +152,26 @@ Device(tensorflow::DEVICE_CPU) .TypeConstraint("T"), FastGeluGradOp); +class InitEmbeddingHashTableOp : public tensorflow::OpKernel { +public: + explicit InitEmbeddingHashTableOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} + ~InitEmbeddingHashTableOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("InitEmbeddingHashTable").Device(tensorflow::DEVICE_CPU), InitEmbeddingHashTableOp); + +class EmbeddingHashTableApplyAdamWOp : public tensorflow::OpKernel { +public: + explicit EmbeddingHashTableApplyAdamWOp(tensorflow::OpKernelConstruction *context) + : OpKernel(context) {} + ~EmbeddingHashTableApplyAdamWOp() override {} + void Compute(tensorflow::OpKernelContext *context) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("EmbeddingHashTableApplyAdamW").Device(tensorflow::DEVICE_CPU), + EmbeddingHashTableApplyAdamWOp); + class EmbeddingHashTableEvictOp : public tensorflow::OpKernel { public: explicit EmbeddingHashTableEvictOp(tensorflow::OpKernelConstruction *context) : OpKernel(context) {} diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc index acfad59f3..bb5e99e7f 100644 --- a/tf_adapter/ops/aicore/npu_aicore_ops.cc +++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc @@ -44,6 +44,63 @@ REGISTER_OP("FastGeluGrad") .Attr("T: realnumbertype") .SetShapeFn(tensorflow::shape_inference::MergeBothInputsShapeFn); +REGISTER_OP("EmbeddingHashTableImport") + .Input("table_handles: int64") + .Input("embedding_dims: int64") + .Input("bucket_sizes: int64") + .Input("keys: num * int64") + .Input("counters: num * uint64") + .Input("filter_flags: num * uint8") + .Input("values: num * float32") + .Attr("num: int >= 1") + .SetShapeFn(tensorflow::shape_inference::NoOutputs); + +REGISTER_OP("EmbeddingHashTableExport") + .Input("table_handles: int64") + .Input("table_sizes: int64") + .Input("embedding_dims: int64") + .Input("bucket_sizes: int64") + .Output("keys: num * int64") + .Output("counters: num * uint64") + .Output("filter_flags: num * uint8") + .Output("values: num * float") + .Attr("export_mode: string = 'all'") + .Attr("filter_export_flag: bool = false") + .Attr("num: int >= 1") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + int64 num = 0; + c->GetAttr("num", &num); + for (int64_t i = 0; i < num; ++i) { + c->set_output(i, c->Vector(c->UnknownDim())); + c->set_output(i + num, c->Vector(c->UnknownDim())); + c->set_output(i + 2 * num, c->Vector(c->UnknownDim())); + c->set_output(i + 3 * num, c->Vector(c->UnknownDim())); + } + return Status::OK(); + }); + +REGISTER_OP("EmbeddingHashTableApplyAdamW") + .Input("table_handle: int64") + .Input("keys: int64") + .Input("m: Ref(T)") + .Input("v: Ref(T)") + .Input("beta1_power: Ref(T)") + .Input("beta2_power: Ref(T)") + .Input("lr: T") + .Input("weight_decay: T") + .Input("beta1: T") + .Input("beta2: T") + .Input("epsilon: T") + .Input("grad: T") + .Input("max_grad_norm: Ref(T)") + .Attr("embedding_dim: int") + .Attr("bucket_size: int") + .Attr("amsgrad: bool = false") + .Attr("maximize: bool = false") + .Attr("T: {float16, float32}") + .SetShapeFn(tensorflow::shape_inference::NoOutputs); + REGISTER_OP("DynamicGruV2") .Input("x: T") .Input("weight_input: T") @@ -459,6 +516,27 @@ REGISTER_OP("DynamicRnnGrad") return Status::OK(); }); +REGISTER_OP("EmbeddingHashTableLookupOrInsert") + .Input("table_handle: int64") + .Input("keys:int64") + .Output("values: float") + .Attr("bucket_size:int") + .Attr("embedding_dim:int") + .Attr("filter_mode:string='no_filter'") + .Attr("filter_freq:int=0") + .Attr("default_key_or_value:bool = false") + .Attr("default_key: int = 0") + .Attr("default_value: float = 0.0") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + int64 num = 0; + c->GetAttr("embedding_dim", &num); + auto key_num = c->input(1); + int64_t nsample = InferenceContext::Value(c->Dim(key_num, 0)); + c->set_output(0, c->MakeShape({c->MakeDim(nsample), c->MakeDim(num)})); + return Status::OK(); + }); + REGISTER_OP("LRUCacheV2") .Input("index_list: T") .Input("data: Ref(dtype)") @@ -793,6 +871,15 @@ REGISTER_OP("TabulateFusionGrad") return Status::OK(); }); +REGISTER_OP("InitEmbeddingHashTable") + .Input("table_handle: int64") + .Input("sampled_values: float") + .Attr("bucket_size : int") + .Attr("embedding_dim : int") + .Attr("initializer_mode : string='random'") + .Attr("constant_value : float=0.0") + .SetShapeFn(shape_inference::NoOutputs); + REGISTER_OP("EmbeddingHashTableEvict") .Input("table_handle: int64") .Input("keys: int64") diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index de9586551..e7c5fa61f 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -25,6 +25,27 @@ from npu_bridge.helper import helper gen_npu_cpu_ops = helper.get_gen_ops() +## 提供device侧FeatureMapping LookupOrInsert功能 +# @param table_handle int64 类型 +# @param keys int64 类型 +# @param bucket_size int 类型 +# @param embedding_dim int 类型 +# @param filter_mode string 类型 +# @param filter_freq int 类型 +# @param default_key_or_value bool 类型 +# @param default_key int 类型 +# @param default_value float 类型 +# @return values float 类型 +def embedding_hashtable_lookup_or_insert(table_handle, keys, bucket_size, embedding_dim, filter_mode, filter_freq, + default_key_or_value, default_key, default_value): + """ device embedding feature mapping lookup or insert. """ + result = gen_npu_cpu_ops.EmbeddingHashTableLookupOrInsert( + table_handle=table_handle, keys=keys, bucket_size=bucket_size, embedding_dim=embedding_dim, + filter_mode=filter_mode, filter_freq=filter_freq, default_key_or_value=default_key_or_value, + default_key=default_key, default_value=default_value) + return result + + ## 提供embeddingrankid功能 # @param addr_tensor tensorflow的tensor类型,embeddingrankid操作的输入; # @param index tensorflow的tensor类型,embeddingrankid操作的输入; @@ -579,6 +600,84 @@ def embedding_hashmap_import_v2(file_path, table_ids, table_sizes, table_names, return result +## EmbeddingHashTable Init功能 +# @param table_handle int64 类型 +# @param sampled_values float 类型 +# @param bucket_size int 类型 +# @param embedding_dim int 类型 +# @param initializer_mode string 类型 +# @param constant_value int 类型 +def init_embedding_hashtable(table_handle, sampled_values, bucket_size, embedding_dim, initializer_mode, + constant_value): + """ device init embedding hashtable. """ + result = gen_npu_cpu_ops.InitEmbeddingHashTable( + table_handle=table_handle, sampled_values=sampled_values, bucket_size=bucket_size, embedding_dim=embedding_dim, + initializer_mode=initializer_mode, constant_value=constant_value) + return result + + +## 提供host侧hashTable导入功能 +# @param table_handles int64 类型 +# @param embedding_dims int64 类型 +# @param bucket_sizes int64 类型 +# @param keys int64 类型 +# @param counters uint64 类型 +# @param filter_flags uint8 类型 +# @param values float 类型 +def embedding_hash_table_import(table_handles, embedding_dims, bucket_sizes, keys, counters, filter_flags, values): + """ host embedding feature hash table import. """ + result = gen_npu_cpu_ops.EmbeddingHashTableImport( + table_handles=table_handles, embedding_dims=embedding_dims, bucket_sizes=bucket_sizes, + keys=keys, counters=counters, filter_flags=filter_flags, values=values) + return result + + +## 提供host侧hashTable导出功能 +# @param table_handles int64 类型 +# @param table_sizes int64 类型 +# @param embedding_dims int64 类型 +# @param bucket_sizes int64 类型 +# @param export_mode string 类型 +# @param filtered_export_flag bool 类型 +def embedding_hash_table_export(table_handles, table_sizes, embedding_dims, bucket_sizes, export_mode='all', + filter_export_flag=False): + """ host embedding feature hash table export. """ + result = gen_npu_cpu_ops.EmbeddingHashTableExport( + table_handles=table_handles, table_sizes=table_sizes, embedding_dims=embedding_dims, bucket_sizes=bucket_sizes, + export_mode=export_mode, filter_export_flag=filter_export_flag) + return result + + +## EmbeddingHashTableApplyAdamW AdamW 更新功能 +# @param table_handle int64 类型 +# @param keys int64 类型 +# @param m float16, float32 类型 +# @param v float16, float32 类型 +# @param beta1_power float16, float32 类型 +# @param beta2_power float16, float32 类型 +# @param lr float16, float32 类型 +# @param weight_decay float16, float32 类型 +# @param beta1 float16, float32 类型 +# @param beta2 float16, float32 类型 +# @param epsilon float16, float32 类型 +# @param grad float16, float32 类型 +# @param max_grad_norm float16, float32 类型 +# @param embedding_dim int 类型 +# @param bucket_size int 类型 +# @param amsgrad bool 类型 +# @param maximize bool 类型 +def embedding_hashtable_apply_adam_w(table_handle, keys, m, v, beta1_power, beta2_power, lr, weight_decay, + beta1, beta2, epsilon, grad, max_grad_norm, embedding_dim, + bucket_size, amsgrad, maximize): + """ device update embedding hashtable using AdamW. """ + result = gen_npu_cpu_ops.EmbeddingHashTableApplyAdamW( + table_handle=table_handle, keys=keys, m=m, v=v, beta1_power=beta1_power, beta2_power=beta2_power, + lr=lr, weight_decay=weight_decay, beta1=beta1, beta2=beta2, epsilon=epsilon, grad=grad, + max_grad_norm=max_grad_norm, embedding_dim=embedding_dim, bucket_size=bucket_size, + amsgrad=amsgrad, maximize=maximize) + return result + + ## 提供device侧FeatureMapping Evict功能 # @param table_handle int64 类型 # @param keys int64 类型 -- Gitee From 1bae98e4483314507faf31317b99783d4fa3a689 Mon Sep 17 00:00:00 2001 From: yangyongqiang_huawei Date: Thu, 29 May 2025 07:27:58 +0000 Subject: [PATCH 6/9] =?UTF-8?q?!2984=20\embdding=5Fhash=5Ftable=5Flook=5Fu?= =?UTF-8?q?p=5For=5Finsert=E7=AE=97=E5=AD=90=E6=96=B0=E5=A2=9E=E5=B1=9E?= =?UTF-8?q?=E6=80=A7=20Merge=20pull=20request=20!2984=20from=20yangyongqia?= =?UTF-8?q?ng=5Fhuawei/master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/ops/aicore/npu_aicore_ops.cc | 2 ++ tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc index bb5e99e7f..91913fd56 100644 --- a/tf_adapter/ops/aicore/npu_aicore_ops.cc +++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc @@ -527,6 +527,8 @@ REGISTER_OP("EmbeddingHashTableLookupOrInsert") .Attr("default_key_or_value:bool = false") .Attr("default_key: int = 0") .Attr("default_value: float = 0.0") + .Attr("filter_key_flag: bool = false") + .Attr("filter_key: int = -1") .SetIsStateful() .SetShapeFn([](InferenceContext* c) { int64 num = 0; diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index e7c5fa61f..96ac42738 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -35,14 +35,16 @@ gen_npu_cpu_ops = helper.get_gen_ops() # @param default_key_or_value bool 类型 # @param default_key int 类型 # @param default_value float 类型 +# @param filter_key_flag bool 类型 +# @param filter_key int 类型 # @return values float 类型 def embedding_hashtable_lookup_or_insert(table_handle, keys, bucket_size, embedding_dim, filter_mode, filter_freq, - default_key_or_value, default_key, default_value): + default_key_or_value, default_key, default_value, filter_key_flag, filter_key): """ device embedding feature mapping lookup or insert. """ result = gen_npu_cpu_ops.EmbeddingHashTableLookupOrInsert( table_handle=table_handle, keys=keys, bucket_size=bucket_size, embedding_dim=embedding_dim, filter_mode=filter_mode, filter_freq=filter_freq, default_key_or_value=default_key_or_value, - default_key=default_key, default_value=default_value) + default_key=default_key, default_value=default_value, filter_key_flag=filter_key_flag, filter_key=filter_key) return result -- Gitee From 869cf82af68c2048bd79a24e05ff5c01a7aa7881 Mon Sep 17 00:00:00 2001 From: zangyan Date: Fri, 30 May 2025 02:10:18 +0000 Subject: [PATCH 7/9] !2973 modify readme Merge pull request !2973 from zangyan/master --- README.md | 31 ++++++++++--- tf_adapter/README.md | 94 +++++++++++++++++----------------------- tf_adapter_2.x/README.md | 83 +++++++++++++++-------------------- 3 files changed, 100 insertions(+), 108 deletions(-) diff --git a/README.md b/README.md index 1fb46eda4..f0a90cbbc 100755 --- a/README.md +++ b/README.md @@ -1,16 +1,19 @@ # TensorFlow Adapter For Ascend -[View English](README.en.md) - ## 简介 -TensorFlow Adapter For Ascend(简称TF Adapter)致力于将昇腾AI处理器卓越的运算能力,便捷地提供给使用Tensorflow框架的开发者。 +TensorFlow Adapter For Ascend(简称TF Adapter)是昇腾提供的TensorFlow框架适配插件,让Tensorflow框架的开发者可以使用昇腾AI处理器的算力。 开发者只需安装TF Adapter插件,并在现有TensorFlow脚本中添加少量配置,即可实现在昇腾AI处理器上加速自己的训练任务。 -## TensorFlow版本适配详情 -tensorflow 1.15.x参考[tf_adapter 1.x](./tf_adapter/README.md) +## 版本配套说明 +### CANN版本配套说明 +TF Adapter源码标签的命名规则为:tfa\_{标签名}\_{版本号},其中{版本号}与配套的CANN版本号保持一致。 -tensorflow 2.6.x参考[tf_adapter 2.x](./tf_adapter_2.x/README.md) +> **需要注意:** +> +> 使用TF Adapter时,请选择与配套CANN版本一致的标签源码,使用master分支可能存在版本不匹配的风险。 +### 支持的TensorFlow版本 +TF Adapter支持的TensorFlow版本为TensorFlow 1.15与TensorFlow 2.6.5。 ## 支持的产品型号 @@ -20,6 +23,20 @@ Atlas A2 训练系列产品 Atlas 推理系列产品(Ascend 310P处理器)(仅支持TensorFlow 1.15在线推理特性) -## License +## 如何使用源码 + +若您的TensorFlow框架版本是1.15,本源码仓的编译安装等详细使用方法请参见[tf_adapter 1.x](./tf_adapter/README.md)。 + +若您的TensorFlow框架版本是1.15,本源码仓的编译安装等详细使用方法请参见[tf_adapter 2.x](./tf_adapter_2.x/README.md)。 + +## 贡献指南 + +针对tensorflow仓,开发者准备本地代码与提交PR时需要重点关注如下几点: + +1. 提交PR时,请按照PR模板仔细填写本次PR的业务背景、目的、方案等信息。 +2. 若您的修改不是简单的bug修复,而是涉及到新增特性、新增接口、新增配置参数或者修改代码流程等,请务必先通过Issue进行方案讨论,以避免您的代码被拒绝合入。若您不确定本次修改是否可被归为“简单的bug修复”,亦可通过提交Issue进行方案讨论。 +3. psuh代码前,请务必保证已经完成了基础功能测试和网络测试。 + +## 许可证 [Apache License 2.0](LICENSE) diff --git a/tf_adapter/README.md b/tf_adapter/README.md index 860e149d0..8fc57ce94 100755 --- a/tf_adapter/README.md +++ b/tf_adapter/README.md @@ -1,14 +1,12 @@ # Ascend Adapter for TF1.X -[View English](README.en.md) - ## 简介 Ascend Adapter for TF1.X 致力于将昇腾AI处理器卓越的运算能力,便捷地提供给使用Tensorflow框架的开发者。 开发者只需安装TF Adapter插件,并在现有TensorFlow脚本中添加少量配置,即可实现在昇腾AI处理器上加速自己的训练任务。 ![tfadapter](https://images.gitee.com/uploads/images/2020/1027/094640_8f305b88_8175427.jpeg "framework.jpg") -您可以通过阅读[昇腾社区文档中心](https://www.hiascend.com/zh/document)第三方框架适配的Tensorflow中的TensorFlow 1.15模型迁移章节获取更多使用细节。 +您可以通过阅读[昇腾社区文档中心-TensorFlow 1.15模型迁移](https://hiascend.com/document/redirect/canntfmigr)手册获取更多使用细节。 ## 源码安装 您可以通过此仓中的源代码构建TF Adapter软件包并将其部署在昇腾AI处理器所在环境上。 @@ -17,62 +15,59 @@ Ascend Adapter for TF1.X 致力于将昇腾AI处理器卓越的运算能力, Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上需要安装一下软件依赖: -#### 1. python3.7 - -Ascend Adapter 需要使用python3.7版本进行编译 +- **Python3.7** -#### 2. tensorflow1.15.0 + Ascend Adapter 需要使用python3.7版本进行编译 -Ascend Adapter 与 Tensorflow 有严格的匹配关系,通过源码构建TF Adapter软件包前,您需要确保已经正确安装了 [Tensorflow v1.15.0版本](https://www.tensorflow.org/install/pip) ,安装方式可参见[昇腾社区文档中心](https://www.hiascend.com/zh/document)中心第三方框架适配的Tensorflow中的"TensorFlow 1.15模型迁移>环境准备>安装开源框架Tensorflow"章节。 +- **TensorTlow 1.15.0** -#### 3. GCC >= 7.3.0 + Ascend Adapter 与 Tensorflow 有严格的匹配关系,通过源码构建TF Adapter软件包前,您需要确保已经正确安装了 [Tensorflow v1.15.0版本](https://www.tensorflow.org/install/pip) ,安装方式可参见[昇腾社区文档中心-TensorFlow 1.15模型迁移](https://hiascend.com/document/redirect/canntfmigr)中的“TensorFlow 1.15模型迁移 > 环境准备 > 安装开源框架TensorFlow 1.15”章节。 -Ascend Adapter 需要使用7.3.0及更高版本的gcc编译 +- **GCC >= 7.3.0** -#### 4. CMake >= 3.14.0 + Ascend Adapter 需要使用7.3.0及更高版本的gcc编译 -Ascend Adapter 需要使用3.14.0及更高版本的cmake编译 +- **CMake >= 3.14.0** -#### 5. CANN开发者套件 + Ascend Adapter 需要使用3.14.0及更高版本的cmake编译 -CANN开发者套件下载方法 +- **CANN开发者套件** -请参见"[开放项目与CANN版本配套表](https://gitee.com/ascend/cann-community/blob/master/README.md#cannversionmap)"获取对应的CANN开发套件包`Ascend-cann-toolkit__linux-.run`,CANN开发套件包支持的安装方式及操作系统请参见配套版本的[用户手册](https://hiascend.com/document/redirect/CannCommunityInstSoftware)。 + 请根据"[CANN版本配套说明](../README.md#cannversionmap)"获取对应的CANN软件版本号,并在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载对应版本的CANN开发套件包`Ascend-cann-toolkit__linux-.run`,CANN开发套件包支持的安装方式及操作系统请参见配套版本的[用户手册](https://hiascend.com/document/redirect/CannCommunityInstSoftware)。 -CANN开发者套件安装方法 + CANN开发者套件安装方法: -执行安装命令时,请确保安装用户对软件包具有可执行权限。 - -使用默认路径安装 -```shell -./Ascend-cann-toolkit__linux-.run --install -``` -若使用root用户安装,安装完成后相关软件存储在`/usr/local/Ascend/ascend-toolkit/latest`路径下。 - -若使用非root用户安装,安装完成后相关软件存储在`$HOME/Ascend/ascend-toolkit/latest`路径下。 - -指定路径安装 -```bash -./Ascend-cann-toolkit__linux-.run --install --install-path=${ASCEND_INSTALLED_PATH} -``` -安装完成后,相关软件存储在`${ASCEND_INSTALLED_PATH}`指定路径下。 + 执行安装命令时,请确保安装用户对软件包具有可执行权限。 -#### 6. swig + - 使用默认路径安装 + ```shell + ./Ascend-cann-toolkit__linux-.run --install + ``` + - 若使用root用户安装,安装完成后相关软件存储在`/usr/local/Ascend/ascend-toolkit/latest`路径下。 + + - 若使用非root用户安装,安装完成后相关软件存储在`$HOME/Ascend/ascend-toolkit/latest`路径下。 -Ascend Adapter 源码编译依赖SWIG -可执行如下命令进行SWIG的安装: + - 指定路径安装 + ```bash + ./Ascend-cann-toolkit__linux-.run --install --install-path=${ASCEND_INSTALLED_PATH} + ``` + 安装完成后,相关软件存储在`${ASCEND_INSTALLED_PATH}`指定路径下。 + +- **swig** + Ascend Adapter 源码编译依赖SWIG,可执行如下命令进行SWIG的安装: + `apt-get install swig` + +- **CommunitySDK包** -#### 7. CommunitySDK包 + Ascend Adapter 软件的编译还需要依赖CommunitySDK包,此包可在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载。 -Ascend Adapter 软件的编译还需要依赖CommunitySDK包,此包可以到下面链接下载(https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.2.RC1.alpha001) - -安装CommunitySDK到toolkit下: -```bash -./Ascend-cann-communitysdk_*.run --full --install-path=${ASCEND_INSTALLED_PATH} -``` -> 注意:CommunitySDK一定要跟CANN包安装在同一路径下,否则编译报错。 + 安装CommunitySDK到toolkit下: + ```bash + ./Ascend-cann-communitysdk_*.run --full --install-path=${ASCEND_INSTALLED_PATH} + ``` + > 注意:CommunitySDK一定要跟CANN包安装在同一路径下,否则编译报错。 ### 源码下载 @@ -90,6 +85,7 @@ chmod +x build.sh ``` > 请注意:执行编译命令前,请确保环境中已配置了以下环境变量: +> > 1. 配置CANN开发套件包的环境变量: ``` @@ -113,16 +109,6 @@ pip3 install ./build/tfadapter/dist/python/dist/npu_bridge-1.15.0-py3-none-any.w ``` 执行完成后,TF Adapter相关文件安装到python解释器搜索路径下,例如“/usr/local/python3.7.5/lib/python3.7/siite-packages”路径,安装后文件夹为“npu_bridge”与“npu_bridge-1.15.0.dist-info”。 -## 贡献 -欢迎参与贡献。 - -## 社区版本规划 -https://gitee.com/ascend/tensorflow/wikis/Home?sort_id=3076366 - -## Release Notes - -Release Notes请参考[RELEASE](RELEASE.md). - ## FAQ #### 1. 执行./build.sh时提示配置swig的路径 @@ -157,6 +143,6 @@ pip3 install swig ![CMakeList.txt文件](https://gitee.com/guopeian/tensorflow/raw/fix_readme/tf_adapter/docs/cmake.png "cmake.png") -## License +## 许可证 -[Apache License 2.0](LICENSE) +[Apache License 2.0](LICENSE) \ No newline at end of file diff --git a/tf_adapter_2.x/README.md b/tf_adapter_2.x/README.md index 049c6d52c..a29c903d8 100644 --- a/tf_adapter_2.x/README.md +++ b/tf_adapter_2.x/README.md @@ -13,60 +13,57 @@ TF2.X插件,并在现有Tensorflow 2.x脚本中添加少量配置,即可实 Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上需要安装一下软件依赖: -#### 1. tensorflow2.6.5 +- **TensorFlow 2.6.5** -Ascend Adapter 与 Tensorflow 有严格的匹配关系,从源码构建前,您需要确保已经正确安装了[Tensorflow v2.6.5 版本](https://www.tensorflow.org/install) 。 + Ascend Adapter 与 Tensorflow 有严格的匹配关系,从源码构建前,您需要确保已经正确安装了[Tensorflow v2.6.5 版本](https://www.tensorflow.org/install) ,安装方式可参见[昇腾社区文档中心-TensorFlow 2.6.5模型迁移](https://hiascend.com/document/redirect/canntfmigr)中的“TensorFlow 2.6.5模型迁移 > 环境准备 > 安装开源框架TensorFlow 2.6.5”章节。 -#### 2. GCC >= 7.3.0 +- **GCC >= 7.3.0** -Ascend Adapter 需要使用7.3.0及更高版本的gcc编译 + Ascend Adapter 需要使用7.3.0及更高版本的gcc编译。 -#### 3. CMake >= 3.14.0 +- **CMake >= 3.14.0** -Ascend Adapter 需要使用3.14.0及更高版本的cmake编译 + Ascend Adapter 需要使用3.14.0及更高版本的cmake编译。 -#### 4. CANN开发者套件 +- **CANN开发者套件** -CANN开发者套件下载方法 + 请根据"[CANN版本配套说明](../README.md#cannversionmap)"获取对应的CANN软件版本号,并在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载对应版本的CANN开发套件包`Ascend-cann-toolkit__linux-.run`,CANN开发套件包支持的安装方式及操作系统请参见配套版本的[用户手册](https://hiascend.com/document/redirect/CannCommunityInstSoftware)。 -请参见"[开放项目与CANN版本配套表](https://gitee.com/ascend/cann-community/blob/master/README.md#cannversionmap)"获取对应的CANN开发套件包`Ascend-cann-toolkit__linux-.run`,CANN开发套件包支持的安装方式及操作系统请参见配套版本的[用户手册](https://hiascend.com/document/redirect/CannCommunityInstSoftware)。 + CANN开发者套件安装方法: -CANN开发者套件安装方法 + 执行安装命令时,请确保安装用户对软件包具有可执行权限。 -执行安装命令时,请确保安装用户对软件包具有可执行权限。 + - 使用默认路径安装 + ```shell + ./Ascend-cann-toolkit__linux-.run --install + ``` + - 若使用root用户安装,安装完成后相关软件存储在`/usr/local/Ascend/ascend-toolkit/latest`路径下。 + + - 若使用非root用户安装,安装完成后相关软件存储在`$HOME/Ascend/ascend-toolkit/latest`路径下。 -使用默认路径安装 -```shell -./Ascend-cann-toolkit__linux-.run --install -``` -若使用root用户安装,安装完成后相关软件存储在`/usr/local/Ascend/ascend-toolkit/latest`路径下。 - -若使用非root用户安装,安装完成后相关软件存储在`$HOME/Ascend/ascend-toolkit/latest`路径下。 - -指定路径安装 -```bash -./Ascend-cann-toolkit__linux-.run --install --install-path=${ASCEND_INSTALLED_PATH} -``` -安装完成后,相关软件存储在`${ASCEND_INSTALLED_PATH}`指定路径下。 - -#### 5. swig + - 指定路径安装 + ```bash + ./Ascend-cann-toolkit__linux-.run --install --install-path=${ASCEND_INSTALLED_PATH} + ``` + 安装完成后,相关软件存储在`${ASCEND_INSTALLED_PATH}`指定路径下。 -Ascend Adapter 软件源码编译依赖SWIG。 -可执行如下命令进行SWIG(http://www.swig.org/download.html)的安装: +- **swig** + Ascend Adapter 软件源码编译依赖SWIG,可执行如下命令进行SWIG(http://www.swig.org/download.html)的安装: + `pip3 install swig` + +- **CommunitySDK包** -#### 6. CommunitySDK包 + Ascend Adapter 软件的编译还需要依赖CommunitySDK包,此包可在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载。 -Ascend Adapter 软件的编译还需要依赖CommunitySDK包,此包可以到下面链接下载(https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.2.RC1.alpha001) + 安装CommunitySDK到toolkit下: + ```bash + ./Ascend-cann-communitysdk_*.run --full --install-path=${ASCEND_INSTALLED_PATH} + ``` + > 注意:CommunitySDK一定要跟CANN包安装在同一路径下,否则编译报错。 -安装CommunitySDK到toolkit下: -```bash -./Ascend-cann-communitysdk_*.run --full --install-path=${ASCEND_INSTALLED_PATH} -``` -> 注意:CommunitySDK一定要跟CANN包安装在同一路径下,否则编译报错。 - -### 下载源码 +### 源码下载 ``` git clone https://gitee.com/ascend/tensorflow.git @@ -127,7 +124,7 @@ make -j8 ./dist/python/dist/npu_device-0.1-py3-none-any.whl ``` -### 安装 +### 安装TF Adapter 您可以继续执行 @@ -141,14 +138,6 @@ make install pip3 install ./dist/python/dist/npu_device-0.1-py3-none-any.whl --upgrade ``` -## 贡献 - -psuh代码前,请务必保证已经完成了基础功能测试和网络测试! - -## Release Notes - -Release Notes请参考[RELEASE](RELEASE.md). - -## License +## 许可证 [Apache License 2.0](LICENSE) \ No newline at end of file -- Gitee From 63f4e8495900183220853f00091c2f9922c672e8 Mon Sep 17 00:00:00 2001 From: guopeian Date: Fri, 6 Jun 2025 00:47:34 +0000 Subject: [PATCH 8/9] =?UTF-8?q?!2987=20=E3=80=90=E8=BD=BB=E9=87=8F?= =?UTF-8?q?=E7=BA=A7=20PR=E3=80=91=EF=BC=9Aupdate=20README.md.=20Merge=20p?= =?UTF-8?q?ull=20request=20!2987=20from=20guopeian/N/A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f0a90cbbc..d4cbfcb91 100755 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Atlas 推理系列产品(Ascend 310P处理器)(仅支持TensorFlow 1.15在 若您的TensorFlow框架版本是1.15,本源码仓的编译安装等详细使用方法请参见[tf_adapter 1.x](./tf_adapter/README.md)。 -若您的TensorFlow框架版本是1.15,本源码仓的编译安装等详细使用方法请参见[tf_adapter 2.x](./tf_adapter_2.x/README.md)。 +若您的TensorFlow框架版本是2.6.5,本源码仓的编译安装等详细使用方法请参见[tf_adapter 2.x](./tf_adapter_2.x/README.md)。 ## 贡献指南 -- Gitee From d85cad556e80ca8728eacfd39a30ea10ed8b41eb Mon Sep 17 00:00:00 2001 From: zangyan Date: Wed, 16 Jul 2025 03:20:06 +0000 Subject: [PATCH 9/9] !2999 modify readme Merge pull request !2999 from zangyan/master --- README.en.md | 15 ---------- tf_adapter/README.en.md | 59 ---------------------------------------- tf_adapter/README.md | 31 +++++++++++---------- tf_adapter_2.x/README.md | 25 +++++++++++------ 4 files changed, 33 insertions(+), 97 deletions(-) delete mode 100755 README.en.md delete mode 100755 tf_adapter/README.en.md diff --git a/README.en.md b/README.en.md deleted file mode 100755 index 8c3fa6708..000000000 --- a/README.en.md +++ /dev/null @@ -1,15 +0,0 @@ -# TF_Adapter - -[View Chinese](README.md) - -TF_Adapter is committed to providing the outstanding computing power of Ascend AI Processors to developers who use the TensorFlow framework. -Developers only need to install the TF_Adapter plug-in and add a small amount of configuration to the existing TensorFlow script to accelerate their training jobs on the Ascend AI Processors. - -## Details on TensorFlow version adaptation -tensorflow 1.15.x reference [tf_adapter 1.x](./tf_adapter/README.md) - -tensorflow 2.6.x reference [tf_adapter 2.x](./tf_adapter_2.x/README.md) - -## License - -[Apache License 2.0](LICENSE) diff --git a/tf_adapter/README.en.md b/tf_adapter/README.en.md deleted file mode 100755 index ccda21920..000000000 --- a/tf_adapter/README.en.md +++ /dev/null @@ -1,59 +0,0 @@ -# Ascend Adapter for TF1.X - -[查看中文](README.md) - -TF_Adapter1.x is committed to providing the outstanding computing power of Ascend AI Processors to developers who use the TensorFlow framework. -Developers only need to install the TF_Adapter plug-in and add a small amount of configuration to the existing TensorFlow script to accelerate their training jobs on the Ascend AI Processors. - -![tfadapter](https://images.gitee.com/uploads/images/2020/1027/094640_8f305b88_8175427.jpeg "framework.jpg") - -You can obtain more details on usage by reading the TensorFlow 1.15 model migration section in the third-party framework adaptation of TensorFlow within the [Ascend Community Documentation Center](https://www.hiascend.com/zh/document). -## Installation Guide -### Building from source - -You can build the TF_Adapter software package from the source code and install it on the Ascend AI Processor environment. -> The TF_Adapter plug-in has a strict matching relationship with TensorFlow. Before building from source code, you need to ensure that it has been installed correctly [TensorFlow v1.15.0 ->Version](https://www.tensorflow.org/install/pip). - -You may also build GraphEngine from the source. To build GraphEngine, please make sure that you have access to an Ascend 910 environment as compiling environment, and make sure that the following software requirements are fulfilled. -- Linux OS -- GCC >= 7.3.0 -- CMake >= 3.14.0 -- [SWIG](http://www.swig.org/download.html) - -#### Download -``` -git clone https://gitee.com/ascend/tensorflow.git -cd tensorflow -``` - -#### Execute the script to generate the installation package -``` -chmod +x build.sh -./build.sh -``` - - -After the script is successfully executed, a compressed file of tfadapter.tar will be generated in the output directory. - -#### Install -Unzip the tfadapter.tar file to generate npu_bridge-1.15.0-py3-none-any.whl. -Then you can install the TF_Adapter plug-in using pip. -``` -pip install ./dist/python/dist/npu_bridge-1.15.0-py3-none-any.whl -``` - -It should be noted that you should ensure that the installation path is the same as the Python you specified when compiling -The interpreter search path is consistent. - -## Contributing - -Welcome to contribute. - -## Release Notes - -For Release Notes, see our [RELEASE](RELEASE.md). - -## License - -[Apache License 2.0](LICENSE) diff --git a/tf_adapter/README.md b/tf_adapter/README.md index 8fc57ce94..843999e9c 100755 --- a/tf_adapter/README.md +++ b/tf_adapter/README.md @@ -8,7 +8,7 @@ Ascend Adapter for TF1.X 致力于将昇腾AI处理器卓越的运算能力, 您可以通过阅读[昇腾社区文档中心-TensorFlow 1.15模型迁移](https://hiascend.com/document/redirect/canntfmigr)手册获取更多使用细节。 -## 源码安装 +## 编译与安装 您可以通过此仓中的源代码构建TF Adapter软件包并将其部署在昇腾AI处理器所在环境上。 ### 环境准备 @@ -17,7 +17,7 @@ Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上 - **Python3.7** - Ascend Adapter 需要使用python3.7版本进行编译 + Ascend Adapter 需要使用python3.7版本进行编译。 - **TensorTlow 1.15.0** @@ -31,6 +31,14 @@ Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上 Ascend Adapter 需要使用3.14.0及更高版本的cmake编译 +- **SWIG** + + Ascend Adapter 源码编译依赖SWIG, SWIG安装命令示例如下: + ```shell + # Ubuntu/Debian操作系统安装命令示例如下,其他操作系统请自行安装 + apt-get install swig + ``` + - **CANN开发者套件** 请根据"[CANN版本配套说明](../README.md#cannversionmap)"获取对应的CANN软件版本号,并在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载对应版本的CANN开发套件包`Ascend-cann-toolkit__linux-.run`,CANN开发套件包支持的安装方式及操作系统请参见配套版本的[用户手册](https://hiascend.com/document/redirect/CannCommunityInstSoftware)。 @@ -42,23 +50,17 @@ Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上 - 使用默认路径安装 ```shell ./Ascend-cann-toolkit__linux-.run --install - ``` + ``` - 若使用root用户安装,安装完成后相关软件存储在`/usr/local/Ascend/ascend-toolkit/latest`路径下。 - + - 若使用非root用户安装,安装完成后相关软件存储在`$HOME/Ascend/ascend-toolkit/latest`路径下。 - 指定路径安装 ```bash ./Ascend-cann-toolkit__linux-.run --install --install-path=${ASCEND_INSTALLED_PATH} - ``` + ``` 安装完成后,相关软件存储在`${ASCEND_INSTALLED_PATH}`指定路径下。 - -- **swig** - Ascend Adapter 源码编译依赖SWIG,可执行如下命令进行SWIG的安装: - -`apt-get install swig` - - **CommunitySDK包** Ascend Adapter 软件的编译还需要依赖CommunitySDK包,此包可在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载。 @@ -99,13 +101,14 @@ chmod +x build.sh 编译结束后,TF Adapter安装包生成在如下路径: ``` -./build/tfadapter/dist/python/dist/npu_bridge-1.15.0-py3-none-any.whl +./build/tfadapter/dist/python/dist/npu_bridge-1.15.0-py3-none-manylinux2014_.whl ``` +\表示操作系统架构,取值为x86_64与aarch64。 ### 安装TF Adapter -执行如下命令安装TF Adapter。 +执行如下命令安装TF Adapter,请注意替换为实际的包名。 ``` -pip3 install ./build/tfadapter/dist/python/dist/npu_bridge-1.15.0-py3-none-any.whl --upgrade +pip3 install ./build/tfadapter/dist/python/dist/npu_bridge-1.15.0-py3-none-manylinux2014_.whl --upgrade ``` 执行完成后,TF Adapter相关文件安装到python解释器搜索路径下,例如“/usr/local/python3.7.5/lib/python3.7/siite-packages”路径,安装后文件夹为“npu_bridge”与“npu_bridge-1.15.0.dist-info”。 diff --git a/tf_adapter_2.x/README.md b/tf_adapter_2.x/README.md index a29c903d8..c6a7ad7da 100644 --- a/tf_adapter_2.x/README.md +++ b/tf_adapter_2.x/README.md @@ -13,6 +13,10 @@ TF2.X插件,并在现有Tensorflow 2.x脚本中添加少量配置,即可实 Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上需要安装一下软件依赖: +- **Python3.7~Python3.9** + + Ascend Adapter可以使用python3.7、Python3.8、Python3.9版本进行编译。 + - **TensorFlow 2.6.5** Ascend Adapter 与 Tensorflow 有严格的匹配关系,从源码构建前,您需要确保已经正确安装了[Tensorflow v2.6.5 版本](https://www.tensorflow.org/install) ,安装方式可参见[昇腾社区文档中心-TensorFlow 2.6.5模型迁移](https://hiascend.com/document/redirect/canntfmigr)中的“TensorFlow 2.6.5模型迁移 > 环境准备 > 安装开源框架TensorFlow 2.6.5”章节。 @@ -25,6 +29,14 @@ Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上 Ascend Adapter 需要使用3.14.0及更高版本的cmake编译。 +- **SWIG >= 4.1.0** + + Ascend Adapter 源码编译依赖SWIG, SWIG安装命令示例如下: + ```shell + # Ubuntu/Debian操作系统安装命令示例如下,其他操作系统请自行安装 + apt-get install swig + ``` + - **CANN开发者套件** 请根据"[CANN版本配套说明](../README.md#cannversionmap)"获取对应的CANN软件版本号,并在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载对应版本的CANN开发套件包`Ascend-cann-toolkit__linux-.run`,CANN开发套件包支持的安装方式及操作系统请参见配套版本的[用户手册](https://hiascend.com/document/redirect/CannCommunityInstSoftware)。 @@ -47,12 +59,6 @@ Ascend Adapter 软件包需要在Linux OS环境上进行编译,同时环境上 ``` 安装完成后,相关软件存储在`${ASCEND_INSTALLED_PATH}`指定路径下。 -- **swig** - - Ascend Adapter 软件源码编译依赖SWIG,可执行如下命令进行SWIG(http://www.swig.org/download.html)的安装: - -`pip3 install swig` - - **CommunitySDK包** Ascend Adapter 软件的编译还需要依赖CommunitySDK包,此包可在“[CANN软件下载页面](https://www.hiascend.com/developer/download/community/result?module=cann)”下载。 @@ -121,8 +127,9 @@ make -j8 编译结束后,安装包会生成在 ``` -./dist/python/dist/npu_device-0.1-py3-none-any.whl +./dist/python/dist/npu_device-2.6.5-py3-none-manylinux2014_.whl ``` +\表示操作系统架构,取值为x86_64与aarch64。 ### 安装TF Adapter @@ -132,10 +139,10 @@ make -j8 make install ``` -将Ascend Adapter安装到配置时指定的 python 解释器包目录下,或者使用 pip3 安装 Ascend Adapter 到您期望的位置。 +将Ascend Adapter安装到配置时指定的 python 解释器包目录下,或者使用 pip3 安装 Ascend Adapter 到您期望的位置。请注意替换为实际的包名。 ``` -pip3 install ./dist/python/dist/npu_device-0.1-py3-none-any.whl --upgrade +pip3 install ./dist/python/dist/npu_device-2.6.5-py3-none-manylinux2014_.whl --upgrade ``` ## 许可证 -- Gitee