diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index a26565778126a3e8cd8f361857872e8d2ba3ffd7..1263432bcdad058e27221a5844c678bfb430f03f 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -19,10 +19,10 @@ class NPURunConfig(run_config_lib.RunConfig): soc_config=None, hccl_timeout=None, op_wait_timeout=None, op_execute_timeout=None, HCCL_algorithm=None, customize_dtypes=None, op_debug_config=None, memory_config=None, experimental_config=None, topo_sorting_mode=None, aoe_config_file=None, insert_op_file=None, stream_sync_timeout=-1, - event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, + event_sync_timeout=-1, external_weight=False, deterministic=0, frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, - execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, + graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT"): class ProfilingConfig(): diff --git a/tf_adapter/kernels/aicpu/host_feature_mapping.cc b/tf_adapter/kernels/aicpu/host_feature_mapping.cc deleted file mode 100644 index 8259b3c1c226af262fd518c9ed04d0c8a4828323..0000000000000000000000000000000000000000 --- a/tf_adapter/kernels/aicpu/host_feature_mapping.cc +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "host_feature_mapping.h" - -namespace tensorflow { -namespace featuremapping { -class SimpleThreadPool { - public: - void SyncRun(const std::vector> &tasks) const { - std::vector> futs; - for (auto &task : tasks) { - futs.push_back(std::async(task)); - } - for (auto &fut : futs) { - fut.wait(); - } - } -}; - -std::unordered_map feature_mapping_table; -class HostFeatureMappingOp : public OpKernel { - public: - explicit HostFeatureMappingOp(OpKernelConstruction *ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("threshold", &threshold_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("table_name", &table_name_)); - ADP_LOG(INFO) << "Host HostFeatureMapping built table_name " << table_name_; - } - ~HostFeatureMappingOp() override { - ADP_LOG(INFO) << table_name_ << " has been destructed"; - } - - FeatureMappingTable *get_or_init_tables(std::string table_name, int32_t buckets_num, int32_t threshold) const { - auto it = feature_mapping_table.find(table_name); - if (it != feature_mapping_table.end()) { - return it->second; - } else { - FeatureMappingTable *table = new (std::nothrow) FeatureMappingTable(buckets_num, threshold); - if (table != nullptr) { - feature_mapping_table[table_name] = table; - return table; - } - return nullptr; - } - } - - int64_t get_and_increase_offset(FeatureMappingTable *table, int32_t bucket_index, - int32_t buckets_num, int64_t last_index) const { - // compatible inference training exectuion and continuation training scenarios - if (table->offsets[bucket_index] == 0) { - table->offsets[bucket_index] = last_index; - } - int64_t offset = table->offsets[bucket_index] * buckets_num + bucket_index; - table->offsets[bucket_index]++; - return offset; - } - - void find_hash_table(FeatureMappingTable *table, int32_t bucket_index, int64_t feature_id_len, - const int64_t *feature_id_data, int64_t *offset_id_data) const { - const int32_t buckets_num = table->buckets_num; - auto table_mappings = table->feature_mappings_ptr[bucket_index]; - int64_t last_index = table_mappings->size(); - ADP_LOG(DEBUG) << "last_index value " << last_index; - for (int64_t i = 0; i < feature_id_len; ++i) { - int64_t feature_id = feature_id_data[i]; - auto it = table_mappings->find(feature_id); - if (it == table_mappings->end()) { - int64_t offset = get_and_increase_offset(table, bucket_index, buckets_num, last_index); - std::pair count_and_offset = std::make_pair(1LL, offset); - table_mappings->insert(std::make_pair(feature_id, count_and_offset)); - offset_id_data[i] = offset; - } else { - std::pair &count_and_offset = it->second; - count_and_offset.first++; - offset_id_data[i] = count_and_offset.second; - } - } - } - - void Compute(OpKernelContext *ctx) override { - const Tensor &featureIdTensor = ctx->input(0); - auto feature_id_data = (const int64_t *)(featureIdTensor.tensor_data().data()); - const int64_t feature_id_len = featureIdTensor.NumElements(); - ADP_LOG(INFO) << "table_name " << table_name_ << " len " << feature_id_len << " compute begin"; - - Tensor *output_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, featureIdTensor.shape(), &output_tensor)); - auto offset_id = (int64_t *)(output_tensor->tensor_data().data()); - - // device FeatureMapping uses Usafe only support Single Core - SimpleThreadPool pool; - int32_t thread_num = 1; - std::vector> tasks; - FeatureMappingTable *table = get_or_init_tables(table_name_, thread_num, threshold_); - if (table == nullptr) { - ADP_LOG(ERROR) << "get or init table failed table is nullptr"; - return; - } - for (int32_t i = 0; i < thread_num; ++i) { - tasks.push_back([this, table, i, feature_id_len, feature_id_data, offset_id]() -> int { - find_hash_table(table, i, feature_id_len, feature_id_data, offset_id); - return int{}; - }); - } - - if (!tasks.empty()) { - pool.SyncRun(tasks); - } - ADP_LOG(INFO) << "table_name " << table_name_ << " compute end"; - } - - private: - int threshold_{}; - std::string table_name_{}; -}; - -REGISTER_KERNEL_BUILDER(Name("HostFeatureMapping").Device(DEVICE_CPU), HostFeatureMappingOp); -} // namespace featuremapping -} // namespace tensorflow diff --git a/tf_adapter/kernels/aicpu/host_feature_mapping.h b/tf_adapter/kernels/aicpu/host_feature_mapping.h deleted file mode 100644 index 963cf8f09c0f0f1cfa1e2f5dce69f0076c0b5dfa..0000000000000000000000000000000000000000 --- a/tf_adapter/kernels/aicpu/host_feature_mapping.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef TENSORFLOW_TF_ADAPTER_KERNELS_HOST_FEATURE_MAPPING_OP_H -#define TENSORFLOW_TF_ADAPTER_KERNELS_HOST_FEATURE_MAPPING_OP_H - -#include -#include -#include - -#include "tf_adapter/common/adapter_logger.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/util/work_sharder.h" - -namespace tensorflow { -namespace featuremapping { -using HashmapType = std::unordered_map>; -struct FeatureMappingTable { - explicit FeatureMappingTable(int32_t input_buckets_num, int32_t input_threshold) - : buckets_num(input_buckets_num), threshold(input_threshold), offsets(input_buckets_num), - feature_mappings_ptr(input_buckets_num) { - for (int i = 0; i < this->buckets_num; ++i) { - this->offsets[i] = 0; - this->feature_mappings_ptr[i] = new (std::nothrow) HashmapType(init_hashmap_size / buckets_num); - if (this->feature_mappings_ptr[i] == nullptr) { - ADP_LOG(ERROR) << "new Hash map maping failed"; - } - } - } - const int64_t init_hashmap_size = 60 * 10000; - int32_t buckets_num; - int32_t threshold; - std::vector offsets; - std::vector feature_mappings_ptr; // buckets_num分桶 -}; -extern std::unordered_map feature_mapping_table; -} // namespace featuremapping -} // namespace tensorflow - -#endif // TENSORFLOW_TF_ADAPTER_KERNELS_HOST_FEATURE_MAPPING_OP_H \ No newline at end of file diff --git a/tf_adapter/kernels/aicpu/host_feature_mapping_export.cc b/tf_adapter/kernels/aicpu/host_feature_mapping_export.cc deleted file mode 100644 index f3c76ef5ce1265e1a934f96d02292d0e6f40d9aa..0000000000000000000000000000000000000000 --- a/tf_adapter/kernels/aicpu/host_feature_mapping_export.cc +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#include "host_feature_mapping.h" - -#include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/strings/str_util.h" - -namespace tensorflow { -namespace featuremapping { -const std::string kBinFileSuffix = ".bin"; - -class FeatureMappingExportOp : public OpKernel { - public: - explicit FeatureMappingExportOp(OpKernelConstruction *ctx) : OpKernel(ctx) { - ADP_LOG(INFO) << "FeatureMappingExport built"; - OP_REQUIRES_OK(ctx, ctx->GetAttr("table_name_list", &table_name_list_)); - } - ~FeatureMappingExportOp() override { - ADP_LOG(INFO) << "FeatureMappingExport has been destructed"; - } - - void WriteMappingContens2File(std::string &table_name, std::string &dst_path) const { - auto it = feature_mapping_table.find(table_name); - if (it == feature_mapping_table.end()) { - ADP_LOG(WARNING) << "this table " << table_name << " is not in mapping, just skip"; - return; - } - - FeatureMappingTable *table = it->second; - if (table == nullptr) { - ADP_LOG(ERROR) << "table map find but table is nullptr"; - return; - } - - try { - std::ofstream out_stream(dst_path); - // current use only one bucket refer to host feature mapping op - int32_t bucket_index = 0; - const auto mapping_map = table->feature_mappings_ptr[bucket_index]; - ADP_LOG(INFO) << "table_name " << table_name << " map_size: " << mapping_map->size(); - std::unordered_map>::iterator map_iter; - for (map_iter = mapping_map->begin(); map_iter != mapping_map->end(); ++map_iter) { - const int64_t feature_id = map_iter->first; - std::pair &count_and_offset = map_iter->second; - const int64_t counts = count_and_offset.first; - const int64_t offset_id = count_and_offset.second; - // feature_id: 3 | counts: 1 | offset_id: 7 - std::string content = "feature_id: " + std::to_string(feature_id) + " | " - + "counts: " + std::to_string(counts) + " | " - + "offset_id: " + std::to_string(offset_id); - ADP_LOG(DEBUG) << "table_name " << table_name << "content: " << content; - out_stream << content << std::endl; - } - out_stream.close(); - } catch (std::exception &e) { - ADP_LOG(ERROR) << "write to file " << dst_path << " failed, err: " << e.what(); - return; - } - } - - void SaveFeatureMapping2File(const std::string &path) const { - const size_t path_length = path.size(); - std::string dst_path_way = path; - if (path[path_length - 1] != '/') { - (void)dst_path_way.append("/"); - } - - std::ifstream is_path(dst_path_way); - if (!is_path) { - ADP_LOG(INFO) << "export file path " << dst_path_way << " is not exits, make it"; - if (mkdir(dst_path_way.c_str(), S_IRWXO | S_IRWXG | S_IRWXU) != 0) { - if (errno != EEXIST) { - ADP_LOG(ERROR) << "Create file directory " << dst_path_way << " failed, errmsg " << strerror(errno); - return; - } - } - } - - const size_t name_size = table_name_list_.size(); - ADP_LOG(INFO) << "dst_path_way " << dst_path_way << " name_size " << name_size; - if (name_size == 0) { - ADP_LOG(INFO) << "default export all feature mapping"; - for (const auto &map_pair : feature_mapping_table) { - std::string table_name = map_pair.first; - std::string dst_path_file = dst_path_way + table_name + kBinFileSuffix; - ADP_LOG(INFO) << "table_name " << table_name << " dst_path_file " << dst_path_file; - WriteMappingContens2File(table_name, dst_path_file); - } - } else { - ADP_LOG(INFO) << "export attr name of user specified"; - for (size_t index = 0; index < name_size; ++index) { - std::string attr_table_name = std::string(table_name_list_[index]); - std::string dst_file_path = dst_path_way + attr_table_name + kBinFileSuffix; - ADP_LOG(INFO) << "attr_table_name " << attr_table_name << " dst_file_path " << dst_file_path; - WriteMappingContens2File(attr_table_name, dst_file_path); - } - } - return; - } - - void Compute(OpKernelContext *ctx) override { - ADP_LOG(INFO) << "Host FeatureMappingExport compute begin"; - const Tensor &save_path_tensor = ctx->input(0); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(save_path_tensor.shape()), - errors::InvalidArgument("path expects a scalar.")); - OP_REQUIRES(ctx, (save_path_tensor.dtype() == DT_STRING), - errors::InvalidArgument("path should be string but got ", - DataTypeString(save_path_tensor.dtype()))); - const StringPiece save_path = save_path_tensor.scalar()(); - OP_REQUIRES(ctx, !save_path.empty(), - errors::InvalidArgument("path should be a valid string.")); - Tensor *output_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, save_path_tensor.shape(), &output_tensor)); - SaveFeatureMapping2File(std::string(save_path)); - ADP_LOG(INFO) << "Host FeatureMappingExport compute end"; - } - - private: - std::vector table_name_list_{}; -}; - -REGISTER_KERNEL_BUILDER(Name("FeatureMappingExport").Device(DEVICE_CPU), FeatureMappingExportOp); -} // namespace featuremapping -} // namespace tensorflow diff --git a/tf_adapter/kernels/aicpu/host_feature_mapping_import.cc b/tf_adapter/kernels/aicpu/host_feature_mapping_import.cc deleted file mode 100644 index 594b55c7b1e196cfea2803aa771da7bc322cfa7c..0000000000000000000000000000000000000000 --- a/tf_adapter/kernels/aicpu/host_feature_mapping_import.cc +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include "dirent.h" - -#include "host_feature_mapping.h" - -#include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/strings/str_util.h" - -namespace tensorflow { -namespace featuremapping { -const uint32_t kSpaceAndSymbolLength = 2; -const uint32_t kIncludeCountsLength = 8; - -class FeatureMappingImportOp : public OpKernel { - public: - explicit FeatureMappingImportOp(OpKernelConstruction *ctx) : OpKernel(ctx) { - ADP_LOG(INFO) << "Host FeatureMappingImport built"; - } - ~FeatureMappingImportOp() override { - ADP_LOG(INFO) << "Host FeatureMappingImport has been destructed"; - } - - void ResotreLineToMapping(std::string &line, std::string &table_name) const { - /* format :: feature_id: 3 | counts: 1 | offset_id: 7 */ - ADP_LOG(INFO) << "table name: " << table_name << " line " << line; - size_t fid_pos = line.find(":") + kSpaceAndSymbolLength; - size_t bar_pos = line.find("|"); - std::string feature_id_str = line.substr(fid_pos, bar_pos - fid_pos - 1); - ADP_LOG(DEBUG) << "feature id str: " << feature_id_str; - int64_t feature_id = 0; - try { - feature_id = stoll(feature_id_str); - } catch(std::exception &e) { - ADP_LOG(ERROR) << "stoll failed feature id str: " << feature_id_str << " reason: " << e.what(); - return; - } - - size_t counts_index = line.find("counts") + kIncludeCountsLength; - size_t last_sep_pos = line.find_last_of("|"); - std::string counts_str = line.substr(counts_index, last_sep_pos - 1 - counts_index); - ADP_LOG(DEBUG) << "counts str: " << counts_str; - int64_t counts = 0; - try { - counts = stoll(counts_str); - } catch(std::exception &e) { - ADP_LOG(ERROR) << "stoll failed counts str: " << counts_str << " reason: " << e.what(); - return; - } - - size_t off_pos = line.find_last_of(":") + kSpaceAndSymbolLength; - std::string offset_id_str = line.substr(off_pos, line.length()); - ADP_LOG(DEBUG) << "offset id str: " << offset_id_str; - int64_t offset_id = 0; - try { - offset_id = stoll(offset_id_str); - } catch(std::exception &e) { - ADP_LOG(ERROR) << "stoll failed offset id str: " << offset_id_str << " reason: " << e.what(); - return; - } - ADP_LOG(DEBUG) << "feature_id: " << feature_id << " counts: " << counts << " offset_id: " << offset_id; - - // import data to hash map - FeatureMappingTable *table = nullptr; - auto it = feature_mapping_table.find(table_name); - if (it != feature_mapping_table.end()) { - ADP_LOG(INFO) << "have the map, insert directly"; - table = it->second; - } else { - uint32_t buckets_num = 1; - uint32_t threshold = 1; - table = new (std::nothrow) FeatureMappingTable(buckets_num, threshold); - } - - if (table != nullptr) { - feature_mapping_table[table_name] = table; - // current use only one bucket refer to host feature mapping op - int32_t bucket_index = 0; - auto it_key = table->feature_mappings_ptr[bucket_index]->find(feature_id); - if (it_key == table->feature_mappings_ptr[bucket_index]->end()) { - std::pair count_and_offset = std::make_pair(counts, offset_id); - table->feature_mappings_ptr[bucket_index]->insert(std::make_pair(feature_id, count_and_offset)); - ADP_LOG(DEBUG) << "one item insert feature_id: " << feature_id << " counts: " << counts << " offset_id " << offset_id; - } else { - ADP_LOG(ERROR) << "do not here anymore"; - } - ADP_LOG(DEBUG) << "map size: " << table->feature_mappings_ptr[bucket_index]->size(); - } else { - ADP_LOG(ERROR) << "table new nothrow failed"; - } - return; - } - - void FindTableDoImport(std::string &dst_path_way, std::string &file_name) const { - std::string src_file_name = dst_path_way + file_name; - try { - std::ifstream in_stream(src_file_name); - if (!in_stream.is_open()) { - ADP_LOG(ERROR) << "src_file_name: " << src_file_name << " can not open"; - return; - } - - // read line by line - std::string line = ""; - while (std::getline(in_stream, line)) { - std::string table_name = ""; - size_t pos_period = file_name.find_last_of("."); - if (pos_period != std::string::npos) { - table_name = file_name.substr(0, pos_period); - } else { - ADP_LOG(ERROR) << "parse file " << file_name << " error"; - return; - } - ResotreLineToMapping(line, table_name); - } - in_stream.close(); - } catch (std::exception &e) { - ADP_LOG(ERROR) << "write to file " << file_name << " failed, err: " << e.what(); - return; - } - } - - void TraverseAndParse(const std::string &src_path) const { - std::ifstream is_path(src_path); - if (!is_path) { - ADP_LOG(ERROR) << "import file path " << src_path << " is not exits"; - return; - } - - const size_t path_length = src_path.size(); - std::string dst_path_way = src_path; - if (dst_path_way[path_length - 1] != '/') { - (void)dst_path_way.append("/"); - } - - DIR *dir; - struct dirent *ent; - dir = opendir(src_path.c_str()); - if (dir != nullptr) { - while ((ent = readdir(dir)) != nullptr) { - std::string file_name = ent->d_name; - if (file_name == ".." || file_name == ".") { - continue; - } - ADP_LOG(INFO) << "file_name: " << ent->d_name; - FindTableDoImport(dst_path_way, file_name); - } - closedir(dir); - } else { - ADP_LOG(ERROR) << "open directory failed " << src_path; - } - } - - void Compute(OpKernelContext *ctx) override { - ADP_LOG(INFO) << "Host FeatureMappingImport compute begin"; - const Tensor &restore_path_tensor = ctx->input(0); - OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(restore_path_tensor.shape()), - errors::InvalidArgument("path expects a scalar.")); - OP_REQUIRES(ctx, (restore_path_tensor.dtype() == DT_STRING), - errors::InvalidArgument("path should be string but got ", - DataTypeString(restore_path_tensor.dtype()))); - const StringPiece restore_path = restore_path_tensor.scalar()(); - OP_REQUIRES(ctx, !restore_path.empty(), - errors::InvalidArgument("path should be a valid string.")); - Tensor *output_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, restore_path_tensor.shape(), &output_tensor)); - TraverseAndParse(std::string(restore_path)); - ADP_LOG(INFO) << "Host FeatureMappingImport compute end"; - } -}; - -REGISTER_KERNEL_BUILDER(Name("FeatureMappingImport").Device(DEVICE_CPU), FeatureMappingImportOp); -} // namespace featuremapping -} // namespace tensorflow diff --git a/tf_adapter/kernels/aicpu/npu_embedding_ops.cc b/tf_adapter/kernels/aicpu/npu_embedding_ops.cc index 666d7a7c7e0bf421d894fef7587a256d7e18217b..956f20e1b8498822ef14b101119f3262e1704044 100644 --- a/tf_adapter/kernels/aicpu/npu_embedding_ops.cc +++ b/tf_adapter/kernels/aicpu/npu_embedding_ops.cc @@ -21,202 +21,6 @@ #include "tf_adapter/util/cache_interface.h" namespace tensorflow { -class InitPartitionMapOp : public OpKernel { -public: - explicit InitPartitionMapOp(OpKernelConstruction *context) : OpKernel(context) {} - ~InitPartitionMapOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "InitPartitionMapOp Compute"; } -}; - -class InitEmbeddingHashmapOp : public OpKernel { -public: - explicit InitEmbeddingHashmapOp(OpKernelConstruction *context) : OpKernel(context) {} - ~InitEmbeddingHashmapOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "InitEmbeddingHashmapOp Compute"; } -}; - -class EmbeddingTableFindOp : public OpKernel { -public: - explicit EmbeddingTableFindOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingTableFindOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingTableFindOp Compute"; } -}; - -class EmbeddingTableImportOp : public OpKernel { -public: - explicit EmbeddingTableImportOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingTableImportOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingTableImportOp Compute"; } -}; - -class UninitPartitionMapOp : public OpKernel { -public: - explicit UninitPartitionMapOp(OpKernelConstruction *context) : OpKernel(context) {} - ~UninitPartitionMapOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "UninitPartitionMapOp Compute"; } -}; - -class UninitEmbeddingHashmapOp : public OpKernel { -public: - explicit UninitEmbeddingHashmapOp(OpKernelConstruction *context) : OpKernel(context) {} - ~UninitEmbeddingHashmapOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "UninitEmbeddingHashmapOp Compute"; } -}; - -class TableToResourceOp : public OpKernel { -public: - explicit TableToResourceOp(OpKernelConstruction *context) : OpKernel(context) {} - ~TableToResourceOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "TableToResourceOp Compute"; } -}; - -class EmbeddingTableFindAndInitOp : public OpKernel { -public: - explicit EmbeddingTableFindAndInitOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingTableFindAndInitOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingTableFindAndInitOp Compute"; } -}; - -class FakeRemoteLookupUniquedOp : public OpKernel { - public: - explicit FakeRemoteLookupUniquedOp(OpKernelConstruction *context) : OpKernel(context) {} - ~FakeRemoteLookupUniquedOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "FakeRemoteLookupUniquedOp Compute"; } -}; - -class EmbeddingApplyAdamOp : public OpKernel { -public: - explicit EmbeddingApplyAdamOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingApplyAdamOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingApplyAdamOp Compute"; } -}; - -class EmbeddingApplyAdamWOp : public OpKernel { -public: - explicit EmbeddingApplyAdamWOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingApplyAdamWOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingApplyAdamWOp Compute"; } -}; - -class EmbeddingApplyAdaGradOp : public OpKernel { -public: - explicit EmbeddingApplyAdaGradOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingApplyAdaGradOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingApplyAdaGradOp Compute"; } -}; - -class EmbeddingApplySgdOp : public OpKernel { -public: - explicit EmbeddingApplySgdOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingApplySgdOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingApplySgdOp Compute"; } -}; - -class EmbeddingApplyRmspropOp : public OpKernel { -public: - explicit EmbeddingApplyRmspropOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingApplyRmspropOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingApplyRmspropOp Compute"; } -}; - -class EmbeddingApplyFtrlOp : public OpKernel { -public: - explicit EmbeddingApplyFtrlOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingApplyFtrlOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingApplyFtrlOp Compute"; } -}; - -class ExponentialDecayLROp : public OpKernel { -public: - explicit ExponentialDecayLROp(OpKernelConstruction *context) : OpKernel(context) {} - ~ExponentialDecayLROp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "ExponentialDecayLROp Compute"; } -}; - -class EmbeddingComputeVarExportOp : public OpKernel { -public: - explicit EmbeddingComputeVarExportOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingComputeVarExportOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingComputeVarExportOp Compute"; } -}; - -class EmbeddingComputeVarImportOp : public OpKernel { -public: - explicit EmbeddingComputeVarImportOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingComputeVarImportOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingComputeVarImportOp Compute"; } -}; - -class EmbeddingTableExportOp : public OpKernel { -public: - explicit EmbeddingTableExportOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingTableExportOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingTableExportOp Compute"; } -}; - -class EmbeddingFeatureMappingOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingOp Compute"; } -}; - -class EmbeddingFeatureMappingV2Op : public OpKernel { -public: - explicit EmbeddingFeatureMappingV2Op(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingV2Op() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingV2Op Compute"; } -}; - -class EmbeddingFeatureMappingTableSizeOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingTableSizeOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingTableSizeOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingTableSizeOp Compute"; } -}; - -class EmbeddingFeatureMappingFindOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingFindOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingFindOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingFindOp Compute"; } -}; - -class EmbeddingFeatureMappingExportOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingExportOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingExportOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingExportOp Compute"; } -}; - -class EmbeddingFeatureMappingFileSizeOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingFileSizeOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingFileSizeOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingFileSizeOp Compute"; } -}; - -class EmbeddingFeatureMappingImportOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingImportOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingImportOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingImportOp Compute"; } -}; - -class EmbeddingFeatureMappingInsertOp : public OpKernel { -public: - explicit EmbeddingFeatureMappingInsertOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingFeatureMappingInsertOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingFeatureMappingInsertOp Compute"; } -}; - -class EmbeddingTableEvictOp : public OpKernel { -public: - explicit EmbeddingTableEvictOp(OpKernelConstruction *context) : OpKernel(context) {} - ~EmbeddingTableEvictOp() override {} - void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "EmbeddingTableEvictOp Compute"; } -}; - class InitEmbeddingHashmapV2Op : public OpKernel { public: explicit InitEmbeddingHashmapV2Op(OpKernelConstruction *context) : OpKernel(context) {} @@ -265,35 +69,6 @@ public: ~EmbeddingHashmapImportOp() override {} void Compute(OpKernelContext *context) override {} }; - -REGISTER_KERNEL_BUILDER(Name("InitPartitionMap").Device(DEVICE_CPU), InitPartitionMapOp); -REGISTER_KERNEL_BUILDER(Name("InitEmbeddingHashmap").Device(DEVICE_CPU), InitEmbeddingHashmapOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingTableFind").Device(DEVICE_CPU), EmbeddingTableFindOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingTableImport").Device(DEVICE_CPU), EmbeddingTableImportOp); -REGISTER_KERNEL_BUILDER(Name("UninitPartitionMap").Device(DEVICE_CPU), UninitPartitionMapOp); -REGISTER_KERNEL_BUILDER(Name("UninitEmbeddingHashmap").Device(DEVICE_CPU), UninitEmbeddingHashmapOp); -REGISTER_KERNEL_BUILDER(Name("TableToResource").Device(DEVICE_CPU), TableToResourceOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingTableFindAndInit").Device(DEVICE_CPU), EmbeddingTableFindAndInitOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingApplyAdam").Device(DEVICE_CPU), EmbeddingApplyAdamOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingApplyAdamW").Device(DEVICE_CPU), EmbeddingApplyAdamWOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingApplyAdaGrad").Device(DEVICE_CPU), EmbeddingApplyAdaGradOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingApplySgd").Device(DEVICE_CPU), EmbeddingApplySgdOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingApplyRmsprop").Device(DEVICE_CPU), EmbeddingApplyRmspropOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingApplyFtrl").Device(DEVICE_CPU), EmbeddingApplyFtrlOp); -REGISTER_KERNEL_BUILDER(Name("ExponentialDecayLR").Device(DEVICE_CPU), ExponentialDecayLROp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingTableExport").Device(DEVICE_CPU), EmbeddingTableExportOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMapping").Device(DEVICE_CPU), EmbeddingFeatureMappingOp); -REGISTER_KERNEL_BUILDER(Name("FakeRemoteLookupUniqued").Device(DEVICE_CPU), FakeRemoteLookupUniquedOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingComputeVarExport").Device(DEVICE_CPU), EmbeddingComputeVarExportOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingComputeVarImport").Device(DEVICE_CPU), EmbeddingComputeVarImportOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingV2").Device(DEVICE_CPU), EmbeddingFeatureMappingV2Op); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingTableSize").Device(DEVICE_CPU), EmbeddingFeatureMappingTableSizeOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingFind").Device(DEVICE_CPU), EmbeddingFeatureMappingFindOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingExport").Device(DEVICE_CPU), EmbeddingFeatureMappingExportOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingFileSize").Device(DEVICE_CPU), EmbeddingFeatureMappingFileSizeOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingImport").Device(DEVICE_CPU), EmbeddingFeatureMappingImportOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingFeatureMappingInsert").Device(DEVICE_CPU), EmbeddingFeatureMappingInsertOp); -REGISTER_KERNEL_BUILDER(Name("EmbeddingTableEvict").Device(DEVICE_CPU), EmbeddingTableEvictOp); REGISTER_KERNEL_BUILDER(Name("InitEmbeddingHashmapV2").Device(DEVICE_CPU), InitEmbeddingHashmapV2Op); REGISTER_KERNEL_BUILDER(Name("DeinitEmbeddingHashmapV2").Device(DEVICE_CPU), DeinitEmbeddingHashmapV2Op); REGISTER_KERNEL_BUILDER(Name("TableToResourceV2").Device(DEVICE_CPU), TableToResourceV2Op); diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index a1fb9a044e221e84c587d993d4f6caf2a2e34baf..7d6fa457b4b5cb4a7626cb7540d4f5593ece3290 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -404,12 +404,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { } (void) ctx->GetAttr("_recompute_mode", &recompute_mode_); - (void) ctx->GetAttr("_max_key_num", &max_key_num_); - (void) ctx->GetAttr("_use_counter_filter", &use_counter_filter_); - (void) ctx->GetAttr("_embedding_dim", &embedding_dim_); (void) ctx->GetAttr("_compile_dynamic_mode", &compile_dynamic_mode_); - (void) ctx->GetAttr("_padding_key", &padding_key_); - (void) ctx->GetAttr("_embedding_flags", &embedding_flags_); (void) ctx->GetAttr("_dynamic_input", &dynamic_input_); (void) ctx->GetAttr("_jit_compile", &jit_compile_); if (!dynamic_input_.empty() && dynamic_input_ == "1") { @@ -431,9 +426,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { << ", getnext_inputs_shape_range: " << getnext_inputs_shape_range_ << ", data_inputs_shape_range: " << data_inputs_shape_range_ << ", is_train_graph: " << is_train_graph_ << ", is_dynamic_getnext: " << is_dynamic_getnext_ << ", placeholder_index: " << placeholder_index_ - << ", is_var_init_graph: " << is_var_init_graph_ << ", use_counter_filter: " << use_counter_filter_ - << ", max_key_num: " << max_key_num_ << ", embedding_dim: " << embedding_dim_ - << ", padding_key: " << padding_key_ << ", embedding_flags: " << embedding_flags_ + << ", is_var_init_graph: " << is_var_init_graph_ << ", compile_dynamic_mode: " << compile_dynamic_mode_ << ", shape_generalization_mode: " << shape_generalization_mode_; @@ -1253,21 +1246,6 @@ Status GeOp::SetGraphOptions(OpKernelContext *ctx) { if (!recompute_mode_.empty()) { graph_options_["ge.recompute"] = recompute_mode_; } - if (!max_key_num_.empty()) { - graph_options_["ge.max_key_num"] = max_key_num_; - } - if (!embedding_dim_.empty()) { - graph_options_["ge.embedding_dim"] = embedding_dim_; - } - if (!use_counter_filter_.empty()) { - graph_options_["ge.use_counter_filter"] = use_counter_filter_; - } - if (!padding_key_.empty()) { - graph_options_["ge.padding_key"] = padding_key_; - } - if (!embedding_flags_.empty()) { - graph_options_["ge.embedding_flags"] = embedding_flags_; - } SetDynamicInput(); graph_options_["ge.exec.isVarInitGraph"] = is_var_init_graph_; graph_options_["ge.jit_compile"] = jit_compile_; diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index 526847e3d54c81ffc2b8eb0463819af7ea44043b..8da492354aef4ead14d4cc8f09f50fabec1cc241 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -254,11 +254,6 @@ public: std::atomic_flag tuned_flag_; std::vector> remove_index_; std::string is_var_init_graph_; - std::string max_key_num_; - std::string embedding_dim_; - std::string use_counter_filter_; - std::string padding_key_; - std::string embedding_flags_; std::string recompute_mode_; std::vector> input_shapes_vec_; std::string jit_compile_; diff --git a/tf_adapter/ops/aicpu/npu_embedding_ops.cc b/tf_adapter/ops/aicpu/npu_embedding_ops.cc index 5487dd6099a53b92fe186827f06817f4f8e119b8..3cd15297544d220ddbe6d9d64c4c7d45684547f9 100644 --- a/tf_adapter/ops/aicpu/npu_embedding_ops.cc +++ b/tf_adapter/ops/aicpu/npu_embedding_ops.cc @@ -23,443 +23,6 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; -REGISTER_OP("InitPartitionMap") - .Input("ps_num: int32") - .Input("ps_ids: int32") - .Attr("partition_num: int = 65537") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("InitEmbeddingHashmap") - .Input("table_id: int32") - .Attr("bucket_size: int = 0") - .Attr("value_total_len: int = 0") - .Attr("dtype: {uint8, uint16, float32} = DT_FLOAT") - .Attr("embedding_dim: int = 0") - .Attr("initializer_mode: string = '' ") - .Attr("constant_value: float = 0") - .Attr("min: float = -2") - .Attr("max: float = 2") - .Attr("mu: float = 0") - .Attr("sigma: float = 1") - .Attr("seed: int = 0") - .Attr("seed2: int = 0") - .Attr("filter_mode: string = 'no_filter' ") - .Attr("optimizer_mode: string = '' ") - .Attr("optimizer_params: list(float) = [0.1]") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingTableImport") - .Input("file_path: string") - .Input("ps_id: int32") - .Input("table_id: int32") - .Input("global_step: int64") - .Attr("embedding_dim: list(int)") - .Attr("value_total_len: list(int)") - .Attr("only_var_flag: bool = false") - .Attr("file_type: string = 'bin' ") - .Attr("table_name: list(string)") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingTableFind") - .Input("table_id: int32") - .Input("keys: int64") - .Output("values: float32") - .Attr("embedding_dim: list(int) = [0]") - .Attr("default_value: list(float) = [-1]") - .SetShapeFn([](shape_inference::InferenceContext *c) { - ShapeHandle keys_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys_shape)); - std::vector embedding_dims; - if (!c->GetAttr("embedding_dim", &embedding_dims).ok()) { - return errors::InvalidArgument("Invalid embedding_dim"); - } - c->set_output(0, c->Matrix(c->Dim(keys_shape, 0), embedding_dims[0])); - return Status::OK(); - }); - -REGISTER_OP("UninitPartitionMap") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("UninitEmbeddingHashmap") - .Input("table_id: int32") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("TableToResource") - .Input("table_id: int32") - .Output("table_handle: resource") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingTableFindAndInit") - .Input("table_id: int32") - .Input("keys: int64") - .Output("values: float32") - .Attr("embedding_dim: list(int) = [0]") - .Attr("value_total_len: list(int) = [0]") - .Attr("initializer_mode: list(string) = ['random_uniform']") - .Attr("constant_value: list(float) = [0]") - .Attr("min: list(float) = [-2]") - .Attr("max: list(float) = [2]") - .Attr("mu: list(float) = [0]") - .Attr("sigma: list(float) = [1]") - .Attr("seed: list(int) = [0]") - .Attr("seed2: list(int) = [0]") - .Attr("filter_mode: list(string) = ['no_filter']") - .Attr("filter_freq: list(int) = [0]") - .Attr("default_key_or_value: list(int) = [0]") - .Attr("default_key: list(int) = [0]") - .Attr("default_value: list(float) = [0]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("optimizer_mode: list(string) = [''] ") - .Attr("optimizer_params: list(float) = [0.1]") - .SetShapeFn([](shape_inference::InferenceContext *c) { - ShapeHandle keys_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &keys_shape)); - std::vector embedding_dims; - if (!c->GetAttr("embedding_dim", &embedding_dims).ok()) { - return errors::InvalidArgument("Invalid embedding_dim"); - } - c->set_output(0, c->Matrix(c->Dim(keys_shape, 0), embedding_dims[0])); - return Status::OK(); - }); - -REGISTER_OP("FakeRemoteLookupUniqued") - .Input("table_id: int32") - .Input("keys: int64") - .Input("actual_keys_input: int64") - .Input("unique_indices: int32") - .Input("key_count: int64") - .Output("values: float32") - .Attr("embedding_dim: list(int) = [0]") - .Attr("value_total_len: list(int) = [0]") - .Attr("initializer_mode: list(string) = ['random_uniform']") - .Attr("constant_value: list(float) = [0]") - .Attr("min: list(float) = [-2]") - .Attr("max: list(float) = [2]") - .Attr("mu: list(float) = [0]") - .Attr("sigma: list(float) = [1]") - .Attr("seed: list(int) = [0]") - .Attr("seed2: list(int) = [0]") - .Attr("filter_mode: list(string) = ['no_filter']") - .Attr("filter_freq: list(int) = [0]") - .Attr("default_key_or_value: list(int) = [0]") - .Attr("default_key: list(int) = [0]") - .Attr("default_value: list(float) = [0]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("optimizer_mode: list(string) = [''] ") - .Attr("optimizer_params: list(float) = [0.1]") - .SetShapeFn([](shape_inference::InferenceContext *c) { - ShapeHandle keys_shape; - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &keys_shape)); - std::vector embedding_dims; - if (!c->GetAttr("embedding_dim", &embedding_dims).ok()) { - return errors::InvalidArgument("Invalid embedding_dim"); - } - c->set_output(0, c->Matrix(c->Dim(keys_shape, 0), embedding_dims[0])); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingApplyAdam") - .Input("var_handle: resource") - .Input("beta1_power: T") - .Input("beta2_power: T") - .Input("lr: T") - .Input("beta1: T") - .Input("beta2: T") - .Input("epsilon: T") - .Input("grad: T") - .Input("keys: int64") - .Input("global_step: Tstep") - .Output("var_handle_output: resource") - .Attr("embedding_dim: list(int) = [0]") - .Attr("mask_zero: list(int) = [0]") - .Attr("padding_key: list(int) = [0]") - .Attr("padding_key_mask: list(int) = [1]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("T: {float32, float16}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingApplyAdamW") - .Input("var_handle: resource") - .Input("beta1_power: T") - .Input("beta2_power: T") - .Input("lr: T") - .Input("weight_decay: T") - .Input("beta1: T") - .Input("beta2: T") - .Input("epsilon: T") - .Input("grad: T") - .Input("keys: int64") - .Input("max_grad_norm: T") - .Input("global_step: Tstep") - .Output("var_handle_output: resource") - .Attr("amsgrad: list(int) = [0]") - .Attr("maximize: list(int) = [0]") - .Attr("embedding_dim: list(int) = [0]") - .Attr("mask_zero: list(int) = [0]") - .Attr("padding_key: list(int) = [0]") - .Attr("padding_key_mask: list(int) = [1]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("T: {float32, float16}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingApplyAdaGrad") - .Input("var_handle: resource") - .Input("lr: T") - .Input("grad: T") - .Input("keys: int64") - .Input("global_step: Tstep") - .Output("var_handle_output: resource") - .Attr("embedding_dim: list(int) = [0]") - .Attr("mask_zero: list(int) = [0]") - .Attr("padding_key: list(int) = [0]") - .Attr("padding_key_mask: list(int) = [1]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("T: {float32, float16}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingApplySgd") - .Input("var_handle: resource") - .Input("lr: T") - .Input("grad: T") - .Input("keys: int64") - .Input("global_step: Tstep") - .Output("var_handle_output: resource") - .Attr("embedding_dim: list(int) = [0]") - .Attr("mask_zero: list(int) = [0]") - .Attr("padding_key: list(int) = [0]") - .Attr("padding_key_mask: list(int) = [1]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("T: {float32, float16}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingApplyRmsprop") - .Input("var_handle: resource") - .Input("lr: T") - .Input("rho: T") - .Input("momentum: T") - .Input("epsilon: T") - .Input("grad: T") - .Input("keys: int64") - .Input("global_step: Tstep") - .Output("var_handle_output: resource") - .Attr("embedding_dim: list(int) = [0]") - .Attr("mask_zero: list(int) = [0]") - .Attr("padding_key: list(int) = [0]") - .Attr("padding_key_mask: list(int) = [1]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("T: numbertype") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingApplyFtrl") - .Input("var_handle: resource") - .Input("lr: T") - .Input("lr_power: T") - .Input("lambda1: T") - .Input("lambda2: T") - .Input("grad: T") - .Input("keys: int64") - .Input("global_step: Tstep") - .Output("var_handle_output: resource") - .Attr("embedding_dim: list(int) = [0]") - .Attr("mask_zero: list(int) = [0]") - .Attr("padding_key: list(int) = [0]") - .Attr("padding_key_mask: list(int) = [1]") - .Attr("completion_key: list(int) = [0]") - .Attr("completion_key_mask: list(int) = [1]") - .Attr("T: {float32, float16}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("ExponentialDecayLR") - .Input("var_handle: resource") - .Input("initial_learning_rate: T") - .Input("decay_rate: T") - .Input("decay_steps: Tstep") - .Output("decayed_lr: T") - .Attr("staircase: bool = false") - .Attr("T: {float32, float16}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn([](shape_inference::InferenceContext *c) { - auto data_shape = c->input(0); - c->set_output(0, data_shape); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingComputeVarExport") - .Input("file_path: string") - .Input("ps_id: int32") - .Input("table_id: int32") - .Input("global_step: int64") - .Attr("table_name: list(string)") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingComputeVarImport") - .Input("file_path: string") - .Input("ps_id: int32") - .Input("table_id: int32") - .Input("global_step: int64") - .Attr("table_name: list(string)") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingTableExport") - .Input("file_path: string") - .Input("ps_id: int32") - .Input("table_id: int32") - .Input("global_step: Tstep") - .Attr("embedding_dim: list(int)") - .Attr("value_total_len: list(int)") - .Attr("export_mode: {'all', 'old', 'new', 'specifiednew'} = 'all'") - .Attr("only_var_flag: bool = false") - .Attr("file_type: string = 'bin' ") - .Attr("table_name: list(string)") - .Attr("filter_export_flag: bool = false") - .Attr("steps_to_live_list: list(int)") - .Attr("Tstep: {int32, int64}") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingTableEvict") - .Input("var_handle: T") - .Input("global_step: Tstep") - .Attr("steps_to_live: int = 0") - .Attr("T: {resource, int32}") - .Attr("Tstep: {int32, int64}") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingFeatureMapping") - .Input("feature_id: int64") - .Output("offset_id: int32") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingFeatureMappingV2") - .Input("table_name: string") - .Input("feature_id: int64") - .Attr("table_total_size: list(int) = [1]") - .Attr("table_actual_size: list(int) = [1]") - .Output("offset_id: int32") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(1)); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingFeatureMappingTableSize") - .Input("table_name: string") - .Output("feature_size: int64") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->Vector(c->UnknownDim())); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingFeatureMappingFind") - .Input("table_name: string") - .Input("feature_size: int64") - .Output("feature_id: num * int64") - .Output("offset_id: num * int32") - .Attr("num: int >= 1") - .SetShapeFn([](shape_inference::InferenceContext *c) { - int64 num = 0; - c->GetAttr("num", &num); - for (int64_t i = 0; i < num; ++i) { - c->set_output(i, c->Vector(c->UnknownDim())); - c->set_output(i + num, c->Vector(c->UnknownDim())); - } - return Status::OK(); - }); - -REGISTER_OP("EmbeddingFeatureMappingExport") - .Input("file_path: string") - .Input("table_name: string") - .Input("global_step: int64") - .Input("values: float") - .Input("feature_id: num * int64") - .Input("offset_id: num * int32") - .Attr("embedding_dim: list(int)") - .Attr("num: int >= 1") - .SetShapeFn(shape_inference::NoOutputs); - -REGISTER_OP("EmbeddingFeatureMappingFileSize") - .Input("file_path: string") - .Input("table_name: string") - .Input("global_step: int64") - .Output("feature_size: int64") - .Attr("embedding_dim: list(int)") - .Attr("only_offset_flag: bool = True") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->Vector(c->UnknownDim())); - return Status::OK(); - }); - -REGISTER_OP("EmbeddingFeatureMappingImport") - .Input("file_path: string") - .Input("table_name: string") - .Input("feature_size: int64") - .Input("global_step: int64") - .Output("feature_id: num * int64") - .Output("offset_id: num * int32") - .Output("values: num * float") - .Attr("embedding_dim: list(int)") - .Attr("only_offset_flag: bool = True") - .Attr("num: int >= 1") - .SetShapeFn([](shape_inference::InferenceContext *c) { - int64 num = 0; - c->GetAttr("num", &num); - for (int64_t i = 0; i < num; ++i) { - c->set_output(i, c->Vector(c->UnknownDim())); - c->set_output(i + num, c->Vector(c->UnknownDim())); - c->set_output(i + 2 * num, c->Vector(c->UnknownDim())); - } - return Status::OK(); - }); - -REGISTER_OP("EmbeddingFeatureMappingInsert") - .Input("table_name: string") - .Input("feature_id: num * int64") - .Input("offset_id: num * int32") - .Attr("num: int >= 1") - .SetShapeFn(shape_inference::NoOutputs); - REGISTER_OP("InitEmbeddingHashmapV2") .Input("table_id: int32") .Output("table_handle: int64") @@ -544,31 +107,4 @@ REGISTER_OP("EmbeddingHashmapImport") } return Status::OK(); }); - -REGISTER_OP("HostFeatureMapping") - .Input("feature_id: int64") - .Output("offset_id: int64") - .Attr("threshold: int = 1") - .Attr("table_name: string = 'default_table_name' ") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); - -REGISTER_OP("FeatureMappingExport") - .Input("path: string") - .Attr("table_name_list: list(string)") - .Output("export_fake_output: string") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); - -REGISTER_OP("FeatureMappingImport") - .Input("path: string") - .Output("import_fake_output: string") - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); } // namespace tensorflow diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index 65845499a7119316e5bd681bc6722282a1e601d1..95511cde208dd31af1107ff4462b5ef082fd9d8b 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -2103,11 +2103,6 @@ void OMPartitionSubgraphsPass::GetGraphConfig(const Node &node, bool enable_dp, const std::string kDynamicInputsShapeRange = "_graph_dynamic_inputs_shape_range"; const std::string kIsTrainGraph = "_is_train_graph"; const std::string kRecomputeMode = "_recompute_mode"; - const std::string kMaxKeyNum = "_max_key_num"; - const std::string kEmbeddingDim = "_embedding_dim"; - const std::string kUseCounterFilter = "_use_counter_filter"; - const std::string kPaddingKey = "_padding_key"; - const std::string kPaddingKeyMask = "_embedding_flags"; if (node_attrs.find(kDynamicInput) != node_attrs.end()) { bool dynamic_input = node_attrs.at(kDynamicInput).b(); graph_options["dynamic_input"] = std::to_string(static_cast(dynamic_input)); @@ -2128,22 +2123,6 @@ void OMPartitionSubgraphsPass::GetGraphConfig(const Node &node, bool enable_dp, std::string recompute_mode = node_attrs.at(kRecomputeMode).s(); graph_options["recompute_mode"] = recompute_mode; } - if (node_attrs.find(kMaxKeyNum) != node_attrs.end()) { - graph_options["max_key_num"] = std::to_string(static_cast(node_attrs.at(kMaxKeyNum).i())); - } - if (node_attrs.find(kEmbeddingDim) != node_attrs.end()) { - const auto embedding_dim = node_attrs.at(kEmbeddingDim).i(); - graph_options["embedding_dim"] = std::to_string(static_cast(embedding_dim)); - } - if (node_attrs.find(kUseCounterFilter) != node_attrs.end()) { - graph_options["use_counter_filter"] = std::to_string(static_cast(node_attrs.at(kUseCounterFilter).i())); - } - if (node_attrs.find(kPaddingKey) != node_attrs.end()) { - graph_options["padding_key"] = std::to_string(static_cast(node_attrs.at(kPaddingKey).i())); - } - if (node_attrs.find(kPaddingKeyMask) != node_attrs.end()) { - graph_options["embedding_flags"] = std::to_string(static_cast(node_attrs.at(kPaddingKeyMask).b())); - } } Status OMPartitionSubgraphsPass::ProcessGetNext(Node &node, const std::string enable_dp, diff --git a/tf_adapter/python/npu_bridge/embedding/__init__.py b/tf_adapter/python/npu_bridge/embedding/__init__.py deleted file mode 100644 index fa8b91fd5cbffda44daa736a7459f1bdc3978e48..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from npu_bridge.embedding.embedding_optimizer import AdamOptimizer as EmbeddingAdamOptimizer -from npu_bridge.embedding.embedding_optimizer import AdagradOptimizer as EmbeddingAdagradOptimizer -from npu_bridge.embedding.embedding_optimizer import AdamWOptimizer as EmbeddingAdamWOptimizer -from npu_bridge.embedding.embedding_optimizer import SgdOptimizer as EmbeddingSgdOptimizer -from npu_bridge.embedding.embedding_optimizer import RmspropOptimizer as EmbeddingRmspropOptimizer -from npu_bridge.embedding.embedding_optimizer import FtrlOptimizer as EmbeddingFtrlOptimizer -from npu_bridge.embedding.embedding_optimizer import EmbeddingHashTableAdamWOptimizer as AdamWOptimizer -from npu_bridge.embedding.embedding_optimizer import exponential_decay_lr as exponential_decay_lr -from npu_bridge.embedding.embedding_service import ESWorker as EmbeddingService -from npu_bridge.embedding.embedding_service import es_initializer as es_initializer -from npu_bridge.embedding.embedding_table_map_policy import NoneTableMapPolicy, AutoMergeTableMapPolicy -from npu_bridge.embedding.tf_path import path_on_tf -path_on_tf() \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py b/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py deleted file mode 100644 index 7d6eb1536e922f9c2a4c3b59000cef677d619d4c..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py +++ /dev/null @@ -1,704 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -from tensorflow.python.framework import ops -from tensorflow.python.eager import context -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.training import optimizer -from tensorflow.python.training import adam -from tensorflow.python.training import adagrad -from tensorflow.python.training import training_ops -from tensorflow.python.training import training_util -from npu_bridge.embedding.embedding_resource import NpuEmbeddingResource, NpuEmbeddingResourceV2 -from npu_bridge.npu_cpu.npu_cpu_ops import gen_npu_cpu_ops - -_GLOBAL_STEP_VALUE = 1 -_ADAM_BEAT1_POWER_VALUE = 0.9 -_ADAM_BEAT2_POWER_VALUE = 0.99 -_ADAMW_BEAT1_POWER_VALUE = 0.9 -_ADAMW_BEAT2_POWER_VALUE = 0.99 -_SMALL_ADAMW_INDEX = 0 - - -class AdamOptimizer(adam.AdamOptimizer): - def __init__(self, - learning_rate=0.01, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-8, - using_locking=False, - name="EmbeddingAdamOptimizer"): - """Construct a EmbeddingAdam optimizer.""" - if isinstance(learning_rate, ExponentialDecayLR): - lr = learning_rate.learning_rate - self._decay_rate = learning_rate.decay_rate - self._decay_steps = learning_rate.decay_steps - self._staircase = learning_rate.staircase - self._decay_steps_t = None - self._decay_rate_t = None - self._use_adaptive_lr = True - else: - lr = learning_rate - self._use_adaptive_lr = False - super(AdamOptimizer, self).__init__(lr, beta_1, beta_2, epsilon, using_locking, name) - self._beta1_power = None - self._beta2_power = None - self.embedding_dim = -1 - self.max_num = -1 - self.padding_key = None - self.padding_key_mask = 1 - self.completion_key = None - self.completion_key_mask = 1 - self.embedding_flags = -1 - self.mask_zero = 0 - - def _prepare(self): - lr = self._call_if_callable(self._lr) - epsilon = self._call_if_callable(self._epsilon) - beta1 = self._call_if_callable(self._beta1) - beta2 = self._call_if_callable(self._beta2) - beta1_power = self._call_if_callable(_ADAM_BEAT1_POWER_VALUE) - beta2_power = self._call_if_callable(_ADAM_BEAT2_POWER_VALUE) - - self._lr_t = ops.convert_to_tensor(lr, name="learning_rate") - self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon") - self._beta1_t = ops.convert_to_tensor(beta1, name="beta1") - self._beta2_t = ops.convert_to_tensor(beta2, name="beta2") - self._beta1_power = ops.convert_to_tensor(beta1_power, name="beta1_power") - self._beta2_power = ops.convert_to_tensor(beta2_power, name="beta2_power") - - if self._use_adaptive_lr: - decay_steps = self._call_if_callable(self._decay_steps) - decay_rate = self._call_if_callable(self._decay_rate) - self._decay_steps_t = ops.convert_to_tensor(decay_steps, name="decay_steps") - self._decay_rate_t = ops.convert_to_tensor(decay_rate, name="decay_rate") - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResource): - if self._use_adaptive_lr: - lr_output = gen_npu_cpu_ops.exponential_decay_lr(var_handle=var.handle, - initial_learning_rate= - math_ops.cast(self._lr_t, grad.dtype), - decay_rate=self._decay_rate_t, - decay_steps=self._decay_steps_t, - staircase=self._staircase) - else: - lr_output = math_ops.cast(self._lr_t, grad.dtype) - result = gen_npu_cpu_ops.embedding_apply_adam(var_handle=var.handle, - beta1_power=math_ops.cast(self._beta1_power, grad.dtype), - beta2_power=math_ops.cast(self._beta2_power, grad.dtype), - lr=lr_output, - beta1=math_ops.cast(self._beta1_t, grad.dtype), - beta2=math_ops.cast(self._beta2_t, grad.dtype), - epsilon=math_ops.cast(self._epsilon_t, grad.dtype), - grad=grad, - keys=indices, - global_step=ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - embedding_dim=[self.embedding_dim], - mask_zero=[self.mask_zero], - padding_key=[self.padding_key], - padding_key_mask=[self.padding_key_mask], - completion_key=[self.completion_key], - completion_key_mask=[self.completion_key_mask]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self.embedding_dim)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self.max_num)) - result.op._set_attr("_padding_key", attr_value_pb2.AttrValue(i=self.padding_key)) - result.op._set_attr("_embedding_flags", attr_value_pb2.AttrValue(i=self.embedding_flags)) - return result - else: - return self._apply_sparse_shared(grad, var, indices, self._resource_scatter_add) - - def _create_slots(self, var_list): - for v in var_list: - if not isinstance(v, NpuEmbeddingResource): - self._zeros_slot(v, "m", self._name) - self._zeros_slot(v, "v", self._name) - - def _finish(self, update_ops, name_scope): - # Update the power accumulators. - return control_flow_ops.group(*update_ops, name=name_scope) - - -class AdagradOptimizer(adagrad.AdagradOptimizer): - def __init__(self, - learning_rate=0.01, - initial_accumulator_value=0.1, - using_locking=False, - name="EmbeddingAdagradOptimizer"): - """Construct a EmbeddingAdagrad optimizer.""" - if isinstance(learning_rate, ExponentialDecayLR): - lr = learning_rate.learning_rate - self._decay_rate = learning_rate.decay_rate - self._decay_steps = learning_rate.decay_steps - self._staircase = learning_rate.staircase - self._use_adaptive_lr = True - else: - lr = learning_rate - self._use_adaptive_lr = False - super(AdagradOptimizer, self).__init__(lr, initial_accumulator_value, using_locking, name) - self.mask_zero = 0 - self.initial_accumulator_value = initial_accumulator_value - self.embedding_dim = -1 - self.max_num = -1 - self.padding_key = None - self.padding_key_mask = 1 - self.completion_key = None - self.completion_key_mask = 1 - self.embedding_flags = -1 - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResource): - if self._use_adaptive_lr: - lr_output = gen_npu_cpu_ops.exponential_decay_lr(var_handle=var.handle, - initial_learning_rate= - math_ops.cast(self._learning_rate_tensor, grad.dtype), - decay_rate=math_ops.cast(self._decay_rate, grad.dtype), - decay_steps=self._decay_steps, - staircase=self._staircase) - else: - lr_output = math_ops.cast(self._learning_rate_tensor, grad.dtype) - result = gen_npu_cpu_ops.embedding_apply_ada_grad(var_handle=var.handle, - lr=lr_output, - grad=grad, - keys=indices, - global_step=ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - embedding_dim=[self.embedding_dim], - mask_zero=[self.mask_zero], - padding_key=[self.padding_key], - padding_key_mask=[self.padding_key_mask], - completion_key=[self.completion_key], - completion_key_mask=[self.completion_key_mask]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self.embedding_dim)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self.max_num)) - result.op._set_attr("_padding_key", attr_value_pb2.AttrValue(i=self.padding_key)) - result.op._set_attr("_embedding_flags", attr_value_pb2.AttrValue(i=self.embedding_flags)) - return result - else: - return self.training_ops.resource_sparse_apply_adagrad(var.handle, grad.handle, - math_ops.cast(self._learning_rate_tensor, - grad.dtype), - grad, indices, - use_locking=self._use_locking) - - def _create_slots(self, var_list): - for v in var_list: - if not isinstance(v, NpuEmbeddingResource): - dtype = v.dtype.base_dtype - if v.get_shape().is_fully_defined(): - init = init_ops.constant_initializer(self._initial_accumulator_value, - dtype=dtype) - else: - init = self._init_constant_op(v, dtype) - self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype, - "accumulator", self._name) - - -class AdamWOptimizer(optimizer.Optimizer): - """A basic adam optimizer that includes "correct" L2 weight decay.""" - - def __init__(self, - learning_rate=0.01, - weight_decay=0.004, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-6, - max_grad_norm=0.5, - amsgrad: bool = False, - maximize: bool = False, - name="AdamWOptimizer"): - """Construct a AdamW optimizer.""" - if isinstance(learning_rate, ExponentialDecayLR): - lr = learning_rate.learning_rate - self._decay_rate = learning_rate.decay_rate - self._decay_steps = learning_rate.decay_steps - self._staircase = learning_rate.staircase - self._decay_steps_t = None - self._decay_rate_t = None - self._use_adaptive_lr = True - else: - lr = learning_rate - self._use_adaptive_lr = False - super(AdamWOptimizer, self).__init__(False, name) - if (learning_rate is None) or (weight_decay is None) or (beta_1 is None) or (beta_2 is None): - raise ValueError("learning_rate, weight decay, beta_1 and beta_2 can not be None.") - if (epsilon is None) or (amsgrad is None) or (maximize is None): - raise ValueError("epsilon, amsgrad and maximize can not be None.") - if (max_grad_norm is None) and amsgrad: - raise ValueError("if amsgrad is True, max_grad_norm can not be None.") - self._weight_decay = weight_decay - self._lr = lr - self._beta1 = beta_1 - self._beta2 = beta_2 - self._epsilon = epsilon - self._max_grad_norm = max_grad_norm - self._amsgrad = amsgrad - self._maximize = maximize - - # Tensor versions of the constructor arguments, created in _prepare() - self._weight_decay_t = None - self._lr_t = None - self._beta1_t = None - self._beta2_t = None - self._epsilon_t = None - self._max_grad_norm_t = None - self._beta1_power_t = None - self._beta2_power_t = None - self.mask_zero = 0 - self.embedding_dim = -1 - self.max_num = -1 - self.padding_key = None - self.padding_key_mask = 1 - self.completion_key = None - self.completion_key_mask = 1 - self.embedding_flags = -1 - - def _prepare(self): - beta1_power = self._call_if_callable(_ADAMW_BEAT1_POWER_VALUE) - beta2_power = self._call_if_callable(_ADAMW_BEAT2_POWER_VALUE) - lr = self._call_if_callable(self._lr) - weight_decay = self._call_if_callable(self._weight_decay) - beta1 = self._call_if_callable(self._beta1) - beta2 = self._call_if_callable(self._beta2) - epsilon = self._call_if_callable(self._epsilon) - max_grad_norm = self._call_if_callable(self._max_grad_norm) - - self._beta1_power_t = ops.convert_to_tensor(beta1_power, name="beta1_power") - self._beta2_power_t = ops.convert_to_tensor(beta2_power, name="beta2_power") - self._lr_t = ops.convert_to_tensor(lr, name="learning_rate") - self._weight_decay_t = ops.convert_to_tensor(weight_decay, name="weight_decay") - self._beta1_t = ops.convert_to_tensor(beta1, name="beta1") - self._beta2_t = ops.convert_to_tensor(beta2, name="beta2") - self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon") - self._max_grad_norm_t = ops.convert_to_tensor(max_grad_norm, name="max_grad_norm") - if self._use_adaptive_lr: - decay_steps = self._call_if_callable(self._decay_steps) - decay_rate = self._call_if_callable(self._decay_rate) - self._decay_steps_t = ops.convert_to_tensor(decay_steps, name="decay_steps") - self._decay_rate_t = ops.convert_to_tensor(decay_rate, name="decay_rate") - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResource): - if self._use_adaptive_lr: - lr_output = gen_npu_cpu_ops.exponential_decay_lr(var_handle=var.handle, - initial_learning_rate= - math_ops.cast(self._lr_t, grad.dtype), - decay_rate=self._decay_rate_t, - decay_steps=self._decay_steps_t, - staircase=self._staircase) - else: - lr_output = math_ops.cast(self._lr_t, grad.dtype) - result = gen_npu_cpu_ops.embedding_apply_adam_w(var_handle=var.handle, - beta1_power= - math_ops.cast(self._beta1_power_t, grad.dtype), - beta2_power= - math_ops.cast(self._beta2_power_t, grad.dtype), - lr=lr_output, - weight_decay= - math_ops.cast(self._weight_decay_t, grad.dtype), - beta1=math_ops.cast(self._beta1_t, grad.dtype), - beta2=math_ops.cast(self._beta2_t, grad.dtype), - epsilon=math_ops.cast(self._epsilon_t, grad.dtype), - grad=grad, - keys=indices, - max_grad_norm= - math_ops.cast(self._max_grad_norm_t, grad.dtype), - global_step=ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - amsgrad=[self._amsgrad], - maximize=[self._maximize], - embedding_dim=[self.embedding_dim], - mask_zero=[self.mask_zero], - padding_key=[self.padding_key], - padding_key_mask=[self.padding_key_mask], - completion_key=[self.completion_key], - completion_key_mask=[self.completion_key_mask]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self.embedding_dim)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self.max_num)) - result.op._set_attr("_padding_key", attr_value_pb2.AttrValue(i=self.padding_key)) - result.op._set_attr("_embedding_flags", attr_value_pb2.AttrValue(i=self.embedding_flags)) - return result - else: - raise TypeError("Variable is not NpuEmbeddingResource type, please check.") - - -class SgdOptimizer(optimizer.Optimizer): - """A sgd optimizer that apply SGD algorithm.""" - - def __init__(self, - learning_rate=0.01, - name="EmbeddingApplySgdOptimizer"): - """Construct a AdamW optimizer.""" - if isinstance(learning_rate, ExponentialDecayLR): - lr = learning_rate.learning_rate - self._decay_rate = learning_rate.decay_rate - self._decay_steps = learning_rate.decay_steps - self._staircase = learning_rate.staircase - self._decay_steps_t = None - self._decay_rate_t = None - self._use_adaptive_lr = True - else: - lr = learning_rate - self._use_adaptive_lr = False - super(SgdOptimizer, self).__init__(False, name) - self._lr = lr - self.mask_zero = 0 - self.embedding_dim = -1 - self.max_num = -1 - self.padding_key = None - self.padding_key_mask = 1 - self.completion_key = None - self.completion_key_mask = 1 - self.embedding_flags = -1 - - def _prepare(self): - lr = self._call_if_callable(self._lr) - self._lr_t = ops.convert_to_tensor(lr, name="learning_rate") - if self._use_adaptive_lr: - decay_steps = self._call_if_callable(self._decay_steps) - decay_rate = self._call_if_callable(self._decay_rate) - self._decay_steps_t = ops.convert_to_tensor(decay_steps, name="decay_steps") - self._decay_rate_t = ops.convert_to_tensor(decay_rate, name="decay_rate") - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResource): - if self._use_adaptive_lr: - lr_output = gen_npu_cpu_ops.exponential_decay_lr(var_handle=var.handle, - initial_learning_rate= - math_ops.cast(self._lr_t, grad.dtype), - decay_rate=self._decay_rate_t, - decay_steps=self._decay_steps_t, - staircase=self._staircase) - else: - lr_output = math_ops.cast(self._lr_t, grad.dtype) - result = gen_npu_cpu_ops.embedding_apply_sgd(var_handle=var.handle, - lr=lr_output, - grad=grad, - keys=indices, - global_step=ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - embedding_dim=[self.embedding_dim], - mask_zero=[self.mask_zero], - padding_key=[self.padding_key], - padding_key_mask=[self.padding_key_mask], - completion_key=[self.completion_key], - completion_key_mask=[self.completion_key_mask]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self.embedding_dim)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self.max_num)) - result.op._set_attr("_padding_key", attr_value_pb2.AttrValue(i=self.padding_key)) - result.op._set_attr("_embedding_flags", attr_value_pb2.AttrValue(i=self.embedding_flags)) - return result - else: - raise TypeError("Variable is not NpuEmbeddingResource type, please check.") - - -class RmspropOptimizer(optimizer.Optimizer): - """A Rmsprop optimizer that use rmsprop algorithm.""" - - def __init__(self, - learning_rate=0.01, - ms=0.9, - mom=0.0, - rho=0.9, - momentum=0.9, - epsilon=1e-8, - name="EmbeddingApplyRmspropOptimizer"): - """Construct an ApplyRmsprop optimizer.""" - if isinstance(learning_rate, ExponentialDecayLR): - lr = learning_rate.learning_rate - self._decay_rate = learning_rate.decay_rate - self._decay_steps = learning_rate.decay_steps - self._staircase = learning_rate.staircase - self._decay_steps_t = None - self._decay_rate_t = None - self._use_adaptive_lr = True - else: - lr = learning_rate - self._use_adaptive_lr = False - super(RmspropOptimizer, self).__init__(False, name) - self.ms = ms - self.mom = mom - self._rho = rho - self._momentum = momentum - self._epsilon = epsilon - self._lr = lr - self._rho_t = None - self._momentum_t = None - self.mask_zero = 0 - self.embedding_dim = -1 - self.max_num = -1 - self.padding_key = None - self.padding_key_mask = 1 - self.completion_key = None - self.completion_key_mask = 1 - self.embedding_flags = -1 - - def _prepare(self): - rho = self._call_if_callable(self._rho) - momentum = self._call_if_callable(self._momentum) - epsilon = self._call_if_callable(self._epsilon) - lr = self._call_if_callable(self._lr) - - self._rho_t = ops.convert_to_tensor(rho, name="rho") - self._momentum_t = ops.convert_to_tensor(momentum, name="momentum") - self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon") - self._lr_t = ops.convert_to_tensor(lr, name="learning_rate") - if self._use_adaptive_lr: - decay_steps = self._call_if_callable(self._decay_steps) - decay_rate = self._call_if_callable(self._decay_rate) - self._decay_steps_t = ops.convert_to_tensor(decay_steps, name="decay_steps") - self._decay_rate_t = ops.convert_to_tensor(decay_rate, name="decay_rate") - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResource): - if self._use_adaptive_lr: - lr_output = gen_npu_cpu_ops.exponential_decay_lr(var_handle=var.handle, - initial_learning_rate= - math_ops.cast(self._lr_t, grad.dtype), - decay_rate=self._decay_rate_t, - decay_steps=self._decay_steps_t, - staircase=self._staircase) - else: - lr_output = math_ops.cast(self._lr_t, grad.dtype) - result = gen_npu_cpu_ops.embedding_apply_rmsprop(var_handle=var.handle, - lr=lr_output, - rho=math_ops.cast(self._rho_t, grad.dtype), - momentum=math_ops.cast(self._momentum_t, grad.dtype), - epsilon=math_ops.cast(self._epsilon_t, grad.dtype), - grad=grad, - keys=indices, - global_step=ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - embedding_dim=[self.embedding_dim], - mask_zero=[self.mask_zero], - padding_key=[self.padding_key], - padding_key_mask=[self.padding_key_mask], - completion_key=[self.completion_key], - completion_key_mask=[self.completion_key_mask]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self.embedding_dim)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self.max_num)) - result.op._set_attr("_padding_key", attr_value_pb2.AttrValue(i=self.padding_key)) - result.op._set_attr("_embedding_flags", attr_value_pb2.AttrValue(i=self.embedding_flags)) - return result - else: - raise TypeError("Variable is not NpuEmbeddingResource type, please check.") - - -class FtrlOptimizer(optimizer.Optimizer): - """A Ftrl optimizer that use ftrl algorithm.""" - - def __init__(self, - learning_rate, - learning_rate_power=-0.5, - initial_accumulator_value=0.1, - l1_regularization_strength=0.0, - l2_regularization_strength=0.0, - using_locking=False, - name="EmbeddingFtrlOptimizer"): - """Construct an Ftrl optimizer.""" - if isinstance(learning_rate, ExponentialDecayLR): - lr = learning_rate.learning_rate - self._decay_rate = learning_rate.decay_rate - self._decay_steps = learning_rate.decay_steps - self._staircase = learning_rate.staircase - self._decay_steps_t = None - self._decay_rate_t = None - self._use_adaptive_lr = True - else: - lr = learning_rate - self._use_adaptive_lr = False - super(FtrlOptimizer, self).__init__(using_locking, name) - self._lr = lr - self._lr_power = learning_rate_power - self._l1 = l1_regularization_strength - self._l2 = l2_regularization_strength - self.accum = initial_accumulator_value - self.linear = 0.0 - self._lr_power_t = None - self._l1_t = None - self._l2_t = None - self._decay_steps_t = None - self._decay_rate_t = None - self.mask_zero = 0 - self.embedding_dim = -1 - self.max_num = -1 - self.padding_key = None - self.padding_key_mask = 1 - self.completion_key = None - self.completion_key_mask = 1 - self.embedding_flags = -1 - - def _prepare(self): - lr = self._call_if_callable(self._lr) - lr_power = self._call_if_callable(self._lr_power) - l1 = self._call_if_callable(self._l1) - l2 = self._call_if_callable(self._l2) - - self._lr_t = ops.convert_to_tensor(lr, name="learning_rate") - self._lr_power_t = ops.convert_to_tensor(lr_power, name="lr_power") - self._l1_t = ops.convert_to_tensor(l1, name="lambda1") - self._l2_t = ops.convert_to_tensor(l2, name="lambda2") - if self._use_adaptive_lr: - decay_steps = self._call_if_callable(self._decay_steps) - decay_rate = self._call_if_callable(self._decay_rate) - self._decay_steps_t = ops.convert_to_tensor(decay_steps, name="decay_steps") - self._decay_rate_t = ops.convert_to_tensor(decay_rate, name="decay_rate") - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResource): - if self._use_adaptive_lr: - lr_output = gen_npu_cpu_ops.exponential_decay_lr(var_handle=var.handle, - initial_learning_rate= - math_ops.cast(self._lr_t, grad.dtype), - decay_rate=self._decay_rate_t, - decay_steps=self._decay_steps_t, - staircase=self._staircase) - else: - lr_output = math_ops.cast(self._lr_t, grad.dtype) - result = gen_npu_cpu_ops.embedding_apply_ftrl(var_handle=var.handle, - lr=lr_output, - lr_power=math_ops.cast(self._lr_power_t, grad.dtype), - lambda1=math_ops.cast(self._l1_t, grad.dtype), - lambda2=math_ops.cast(self._l2_t, grad.dtype), - grad=grad, - keys=indices, - global_step=ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - mask_zero=[self.mask_zero], - embedding_dim=[self.embedding_dim], - padding_key=[self.padding_key], - padding_key_mask=[self.padding_key_mask], - completion_key=[self.completion_key], - completion_key_mask=[self.completion_key_mask]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self.embedding_dim)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self.max_num)) - result.op._set_attr("_padding_key", attr_value_pb2.AttrValue(i=self.padding_key)) - result.op._set_attr("_embedding_flags", attr_value_pb2.AttrValue(i=self.embedding_flags)) - return result - else: - raise TypeError("Variable is not NpuEmbeddingResource type, please check.") - - -class EmbeddingHashTableAdamWOptimizer(optimizer.Optimizer): - """A basic adam optimizer that includes "correct" L2 weight decay.""" - - def __init__(self, - learning_rate=0.01, - weight_decay=0.004, - beta_1=0.9, - beta_2=0.999, - epsilon=1e-6, - amsgrad: bool = False, - maximize: bool = False, - name="EmbeddingHashTableAdamWOptimizer"): - """Construct a AdamW optimizer.""" - super(EmbeddingHashTableAdamWOptimizer, self).__init__(False, name) - if (learning_rate is None) or (weight_decay is None) or (beta_1 is None) or (beta_2 is None): - raise ValueError("learning_rate, weight decay, beta_1 and beta_2 can not be None.") - if (epsilon is None) or (amsgrad is None) or (maximize is None): - raise ValueError("epsilon, amsgrad and maximize can not be None.") - # const input - self._lr = learning_rate - self._weight_decay = weight_decay - self._beta1 = beta_1 - self._beta2 = beta_2 - self._epsilon = epsilon - # var ref input - self._beta1_power_v = tf.Variable(initial_value=0.9, name="beta1_power_" + str(_SMALL_ADAMW_INDEX)) - self._beta2_power_v = tf.Variable(initial_value=0.9, name="beta2_power_" + str(_SMALL_ADAMW_INDEX)) - # attr - self._amsgrad = amsgrad - self._maximize = maximize - # Tensor versions of the constructor arguments, created in _prepare() - self._lr_t = None - self._weight_decay_t = None - self._beta1_t = None - self._beta2_t = None - self._epsilon_t = None - # attr - self.embedding_dim = -1 - self.bucket_size = -1 - - def _prepare(self): - self._m_v = tf.Variable(tf.random_uniform([self.bucket_size, self.embedding_dim], minval=1.0, maxval=1.0), - name="m_" + str(_SMALL_ADAMW_INDEX)) - self._v_v = tf.Variable(tf.random_uniform([self.bucket_size, self.embedding_dim], minval=1.0, maxval=1.0), - name="v_" + str(_SMALL_ADAMW_INDEX)) - self._max_grad_norm_v = \ - tf.Variable(tf.random_uniform([self.bucket_size, self.embedding_dim], minval=1.0, maxval=1.0), - name="max_grad_norm_" + str(_SMALL_ADAMW_INDEX)) - lr = self._call_if_callable(self._lr) - weight_decay = self._call_if_callable(self._weight_decay) - beta1 = self._call_if_callable(self._beta1) - beta2 = self._call_if_callable(self._beta2) - epsilon = self._call_if_callable(self._epsilon) - - self._lr_t = ops.convert_to_tensor(lr, name="learning_rate") - self._weight_decay_t = ops.convert_to_tensor(weight_decay, name="weight_decay") - self._beta1_t = ops.convert_to_tensor(beta1, name="beta1") - self._beta2_t = ops.convert_to_tensor(beta2, name="beta2") - self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon") - - def _resource_apply_sparse(self, grad, var, indices): - if isinstance(var, NpuEmbeddingResourceV2): - result = gen_npu_cpu_ops.embedding_hash_table_apply_adam_w(table_handle=var.handle, - m=self._m_v, - v=self._v_v, - beta1_power=self._beta1_power_v, - beta2_power=self._beta2_power_v, - lr=math_ops.cast(self._lr_t, grad.dtype), - weight_decay= - math_ops.cast(self._weight_decay_t, grad.dtype), - beta1=math_ops.cast(self._beta1_t, grad.dtype), - beta2=math_ops.cast(self._beta2_t, grad.dtype), - epsilon= - math_ops.cast(self._epsilon_t, grad.dtype), - grad=grad, - keys=indices, - max_grad_norm=self._max_grad_norm_v, - embedding_dim=self.embedding_dim, - bucket_size=self.bucket_size, - amsgrad=self._amsgrad, - maximize=self._maximize - ) - return result - else: - raise TypeError("Variable is not NpuEmbeddingResourceV2 type, please check.") - - -class ExponentialDecayLR: - """ exponential decay learning rate used in embedding optimizer. """ - - def __init__(self, learning_rate, decay_steps, decay_rate, staircase=False): - self.learning_rate = learning_rate - self.decay_steps = decay_steps - self.decay_rate = decay_rate - self.staircase = staircase - - -def exponential_decay_lr(learning_rate, decay_steps, decay_rate, staircase=False): - """" Operator for init ExponentialDecayLr. """ - if (learning_rate is None) or (not isinstance(learning_rate, (float, int))): - raise ValueError("learning_rate can not be None, must be float or int.") - if (decay_rate is None) or (not isinstance(decay_rate, (float, int))): - raise ValueError("decay_rate can not be None, must be float or int.") - if (decay_steps is None) or (not isinstance(decay_steps, int)): - raise ValueError("decay_steps can not be None, must be int.") - if (learning_rate <= 0) or (decay_rate <= 0) or (decay_steps <= 0): - raise ValueError("learning_rate, decay_rate and decay_steps must be bigger than 0") - if not isinstance(staircase, bool): - raise TypeError("staircase must be bool.") - return ExponentialDecayLR(learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, - staircase=staircase) diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_resource.py b/tf_adapter/python/npu_bridge/embedding/embedding_resource.py deleted file mode 100644 index 6c9368116e10286bfb937fb32e9f44c6029c3991..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/embedding_resource.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from tensorflow.python.framework import ops -from npu_bridge.npu_cpu.npu_cpu_ops import gen_npu_cpu_ops - - -class NpuEmbeddingResource: - - def __init__(self, table_id): - self.name = table_id - self._tensor = gen_npu_cpu_ops.table_to_resource(ops.convert_to_tensor(table_id)) - - @property - def handle(self): - return self._tensor - - @property - def graph(self): - return self._tensor.graph - - @property - def op(self): - return self._tensor.op - - @property - def device(self): - return self._tensor.op.device - - -class NpuEmbeddingResourceV2: - - def __init__(self, table_id): - self.name = table_id - self._tensor = gen_npu_cpu_ops.table_to_resource_v2(ops.convert_to_tensor([table_id])) - - @property - def handle(self): - return self._tensor - - @property - def graph(self): - return self._tensor.graph - - @property - def op(self): - return self._tensor.op - - @property - def device(self): - return self._tensor.op.device - diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_service.py b/tf_adapter/python/npu_bridge/embedding/embedding_service.py deleted file mode 100644 index c0cdc79128fef2a56b73bef93cdfd092df910dc9..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/embedding_service.py +++ /dev/null @@ -1,1725 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import json -import contextlib -import os -import math -import typing -import re -import tensorflow as tf -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.platform import tf_logging as logging -from tensorflow.core.framework import attr_value_pb2 -from npu_bridge.npu_cpu.npu_cpu_ops import gen_npu_cpu_ops -from npu_bridge.hccl.hccl_ops import allgather -from hccl.manage.api import create_group -from hccl.manage.api import set_ps_table_num -from npu_bridge.embedding.embedding_resource import NpuEmbeddingResource, NpuEmbeddingResourceV2 -from npu_bridge.embedding import embedding_optimizer -from npu_bridge.embedding.embedding_table_map_policy import NoneTableMapPolicy, AutoMergeTableMapPolicy -from npu_bridge.embedding.embedding_utils import EmbeddingVariableOption, CounterFilter, PaddingParamsOption, \ - CompletionKeyOption, check_common_init_params, check_each_initializer, check_init_params_type -from npu_bridge.embedding.embedding_utils import EvictOption - -_INT32_MAX_VALUE = 2147483647 -_SAVE_EVICT_COMM_GROUP = "_set_evict_group_0" - - -@contextlib.contextmanager -def specified_ps_engine_scope(): - """ - Enable the non npu compilation of operators within the scope. - """ - attrs = { - "_process_node_engine_id": attr_value_pb2.AttrValue(s=tf.compat.as_bytes("PS")) - } - with ops.get_default_graph()._attr_scope(attrs): - yield - - -class EsInitializer: - """Initializer for embedding service table.""" - - def __init__(self, initializer_mode, min=-0.01, max=0.01, constant_value=1.0, mu=0.0, sigma=1.0, seed=0): - self.initializer_mode = initializer_mode - self.min = min - self.max = max - self.constant_value = constant_value - self.mu = mu - self.sigma = sigma - self.seed = seed - - -# 提供 embedding_service table initializer method -# min 下限值, float 类型 -# max 上限值, float 类型 -# initializer_mode 初始化方式, string 类型 -# constant_value 常量初始化的常量值, float 类型 -# mu 正态分布的均值, float 类型 -# sigma 正态分布的标准差, float 类型 -def es_initializer(initializer_mode, min=-2.0, max=2.0, constant_value=0.0, mu=0.0, sigma=1.0, seed=0): - """Operator for init initializer.""" - if initializer_mode is None: - raise ValueError("initializer_mode can not be None.") - if initializer_mode == 'random_uniform': - if (min is None) or (max is None) or \ - (not isinstance(min, (float, int))) or (not isinstance(max, (float, int))): - raise ValueError("If initializer is random_uniform, min and max can not be None, must be int or float.") - if initializer_mode == 'truncated_normal': - if (min is None) or (max is None) or (mu is None) or (sigma is None): - raise ValueError("If initializer is truncated_normal, min, max, mu and sigma can not be None") - if (not isinstance(min, (float, int))) or (not isinstance(max, (float, int))) or \ - (not isinstance(mu, (float, int))) or (not isinstance(sigma, (float, int))): - raise ValueError("If initializer is truncated_normal, min, max, mu and sigma must be int or float.") - if initializer_mode == 'constant': - if (constant_value is None) or (not isinstance(constant_value, (float, int))): - raise ValueError("If initializer is constant, constant_value can not be None, must be float or int.") - if min > max: - raise ValueError("Initializer min value can not be larger than max value.") - if (initializer_mode != 'constant') and (initializer_mode != 'random_uniform') and \ - (initializer_mode != 'truncated_normal'): - raise ValueError("Initializer mode must be random_uniform or truncated normal or constant.") - return EsInitializer(initializer_mode=initializer_mode, - min=min, - max=max, - constant_value=constant_value, - mu=mu, - sigma=sigma, - seed=seed) - - -def check_small_hashtable_init_params(name, init_vocabulary_size, embedding_dim, max_feature_count, initializer_mode): - if (name is None) or (init_vocabulary_size is None) or (embedding_dim is None): - raise ValueError("table name, init_vocabulary_size and embedding_dim can not be None.") - if not isinstance(name, str): - raise TypeError("embedding table name must be string.") - regex = re.compile('[@!#$%^&*()<>?/\|}{~:]') - if regex.search(name) is not None: - raise ValueError("table name contains illegal character.") - if (not isinstance(init_vocabulary_size, int)) or (not isinstance(embedding_dim, int)) or \ - (not isinstance(max_feature_count, int)): - raise ValueError("init_vocabulary_size, embedding_dim and max_feature_count must be int.") - if init_vocabulary_size <= 0 or max_feature_count <= 0 or embedding_dim <= 0: - raise ValueError("init_vocabulary_size and max_feature_count and embedding_dim must be greater than zero.") - if (initializer_mode is not None) and (initializer_mode != "random") and (initializer_mode != "constant"): - raise TypeError("initializer_mode must be random or constant") - - -class ESWorker: - """ Embedding service class. """ - - def __init__(self): - self._server_ip_to_ps_num = {} - self._update_config_params() - - self._init_embedding_hash_maps = {} - self._init_partition_maps = {} - # storage each ps table's params - self._table_to_embedding_dim = {} - self._table_to_max_num = {} - self._table_to_optimizer = {} - self._table_to_initializer = {} - self._table_to_slot_var_num = {} - self._table_to_counter_filter = {} - self._use_counter_filter = False - self._use_padding_key = False - self._use_evict = False - self._train_mode = True - self._train_level = False - self._optimizer = None - self._init_table_flag = False - - self._small_table_name_list = [] - self._small_table_variable_list = [] - self._small_table_variable_dim_list = [] - self._ps_table_count = 0 - self._table_name_to_id = {} - self._table_id_to_name = {} - self._table_id_to_initializer = {} - self._table_id_to_steps_to_live = {} - - self._ps_table_id_list = [] - self._existing_lookup_table_ids = [] - # storage lookup: table_id list, lookup result list, lookup key list - self._ps_lookup_index = 0 - self._ps_table_has_lookup = [] - self._ps_table_lookup_key = [] - self._ps_table_lookup_result = [] - # host unique - self.key_recovery_matrix = [] - self.use_host_unique = False - # storage all inited table names - self._table_name_has_init = [] - # only storage all inited PS table names - self._ps_table_name_list = [] - # feature_mapping export and import - self._feature_mapping_name_list = [] - # now only use for adagrad accum - self._ps_table_id_to_optimizer_mode = {} - self._ps_table_id_to_optimizer_params = {} - - # use for small table merge - self.user_defined_table_infos = [] - self.table_map_policy = None - self.table_create_infos = [] - self.total_variable_table = [] - self._small_table_embedding_dim = -1 - # if all small table do not merge - self._small_table_to_variable = {} - self._small_table_to_multihot_lens = {} - self._small_table_name_to_multihot_lens = {} - self._small_table_name_to_max_vocabulary_size = {} - self.total_embedding_count = 0 - self._npu_table_to_embedding_dim = {} - self._need_table_merge = False - self._only_merge_to_one_table = True - self._small_table_init = False - # use for counter filter - self._table_use_counter_filter = {} - - self._default_key_or_value = 1 - self._filter_freq = None - self._default_key = None - self._default_value = None - - # use for evict option - self._steps_to_live = 0 - self._use_completion_key = False - self._table_id_to_completion_option = {} - self._user_group_set = set() - # test for david small table - self._init_small_hashtable_params() - - # 提供 embedding_service table initializer method - # table_id embedding 表索引, int 类型 - # min 下限值, float 类型 - # max 上限值, float 类型 - # initializer_mode 初始化方式, string 类型 - # constant_value 常量初始化的常量值, float 类型 - # mu 正态分布的均值, float 类型 - # sigma 正态分布的标准差, float 类型 - def initializer(self, table_id, initializer_mode, min=-2.0, max=2.0, constant_value=0.0, mu=0.0, sigma=1.0): - """Operator for init initializer.""" - if (table_id is None) or (initializer_mode is None): - raise ValueError("table_id and initializer_mode can not be None.") - check_each_initializer(initializer_mode=initializer_mode, min_value=min, max_value=max, - constant_value=constant_value, mu=mu, sigma=sigma) - if (not isinstance(table_id, int)) or (table_id < 0) or (table_id >= _INT32_MAX_VALUE): - raise ValueError("table_id value is false, must be [0, 2147483647) and int type, please check.") - if min > max: - raise ValueError("Initializer min value can not be larger than max value.") - if (initializer_mode != 'constant') and (initializer_mode != 'random_uniform') and \ - (initializer_mode != 'truncated_normal'): - raise ValueError("Initializer mode must be random_uniform or truncated normal or constant.") - self._table_id_to_initializer[table_id] = EsInitializer(min=min, - max=max, - initializer_mode=initializer_mode, - constant_value=constant_value, - mu=mu, - sigma=sigma) - - # embedding variable option - # 包括特征准入及淘汰策略,特征存储策略及通信策略等 - # 暂时只使用特征准入option - def embedding_variable_option(self, filter_option=None, padding_option=None, evict_option=None, - completion_option=None, storage_option=None, feature_freezing_option=None, - communication_option=None): - if (filter_option is not None) and (not isinstance(filter_option, CounterFilter)): - raise ValueError("If padding_option isn't None, it must be CounterFilter type.") - if filter_option is not None: - self._use_counter_filter = True - if (padding_option is not None) and (not isinstance(padding_option, PaddingParamsOption)): - raise TypeError("If padding_option isn't None, it must be EmbeddingPaddingParamsOption type.") - if (completion_option is not None) and (not isinstance(completion_option, CompletionKeyOption)): - raise TypeError("If completion_option isn't None, it must be EmbeddingPaddingCompletionKeyOption type.") - if (evict_option is not None) and (not isinstance(evict_option, EvictOption)): - raise TypeError("When evict_option is not None, it must be EvictOption type.") - return EmbeddingVariableOption(filter_option=filter_option, padding_option=padding_option, - evict_option=evict_option, completion_option=completion_option, - storage_option=storage_option, feature_freezing_option=feature_freezing_option, - communication_option=communication_option) - - # new version - # 提供embedding init功能 - # @param vocabulary_size 表的初始大小, int 类型 - # @param table_id, int32 类型 - # @param max_batch_size, int32 类型 - # @param optimizer, 支持EmbeddingAdamOptimizer,EmbeddingAdagradOptimizer,EmbeddingAdamwOptimizer - # @param initializer, string 类型 - # @param embedding_dim, int32 类型 - def get_embedding_variable(self, name, init_vocabulary_size, embedding_dim, key_dtype=tf.int64, - value_dtype=tf.float32, partitioner=None, - initializer=tf.random_uniform_initializer(minval=-0.01, maxval=0.01, seed=1234), - embedding_type="PS", ev_option=None, max_feature_count=None, multihot_lens=None, - optimizer=None, allow_merge=False, mask_zero=False): - """ Operator for get embedding variable according to embedding type. """ - check_common_init_params(name=name, init_vocabulary_size=init_vocabulary_size, embedding_dim=embedding_dim, - embedding_type=embedding_type, mask_zero=mask_zero) - if embedding_type == "data_parallel": - self._check_and_update_small_init_params(name=name, init_vocabulary_size=init_vocabulary_size, - embedding_dim=embedding_dim, multihot_lens=multihot_lens, - key_dtype=key_dtype, value_dtype=value_dtype, - allow_merge=allow_merge, initializer=initializer) - new_small_table_info = dict( - name=name, - max_vocabulary_size=init_vocabulary_size, - embedding_dim=embedding_dim, - multihot_lens=multihot_lens, - allow_merge=allow_merge, - initializer=initializer) - self._small_table_embedding_dim = embedding_dim - self.user_defined_table_infos.append(new_small_table_info) - return new_small_table_info - elif embedding_type == "PS": - table_id = self._check_and_update_ps_init_params(name=name, init_vocabulary_size=init_vocabulary_size, - embedding_dim=embedding_dim, - max_feature_count=max_feature_count, ev_option=ev_option) - self._ps_lookup_index = self._ps_table_count - self._table_to_embedding_dim[table_id] = embedding_dim - self._table_to_max_num[table_id] = max_feature_count - # storage the table id for embedding PS table - self._ps_table_id_list.append(table_id) - self._ps_table_name_list.append(name) - if len(self._ps_table_id_list) > 1024: - raise ValueError("Now only 1024 PS embedding tables can be init.") - bucket_size = math.ceil(init_vocabulary_size / self._ps_num) - if optimizer is None: - self._train_mode = False - self._table_to_slot_var_num[table_id] = 0 - else: - self._check_ps_opt_and_initializer(optimizer=optimizer, initializer=initializer, table_id=table_id) - self._set_ps_optimizer_params(table_id=table_id, optimizer=optimizer, embedding_dim=embedding_dim, - max_feature_count=max_feature_count, mask_zero=mask_zero, - ev_option=ev_option) - # new train or continue train from a checkpoint - if initializer is not None: - self._train_level = True - with specified_ps_engine_scope(): - self._init_partition_maps[table_id] = \ - gen_npu_cpu_ops.init_partition_map(ps_num=ops.convert_to_tensor(self._ps_num), - ps_ids=ops.convert_to_tensor(self._ps_ids), - partition_num=65537) - self._init_partition_maps.get(table_id)._set_attr("_embedding_dim", - attr_value_pb2.AttrValue(i=embedding_dim)) - self._init_partition_maps.get(table_id)._set_attr("_max_key_num", - attr_value_pb2.AttrValue(i=max_feature_count)) - return self._init_hashmap_and_table_import(bucket_size, table_id, embedding_dim, ev_option) - - # new version - # 提供embedding lookup功能 - # @param name str 类型 - # @param ids int64 类型 - # @return values float32 类型 - def embedding_lookup(self, name: str, ids: typing.Any, actual_keys_input=None, unique_indices=None, key_count=None): - """ Operator for look up in embedding table. """ - table_id = self._check_ps_lookup_params(name=name, ids=ids) - if self._table_to_counter_filter.get(table_id) is not None: - filter_mode = "counter" - self._filter_freq = self._table_to_counter_filter.get(table_id).filter_freq - self._default_key_or_value = 1 if self._table_to_counter_filter.get(table_id). \ - default_key_or_value is True else 0 - self._default_key = self._table_to_counter_filter.get(table_id).default_key - self._default_value = self._table_to_counter_filter.get(table_id).default_value - else: - filter_mode = "no_filter" - # useless - self._filter_freq = 1 - self._default_key_or_value = 1 - self._default_key = 0 - self._default_value = -1 - # whether to use host unique to improve performance - self.use_host_unique = False - use_counter_filter = False - if (actual_keys_input is not None) and (unique_indices is not None): - self.use_host_unique = True - if key_count is not None: - use_counter_filter = True - - result = self._call_lookup_op(table_id=table_id, ids=ids, actual_keys_input=actual_keys_input, - unique_indices=unique_indices, filter_mode=filter_mode, - use_counter_filter=use_counter_filter, key_count=key_count) - - self._filter_freq = None - self._default_key_or_value = 1 - self._default_key = None - self._default_value = None - if (self._ps_lookup_index != 0) or (self._existing_lookup_table_ids.count(table_id) != 0): - self._ps_table_has_lookup.append(table_id) - self._ps_table_lookup_key.append(ids) - self._ps_table_lookup_result.append(result) - self._ps_lookup_index = self._ps_lookup_index - 1 - if self.use_host_unique: - self.key_recovery_matrix.append(unique_indices) - # restore table id that has called lookup, if this table call lookup again, key and values must be stored. - self._existing_lookup_table_ids.append(table_id) - return result - - # new version - # 提供embedding update功能 - # @param loss 类型 - def embedding_update(self, loss): - """ Operator for update in embedding table. """ - params = self._ps_table_lookup_result - input_ids_list = self._ps_table_lookup_key - table_ids = self._ps_table_has_lookup - self._check_update_params(params, input_ids_list, table_ids, loss) - # Call HCCL python API - set_ps_table_num(self._ps_table_count) - if (not isinstance(params, (list, tuple)) and not isinstance(table_ids, (list, tuple)) - and not isinstance(input_ids_list, (list, tuple))): - params = [params] - table_ids = [table_ids] - input_ids_list = [input_ids_list] - for table_id in table_ids: - if table_id not in self._ps_table_id_list: - raise ValueError("this table has not yet initialized.") - if (len(params) != len(table_ids)) or (len(params) != len(input_ids_list)) \ - or (len(table_ids) != len(input_ids_list)): - raise ValueError("The length of params, table_ids, input_ids_list should be equal.") - embedding_grads = tf.gradients(loss, params) - update_op = [] - self._ps_table_lookup_result = [] - self._ps_table_lookup_key = [] - self._ps_table_has_lookup = [] - - if self.use_host_unique: - key_before_unique = [] - for i in range(len(table_ids)): - key_before_unique.append(tf.gather(input_ids_list[i], self.key_recovery_matrix[i])) - with specified_ps_engine_scope(): - for i in range(len(table_ids)): - if embedding_grads[i] is None: - continue - if self.use_host_unique: - params_grads = [tf.IndexedSlices(embedding_grads[i], key_before_unique[i], - dense_shape=params[i].shape)] - else: - params_grads = [tf.IndexedSlices(embedding_grads[i], input_ids_list[i], - dense_shape=params[i].shape)] - var_refs = [NpuEmbeddingResource(table_ids[i])] - update_op.append( - self._table_to_optimizer.get(table_ids[i]).apply_gradients(list(zip(params_grads, var_refs)))) - return update_op - - def padding_param(self, padding_key, mask=True): - if not isinstance(padding_key, int): - raise TypeError("padding_key must be int, please check.") - if not isinstance(mask, bool): - raise TypeError("mask must be bool, please check.") - self._use_padding_key = True - return PaddingParamsOption(padding_key=padding_key, mask=mask) - - def completion_key(self, completion_key, mask=True): - if not isinstance(completion_key, int): - raise TypeError("completion_key must be int, please check.") - if not isinstance(mask, bool): - raise TypeError("mask must be bool, please check.") - self._use_completion_key = True - completion_key_mask = 1 if mask is True else 0 - return CompletionKeyOption(completion_key=completion_key, mask=completion_key_mask) - - def counter_filter(self, filter_freq, default_key=None, default_value=None): - if not isinstance(filter_freq, int): - raise TypeError("filter_freq must be int, please check.") - if filter_freq < 0: - raise ValueError("filter_freq must can not be smaller than 0.") - if (default_key is None) and (default_value is None): - raise ValueError("default_key and default_value can not be both None.") - if (default_key is not None) and (default_value is not None): - raise ValueError("default_key and default_value can not be both set.") - if default_key is None and (not isinstance(default_value, (int, float))): - raise TypeError("When default_value is not None, it must be float or int, please check.") - if default_value is None and (not isinstance(default_key, int)): - raise TypeError("When default_key is not None, it must be int, please check.") - self._use_counter_filter = True - if default_key is None: - return CounterFilter(filter_freq=filter_freq, default_key_or_value=False, - default_key=0, default_value=default_value) - else: - return CounterFilter(filter_freq=filter_freq, default_key_or_value=True, - default_key=default_key, default_value=1) - - def evict_option(self, steps_to_live): - if not isinstance(steps_to_live, int): - raise TypeError("steps_to_live must be int, please check.") - if steps_to_live <= 0: - raise ValueError("steps_to_live must must be greater than 0.") - self._use_evict = True - return EvictOption(steps_to_live=steps_to_live) - - def init_table(self, table_map_policy=AutoMergeTableMapPolicy()): - if len(self.user_defined_table_infos) == 0: - raise ValueError("small table has not been created.") - self.total_embedding_count = 0 - # Only in train scene, and multi-device, with this API be first called - # FIX cbg bug - if (os.environ.get("RANK_SIZE") is not None) and (int(os.environ.get("RANK_SIZE")) > 1) and \ - (self._small_table_init is False): - rank_size = int(os.environ.get("RANK_SIZE")) - rank_list = [] - for i in range(rank_size): - rank_list.append(i) - create_group("user_group_fm", rank_size, rank_list) - self._create_variable_for_small_table(table_map_policy) - - # new version - def embeddings_lookup(self, ids_list, name=None): - if ids_list is None: - raise ValueError("ids_list can not be None.") - env_dist = os.environ - rank_size = int(env_dist.get("RANK_SIZE")) - rank_id = int(env_dist.get("RANK_ID")) - if rank_size < 1: - raise ValueError('Rank size from env must be at least 1, 'f' Received: {rank_size}.') - if rank_id < 0 or rank_id >= rank_size: - raise ValueError('Rank id from env must be at least 0, and smaller than Rank Size.' - 'But Rank id 'f' Received: {rank_id}.') - - ids_list_shape_list = ids_list.get_shape().as_list() - if not self._need_table_merge: - return self._small_table_lookup_v1(name, rank_id, rank_size, ids_list) - - if self.total_embedding_count != len(self.table_create_infos) or self.total_embedding_count == 0: - raise ValueError("Must init_table() first!") - (in_slot_size_group, slot_to_table, table_to_input_group, \ - table_to_slot, table_to_output_slots, in_slot_vocabulary_size_group, table_to_vocabulary_slots) = \ - (self.table_map_policy.in_slot_size_group, self.table_map_policy.slot_to_table, \ - self.table_map_policy.table_to_input_groups, self.table_map_policy.table_to_slot, \ - self.table_map_policy.table_to_output_slots, self.table_map_policy.in_slot_vocabulary_size_group, \ - self.table_map_policy.table_to_vocabulary_slots) - - total_in_slot_num = 0 - for in_slot_size in in_slot_size_group: - total_in_slot_num += in_slot_size - if ids_list_shape_list[1] != total_in_slot_num: - raise ValueError("size of ids_list is not the same as all small tables.") - - if self.total_embedding_count == 1: - return self._small_table_lookup_v2(rank_id, rank_size, in_slot_size_group, - ids_list, table_to_output_slots, table_to_slot, - in_slot_vocabulary_size_group, ids_list_shape_list[0]) - - return self._small_table_lookup_v3(rank_id, rank_size, ids_list, in_slot_size_group, slot_to_table, - table_to_input_group, table_to_output_slots, table_to_slot, - table_to_vocabulary_slots, ids_list_shape_list[0]) - - def save_embedding(self, name: str, path: str, step=None): - """ Operator for save values in table_id embedding table. """ - self._check_save_or_restore_params(name=name, path=path) - env_dist = os.environ - table_id = self._table_name_to_id.get(name) - step = tf.cast(step, dtype=tf.int64) - if int(os.environ.get("RANK_ID")) != 0: - step = -1 - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor([table_id], name="table_id") - step_to_live = self._table_id_to_steps_to_live.get(table_id, 0) - embedding_table_export = \ - gen_npu_cpu_ops.embedding_table_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - value_total_len=[self._table_to_embedding_dim.get(table_id)], - export_mode="all", - only_var_flag=True, - file_type="bin", - table_name=[name], - global_step=step, - steps_to_live_list=[step_to_live]) - return tf.group([embedding_table_export]) - - def save_embeddings(self, path: str, step=None): - """ Operator for save values in all embedding tables. """ - if len(self._ps_table_name_list) != 0: - self._check_save_or_restore_params_v2(path=path, save_flag=True) - feature_mapping_export_list = None - step = tf.cast(step, dtype=tf.int64) - if len(self._small_table_variable_list) != 0: - feature_mapping_export_list = self._call_feature_mapping_export_op(path, True, step) - if self._ps_table_count == 0: - return feature_mapping_export_list - if int(os.environ.get("RANK_ID")) != 0: - step = -1 - self._create_comm_group_for_allgather() - return self._call_embeddings_export_op(path=path, feature_mapping_export_list=feature_mapping_export_list, - step=step) - - def restore_embedding(self, name: str, path: str, step=None): - self._check_save_or_restore_params(name=name, path=path) - table_id = self._table_name_to_id.get(name) - if (step is None) or ((os.environ.get("RANK_ID") is not None) and (int(os.environ.get("RANK_ID")) != 0)): - step = -1 - else: - step = tf.cast(step, dtype=tf.int64) - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - embedding_table_import = \ - gen_npu_cpu_ops.embedding_table_import(ps_id=ops.convert_to_tensor(-1), - file_path=ops.convert_to_tensor(path), - table_id=ops.convert_to_tensor([table_id]), - global_step=step, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - value_total_len=[self._table_to_embedding_dim.get(table_id)], - only_var_flag=True, - file_type="bin", - table_name=[name]) - return tf.group([embedding_table_import]) - - def restore_embeddings(self, path: str, step=None): - if len(self._ps_table_name_list) != 0: - self._check_save_or_restore_params_v2(path=path, save_flag=False) - if step is None: - step = -1 - else: - step = tf.cast(step, dtype=tf.int64) - if len(self._small_table_variable_list) != 0: - feature_mapping_import_list = self._call_feature_mapping_import_op(path=path, import_value=True, step=step) - if self._ps_table_count == 0: - return feature_mapping_import_list - if (os.environ.get("RANK_ID") is not None) and (int(os.environ.get("RANK_ID")) != 0): - step = -1 - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - table_id_list = [] - embedding_dim_list = [] - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_dim_list.append(self._table_to_embedding_dim.get(table_id)) - embedding_table_import = \ - gen_npu_cpu_ops.embedding_table_import(ps_id=ops.convert_to_tensor(-1), - file_path=ops.convert_to_tensor(path), - table_id=ops.convert_to_tensor(table_id_list), - global_step=step, - embedding_dim=embedding_dim_list, - value_total_len=embedding_dim_list, - only_var_flag=True, - file_type="bin", - table_name=self._ps_table_name_list) - if len(self._small_table_variable_list) == 0: - return tf.group([embedding_table_import]) - return embedding_table_import, feature_mapping_import_list - - def save_checkpoint(self, name: str, path: str, save_filtered_features=False, step=None): - """ Operator for save values and optimizer params in table_id embedding table. """ - self._check_save_or_restore_params(name=name, path=path) - if not isinstance(save_filtered_features, bool): - raise TypeError("save_filtered_features must be bool.") - step = tf.cast(step, dtype=tf.int64) - if int(os.environ.get("RANK_ID")) != 0: - step = -1 - self._create_comm_group_for_allgather() - table_id = self._table_name_to_id.get(name) - with specified_ps_engine_scope(): - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor([table_id], name="table_id") - step_to_live = self._table_id_to_steps_to_live.get(table_id, 0) - embedding_table_export = \ - gen_npu_cpu_ops.embedding_table_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - value_total_len=[self._table_to_embedding_dim.get(table_id) * - (self._table_to_slot_var_num.get( - table_id) + 1) + 2], - export_mode="all", - only_var_flag=False, - file_type="bin", - table_name=[name], - filter_export_flag=save_filtered_features, - global_step=step, - steps_to_live_list=[step_to_live]) - with tf.control_dependencies([embedding_table_export]): - embedding_compute_var_export = \ - gen_npu_cpu_ops.embedding_compute_var_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - global_step=step, - table_name=[name]) - return tf.group([embedding_compute_var_export]) - - def save_checkpoints(self, path: str, save_filtered_features=False, export_feature_mapping=False, step=None): - """ Operator for save values and optimizer params in all embedding tables. """ - if len(self._ps_table_name_list) != 0: - self._check_save_or_restore_params_v2(path=path, save_flag=True) - if not isinstance(save_filtered_features, bool): - raise TypeError("save_filtered_features must be bool.") - feature_mapping_export_list = None - step = tf.cast(step, dtype=tf.int64) - if export_feature_mapping or len(self._small_table_variable_list) != 0: - feature_mapping_export_list = self._call_feature_mapping_export_op(path, False, step) - if self._ps_table_count == 0: - return feature_mapping_export_list - if int(os.environ.get("RANK_ID")) != 0: - step = -1 - self._create_comm_group_for_allgather() - return self._call_ckpts_export_op(path=path, feature_mapping_export_list=feature_mapping_export_list, - save_filtered_features=save_filtered_features, step=step) - - def restore_checkpoint(self, name: str, path: str, step=None): - """ Operator for restore values and optimizer params in table_id embedding table. """ - self._check_save_or_restore_params(name=name, path=path) - if (step is None) or ((os.environ.get("RANK_ID") is not None) and (int(os.environ.get("RANK_ID")) != 0)): - step = -1 - else: - step = tf.cast(step, dtype=tf.int64) - self._create_comm_group_for_allgather() - table_id = self._table_name_to_id.get(name) - with specified_ps_engine_scope(): - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor([table_id], name="table_id") - embedding_table_import = \ - gen_npu_cpu_ops.embedding_table_import(ps_id=ps_id_tensor, - file_path=file_path_tensor, - table_id=table_id_tensor, - global_step=step, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - value_total_len=[self._table_to_embedding_dim.get(table_id) * - (self._table_to_slot_var_num.get( - table_id) + 1) + 2], - only_var_flag=False, - file_type="bin", - table_name=[name]) - with tf.control_dependencies([embedding_table_import]): - embedding_compute_var_import = \ - gen_npu_cpu_ops.embedding_compute_var_import(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - global_step=step, - table_name=[name]) - return tf.group([embedding_compute_var_import]) - - def restore_checkpoints(self, path: str, import_feature_mapping=False, step=None): - """ Operator for restore values and optimizer params in all embedding tables. """ - if len(self._ps_table_name_list) != 0: - self._check_save_or_restore_params_v2(path=path, save_flag=False) - if step is None: - step = -1 - else: - step = tf.cast(step, dtype=tf.int64) - if import_feature_mapping or len(self._small_table_variable_list) != 0: - feature_mapping_import_list = self._call_feature_mapping_import_op(path=path, import_value=False, - step=step) - if self._ps_table_count == 0: - return feature_mapping_import_list - if (os.environ.get("RANK_ID") is not None) and (int(os.environ.get("RANK_ID")) != 0): - step = -1 - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - table_id_list = [] - embedding_dim_list = [] - value_total_len_list = [] - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_dim_list.append(self._table_to_embedding_dim.get(table_id)) - value_total_len_list.append(self._table_to_embedding_dim.get(table_id) * - (self._table_to_slot_var_num.get(table_id) + 1) + 2) - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor(table_id_list, name="table_id") - embedding_table_import = \ - gen_npu_cpu_ops.embedding_table_import(ps_id=ps_id_tensor, - file_path=file_path_tensor, - table_id=table_id_tensor, - global_step=step, - embedding_dim=embedding_dim_list, - value_total_len=value_total_len_list, - only_var_flag=False, - file_type="bin", - table_name=self._ps_table_name_list) - with tf.control_dependencies([embedding_table_import]): - embedding_compute_var_import = \ - gen_npu_cpu_ops.embedding_compute_var_import(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - global_step=step, - table_name=self._ps_table_name_list) - if len(self._small_table_variable_list) == 0: - return tf.group([embedding_compute_var_import]) - return embedding_compute_var_import, feature_mapping_import_list - - def save_incremental_embedding(self, name: str, path: str, step=None): - """ Operator for save incremental values in table_id embedding table. """ - self._check_save_or_restore_params(name=name, path=path) - table_id = self._table_name_to_id.get(name) - step = tf.cast(step, dtype=tf.int64) - if int(os.environ.get("RANK_ID")) != 0: - step = -1 - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor([table_id], name="table_id") - step_to_live = self._table_id_to_steps_to_live.get(table_id, 0) - embedding_table_export = \ - gen_npu_cpu_ops.embedding_table_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - value_total_len=[self._table_to_embedding_dim.get(table_id)], - export_mode="new", - only_var_flag=True, - file_type="bin", - table_name=[name], - global_step=step, - steps_to_live_list=[step_to_live]) - return tf.group([embedding_table_export]) - - def save_incremental_embeddings(self, path: str, step=None): - """ Operator for save incremental values in all embedding tables. """ - self._check_save_or_restore_params_v2(path=path, save_flag=True) - step = tf.cast(step, dtype=tf.int64) - if int(os.environ.get("RANK_ID")) != 0: - step = -1 - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - table_id_list = [] - embedding_dim_list = [] - steps_list = [] - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_dim_list.append(self._table_to_embedding_dim.get(table_id)) - steps_list.append(self._table_id_to_steps_to_live.get(table_id, 0)) - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor(table_id_list, name="table_id") - embedding_table_export = \ - gen_npu_cpu_ops.embedding_table_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - embedding_dim=embedding_dim_list, - value_total_len=embedding_dim_list, - export_mode="new", - only_var_flag=True, - file_type="bin", - table_name=self._ps_table_name_list, - global_step=step, - steps_to_live_list=steps_list) - return tf.group([embedding_table_export]) - - def restore_incremental_embedding(self, name: str, path: str, step=None): - self._check_save_or_restore_params(name=name, path=path) - if (step is None) or ((os.environ.get("RANK_ID") is not None) and (int(os.environ.get("RANK_ID")) != 0)): - step = -1 - else: - step = tf.cast(step, dtype=tf.int64) - self._create_comm_group_for_allgather() - table_id = self._table_name_to_id.get(name) - with specified_ps_engine_scope(): - embedding_table_import = \ - gen_npu_cpu_ops.embedding_table_import(ps_id=ops.convert_to_tensor(-1), - file_path=ops.convert_to_tensor(path), - table_id=ops.convert_to_tensor([table_id]), - global_step=step, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - value_total_len=[self._table_to_embedding_dim.get(table_id)], - only_var_flag=True, - file_type="bin", - table_name=[name]) - return tf.group([embedding_table_import]) - - def restore_incremental_embeddings(self, path: str, step=None): - self._check_save_or_restore_params_v2(path=path, save_flag=False) - if (step is None) or ((os.environ.get("RANK_ID") is not None) and (int(os.environ.get("RANK_ID")) != 0)): - step = -1 - else: - step = tf.cast(step, dtype=tf.int64) - self._create_comm_group_for_allgather() - with specified_ps_engine_scope(): - table_id_list = [] - embedding_dim_list = [] - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_dim_list.append(self._table_to_embedding_dim.get(table_id)) - embedding_table_import = \ - gen_npu_cpu_ops.embedding_table_import(ps_id=ops.convert_to_tensor(-1), - file_path=ops.convert_to_tensor(path), - table_id=ops.convert_to_tensor(table_id_list), - global_step=step, - embedding_dim=embedding_dim_list, - value_total_len=embedding_dim_list, - only_var_flag=True, - file_type="bin", - table_name=self._ps_table_name_list) - return tf.group([embedding_table_import]) - - def embedding_evict(self, steps_to_live: int): - """ Operator for evict values in all embedding tables. """ - if not isinstance(steps_to_live, int): - raise ValueError("steps_to_live must be int.") - if steps_to_live <= 0: - raise ValueError("steps_to_live must be greater than zero.") - self._steps_to_live = steps_to_live - self._create_comm_group_for_allgather() - table_id_list = [] - with specified_ps_engine_scope(): - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_table_evict = \ - gen_npu_cpu_ops.embedding_table_evict(var_handle=ops.convert_to_tensor(table_id_list), - global_step=1, - steps_to_live=self._steps_to_live) - return tf.group([embedding_table_evict]) - - def get_embedding_small_variable(self, name, init_vocabulary_size, embedding_dim, max_feature_count, - initializer_mode="constant", constant_value=1.0, load_factor=0.8, - optimizer=None, ev_option=None): - if name not in self._small_hash_table_has_init: - table_id = self._small_hash_table_count - self._small_hash_table_name_to_id[name] = table_id - self._small_hash_table_id_to_name[table_id] = name - self._small_hash_table_count += 1 - self._small_hash_table_has_init.append(name) - else: - raise ValueError("This small hashtable has been initialized.") - check_small_hashtable_init_params(name=name, init_vocabulary_size=init_vocabulary_size, - embedding_dim=embedding_dim, max_feature_count=max_feature_count, - initializer_mode=initializer_mode) - self._update_small_hash_table_dict(table_id=table_id, embedding_dim=embedding_dim, - max_feature_count=max_feature_count, - init_vocabulary_size=init_vocabulary_size, optimizer=optimizer, - ev_option=ev_option) - self._init_embedding_hashmap_v2[table_id] = \ - gen_npu_cpu_ops.init_embedding_hashmap_v2(table_id=table_id, - bucket_size=init_vocabulary_size, - embedding_dim=embedding_dim, - load_factor=load_factor, - dtype=tf.float32) - init_constant_value = constant_value - if initializer_mode is "constant": - sampled_values = ops.convert_to_tensor(1.0, tf.float32) - else: - sampled_values = tf.random.stateless_uniform(shape=[init_vocabulary_size, embedding_dim], - seed=[42, 1234], - minval=0.0, - maxval=1.0, - dtype=tf.float32) - self._init_embedding_hash_table[table_id] = \ - gen_npu_cpu_ops.init_embedding_hash_table(table_handle=self._init_embedding_hashmap_v2.get(table_id), - sampled_values=sampled_values, - bucket_size=init_vocabulary_size, - embedding_dim=embedding_dim, - initializer_mode=initializer_mode, - constant_value=init_constant_value) - return self._init_embedding_hash_table[table_id] - - def forward_lookup(self, name, key): - table_id = self._small_hash_table_name_to_id[name] - if table_id not in self._small_hash_table_id_list: - raise ValueError("This hash table hash not yet initialized.") - table_handle = gen_npu_cpu_ops.table_to_resource_v2(table_id=[table_id]) - result = gen_npu_cpu_ops.embedding_hash_table_lookup_or_insert(table_handle=table_handle, - keys=key, - bucket_size= - self._small_hash_table_to_bucket_size - .get(table_id), - embedding_dim= - self._small_hash_table_to_embedding_dim - .get(table_id), - filter_mode= - self._small_hash_table_to_filter_mode - .get(table_id), - filter_freq= - self._small_hash_table_to_counter_filter - .get(table_id).filter_freq, - default_key_or_value= - self._small_hash_table_to_counter_filter - .get(table_id).default_key_or_value, - default_key= - self._small_hash_table_to_counter_filter - .get(table_id).default_key, - default_value= - self._small_hash_table_to_counter_filter - .get(table_id).default_value) - self._small_hash_table_to_lookup_key[table_id] = key - self._small_hash_table_to_lookup_result[table_id] = result - self._small_hash_table_has_lookup.append(table_id) - return result - - def _update_config_params(self): - env_dist = os.environ - rank_size = env_dist.get("RANK_SIZE") - rank_id = env_dist.get("RANK_ID") - cluster_config_file = env_dist.get("ESCLUSTER_CONFIG_PATH") - if (cluster_config_file is None) and (rank_size is None) and (rank_id is None): - return - if cluster_config_file is None: - raise ValueError("EsClusterConfig env is null, check your env config.") - with open(cluster_config_file, encoding='utf-8') as b: - es_cluster_config_json = json.load(b) - self._ps_num = int(es_cluster_config_json["psNum"]) - self._ps_ids = [] - self._ps_ids_list = es_cluster_config_json["psCluster"] - for each_ps in self._ps_ids_list: - self._server_ip_to_ps_num[each_ps["ctrlPanel"]["ipaddr"]] = 0 - for each_ps in self._ps_ids_list: - self._ps_ids.append(each_ps["id"]) - ctrl_panel = each_ps["ctrlPanel"] - self._server_ip_to_ps_num[ctrl_panel["ipaddr"]] += 1 - self._check_max_ps_num() - - def _check_max_ps_num(self): - for each_server_ps_num in self._server_ip_to_ps_num: - if self._server_ip_to_ps_num[each_server_ps_num] > 4: - raise ValueError("PS num of one server can not exceed 4, please check config params.") - - def _init_small_hashtable_params(self): - self._small_hash_table_name_to_id = {} - self._small_hash_table_id_to_name = {} - self._small_hash_table_count = 0 - self._small_hash_table_has_init = [] - self._small_hash_table_id_list = [] - self._small_hash_table_to_embedding_dim = {} - self._small_hash_table_to_key_num = {} - self._small_hash_table_to_bucket_size = {} - self._small_hash_table_to_optimizer = {} - self._small_hash_table_to_filter_mode = {} - self._small_hash_table_to_counter_filter = {} - # op - self._init_embedding_hashmap_v2 = {} - self._init_embedding_hash_table = {} - # for lookup - self._small_hash_table_lookup_result = {} - self._small_hash_table_to_lookup_result = {} - self._small_hash_table_to_lookup_key = {} - self._small_hash_table_has_lookup = [] - - def _update_small_hash_table_dict(self, table_id, embedding_dim, max_feature_count, - init_vocabulary_size, optimizer, ev_option): - self._small_hash_table_to_embedding_dim[table_id] = embedding_dim - self._small_hash_table_to_key_num[table_id] = max_feature_count - self._small_hash_table_to_bucket_size[table_id] = init_vocabulary_size - self._small_hash_table_id_list.append(table_id) - - if (ev_option is not None) and (ev_option.filter_option is not None): - self._small_hash_table_to_filter_mode[table_id] = "counter" - self._small_hash_table_to_counter_filter[table_id] = ev_option.filter_option - else: - self._small_hash_table_to_filter_mode[table_id] = "no_filter" - self._small_hash_table_to_counter_filter[table_id] = CounterFilter(filter_freq=1, - default_key_or_value=False, - default_key=1, - default_value=1.0) - self._small_hash_table_to_optimizer[table_id] = optimizer - self._small_hash_table_to_optimizer[table_id].embedding_dim = embedding_dim - self._small_hash_table_to_optimizer[table_id].bucket_size = init_vocabulary_size - - def _check_and_update_small_init_params(self, name, init_vocabulary_size, embedding_dim, multihot_lens, key_dtype, - value_dtype, allow_merge, initializer): - if name not in self._small_table_name_list: - self._small_table_name_list.append(name) - self._feature_mapping_name_list.append(name) - else: - raise ValueError("This small table has been initialized.") - if (init_vocabulary_size is None) or (embedding_dim is None) or (multihot_lens is None): - raise ValueError("max_vocabulary_size or embedding_dim or multihot_lens can not be None.") - if (key_dtype is None) or (value_dtype is None): - raise ValueError("key_dtype and value_dtype can not be None.") - check_init_params_type(key_dtype=key_dtype, value_dtype=value_dtype, - init_vocabulary_size=init_vocabulary_size, embedding_dim=embedding_dim, - multihot_lens=multihot_lens, allow_merge=allow_merge) - if init_vocabulary_size <= 0 or embedding_dim <= 0 or multihot_lens <= 0: - raise ValueError("init_vocabulary_size, embedding_dim, multihot_lens must be greater than zero.") - if initializer is None: - raise ValueError("Initializer can not be None.") - if allow_merge: - raise ValueError("allow_merge do not support now.") - self._need_table_merge = True - if isinstance(initializer, EsInitializer): - if initializer.initializer_mode == "random_uniform": - self._table_id_to_initializer[table_id] = \ - tf.random_uniform_initializer(minval=initializer.min, maxval=initializer.max, - seed=initializer.seed, dtype=value_dtype) - elif initializer.initializer_mode == "truncated_normal": - self._table_id_to_initializer[table_id] = \ - tf.truncated_normal_initializer(stddev=initializer.stddev, mean=initializer.mean, - seed=initializer.seed, dtype=value_dtype) - elif initializer.initializer_mode == "constant": - self._table_id_to_initializer[table_id] = \ - tf.constant_initializer(value=initializer.value, dtype=value_dtype) - elif not callable(initializer): - if ops.convert_to_tensor(initializer).dtype.base_dtype != tf.float32: - raise ValueError("Initializer type '%s' and explict dtype tf.float32 don't match." % init_dtype) - - def _check_and_update_ps_init_params(self, name, init_vocabulary_size, embedding_dim, max_feature_count, ev_option): - steps_to_live = 0 - if max_feature_count is None: - raise ValueError("For ps table, max_feature_count can not be None.") - if (ev_option is not None) and (not isinstance(ev_option, EmbeddingVariableOption)): - raise TypeError("For ps table, ev_option must be EmbeddingVariableOption type.") - if (ev_option is not None) and (ev_option.evict_option is not None): - steps_to_live = ev_option.evict_option.steps_to_live - if not isinstance(max_feature_count, int): - raise ValueError("For ps table, max_feature_count must be int.") - if init_vocabulary_size >= _INT32_MAX_VALUE: - raise ValueError("init_vocabulary_size exceeds int32 max value.") - if max_feature_count <= 0: - raise ValueError("For ps table, max_feature_count must be greater than zero.") - if name not in self._table_name_has_init: - table_id = self._ps_table_count - self._table_name_to_id[name] = table_id - self._table_id_to_name[table_id] = name - self._table_id_to_steps_to_live[table_id] = steps_to_live - self._ps_table_count += 1 - self._table_name_has_init.append(name) - else: - raise ValueError("This table has been initialized.") - return table_id - - def _check_ps_opt_and_initializer(self, optimizer, initializer, table_id): - if (not isinstance(optimizer, embedding_optimizer.AdamOptimizer)) and \ - (not isinstance(optimizer, embedding_optimizer.AdagradOptimizer)) and \ - (not isinstance(optimizer, embedding_optimizer.AdamWOptimizer)) and \ - (not isinstance(optimizer, embedding_optimizer.SgdOptimizer)) and \ - (not isinstance(optimizer, embedding_optimizer.RmspropOptimizer)) and \ - (not isinstance(optimizer, embedding_optimizer.FtrlOptimizer)): - raise ValueError( - "Optimizer should be one of AdamOptimizer, AdagradOptimizer, AdamWOptimizer, " - "SGDOptimizer, RmspropOptimizer and FtrlOptimizer.") - if initializer is not None: - if isinstance(initializer, EsInitializer): - self._table_id_to_initializer[table_id] = initializer - elif isinstance(initializer, tf.initializers.truncated_normal): - if initializer.dtype != tf.float32: - raise TypeError("initializer dtype error.") - self._table_id_to_initializer[table_id] = \ - EsInitializer(initializer_mode="truncated_normal", mu=initializer.mean, - sigma=initializer.stddev, seed=initializer.seed) - elif isinstance(initializer, tf.initializers.random_uniform): - if initializer.dtype != tf.float32: - raise TypeError("initializer dtype error.") - self._table_id_to_initializer[table_id] = \ - EsInitializer(initializer_mode="random_uniform", min=initializer.minval, - max=initializer.maxval, seed=initializer.seed) - elif isinstance(initializer, tf.initializers.constant): - if initializer.dtype != tf.float32: - raise TypeError("initializer dtype error.") - self._table_id_to_initializer[table_id] = \ - EsInitializer(initializer_mode="constant", constant_value=initializer.value) - else: - raise TypeError("initializer must be EsInitializer or tensorflow initializer, and only support" - "random_uniform, truncated_normal and constant value.") - - def _update_optimizer_slot_var_num(self, table_id): - # adam, adamw, rmsprop include m and v, 2 slots; adagrad include accumulator, 1 slot; sgd include 0 slot - if isinstance(self._optimizer, embedding_optimizer.AdagradOptimizer): - self._table_to_slot_var_num[table_id] = 1 - elif isinstance(self._optimizer, embedding_optimizer.SgdOptimizer): - self._table_to_slot_var_num[table_id] = 0 - else: - self._table_to_slot_var_num[table_id] = 2 - - def _check_ps_lookup_params(self, name, ids): - if (name is None) or (ids is None): - raise ValueError("table name or ids must be specified.") - if not isinstance(name, str): - raise TypeError("embedding table name must be string.") - regex = re.compile('[@!#$%^&*()<>?/\|}{~:]') - if regex.search(name) is not None: - raise ValueError("table name contains illegal character.") - if ids.dtype != tf.int64: - raise ValueError("dtype of ids must be tf.int64.") - if not self._init_table_flag: - raise ValueError("embedding table must init first!") - table_id = self._table_name_to_id.get(name) - if table_id not in self._ps_table_id_list: - raise ValueError("this ps table has not yet initialized.") - return table_id - - def _check_update_params(self, params, input_ids_list, table_ids, loss): - if (loss is None) or (params is None) or (table_ids is None) or (input_ids_list is None): - raise ValueError("loss or params or table_ids or input_ids_list is None.") - if (isinstance(loss, str)) or (isinstance(params, str)) or isinstance(table_ids, str) or \ - isinstance(input_ids_list, str): - raise ValueError("loss, params, table_ids and input_ids_list can not be str.") - if not self._init_table_flag: - raise ValueError("embedding must init first!") - - def _check_save_or_restore_params(self, name, path): - if path is None or name is None: - raise ValueError("table name, embedding table path can not be None.") - if not isinstance(name, str): - raise TypeError("embedding table name must be string.") - regex = re.compile('[@!#$%^&*()<>?/\|}{~:]') - if regex.search(name) is not None: - raise ValueError("table name contains illegal character.") - if not self._init_table_flag: - raise ValueError("Not any table has been initialized.") - if name not in self._ps_table_name_list: - raise ValueError("this table has not yet initialized.") - if path[-1] == '/': - raise ValueError("path format is wrong, please check.") - - def _check_save_or_restore_params_v2(self, path, save_flag): - if path is None: - raise ValueError("embedding table path can not be None.") - if path[-1] == '/': - raise ValueError("path format is wrong, please check.") - if not self._init_table_flag: - raise ValueError("Not any table has been initialized.") - if save_flag: - env_dist = os.environ - rank_id = int(env_dist.get("RANK_ID")) - if rank_id != 0: - logging.warn("Only minimal_rank_id device in each server can run save graph." - "Else, save graph will raise unexpected error. Please Check.") - - def _init_counter_filter(self, table_id, ev_option): - if (ev_option is not None) and (ev_option.filter_option is not None): - filter_mode = "counter" - self._table_to_counter_filter[table_id] = ev_option.filter_option - self._table_use_counter_filter[table_id] = 1 - else: - filter_mode = "no_filter" - self._table_use_counter_filter[table_id] = 0 - return filter_mode - - def _set_ps_optimizer_params(self, table_id, optimizer, embedding_dim, max_feature_count, mask_zero, ev_option): - self._optimizer = optimizer - self._optimizer.embedding_dim = embedding_dim - self._optimizer.max_num = max_feature_count - self._optimizer.mask_zero = 1 if mask_zero is True else 0 - self._init_ps_opt_padding_key(ev_option=ev_option) - self._init_ps_opt_completion_key(table_id=table_id, ev_option=ev_option) - self._table_to_optimizer[table_id] = self._optimizer - self._ps_table_id_to_optimizer_params[table_id] = [] - self._update_optimizer_slot_var_num(table_id=table_id) - - def _init_ps_opt_padding_key(self, ev_option): - if (ev_option is not None) and (ev_option.padding_option is not None): - self._optimizer.padding_key = ev_option.padding_option.padding_key - self._optimizer.padding_key_mask = 1 if ev_option.padding_option.mask is True else 0 - else: - self._optimizer.padding_key = 0 - self._optimizer.padding_key_mask = 1 - if not self._optimizer.padding_key_mask: - self._optimizer.embedding_flags = 1 - else: - self._optimizer.embedding_flags = 0 - - def _init_ps_opt_completion_key(self, table_id, ev_option): - if (ev_option is not None) and (ev_option.completion_option is not None): - self._optimizer.completion_key = ev_option.completion_option.completion_key - self._optimizer.completion_key_mask = ev_option.completion_option.mask - self._table_id_to_completion_option[table_id] = ev_option.completion_option - else: - self._optimizer.completion_key = 0 - self._optimizer.completion_key_mask = True - self._table_id_to_completion_option[table_id] = CompletionKeyOption(completion_key=0, - mask=1) - - def _init_optimizer_mode_and_params(self, table_id): - if isinstance(self._table_to_optimizer.get(table_id), embedding_optimizer.AdagradOptimizer): - self._ps_table_id_to_optimizer_mode[table_id] = "adagrad" - self._ps_table_id_to_optimizer_params[table_id].append( - self._table_to_optimizer.get(table_id).initial_accumulator_value - ) - self._ps_table_id_to_optimizer_params[table_id].append( - self._table_to_optimizer.get(table_id).initial_accumulator_value - ) - if isinstance(self._table_to_optimizer.get(table_id), embedding_optimizer.AdamOptimizer): - self._ps_table_id_to_optimizer_mode[table_id] = "adam" - self._ps_table_id_to_optimizer_params[table_id].append(0) - self._ps_table_id_to_optimizer_params[table_id].append(0) - if isinstance(self._table_to_optimizer.get(table_id), embedding_optimizer.AdamWOptimizer): - self._ps_table_id_to_optimizer_mode[table_id] = "adamw" - self._ps_table_id_to_optimizer_params[table_id].append(0) - self._ps_table_id_to_optimizer_params[table_id].append(0) - if isinstance(self._table_to_optimizer.get(table_id), embedding_optimizer.SgdOptimizer): - self._ps_table_id_to_optimizer_mode[table_id] = "sgd" - self._ps_table_id_to_optimizer_params[table_id].append(0) - self._ps_table_id_to_optimizer_params[table_id].append(0) - if isinstance(self._table_to_optimizer.get(table_id), embedding_optimizer.RmspropOptimizer): - self._ps_table_id_to_optimizer_mode[table_id] = "rmsprop" - self._ps_table_id_to_optimizer_params[table_id].append( - self._table_to_optimizer.get(table_id).ms) - self._ps_table_id_to_optimizer_params[table_id].append( - self._table_to_optimizer.get(table_id).mom) - if isinstance(self._table_to_optimizer.get(table_id), embedding_optimizer.FtrlOptimizer): - self._ps_table_id_to_optimizer_mode[table_id] = "ftrl" - self._ps_table_id_to_optimizer_params[table_id].append( - self._table_to_optimizer.get(table_id).accum) - self._ps_table_id_to_optimizer_params[table_id].append( - self._table_to_optimizer.get(table_id).linear) - - def _init_hashmap_and_table_import(self, bucket_size, table_id, embedding_dim, ev_option): - filter_mode = self._init_counter_filter(table_id, ev_option) - self._init_optimizer_mode_and_params(table_id) - - with tf.control_dependencies([self._init_partition_maps.get(table_id)]): - if self._train_mode: - if self._train_level: - self._init_embedding_hash_maps[table_id] = \ - gen_npu_cpu_ops.init_embedding_hashmap(table_id=ops.convert_to_tensor(table_id), - bucket_size=bucket_size, - value_total_len=embedding_dim * - (self._table_to_slot_var_num.get( - table_id) + 1) + 2, - embedding_dim=embedding_dim, - initializer_mode= - self._table_id_to_initializer.get(table_id) - .initializer_mode, - constant_value= - self._table_id_to_initializer.get(table_id). - constant_value, - min=self._table_id_to_initializer.get(table_id).min, - max=self._table_id_to_initializer.get(table_id).max, - mu=self._table_id_to_initializer.get(table_id).mu, - sigma=self._table_id_to_initializer.get(table_id).sigma, - seed=self._table_id_to_initializer.get(table_id).seed, - seed2=self._table_id_to_initializer.get(table_id).seed, - filter_mode=filter_mode, - optimizer_mode= - self._ps_table_id_to_optimizer_mode.get(table_id), - optimizer_params= - self._ps_table_id_to_optimizer_params.get(table_id)) - else: - self._init_embedding_hash_maps[table_id] = \ - gen_npu_cpu_ops.init_embedding_hashmap(table_id=ops.convert_to_tensor(table_id), - bucket_size=bucket_size, - value_total_len=embedding_dim * - (self._table_to_slot_var_num.get( - table_id) + 1) + 2, - embedding_dim=embedding_dim, - initializer_mode=None, constant_value=None, - min=None, max=None, mu=None, sigma=None, - seed=None, seed2=None, filter_mode=filter_mode, - optimizer_mode= - self._ps_table_id_to_optimizer_mode.get(table_id), - optimizer_params= - self._ps_table_id_to_optimizer_params.get(table_id)) - else: - self._init_embedding_hash_maps[table_id] = \ - gen_npu_cpu_ops.init_embedding_hashmap(table_id=ops.convert_to_tensor(table_id), - bucket_size=bucket_size, - value_total_len=embedding_dim, - embedding_dim=embedding_dim, - initializer_mode=None, constant_value=None, - min=None, max=None, mu=None, sigma=None, - seed=None, seed2=None, filter_mode=filter_mode, - optimizer_mode= - self._ps_table_id_to_optimizer_mode.get(table_id), - optimizer_params= - self._ps_table_id_to_optimizer_params.get(table_id)) - self._init_table_flag = True - self._init_table_flag = True - if self._train_mode: - return tf.group( - [tf.initializers.variables(self._optimizer.variables()), self._init_embedding_hash_maps.get(table_id)], - name=self._table_id_to_name.get(table_id) + "_init") - else: - return tf.group([self._init_embedding_hash_maps.get(table_id)], - name=self._table_id_to_name.get(table_id) + "_init") - - def _call_lookup_op(self, table_id, ids, actual_keys_input=None, unique_indices=None, - filter_mode=None, use_counter_filter=False, key_count=None): - if self._train_mode: - if self.use_host_unique: - if use_counter_filter: - key_count = key_count - else: - key_count = ids - result = gen_npu_cpu_ops. \ - fake_remote_lookup_uniqued(table_id=ops.convert_to_tensor(table_id), - keys=ids, - actual_keys_input=actual_keys_input, - unique_indices=unique_indices, - key_count=key_count, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - initializer_mode=[self._table_id_to_initializer.get(table_id) - .initializer_mode], - constant_value=[self._table_id_to_initializer.get(table_id) - .constant_value], - min=[self._table_id_to_initializer.get(table_id).min], - max=[self._table_id_to_initializer.get(table_id).max], - mu=[self._table_id_to_initializer.get(table_id).mu], - sigma=[self._table_id_to_initializer.get(table_id).sigma], - seed=[self._table_id_to_initializer.get(table_id).seed], - seed2=[self._table_id_to_initializer.get(table_id).seed], - value_total_len=[self._table_to_embedding_dim - .get(table_id) * (self._table_to_slot_var_num.get(table_id) + 1) + 2], - filter_mode=[filter_mode], - filter_freq=[self._filter_freq], - default_key_or_value=[self._default_key_or_value], - default_key=[self._default_key], - default_value=[self._default_value], - optimizer_mode=[self._ps_table_id_to_optimizer_mode.get(table_id)], - optimizer_params=[self._ps_table_id_to_optimizer_params. - get(table_id)[0], self._ps_table_id_to_optimizer_params. - get(table_id)[1]], - completion_key=[self._table_id_to_completion_option - .get(table_id).completion_key], - completion_key_mask=[self._table_id_to_completion_option - .get(table_id).mask] - ) - else: - result = gen_npu_cpu_ops. \ - embedding_table_find_and_init(table_id=ops.convert_to_tensor(table_id), - keys=ids, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - initializer_mode=[self._table_id_to_initializer.get(table_id) - .initializer_mode], - constant_value=[self._table_id_to_initializer.get(table_id) - .constant_value], - min=[self._table_id_to_initializer.get(table_id).min], - max=[self._table_id_to_initializer.get(table_id).max], - mu=[self._table_id_to_initializer.get(table_id).mu], - sigma=[self._table_id_to_initializer.get(table_id).sigma], - seed=[self._table_id_to_initializer.get(table_id).seed], - seed2=[self._table_id_to_initializer.get(table_id).seed], - value_total_len=[self._table_to_embedding_dim.get(table_id) * - (self._table_to_slot_var_num.get(table_id) + 1) + 2], - filter_mode=[filter_mode], - filter_freq=[self._filter_freq], - default_key_or_value=[self._default_key_or_value], - default_key=[self._default_key], - default_value=[self._default_value], - optimizer_mode=[self._ps_table_id_to_optimizer_mode.get(table_id)], - optimizer_params=[self._ps_table_id_to_optimizer_params - .get(table_id)[0], self._ps_table_id_to_optimizer_params - .get(table_id)[1]], - completion_key=[self._table_id_to_completion_option - .get(table_id).completion_key], - completion_key_mask=[self._table_id_to_completion_option - .get(table_id).mask] - ) - else: - result = gen_npu_cpu_ops.embedding_table_find(table_id=ops.convert_to_tensor(table_id), - keys=ids, - embedding_dim=[self._table_to_embedding_dim.get(table_id)], - default_value=[self._default_value]) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self._table_to_embedding_dim.get(table_id))) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self._table_to_max_num.get(table_id))) - result.op._set_attr("_use_counter_filter", - attr_value_pb2.AttrValue(i=self._table_use_counter_filter.get(table_id))) - return result - - def _call_embeddings_export_op(self, path, feature_mapping_export_list, step): - with specified_ps_engine_scope(): - table_id_list = [] - embedding_dim_list = [] - steps_list = [] - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_dim_list.append(self._table_to_embedding_dim.get(table_id)) - steps_list.append(self._table_id_to_steps_to_live.get(table_id, 0)) - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor(table_id_list, name="table_id") - embedding_table_export = \ - gen_npu_cpu_ops.embedding_table_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - embedding_dim=embedding_dim_list, - value_total_len=embedding_dim_list, - export_mode="all", - only_var_flag=True, - file_type="bin", - table_name=self._ps_table_name_list, - global_step=step, - steps_to_live_list=steps_list) - if len(self._small_table_variable_list) == 0: - return tf.group([embedding_table_export]) - return embedding_table_export, feature_mapping_export_list - - def _call_ckpts_export_op(self, path, feature_mapping_export_list, save_filtered_features, step): - with specified_ps_engine_scope(): - table_id_list = [] - embedding_dim_list = [] - value_total_len_list = [] - steps_list = [] - for table_id in self._ps_table_id_list: - table_id_list.append(table_id) - embedding_dim_list.append(self._table_to_embedding_dim.get(table_id)) - steps_list.append(self._table_id_to_steps_to_live.get(table_id, 0)) - value_total_len_list.append(self._table_to_embedding_dim.get(table_id) * - (self._table_to_slot_var_num.get(table_id) + 1) + 2) - file_path_tensor = ops.convert_to_tensor(path, name="file_path") - ps_id_tensor = ops.convert_to_tensor(-1, name="ps_id") - table_id_tensor = ops.convert_to_tensor(table_id_list, name="table_id") - embedding_table_export = \ - gen_npu_cpu_ops.embedding_table_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - embedding_dim=embedding_dim_list, - value_total_len=value_total_len_list, - export_mode="all", - only_var_flag=False, - file_type="bin", - table_name=self._ps_table_name_list, - filter_export_flag=save_filtered_features, - global_step=step, - steps_to_live_list=steps_list) - with tf.control_dependencies([embedding_table_export]): - embedding_compute_var_export = \ - gen_npu_cpu_ops.embedding_compute_var_export(file_path=file_path_tensor, - ps_id=ps_id_tensor, - table_id=table_id_tensor, - global_step=step, - table_name=self._ps_table_name_list) - if len(self._small_table_variable_list) == 0: - return tf.group([embedding_compute_var_export]) - return embedding_compute_var_export, feature_mapping_export_list - - def _create_variable_for_small_table(self, table_map_policy): - if not self._need_table_merge: - self._create_variable_when_no_merge() - else: - self._create_variable_when_need_merge(table_map_policy) - self.user_defined_table_infos = [] - self._small_table_name_list = [] - self._small_table_init = True - - def _create_variable_when_no_merge(self): - for user_table_info in self.user_defined_table_infos: - if self._small_table_init or (int(os.environ.get("RANK_ID")) != 0): - with tf.compat.v1.variable_scope("es", reuse=tf.compat.v1.AUTO_REUSE): - self._small_table_to_variable[user_table_info['name']] = \ - tf.get_variable(user_table_info['name'], shape=[user_table_info['max_vocabulary_size'], - user_table_info['embedding_dim']], - initializer=user_table_info['initializer'], dtype=tf.float32) - else: - with tf.compat.v1.variable_scope("es"): - self._small_table_to_variable[user_table_info['name']] = \ - tf.get_variable(user_table_info['name'], shape=[user_table_info['max_vocabulary_size'], - user_table_info['embedding_dim']], - initializer=user_table_info['initializer'], dtype=tf.float32) - self._small_table_to_multihot_lens[self.total_embedding_count] = user_table_info['multihot_lens'] - self._small_table_name_to_max_vocabulary_size[user_table_info['name']] = \ - user_table_info['max_vocabulary_size'] - self._small_table_name_to_multihot_lens[user_table_info['name']] = \ - user_table_info['multihot_lens'] - self._small_table_variable_list.append(user_table_info['name'] + ":0") - self._small_table_variable_dim_list.append(user_table_info['embedding_dim']) - self.total_embedding_count += 1 - - def _create_variable_when_need_merge(self, table_map_policy): - self.total_variable_table = [] - if (not isinstance(table_map_policy, NoneTableMapPolicy)) and \ - (not isinstance(table_map_policy, AutoMergeTableMapPolicy)): - raise TypeError("table_map_policy should be NoneTableMapPolicy or AutoMergeTableMapPolicy.") - self.table_map_policy = table_map_policy - self.table_create_infos = self.table_map_policy.map_table_infos(self.user_defined_table_infos) - for table_info_ in self.table_create_infos: - if self._small_table_init or (int(os.environ.get("RANK_ID")) != 0): - with tf.compat.v1.variable_scope("es", reuse=tf.compat.v1.AUTO_REUSE): - self.total_variable_table.append(tf.get_variable('ES' + str(self.total_embedding_count), - shape=[table_info_['max_vocabulary_size'], - table_info_['embedding_dim']], - initializer=table_info_['initializer'], - dtype=tf.float32 - )) - else: - with tf.compat.v1.variable_scope("es"): - self.total_variable_table.append(tf.get_variable('ES' + str(self.total_embedding_count), - shape=[table_info_['max_vocabulary_size'], - table_info_['embedding_dim']], - initializer=table_info_['initializer'], - dtype=tf.float32 - )) - self._npu_table_to_embedding_dim[self.total_embedding_count] = table_info_['embedding_dim'] - self._small_table_variable_list.append('ES' + str(self.total_embedding_count) + ":0") - self._small_table_variable_dim_list.append(table_info_['embedding_dim']) - self.total_embedding_count += 1 - - def _small_table_lookup_v1(self, name, rank_id, rank_size, ids_list): - if not isinstance(name, str): - raise TypeError("embedding table name must be string.") - if self.total_embedding_count == 0: - raise ValueError("Must init_table() first!") - hash_key_shape = ids_list.get_shape().as_list() - if rank_size > 1 and (hash_key_shape[0] is not None): - hash_key = allgather(tensor=ids_list, rank_size=rank_size, group="user_group_fm") - non_hash_key = gen_npu_cpu_ops.embedding_feature_mapping_v2(feature_id=hash_key, table_name=name, - table_total_size=[1], table_actual_size=[1]) - recovery_matrix = [] - for i in range(hash_key_shape[0]): - recovery_matrix.append(rank_id * hash_key_shape[0] + i) - local_non_hash_keys = tf.gather(non_hash_key, recovery_matrix) - else: - hash_key = ids_list - local_non_hash_keys = gen_npu_cpu_ops.embedding_feature_mapping_v2(feature_id=hash_key, table_name=name, - table_total_size=[1], - table_actual_size=[1]) - return tf.nn.embedding_lookup(self._small_table_to_variable[name], local_non_hash_keys) - - def _small_table_lookup_v2(self, rank_id, rank_size, in_slot_size_group, ids_list, table_to_output_slots, - table_to_slot, in_slot_vocabulary_size_group, batch_size): - # all small table merge to One table - hash_key_shape = ids_list.get_shape().as_list() - actual_size = [i * batch_size * rank_size for i in in_slot_size_group] - if rank_size > 1 and (hash_key_shape[0] is not None): - hash_key = allgather(tensor=ids_list, rank_size=rank_size, group="user_group_fm") - non_hash_key = gen_npu_cpu_ops.embedding_feature_mapping_v2( - feature_id=hash_key, table_name=self._small_table_variable_list[0][:-2], - table_total_size=in_slot_vocabulary_size_group, table_actual_size=actual_size) - recovery_matrix = [] - for i in range(hash_key_shape[0]): - recovery_matrix.append(rank_id * hash_key_shape[0] + i) - local_non_hash_keys = tf.gather(non_hash_key, recovery_matrix) - else: - hash_key = ids_list - local_non_hash_keys = gen_npu_cpu_ops.embedding_feature_mapping_v2( - feature_id=hash_key, table_name=self._small_table_variable_list[0][:-2], - table_total_size=in_slot_vocabulary_size_group, table_actual_size=actual_size) - - output_slots = [None for _ in in_slot_size_group] - tid = 0 - table_embedding = tf.nn.embedding_lookup(self.total_variable_table[tid], local_non_hash_keys) - out_embedding_splited = tf.split(table_embedding, table_to_output_slots[0], axis=1) - for out_emb, sid in zip(out_embedding_splited, table_to_slot[0]): - output_slots[sid] = out_emb - return output_slots - - def _small_table_lookup_v3(self, rank_id, rank_size, ids_list, in_slot_size_group, slot_to_table, - table_to_input_group, table_to_output_slots, table_to_slot, - table_to_vocabulary_slots, batch_size): - # All small tables merge to two or more tables - indices_split = tf.split(ids_list, in_slot_size_group, axis=1) - for tid in range(self.total_embedding_count): - table_to_input_group[tid] = [] - for sid, indices in enumerate(indices_split): - tid = slot_to_table[sid] - table_to_input_group[tid].append(indices) - - output_slots = [None for _ in in_slot_size_group] - for tid, table_input_group in enumerate(table_to_input_group): - table_input_hash = tf.concat(table_input_group, axis=1) - hash_key_shape = table_input_hash.get_shape().as_list() - actual_size = [i * batch_size * rank_size for i in table_to_output_slots[tid]] - if rank_size > 1 and (hash_key_shape[0] is not None): - hash_key = allgather(tensor=table_input_hash, rank_size=rank_size, group="user_group_fm") - non_hash_key = gen_npu_cpu_ops.embedding_feature_mapping_v2( - feature_id=hash_key, table_name=self._small_table_variable_list[tid][:-2], - table_total_size=table_to_vocabulary_slots[tid], table_actual_size=actual_size) - recovery_matrix = [] - for i in range(hash_key_shape[0]): - recovery_matrix.append(rank_id * hash_key_shape[0] + i) - local_non_hash_keys = tf.gather(non_hash_key, recovery_matrix) - else: - hash_key = table_input_hash - local_non_hash_keys = gen_npu_cpu_ops.embedding_feature_mapping_v2( - feature_id=hash_key, table_name=self._small_table_variable_list[tid][:-2], - table_total_size=table_to_vocabulary_slots[tid], table_actual_size=actual_size) - table_embedding = tf.nn.embedding_lookup(self.total_variable_table[tid], local_non_hash_keys) - out_embedding_splited = tf.split(table_embedding, table_to_output_slots[tid], axis=1) - for out_emb, sid in zip(out_embedding_splited, table_to_slot[tid]): - output_slots[sid] = out_emb - return output_slots - - def _refresh_small_table(self): - unique_small_tables = set() - global_small_table_list = [] - for v in self._small_table_variable_list: - if v not in unique_small_tables: - unique_small_tables.add(v) - global_small_table_list.append(v) - small_table_num = len(global_small_table_list) - self._small_table_variable_dim_list = self._small_table_variable_dim_list[:small_table_num] - self._small_table_variable_list = self._small_table_variable_list[:small_table_num] - return global_small_table_list, small_table_num - - def _call_feature_mapping_export_op(self, path, export_value, step): - feature_mapping_export_list = [] - global_small_table_list, num = self._refresh_small_table() - index = 0 - # aicpu only support handle 128 tables at one time - while index < num: - iter_max = min(index + 128, num) - table_name_list = [] - offset_list = [] - embedding_dim_list = [] - while index < iter_max: - table_name_list.append(global_small_table_list[index][:-2]) - embedding_dim_list.append(self._small_table_variable_dim_list[index]) - offset_list.append(0) - index += 1 - table_name_tensor = ops.convert_to_tensor(table_name_list) - feature_size = gen_npu_cpu_ops.embedding_feature_mapping_table_size(table_name=table_name_tensor) - feature_id, offset_id = gen_npu_cpu_ops.embedding_feature_mapping_find(table_name=table_name_tensor, - feature_size=feature_size, - num=len(table_name_list)) - if export_value: - tvar = tf.trainable_variables() - for x in tvar: - if x.name[3:-2] in table_name_list: - idx = table_name_list.index(x.name[3:-2]) - offset_list[idx] = tf.reshape(tf.gather(x, offset_id[idx]), [-1]) - values = tf.concat(offset_list, axis=0) - else: - values = 0 - feature_mapping_export = gen_npu_cpu_ops.embedding_feature_mapping_export(file_path=path, - table_name=table_name_tensor, - feature_id=feature_id, - offset_id=offset_id, - values=values, - global_step=step, - embedding_dim= - embedding_dim_list) - feature_mapping_export_list.append(feature_mapping_export) - return feature_mapping_export_list - - def _call_feature_mapping_import_op(self, path, import_value, step): - feature_mapping_import_list = [] - global_small_table_list, num = self._refresh_small_table() - index = 0 - # aicpu only support handle 128 tables at one time - while index < num: - iter_max = min(index + 128, num) - table_name_list = [] - embedding_dim_list = [] - while index < iter_max: - table_name_list.append(global_small_table_list[index][:-2]) - embedding_dim_list.append(self._small_table_variable_dim_list[index]) - index += 1 - - feature_size = \ - gen_npu_cpu_ops.embedding_feature_mapping_file_size(file_path=path, - table_name=ops.convert_to_tensor(table_name_list), - embedding_dim=embedding_dim_list, - global_step=step, - only_offset_flag=import_value) - feature_id, offset_id, values = \ - gen_npu_cpu_ops.embedding_feature_mapping_import(file_path=path, - table_name=ops.convert_to_tensor(table_name_list), - feature_size=feature_size, - embedding_dim=embedding_dim_list, - global_step=step, - only_offset_flag=import_value, - num=len(table_name_list)) - feature_mapping_insert = \ - gen_npu_cpu_ops.embedding_feature_mapping_insert(table_name=ops.convert_to_tensor(table_name_list), - feature_id=feature_id, - offset_id=offset_id) - feature_mapping_import_list.append(feature_mapping_insert) - return feature_mapping_import_list - - def _create_comm_group_for_allgather(self): - if (os.environ.get("RANK_SIZE") is not None) and (int(os.environ.get("RANK_SIZE")) > 1) and \ - (_SAVE_EVICT_COMM_GROUP not in self._user_group_set): - rank_size = int(os.environ.get("RANK_SIZE")) - rank_list = [] - for i in range(rank_size): - rank_list.append(i) - create_group(_SAVE_EVICT_COMM_GROUP, rank_size, rank_list) - self._user_group_set.add(_SAVE_EVICT_COMM_GROUP) diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_table_map_policy.py b/tf_adapter/python/npu_bridge/embedding/embedding_table_map_policy.py deleted file mode 100644 index d3a7886323b77c1d21992308470de940e129a118..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/embedding_table_map_policy.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from functools import reduce -import tensorflow as tf -from tensorflow.python.platform import tf_logging as logging - - -def compare_for_truncated_normal(init1, init2): - if isinstance(init2, tf.initializers.truncated_normal): - if (init1.stddev != init2.stddev) or (init1.seed != init2.seed) or (init1.mean != init2.mean) or \ - (init1.dtype != init2.dtype): - return False - else: - return True - else: - return False - - -def compare_for_random_uniform(init1, init2): - if isinstance(init2, tf.initializers.random_uniform): - if (init1.minval != init2.minval) or (init1.maxval != init2.maxval) or (init1.seed != init2.seed) or \ - (init1.dtype != init2.dtype): - return False - else: - return True - else: - return False - - -def compare_initializer(init1, init2): - if isinstance(init1, tf.initializers.truncated_normal): - return compare_for_truncated_normal(init1, init2) - if isinstance(init1, tf.initializers.random_uniform): - return compare_for_random_uniform(init1, init2) - if isinstance(init1, tf.initializers.constant): - if isinstance(init2, tf.initializers.constant): - if (init1.value != init2.value) or (init1.dtype != init2.dtype): - return False - else: - return True - else: - return False - return True - - -class BaseTableMapPolicy(): - def __init__(self, assign_groups=None): - self.table_create_infos = [] - if assign_groups is None: - self.assign_groups = [] - else: - self.assign_groups = assign_groups - self.in_slot_size_group = [] - self.in_slot_vocabulary_size_group = [] - self.table_to_vocabulary_slots = [] - self.slot_to_table = [] - self.table_to_output_slots = [] - self.table_to_input_groups = [] - self.table_to_slot = [] - - @staticmethod - def _is_equal_table_info(info1, info2): - if info1['embedding_dim'] != info2['embedding_dim']: # dim of table is the same or not - logging.vlog(1, "embedding dim different!, value is %d and %d", info1['embedding_dim'], - info2['embedding_dim']) - return False - if not compare_initializer(info1['initializer'], info2['initializer']): # initializer of table is same or not - return False - return True - - def map_table_infos(self, user_defined_table_infos): - raise NotImplementedError() - - def _register_new_table_info(self, new_table_info): - self.table_create_infos.append(new_table_info) - self.table_to_output_slots.append([]) - self.table_to_vocabulary_slots.append([]) - self.table_to_input_groups.append([]) - self.table_to_slot.append([]) - - def _merge_new_table_info(self, new_table_info, assign_table_id): - main_table_info = self.table_create_infos[assign_table_id] - main_table_info['multihot_lens'] += new_table_info['multihot_lens'] - main_table_info['max_vocabulary_size'] += new_table_info['max_vocabulary_size'] - - def _register_table_info(self, new_table_info, assign_tid=-1): - multihot_lens = new_table_info['multihot_lens'] - in_slot_size = multihot_lens - max_vocabulary_size = new_table_info['max_vocabulary_size'] - in_slot_vocabulary_size = max_vocabulary_size - - tid = assign_tid - if tid == -1: - tid = len(self.table_create_infos) - self._register_new_table_info(new_table_info) - else: - self._merge_new_table_info(new_table_info, tid) - - self.table_to_slot[tid].append(len(self.in_slot_size_group)) - self.table_to_output_slots[tid].append(in_slot_size) - self.in_slot_size_group.append(in_slot_size) - self.table_to_vocabulary_slots[tid].append(in_slot_vocabulary_size) - self.in_slot_vocabulary_size_group.append(in_slot_vocabulary_size) - self.slot_to_table.append(tid) - - def _map_table_infos(self, user_defined_table_infos, assign_groups): - self.table_create_infos = [] - assign_groups_flat = reduce(lambda a, b: a+b, assign_groups, []) - sid_to_gid = reduce(lambda a, b: {**a, **b}, - [{sid: gid for sid in group} - for gid, group in enumerate(assign_groups)], {}) - gid_to_tid = dict() - for sid, table_info in enumerate(user_defined_table_infos): - if sid in assign_groups_flat: - gid = sid_to_gid.get(sid) - if gid in gid_to_tid: - self._register_table_info(table_info, assign_tid=gid_to_tid.get(gid)) - else: - tid = len(self.table_create_infos) - self._register_table_info(table_info, assign_tid=-1) - gid_to_tid[gid] = tid - else: - self._register_table_info(table_info, assign_tid=-1) - return self.table_create_infos - - -# no slot merge -class NoneTableMapPolicy(BaseTableMapPolicy): - def map_table_infos(self, user_defined_table_infos): - return self._map_table_infos(user_defined_table_infos, self.assign_groups) - - -# merge slot by user's assign_groups -class AutoMergeTableMapPolicy(BaseTableMapPolicy): - def map_table_infos(self, user_defined_table_infos): - assign_groups_flat = reduce(lambda a, b: a+b, self.assign_groups, []) - new_assign_groups = [] - for sid, table_info in enumerate(user_defined_table_infos): - if sid in assign_groups_flat: - continue - gid = -1 - if user_defined_table_infos[sid]['allow_merge']: - for ngid, group in enumerate(new_assign_groups): - if self._is_equal_table_info(user_defined_table_infos[group[0]], table_info) \ - and user_defined_table_infos[group[0]]['allow_merge']: - gid = ngid - break - if gid == -1: - gid = len(new_assign_groups) - new_assign_groups.append([]) - new_assign_groups[gid].append(sid) - new_assign_groups = self.assign_groups + new_assign_groups - return self._map_table_infos(user_defined_table_infos, new_assign_groups) diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_utils.py b/tf_adapter/python/npu_bridge/embedding/embedding_utils.py deleted file mode 100644 index bd5aef85bfd5c0a47cfb03f0a415662d5d86ab90..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/embedding_utils.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import re -import tensorflow as tf - - -class EmbeddingVariableOption: - """ option for embedding service table. """ - - def __init__(self, filter_option=None, - padding_option=None, - evict_option=None, - completion_option=None, - storage_option=None, - feature_freezing_option=None, - communication_option=None): - self.filter_option = filter_option - self.padding_option = padding_option - self.evict_option = evict_option - self.completion_option = completion_option - self.storage_option = storage_option - self.feature_freezing_option = feature_freezing_option - self.communication_option = communication_option - - -class CounterFilter: - """ Counter filter for embedding table. """ - - def __init__(self, filter_freq, default_key_or_value, default_key=None, default_value=None): - self.filter_freq = filter_freq - self.default_key = default_key - self.default_value = default_value - self.default_key_or_value = default_key_or_value - - -class PaddingParamsOption: - """ padding key option for embedding service table. """ - - def __init__(self, padding_key=None, - mask=True): - self.padding_key = padding_key - self.mask = mask - - -class CompletionKeyOption: - """ completion key option for embedding service table. """ - - def __init__(self, completion_key=None, mask=1): - self.completion_key = completion_key - self.mask = mask - - -class EvictOption: - """ Evict option for embedding table. """ - - def __init__(self, steps_to_live): - self.steps_to_live = steps_to_live - - -def check_common_init_params(name, init_vocabulary_size, embedding_dim, embedding_type, mask_zero): - if (name is None) or (init_vocabulary_size is None) or (embedding_dim is None): - raise ValueError("table name, init_vocabulary_size and embedding_dim can not be None.") - if not isinstance(name, str): - raise TypeError("embedding table name must be string.") - regex = re.compile('[@!#$%^&*()<>?/\|}{~:]') - if regex.search(name) is not None: - raise ValueError("table name contains illegal character.") - if (not isinstance(init_vocabulary_size, int)) or (not isinstance(embedding_dim, int)): - raise ValueError("init_vocabulary_size and embedding_dim must be int.") - if init_vocabulary_size < 0: - raise ValueError("init_vocabulary_size can not be smaller than zero.") - if embedding_dim <= 0: - raise ValueError("embedding_dim must be greater than zero.") - if (embedding_type != "PS") and (embedding_type != "data_parallel"): - raise TypeError("embedding_type must be PS or data_parallel") - if not isinstance(mask_zero, bool): - raise TypeError("mask zero must be bool") - - -def check_each_initializer(initializer_mode, min_value, max_value, constant_value, mu, sigma): - if initializer_mode == 'random_uniform': - if (min_value is None) or (max_value is None) or \ - (not isinstance(min_value, (float, int))) or (not isinstance(max_value, (float, int))): - raise ValueError("If initializer is random_uniform, min and max can not be None, must be int or float.") - if initializer_mode == 'truncated_normal': - if (min_value is None) or (max_value is None) or (mu is None) or (sigma is None) or \ - (not isinstance(min_value, (float, int))) or (not isinstance(max_value, (float, int))) or \ - (not isinstance(mu, (float, int))) or (not isinstance(sigma, (float, int))): - raise ValueError("If initializer is truncated_normal, min, max, mu and sigma can not be None," - "and they must be int or float.") - if initializer_mode == 'constant': - if (constant_value is None) or (not isinstance(constant_value, (float, int))): - raise ValueError("If initializer is constant, constant_value can not be None, must be float or int.") - - -def check_init_params_type(key_dtype, value_dtype, init_vocabulary_size, embedding_dim, multihot_lens, allow_merge): - if (key_dtype is not tf.int64) or (value_dtype is not tf.float32): - raise TypeError("key_dtype only support tf.int64, value_dtype only support tf.float32 now.") - if (not isinstance(init_vocabulary_size, int)) or (not isinstance(embedding_dim, int)) or \ - (not isinstance(multihot_lens, int)) or (not isinstance(allow_merge, bool)): - raise TypeError("init_vocabulary_size, embedding_dim, multihot_lens must be int, allow_merge must be bool.") diff --git a/tf_adapter/python/npu_bridge/embedding/tf_path.py b/tf_adapter/python/npu_bridge/embedding/tf_path.py deleted file mode 100644 index a5717e652ec960fa3a849471e9d02d74ff0c58da..0000000000000000000000000000000000000000 --- a/tf_adapter/python/npu_bridge/embedding/tf_path.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import variables -from tensorflow.python.training import optimizer as embeddingOptimizer -from npu_bridge.embedding.embedding_resource import NpuEmbeddingResource - - -class _NpuEmbeddingResourceProcessor(embeddingOptimizer._OptimizableVariable): - """Processor for dense NpuEmbeddingResourceProcessor.""" - - def __init__(self, v): - self._v = v - - def target(self): - return self._v - - def update_op(self, optimizer, g): - return optimizer._resource_apply_sparse(g.values, self._v, g.indices) - - -def _get_processor(v): - """The processor of v.""" - if context.executing_eagerly(): - if isinstance(v, ops.Tensor): - return embeddingOptimizer._TensorProcessor(v) - else: - return embeddingOptimizer._DenseResourceVariableProcessor(v) - if isinstance(v, NpuEmbeddingResource): - return _NpuEmbeddingResourceProcessor(v) - if resource_variable_ops.is_resource_variable(v) and not v._in_graph_mode: # pylint: disable=protected-access - # True if and only if `v` was initialized eagerly. - return embeddingOptimizer._DenseResourceVariableProcessor(v) - if v.op.type == "VarHandleOp": - return embeddingOptimizer._DenseResourceVariableProcessor(v) - if isinstance(v, variables.Variable): - return embeddingOptimizer._RefVariableProcessor(v) - if isinstance(v, ops.Tensor): - return embeddingOptimizer._TensorProcessor(v) - - raise NotImplementedError("Trying to optimize unsupported type ", v) - - -def path_on_tf(): - embeddingOptimizer._get_processor = _get_processor - - diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index e0e32ff6ce31fef0576fdb82316780bd1d76b459..8d5fe17398a7d16816260680e5838e767a4ecf69 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -105,7 +105,6 @@ class NPURunConfig(run_config_lib.RunConfig): stream_sync_timeout=-1, event_sync_timeout=-1, external_weight=False, - es_cluster_config=None, deterministic=0, frozen_variable=False, variable_placement="Device", @@ -115,7 +114,6 @@ class NPURunConfig(run_config_lib.RunConfig): quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, - execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, @@ -183,7 +181,6 @@ class NPURunConfig(run_config_lib.RunConfig): experimental_config: The experimental configuration. topo_sorting_mode: Provides an interface for users to customize topology sorting. external_weight: Whether convert const to fileconstant and save weight to file. - es_cluster_config: esClusterConfig from user input in embedding service. frozen_variable: Whether folding constant variables variable_placement: Process variable on host or device jit_compile: Whether enable jit compile @@ -290,12 +287,10 @@ class NPURunConfig(run_config_lib.RunConfig): self.stream_sync_timeout = stream_sync_timeout self.event_sync_timeout = event_sync_timeout self._external_weight = external_weight - self.es_cluster_config = es_cluster_config self._jit_compile = jit_compile self._input_fusion_size = input_fusion_size self._compile_dynamic_mode = compile_dynamic_mode self._graph_max_parallel_model_num = graph_max_parallel_model_num - self.execute_times = execute_times self._export_compile_stat = export_compile_stat self._aicore_num = aicore_num self._oo_constant_folding = oo_constant_folding diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 965965680206db50f4fe6fd64e12cf888caa6a98..3f1ffa8632effbb817a2812b5541477dc290bd0b 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -837,8 +837,6 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["topo_sorting_mode"].i = config.topo_sorting_mode if config.insert_op_file is not None: custom_op.parameter_map["insert_op_file"].s = config.insert_op_file - if config.es_cluster_config is not None: - custom_op.parameter_map["es_cluster_config"].s = tf.compat.as_bytes(config.es_cluster_config) if config._compile_dynamic_mode is not None: custom_op.parameter_map["compile_dynamic_mode"].b = config._compile_dynamic_mode if config._graph_max_parallel_model_num is not None: @@ -850,7 +848,6 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["external_weight"].b = config._external_weight custom_op.parameter_map["frozen_variable"].b = config._frozen_variable custom_op.parameter_map["variable_placement"].s = tf.compat.as_bytes(config._variable_placement) - custom_op.parameter_map["execute_times"].i = config.execute_times if config._shape_generalization_mode is not None: custom_op.parameter_map["shape_generalization_mode"].s = tf.compat.as_bytes( config._shape_generalization_mode) diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py index 96ac42738c31cc3e9b17955b8a6f0621c00081e5..e5abba17784c4879c990184a8bc92765feb41b6a 100644 --- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py +++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py @@ -25,29 +25,6 @@ from npu_bridge.helper import helper gen_npu_cpu_ops = helper.get_gen_ops() -## 提供device侧FeatureMapping LookupOrInsert功能 -# @param table_handle int64 类型 -# @param keys int64 类型 -# @param bucket_size int 类型 -# @param embedding_dim int 类型 -# @param filter_mode string 类型 -# @param filter_freq int 类型 -# @param default_key_or_value bool 类型 -# @param default_key int 类型 -# @param default_value float 类型 -# @param filter_key_flag bool 类型 -# @param filter_key int 类型 -# @return values float 类型 -def embedding_hashtable_lookup_or_insert(table_handle, keys, bucket_size, embedding_dim, filter_mode, filter_freq, - default_key_or_value, default_key, default_value, filter_key_flag, filter_key): - """ device embedding feature mapping lookup or insert. """ - result = gen_npu_cpu_ops.EmbeddingHashTableLookupOrInsert( - table_handle=table_handle, keys=keys, bucket_size=bucket_size, embedding_dim=embedding_dim, - filter_mode=filter_mode, filter_freq=filter_freq, default_key_or_value=default_key_or_value, - default_key=default_key, default_value=default_value, filter_key_flag=filter_key_flag, filter_key=filter_key) - return result - - ## 提供embeddingrankid功能 # @param addr_tensor tensorflow的tensor类型,embeddingrankid操作的输入; # @param index tensorflow的tensor类型,embeddingrankid操作的输入; @@ -400,115 +377,6 @@ def device_feature_mapping(feature_id): return result -## 提供device侧EmbeddingFeatureMappingV2功能 -# @param table_name string 类型 -# @param feature_id int64 类型 -# @param table_total_size list(int) 类型 -# @param table_actual_size list(int) 类型 -# @return offset_id int32 类型 -def embedding_feature_mapping_v2(table_name, feature_id, table_total_size, table_actual_size): - """ device embedding feature mapping v2. """ - result = gen_npu_cpu_ops.EmbeddingFeatureMappingV2( - table_name=table_name, feature_id=feature_id, - table_total_size=table_total_size, table_actual_size=table_actual_size) - return result - - -## 提供device侧EmbeddingFeatureMappingTableSize功能 -# @param table_name string 类型 -# @return feature_size int64 类型 -def embedding_feature_mapping_table_size(table_name): - """ device embedding feature mapping table size. """ - result = gen_npu_cpu_ops.EmbeddingFeatureMappingTableSize(table_name=table_name) - return result - - -## 提供device侧EmbeddingFeatureMappingFind能 -# @param table_name string 类型 -# @param feature_size int64 类型 -# @return offset_id int32 类型 -def embedding_feature_mapping_find(table_name, feature_size, num): - """ device embedding feature mapping find. """ - result = gen_npu_cpu_ops.EmbeddingFeatureMappingFind( - table_name=table_name, feature_size=feature_size, num=num) - return result - - -## 提供host侧EmbeddingFeatureMappingExport功能 -# @param file_path string 类型 -# @param table_name string 类型 -# @param feature_id int64 类型 -# @param offset_id int32 类型 -# @param values float 类型 -def embedding_feature_mapping_export(file_path, table_name, values, feature_id, offset_id, embedding_dim): - """ host embedding feature mapping export. """ - gen_npu_cpu_ops.EmbeddingFeatureMappingExport( - file_path=file_path, table_name=table_name, values=values, - feature_id=feature_id, offset_id=offset_id, embedding_dim=embedding_dim) - - -## 提供host侧EmbeddingFeatureMappingFileSize功能 -# @param file_path string 类型 -# @param table_name string 类型 -# @param only_offset_flag bool 类型 -# @param embedding_dim int 类型 -# @return feature_size int64 类型 -def embedding_feature_mapping_file_size(file_path, table_name, embedding_dim, only_offset_flag): - """ host embedding feature mapping file size. """ - result = gen_npu_cpu_ops.EmbeddingFeatureMappingFileSize( - file_path=file_path, table_name=table_name, - embedding_dim=embedding_dim, only_offset_flag=only_offset_flag) - return result - - -## 提供host侧EmbeddingFeatureMappingImport功能 -# @param file_path string 类型 -# @param table_name string 类型 -# @param feature_size int64 类型 -# @param embedding_dim int 类型 -# @param only_offset_flag bool 类型 -# @return feature_id int64 类型 -# @return offset_id int32 类型 -# @return values float 类型 -def embedding_feature_mapping_import(file_path, table_name, feature_size, embedding_dim, only_offset_flag, num): - """ host embedding feature mapping import. """ - result = gen_npu_cpu_ops.EmbeddingFeatureMappingImport( - file_path=file_path, table_name=table_name, - feature_size=feature_size, embedding_dim=embedding_dim, - only_offset_flag=only_offset_flag, num=num) - return result - - -## 提供device侧EmbeddingFeatureMappingInsert功能 -# @param table_name string 类型 -# @param feature_id int64 类型 -# @param offset_id int32 类型 -def embedding_feature_mapping_insert(table_name, feature_id, offset_id): - """ device embedding feature mapping insert. """ - gen_npu_cpu_ops.EmbeddingFeatureMappingInsert( - table_name=table_name, feature_id=feature_id, offset_id=offset_id) - - -## 提供host侧FeatureMapping Import功能 -# @param path string 类型 -# @param table_name string 类型 -# @return fake int32 类型 -def host_feature_mapping_export(path, table_name_list): - """ host feature mapping export. """ - result = gen_npu_cpu_ops.FeatureMappingExport(path=path, table_name_list=table_name_list) - return result - - -## 提供host侧FeatureMapping Export功能 -# @param path string 类型 -# @param table_name string 类型 -# @return fake int32 类型 -def host_feature_mapping_import(path): - """ host feature mapping export. """ - result = gen_npu_cpu_ops.FeatureMappingImport(path=path) - return result - - ## 提供device侧初始化hashmap表功能 # @param table_id int32 类型 # @param bucket_size int64 类型 @@ -599,104 +467,4 @@ def embedding_hashmap_import_v2(file_path, table_ids, table_sizes, table_names, result = gen_npu_cpu_ops.EmbeddingHashmapImport( file_path=file_path, table_ids=table_ids, table_sizes=table_sizes, table_names=table_names, global_step=global_step, embedding_dims=embedding_dims, num=num) - return result - - -## EmbeddingHashTable Init功能 -# @param table_handle int64 类型 -# @param sampled_values float 类型 -# @param bucket_size int 类型 -# @param embedding_dim int 类型 -# @param initializer_mode string 类型 -# @param constant_value int 类型 -def init_embedding_hashtable(table_handle, sampled_values, bucket_size, embedding_dim, initializer_mode, - constant_value): - """ device init embedding hashtable. """ - result = gen_npu_cpu_ops.InitEmbeddingHashTable( - table_handle=table_handle, sampled_values=sampled_values, bucket_size=bucket_size, embedding_dim=embedding_dim, - initializer_mode=initializer_mode, constant_value=constant_value) - return result - - -## 提供host侧hashTable导入功能 -# @param table_handles int64 类型 -# @param embedding_dims int64 类型 -# @param bucket_sizes int64 类型 -# @param keys int64 类型 -# @param counters uint64 类型 -# @param filter_flags uint8 类型 -# @param values float 类型 -def embedding_hash_table_import(table_handles, embedding_dims, bucket_sizes, keys, counters, filter_flags, values): - """ host embedding feature hash table import. """ - result = gen_npu_cpu_ops.EmbeddingHashTableImport( - table_handles=table_handles, embedding_dims=embedding_dims, bucket_sizes=bucket_sizes, - keys=keys, counters=counters, filter_flags=filter_flags, values=values) - return result - - -## 提供host侧hashTable导出功能 -# @param table_handles int64 类型 -# @param table_sizes int64 类型 -# @param embedding_dims int64 类型 -# @param bucket_sizes int64 类型 -# @param export_mode string 类型 -# @param filtered_export_flag bool 类型 -def embedding_hash_table_export(table_handles, table_sizes, embedding_dims, bucket_sizes, export_mode='all', - filter_export_flag=False): - """ host embedding feature hash table export. """ - result = gen_npu_cpu_ops.EmbeddingHashTableExport( - table_handles=table_handles, table_sizes=table_sizes, embedding_dims=embedding_dims, bucket_sizes=bucket_sizes, - export_mode=export_mode, filter_export_flag=filter_export_flag) - return result - - -## EmbeddingHashTableApplyAdamW AdamW 更新功能 -# @param table_handle int64 类型 -# @param keys int64 类型 -# @param m float16, float32 类型 -# @param v float16, float32 类型 -# @param beta1_power float16, float32 类型 -# @param beta2_power float16, float32 类型 -# @param lr float16, float32 类型 -# @param weight_decay float16, float32 类型 -# @param beta1 float16, float32 类型 -# @param beta2 float16, float32 类型 -# @param epsilon float16, float32 类型 -# @param grad float16, float32 类型 -# @param max_grad_norm float16, float32 类型 -# @param embedding_dim int 类型 -# @param bucket_size int 类型 -# @param amsgrad bool 类型 -# @param maximize bool 类型 -def embedding_hashtable_apply_adam_w(table_handle, keys, m, v, beta1_power, beta2_power, lr, weight_decay, - beta1, beta2, epsilon, grad, max_grad_norm, embedding_dim, - bucket_size, amsgrad, maximize): - """ device update embedding hashtable using AdamW. """ - result = gen_npu_cpu_ops.EmbeddingHashTableApplyAdamW( - table_handle=table_handle, keys=keys, m=m, v=v, beta1_power=beta1_power, beta2_power=beta2_power, - lr=lr, weight_decay=weight_decay, beta1=beta1, beta2=beta2, epsilon=epsilon, grad=grad, - max_grad_norm=max_grad_norm, embedding_dim=embedding_dim, bucket_size=bucket_size, - amsgrad=amsgrad, maximize=maximize) - return result - - -## 提供device侧FeatureMapping Evict功能 -# @param table_handle int64 类型 -# @param keys int64 类型 -# @param sampled_values float 类型 -# @param table_cap int 类型 -# @param embedding_dim int 类型 -# @param init_mode string 类型 -# @param const_val int 类型 -# @return table_handle int64 类型 -def embedding_hashtable_evict(table_handle, keys, sampled_values, table_cap, embedding_dim, init_mode, const_val): - """device embedding feature mapping evict.""" - return gen_npu_cpu_ops.EmbeddingHashTableEvict( - table_handle=table_handle, - keys=keys, - sampled_values=sampled_values, - table_cap=table_cap, - embedding_dim=embedding_dim, - init_mode=init_mode, - const_val=const_val, - ) + return result \ No newline at end of file diff --git a/tf_adapter/python/npu_bridge/npu_init.py b/tf_adapter/python/npu_bridge/npu_init.py index b4bfb113f29d0545a88b9b58fe37b318456c0b5c..aee09779a5897966f2592ecc2a1c4b34a08520db 100644 --- a/tf_adapter/python/npu_bridge/npu_init.py +++ b/tf_adapter/python/npu_bridge/npu_init.py @@ -67,7 +67,6 @@ from hccl.manage.api import get_world_rank_from_group_rank from hccl.manage.api import get_group_rank_from_world_rank from hccl.split.api import set_split_strategy_by_idx from hccl.split.api import set_split_strategy_by_size -from npu_bridge import embedding as npu_embedding from npu_bridge.profiler import profiler from npu_bridge.npu_cpu import npu_cpu_ops from npu_bridge.scoped_graph_manager import scoped_graph_manager diff --git a/tf_adapter/tests/st/kernels/pbtxt/geop.pbtxt b/tf_adapter/tests/st/kernels/pbtxt/geop.pbtxt index 441d74db228973e746c42662fcbbda0789e0c0f7..58575959fcc4e83066f0231f17413e0ac2d7c80d 100644 --- a/tf_adapter/tests/st/kernels/pbtxt/geop.pbtxt +++ b/tf_adapter/tests/st/kernels/pbtxt/geop.pbtxt @@ -113,36 +113,6 @@ node { s: "dynamic_execute" } } - attr { - key: "_max_key_num" - value { - s: "1" - } - } - attr { - key: "_embedding_dim" - value { - s: "1" - } - } - attr { - key: "_use_counter_filter" - value { - s: "1" - } - } - attr { - key: "_padding_key" - value { - s: "0" - } - } - attr { - key: "_embedding_flags" - value { - b: true - } - } attr { key: "_dynamic_input" value { diff --git a/tf_adapter/tests/st/kernels/testcase/host_feature_mapping_test.cc b/tf_adapter/tests/st/kernels/testcase/host_feature_mapping_test.cc deleted file mode 100644 index d6735b82ea342c2aa173f4c3155f717b01c29b42..0000000000000000000000000000000000000000 --- a/tf_adapter/tests/st/kernels/testcase/host_feature_mapping_test.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include "tf_adapter/util/npu_attrs.h" -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/public/version.h" -#include -#include "gtest/gtest.h" -#include - -namespace tensorflow { -namespace { - -#define TF_ASSERT_OK(statement) \ - ASSERT_EQ(::tensorflow::Status::OK(), (statement)) - -#define TF_EXPECT_OK(statement) \ - EXPECT_EQ(::tensorflow::Status::OK(), (statement)) - -class DummyDevice : public DeviceBase { - public: - DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {} - bool RequiresRecordingAccessedTensors() const override { return save_; } - private: - bool save_; -}; -} -class HostFeatureMappingTest : public testing::Test { - protected: - virtual void SetUp() {} - virtual void TearDown() {} -}; - -FakeInputFunctor FakeHostInputStub(DataType dt) { - return [dt](const OpDef &op_def, int in_index, const NodeDef &node_def, - NodeDefBuilder *builder) { - char c = 'a' + (in_index % 26); - string in_node = string(&c, 1); - builder->Input(in_node, 0, dt); - return Status::OK(); - }; -} - -PartialTensorShape THostShape(std::initializer_list dims) { - return PartialTensorShape(dims); -} - -TEST(HostFeatureMappingTest, HostFeatureMappingTestShapeInference) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("HostFeatureMapping", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - int threshold = 1; - std::string table_name = "table_name1"; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeHostInputStub(DT_INT64)) - .Attr("threshold", threshold) - .Attr("table_name", table_name) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {THostShape({1})}, {}, {}, {}); - TF_CHECK_OK(reg->shape_inference_fn(&c)); - ASSERT_EQ("[1]", c.DebugString(c.output(0))); -} - -TEST(HostFeatureMappingTest, HostFeatureMappingExportTestShapeInference) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("FeatureMappingExport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - std::vector table_name_list = {"table_name1"}; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeHostInputStub(DT_STRING)) - .Attr("table_name_list", table_name_list) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {THostShape({1})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(HostFeatureMappingTest, HostFeatureMappingImportTestShapeInference) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("FeatureMappingImport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - std::vector table_name_list = {"table_name1"}; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeHostInputStub(DT_STRING)) - .Attr("table_name_list", table_name_list) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {THostShape({1})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -} // end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/st/kernels/testcase/npu_embedding_ops_test.cc b/tf_adapter/tests/st/kernels/testcase/npu_embedding_ops_test.cc index f8a26dc40d81fb393560ab6f43b46374be75f22b..cfda11c0da6e71f2df0510d33acea4c87a545786 100644 --- a/tf_adapter/tests/st/kernels/testcase/npu_embedding_ops_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/npu_embedding_ops_test.cc @@ -22,179 +22,12 @@ FakeInputFunctor FakeInputStub(DataType dt) { }; } -FakeInputFunctor FakeInputStubList(DataType dt) { - return [dt](const OpDef &op_def, int in_index, const NodeDef &node_def, - NodeDefBuilder *builder) { - char c = 'a' + (in_index % 26); - string in_node = string(&c, 1); - builder->Input({{in_node, {}, dt}}); - return Status::OK(); - }; -} - class NpuCpuOpTest : public testing::Test { protected: virtual void SetUp() {} virtual void TearDown() {} }; -TEST(EmbeddingOpsTest, TestEmbeddingTableFind02) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingTableFind", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("embedding_dim", {4}) - .Input(FakeInputStub(DT_INT32)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({1}), TShape({16})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingShapeInfer) { - const OpRegistrationData* reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMapping", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {TShape({2, 2, 3, 4})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingV2ShapeInfer) { - const OpRegistrationData* reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingV2", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("table_total_size", {1}) - .Attr("table_actual_size", {1}) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingFindShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingFind", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("num", 1) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingInsertShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingInsert", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStubList(DT_INT64)) - .Input(FakeInputStubList(DT_INT32)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({6}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingExportShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingExport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Input(FakeInputStub(DT_FLOAT)) - .Input(FakeInputStubList(DT_INT64)) - .Input(FakeInputStubList(DT_INT32)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({}), TShape({6}), TShape({6}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingImportShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingImport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("embedding_dim", 4) - .Attr("only_offset_flag", 1) - .Attr("num", 1) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingTableSizeShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingTableSize", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_STRING)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingFileSizeShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingFileSize", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("embedding_dim", 4) - .Attr("only_offset_flag", 1) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - TEST(EmbeddingOpsTest, InitEmbeddingHashmapV2ShapeInfer) { const OpRegistrationData* reg; TF_CHECK_OK(OpRegistry::Global()->LookUp("InitEmbeddingHashmapV2", ®)); diff --git a/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc b/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc index 86c53aff564386f8515639fc80e9be884315f393..025c4c6810c142c5bda54e8a4742e668bc87db16 100644 --- a/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc +++ b/tf_adapter/tests/st/optimizers/testcase/get_attr_optimize_pass_test.cc @@ -217,9 +217,6 @@ TEST_F(GetAttrOptimizationPassTest, SetAttrTest) { AttrValue insert_op_file = AttrValue(); insert_op_file.set_s("aipp.cfg"); (*custom_config->mutable_parameter_map())["insert_op_file"] = insert_op_file; - AttrValue es_cluster_config = AttrValue(); - es_cluster_config.set_s("es"); - (*custom_config->mutable_parameter_map())["es_cluster_config"] = es_cluster_config; AttrValue external_weight = AttrValue(); external_weight.set_b(true); (*custom_config->mutable_parameter_map())["external_weight"] = external_weight; diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt index f6e17b529938e6cb6028b5b8cb550f952f7e10a4..bdbcf1631fa03474e89a13d710d28993a26241bd 100644 --- a/tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop.pbtxt @@ -113,36 +113,6 @@ node { s: "dynamic_execute" } } - attr { - key: "_max_key_num" - value { - s: "1" - } - } - attr { - key: "_embedding_dim" - value { - s: "1" - } - } - attr { - key: "_use_counter_filter" - value { - s: "1" - } - } - attr { - key: "_padding_key" - value { - s: "0" - } - } - attr { - key: "_embedding_flags" - value { - b: true - } - } attr { key: "_dynamic_input" value { diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_compile_dynamic.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_compile_dynamic.pbtxt index 2deca47d9dc1806b4c859ee056ae571528804e9a..4c72af935140e1407c60416738862f4030a1851d 100644 --- a/tf_adapter/tests/ut/kernels/pbtxt/geop_compile_dynamic.pbtxt +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_compile_dynamic.pbtxt @@ -113,24 +113,6 @@ node { s: "dynamic_execute" } } - attr { - key: "_max_key_num" - value { - s: "1" - } - } - attr { - key: "_embedding_dim" - value { - s: "1" - } - } - attr { - key: "_use_counter_filter" - value { - s: "1" - } - } attr { key: "_dynamic_input" value { diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_npu_compile.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_npu_compile.pbtxt index 49b420c66a08c0b94608accc2ba582a0c9ab0592..15f0f53d2256b29f805158fca1de11ee1942f814 100644 --- a/tf_adapter/tests/ut/kernels/pbtxt/geop_npu_compile.pbtxt +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_npu_compile.pbtxt @@ -113,36 +113,6 @@ node { s: "dynamic_execute" } } - attr { - key: "_max_key_num" - value { - s: "1" - } - } - attr { - key: "_embedding_dim" - value { - s: "1" - } - } - attr { - key: "_use_counter_filter" - value { - s: "1" - } - } - attr { - key: "_padding_key" - value { - s: "0" - } - } - attr { - key: "_embedding_flags" - value { - b: true - } - } attr { key: "_dynamic_input" value { diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt index 887a72f15c95d43a873e4f69a82d98b8d95c1468..924ab06c7f453cc91eea957f3bd036b4a4c5327b 100644 --- a/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode.pbtxt @@ -113,24 +113,6 @@ node { s: "dynamic_execute" } } - attr { - key: "_max_key_num" - value { - s: "1" - } - } - attr { - key: "_embedding_dim" - value { - s: "1" - } - } - attr { - key: "_use_counter_filter" - value { - s: "1" - } - } attr { key: "_dynamic_input" value { diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt index ae675689d5eeb7b5deaddca91c2dcd40c081d163..092ee2e261574bf78102078a4c6c35f83720d553 100644 --- a/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_shape_generalization_mode_ignore.pbtxt @@ -113,24 +113,6 @@ node { s: "dynamic_execute" } } - attr { - key: "_max_key_num" - value { - s: "1" - } - } - attr { - key: "_embedding_dim" - value { - s: "1" - } - } - attr { - key: "_use_counter_filter" - value { - s: "1" - } - } attr { key: "_dynamic_input" value { diff --git a/tf_adapter/tests/ut/kernels/testcase/host_feature_mapping_test.cc b/tf_adapter/tests/ut/kernels/testcase/host_feature_mapping_test.cc deleted file mode 100644 index 5ad94582934fd4fdd2edf1e7191e7a82b20f5f25..0000000000000000000000000000000000000000 --- a/tf_adapter/tests/ut/kernels/testcase/host_feature_mapping_test.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include "tf_adapter/util/npu_attrs.h" -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/public/version.h" -#include -#include "gtest/gtest.h" -#include - -namespace tensorflow { -namespace { - -#define TF_ASSERT_OK(statement) \ - ASSERT_EQ(::tensorflow::Status::OK(), (statement)) - -#define TF_EXPECT_OK(statement) \ - EXPECT_EQ(::tensorflow::Status::OK(), (statement)) - -class DummyDevice : public DeviceBase { - public: - DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {} - bool RequiresRecordingAccessedTensors() const override { return save_; } - private: - bool save_; -}; -} -class HostFeatureMappingTest : public testing::Test { - protected: - virtual void SetUp() {} - virtual void TearDown() {} -}; - -FakeInputFunctor FakeHostInputStub(DataType dt) { - return [dt](const OpDef &op_def, int in_index, const NodeDef &node_def, - NodeDefBuilder *builder) { - char c = 'a' + (in_index % 26); - string in_node = string(&c, 1); - builder->Input(in_node, 0, dt); - return Status::OK(); - }; -} - -PartialTensorShape THostShape(std::initializer_list dims) { - return PartialTensorShape(dims); -} - -TEST(HostFeatureMappingTest, HostFeatureMappingTestShapeInference) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("HostFeatureMapping", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - int threshold = 1; - std::string table_name = "table_name1"; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeHostInputStub(DT_INT64)) - .Attr("threshold", threshold) - .Attr("table_name", table_name) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {THostShape({1})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); - ASSERT_EQ("[1]", c.DebugString(c.output(0))); -} - -TEST(HostFeatureMappingTest, HostFeatureMappingExportTestShapeInference) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("FeatureMappingExport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - std::vector table_name_list = {"table_name1"}; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeHostInputStub(DT_STRING)) - .Attr("table_name_list", table_name_list) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {THostShape({1})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(HostFeatureMappingTest, HostFeatureMappingImportTestShapeInference) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("FeatureMappingImport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - std::vector table_name_list = {"table_name1"}; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeHostInputStub(DT_STRING)) - .Attr("table_name_list", table_name_list) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {THostShape({1})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -} // end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/ut/kernels/testcase/npu_embedding_ops_test.cc b/tf_adapter/tests/ut/kernels/testcase/npu_embedding_ops_test.cc index ea099967dc4d61665fd1a5e7db224f6ba96a6cb8..aef1eab415d1c8d43148f35d877fc639ef9f4aab 100644 --- a/tf_adapter/tests/ut/kernels/testcase/npu_embedding_ops_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/npu_embedding_ops_test.cc @@ -22,179 +22,12 @@ FakeInputFunctor FakeInputStub(DataType dt) { }; } -FakeInputFunctor FakeInputStubList(DataType dt) { - return [dt](const OpDef &op_def, int in_index, const NodeDef &node_def, - NodeDefBuilder *builder) { - char c = 'a' + (in_index % 26); - string in_node = string(&c, 1); - builder->Input({{in_node, {}, dt}}); - return Status::OK(); - }; -} - class NpuCpuOpTest : public testing::Test { protected: virtual void SetUp() {} virtual void TearDown() {} }; -TEST(EmbeddingOpsTest, TestEmbeddingTableFind02) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingTableFind", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("embedding_dim", {4}) - .Input(FakeInputStub(DT_INT32)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({1}), TShape({16})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingShapeInfer) { - const OpRegistrationData* reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMapping", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c(0, &def, op_def, {TShape({2, 2, 3, 4})}, {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingV2ShapeInfer) { - const OpRegistrationData* reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingV2", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("table_total_size", {1}) - .Attr("table_actual_size", {1}) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingFindShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingFind", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("num", 1) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingInsertShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingInsert", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStubList(DT_INT64)) - .Input(FakeInputStubList(DT_INT32)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({6}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingExportShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingExport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Input(FakeInputStub(DT_FLOAT)) - .Input(FakeInputStubList(DT_INT64)) - .Input(FakeInputStubList(DT_INT32)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({}), TShape({6}), TShape({6}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingImportShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingImport", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("embedding_dim", 4) - .Attr("only_offset_flag", 1) - .Attr("num", 1) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({}), TShape({6})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingTableSizeShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingTableSize", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Input(FakeInputStub(DT_STRING)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - -TEST(EmbeddingOpsTest, TestEmbeddingFeatureMappingFileSizeShapeInfer) { - const OpRegistrationData *reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("EmbeddingFeatureMappingFileSize", ®)); - OpDef op_def = reg->op_def; - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("dummy", &op_def) - .Attr("embedding_dim", 4) - .Attr("only_offset_flag", 1) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_STRING)) - .Input(FakeInputStub(DT_INT64)) - .Finalize(&def)); - shape_inference::InferenceContext c( - 0, &def, op_def, - {TShape({}), TShape({})}, - {}, {}, {}); - ASSERT_TRUE(reg->shape_inference_fn(&c).ok()); -} - TEST(EmbeddingOpsTest, InitEmbeddingHashmapV2ShapeInfer) { const OpRegistrationData* reg; TF_CHECK_OK(OpRegistry::Global()->LookUp("InitEmbeddingHashmapV2", ®)); diff --git a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_embedding_service.pbtxt b/tf_adapter/tests/ut/optimizers/pbtxt/om_test_embedding_service.pbtxt deleted file mode 100644 index f26cb26d9d164118ad869d7fecddae80784f49cf..0000000000000000000000000000000000000000 --- a/tf_adapter/tests/ut/optimizers/pbtxt/om_test_embedding_service.pbtxt +++ /dev/null @@ -1,268 +0,0 @@ -node { - name: "arg_arg_Placeholder_0_0" - op: "_Arg" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "T" - value { - type: DT_INT32 - } - } - attr { - key: "index" - value { - i: 0 - } - } -} -node { - name: "IteratorV2" - op: "IteratorV2" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "_NpuOptimizer" - value { - s: "NpuOptimizer" - } - } - attr { - key: "_job" - value { - s: "localhost" - } - } - attr { - key: "_mix_compile_mode" - value { - s: "0" - } - } - attr { - key: "_use_off_line" - value { - s: "1" - } - } - attr { - key: "_iterations_per_loop" - value { - s: "1" - } - } - attr { - key: "_enable_data_pre_proc" - value { - s: "1" - } - } - attr { - key: "_dynamic_graph_execute_mode" - value { - s: "dynamic_execute" - } - } - attr { - key: "_dynamic_input" - value { - b: true - } - } - attr { - key: "_dynamic_inputs_shape_range" - value { - s: "getnext:[1~50, 1~50],[1~50, 1~50]" - } - } - attr { - key: "container" - value { - s: "" - } - } - attr { - key: "output_shapes" - value { - list { - shape { - dim { - size: -1 - } - dim { - size: -1 - } - } - shape { - dim { - size: -1 - } - dim { - size: -1 - } - } - } - } - } - attr { - key: "output_types" - value { - list { - type: DT_INT64 - type: DT_INT64 - } - } - } - attr { - key: "shared_name" - value { - s: "" - } - } -} -node { - name: "IteratorGetNext" - op: "IteratorGetNext" - input: "IteratorV2" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "output_shapes" - value { - list { - shape { - dim { - size: -1 - } - dim { - size: -1 - } - } - shape { - dim { - size: -1 - } - dim { - size: -1 - } - } - } - } - } - attr { - key: "output_types" - value { - list { - type: DT_INT64 - type: DT_INT64 - } - } - } -} -node { - name: "Cast" - op: "Cast" - input: "IteratorGetNext" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "DstT" - value { - type: DT_INT32 - } - } - attr { - key: "SrcT" - value { - type: DT_INT64 - } - } - attr { - key: "Truncate" - value { - b: false - } - } -} -node { - name: "Cast_1" - op: "Cast" - input: "IteratorGetNext:1" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "DstT" - value { - type: DT_INT32 - } - } - attr { - key: "SrcT" - value { - type: DT_INT64 - } - } - attr { - key: "Truncate" - value { - b: false - } - } -} -node { - name: "Add" - op: "Add" - input: "Cast" - input: "Cast_1" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "T" - value { - type: DT_INT32 - } - } -} -node { - name: "Mul" - op: "Mul" - input: "Add" - input: "arg_arg_Placeholder_0_0" - device: "/job:localhost/replica:0/task:0/device:CPU:0" - attr { - key: "_graph_dynamic_graph_execute_mode" - value { - s: "dynamic_execute" - } - } - attr { - key: "_graph_dynamic_input" - value { - b: true - } - } - attr { - key: "_padding_key" - value { - s: "123" - } - } - attr { - key: "_embedding_flags" - value { - b: true - } - } - attr { - key: "_graph_dynamic_inputs_shape_range" - value { - s: "getnext:[1~50, 1~50],[1~50, 1~50]" - } - } - attr { - key: "T" - value { - type: DT_INT32 - } - } -} -library { -} -versions { - producer: 134 - min_consumer: 12 -} diff --git a/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc index 9b3fa9053f03358e26f2d7e383a820c5a77a8f33..2b51bb23ac021d76bac98376d15c5b48c0df93cc 100644 --- a/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc +++ b/tf_adapter/tests/ut/optimizers/testcase/get_attr_optimize_pass_test.cc @@ -217,9 +217,6 @@ TEST_F(GetAttrOptimizationPassTest, SetAttrTest) { AttrValue insert_op_file = AttrValue(); insert_op_file.set_s("aipp.cfg"); (*custom_config->mutable_parameter_map())["insert_op_file"] = insert_op_file; - AttrValue es_cluster_config = AttrValue(); - es_cluster_config.set_s("esclusterconfig.json"); - (*custom_config->mutable_parameter_map())["es_cluster_config"] = es_cluster_config; AttrValue external_weight = AttrValue(); external_weight.set_b(true); (*custom_config->mutable_parameter_map())["external_weight"] = external_weight; diff --git a/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc index 1bcafeea7ecf36456fc2cf190fd6c1a30f27940d..52e773fbe55227fb794abe582bcd9a8355d2a5e5 100644 --- a/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc +++ b/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc @@ -204,12 +204,6 @@ TEST_F(OmOptimizationPassTest, DynamicGetNextInput1Test) { std::string target_graph = DoRunOmOptimizationPassTest(); EXPECT_EQ(target_graph, "arg_arg_Placeholder_0_0->GeOp14_0"); } -TEST_F(OmOptimizationPassTest, EmbeddingServiceGraphTest) { - string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/om_test_embedding_service.pbtxt"; - InitGraph(org_graph_def_path); - std::string target_graph = DoRunOmOptimizationPassTest(); - EXPECT_EQ(target_graph, "arg_arg_Placeholder_0_0->GeOp15_0"); -} TEST_F(OmOptimizationPassTest, StringInputMaxSizeTest) { string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/om_test_string_input.pbtxt"; InitGraph(org_graph_def_path); diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index ab8d5d14d4f42bd69ba3e908e023051725318436..f9c4f552248c833f08e1ea7a92fea54f32e9c0f5 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -125,8 +125,6 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.exec.staticMemoryPolicy", "static_memory_policy"); option_name_map.emplace("ge.variableUse1gHugePage", "variable_use_1g_huge_page"); option_name_map.emplace("ge.socVersion", "soc_config"); - option_name_map.emplace("ge.esClusterConfig", "es_cluster_config"); - option_name_map.emplace("ge.executeTimes", "execute_times"); option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_EXECUTE_MODE, "dynamic_graph_execute_mode"); option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_INPUT, "dynamic_input"); option_name_map.emplace(ge::AICORE_NUM, "aicore_num"); diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 3da674a1b04af00d0b618f836a4a76c87e6419c3..4741ecc56572c493ceb4a4716c44212ae582e649 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -707,8 +707,6 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string aoe_config_file; std::string stream_sync_timeout = "-1"; std::string event_sync_timeout = "-1"; - std::string es_cluster_config; - std::string execute_times = "-1"; std::string export_compile_stat; std::string aicore_num; std::string oo_constant_folding; @@ -753,8 +751,6 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_aoe_config_file", &aoe_config_file); (void) ctx->GetAttr("_stream_sync_timeout", &stream_sync_timeout); (void) ctx->GetAttr("_event_sync_timeout", &event_sync_timeout); - (void) ctx->GetAttr("_es_cluster_config", &es_cluster_config); - (void) ctx->GetAttr("_execute_times", &execute_times); (void) ctx->GetAttr("_export_compile_stat", &export_compile_stat); (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); @@ -814,9 +810,6 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["ge.aoe_config_file"] = aoe_config_file; init_options_["stream_sync_timeout"] = stream_sync_timeout; init_options_["event_sync_timeout"] = event_sync_timeout; - init_options_["ge.esClusterConfig"] = es_cluster_config; - init_options_["execute_times"] = execute_times; - init_options_["ge.executeTimes"] = execute_times; if (!export_compile_stat.empty()) { init_options_["export_compile_stat"] = export_compile_stat; init_options_["ge.exportCompileStat"] = export_compile_stat; @@ -1253,7 +1246,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string stream_sync_timeout = "-1"; std::string event_sync_timeout = "-1"; std::string external_weight = "0"; - std::string es_cluster_config; std::string graph_parallel_option_path; std::string enable_graph_parallel; std::string graph_compiler_cache_dir; @@ -1261,7 +1253,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string accelerate_train_mode; std::string input_fusion_size; std::string compile_dynamic_mode; - std::string execute_times = "-1"; std::string export_compile_stat; std::string aicore_num; std::string oo_constant_folding; @@ -1350,7 +1341,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto model_deploy_devicelist_value = attrs.Find("_model_deploy_devicelist"); auto topo_sorting_mode_value = attrs.Find("_topo_sorting_mode"); auto insert_op_file_value = attrs.Find("_insert_op_file"); - auto es_cluster_config_value = attrs.Find("_es_cluster_config"); auto resource_config_path_value = attrs.Find("_resource_config_path"); auto aoe_config_file_value = attrs.Find("_aoe_config_file"); auto stream_sync_timeout_value = attrs.Find("_stream_sync_timeout"); @@ -1363,7 +1353,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto accelerate_train_mode_value = attrs.Find("_accelerate_train_mode"); auto input_fusion_size_value = attrs.Find("_input_fusion_size"); auto compile_dynamic_mode_value = attrs.Find("_compile_dynamic_mode"); - auto execute_times_value = attrs.Find("_execute_times"); auto export_compile_stat_value = attrs.Find("_export_compile_stat"); auto aicore_num_value = attrs.Find("_aicore_num"); auto oo_constant_folding_value = attrs.Find("_oo_constant_folding"); @@ -1663,9 +1652,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (external_weight_value != nullptr) { external_weight = external_weight_value->s(); } - if (es_cluster_config_value != nullptr) { - es_cluster_config = es_cluster_config_value->s(); - } if (jit_compile_value != nullptr) { std::string jit_compile = jit_compile_value->s(); all_options["jit_compile"] = jit_compile; @@ -1677,9 +1663,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (input_fusion_size_value != nullptr) { input_fusion_size = input_fusion_size_value->s(); } - if (execute_times_value != nullptr) { - execute_times = execute_times_value->s(); - } if (export_compile_stat_value != nullptr) { export_compile_stat = export_compile_stat_value->s(); } @@ -1792,8 +1775,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["ge.topoSortingMode"] = topo_sorting_mode; all_options["insert_op_file"] = insert_op_file; all_options["ge.insertOpFile"] = insert_op_file; - all_options["es_cluster_config"] = es_cluster_config; - all_options["ge.esClusterConfig"] = es_cluster_config; all_options["resource_config_path"] = resource_config_path; all_options["ge.aoe_config_file"] = aoe_config_file; all_options["aoe_config_file"] = aoe_config_file; @@ -1806,8 +1787,6 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["frozen_variable"] = frozen_variable; all_options["variable_location"] = variable_location; all_options["compile_dynamic_mode"] = compile_dynamic_mode; - all_options["execute_times"] = execute_times; - all_options["ge.executeTimes"] = execute_times; if (!export_compile_stat.empty()) { all_options["export_compile_stat"] = export_compile_stat; all_options["ge.exportCompileStat"] = export_compile_stat; @@ -1940,12 +1919,10 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool external_weight = false; bool frozen_variable = false; std::string variable_location = "Device"; - std::string es_cluster_config; std::string graph_slice_mode; std::string jit_compile; int64_t input_fusion_size = 131072L; // default 128KB std::string accelerate_train_mode; - int32_t execute_times = -1; int32_t export_compile_stat = 1; std::string aicore_num; bool oo_constant_folding = true; @@ -2492,12 +2469,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("external_weight") > 0) { external_weight = params.at("external_weight").b(); } - if (params.count("es_cluster_config") > 0) { - es_cluster_config = params.at("es_cluster_config").s(); - } - if (params.count("execute_times") > 0) { - execute_times = params.at("execute_times").i(); - } if (params.count("frozen_variable") > 0) { frozen_variable = params.at("frozen_variable").b(); } @@ -2694,10 +2665,6 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["ge.aoe_config_file"] = aoe_config_file; init_options_["stream_sync_timeout"] = std::to_string(stream_sync_timeout); init_options_["event_sync_timeout"] = std::to_string(event_sync_timeout); - init_options_["es_cluster_config"] = es_cluster_config; - init_options_["ge.esClusterConfig"] = es_cluster_config; - init_options_["execute_times"] = std::to_string(execute_times); - init_options_["ge.executeTimes"] = std::to_string(execute_times); for (const auto &option : init_options_) { std::string attr_name = std::string("_") + option.first; node->AddAttr(attr_name, option.second);