diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py b/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py index 3ee6aef85b9f47032abbe448e845aba10ca76639..554382b6de9b6229e18da5f907f04c0996026b0b 100644 --- a/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py +++ b/tf_adapter/python/npu_bridge/embedding/embedding_optimizer.py @@ -48,6 +48,9 @@ class AdamOptimizer(adam.AdamOptimizer): super(AdamOptimizer, self).__init__(learning_rate, beta_1, beta_2, epsilon, using_locking, name) self._beta1_power = None self._beta2_power = None + self._table_index = 0 + self.max_key_num_list = list() + self.embedding_dim_list = list() @property def embedding_dims(self): @@ -100,9 +103,12 @@ class AdamOptimizer(adam.AdamOptimizer): grad, indices, ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - self._embedding_dims) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self._embedding_dims)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self._max_nums)) + self.embedding_dim_list[self._table_index]) + result.op._set_attr("_embedding_dim", + attr_value_pb2.AttrValue(i=self.embedding_dim_list[self._table_index])) + result.op._set_attr("_max_key_num", + attr_value_pb2.AttrValue(i=self.max_key_num_list[self._table_index])) + self._table_index += 1 return result else: return self._apply_sparse_shared(grad, var, indices, self._resource_scatter_add) @@ -119,6 +125,17 @@ class AdamOptimizer(adam.AdamOptimizer): class AdagradOptimizer(adagrad.AdagradOptimizer): + def __init__(self, + learning_rate, + initial_accumulator_value=0.1, + use_locking=False, + name="EmbeddingAdagradOptimizer"): + """Construct a AdamW optimizer.""" + super(AdagradOptimizer, self).__init__(learning_rate, initial_accumulator_value, use_locking, name) + self._table_index = 0 + self.max_key_num_list = list() + self.embedding_dim_list = list() + @property def embedding_dims(self): return self._embedding_dims @@ -150,9 +167,12 @@ class AdagradOptimizer(adagrad.AdagradOptimizer): grad, indices, ops.convert_to_tensor(_GLOBAL_STEP_VALUE), - self._embedding_dims) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self._embedding_dims)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self._max_nums)) + self.embedding_dim_list[self._table_index]) + result.op._set_attr("_embedding_dim", + attr_value_pb2.AttrValue(i=self.embedding_dim_list[self._table_index])) + result.op._set_attr("_max_key_num", + attr_value_pb2.AttrValue(i=self.max_key_num_list[self._table_index])) + self._table_index += 1 return result else: return self.training_ops.resource_sparse_apply_adagrad(var.handle, grad.handle, @@ -213,6 +233,9 @@ class AdamWOptimizer(optimizer.Optimizer): self._max_grad_norm_t = None self._beta1_power_t = None self._beta2_power_t = None + self._table_index = 0 + self.max_key_num_list = list() + self.embedding_dim_list = list() @property def embedding_dims(self): @@ -276,9 +299,12 @@ class AdamWOptimizer(optimizer.Optimizer): math_ops.cast(self._max_grad_norm_t, grad.dtype), amsgrad=self._amsgrad, maximize=self._maximize, - embedding_dim=self._embedding_dims) - result.op._set_attr("_embedding_dim", attr_value_pb2.AttrValue(i=self._embedding_dims)) - result.op._set_attr("_max_key_num", attr_value_pb2.AttrValue(i=self._max_nums)) + embedding_dim=self.embedding_dim_list[self._table_index]) + result.op._set_attr("_embedding_dim", + attr_value_pb2.AttrValue(i=self.embedding_dim_list[self._table_index])) + result.op._set_attr("_max_key_num", + attr_value_pb2.AttrValue(i=self.max_key_num_list[self._table_index])) + self._table_index += 1 return result else: raise TypeError("Variable is not NpuEmbeddingResource type, please check.") diff --git a/tf_adapter/python/npu_bridge/embedding/embedding_service.py b/tf_adapter/python/npu_bridge/embedding/embedding_service.py index 9540613b9065e697e304ce539901bfaf9c2f5df7..486f7db71996cbce2e947fdd3f15fd2bc5917492 100644 --- a/tf_adapter/python/npu_bridge/embedding/embedding_service.py +++ b/tf_adapter/python/npu_bridge/embedding/embedding_service.py @@ -233,8 +233,8 @@ class ESWorker: (initializer != 'truncated_normal') and (initializer != 'constant'): raise ValueError("initializer must be random_uniform or truncated_normal or constant.") self._optimizer = optimizer - self._optimizer._embedding_dims = embedding_dim - self._optimizer._max_nums = max_batch_size + self._optimizer.embedding_dim_list.append(embedding_dim) + self._optimizer.max_key_num_list.append(max_batch_size) self._optimizer._es_cluster_configs = self._es_cluster_conf self._table_to_optimizer[table_id] = self._optimizer # adam include m and v, 2 slots; adagrad include accumulator, 1 slot