diff --git a/tf_adapter/kernels/aicpu/dataset_function.cc b/tf_adapter/kernels/aicpu/dataset_function.cc index 5a01b4aea27d41e254c2a77e27b4366cae35d3e6..fb5edd01b6d90d565e8704825f552db662b680f4 100644 --- a/tf_adapter/kernels/aicpu/dataset_function.cc +++ b/tf_adapter/kernels/aicpu/dataset_function.cc @@ -368,7 +368,7 @@ void DatasetFunction::DestroyAclOutputDataset(aclmdlDataset *output, bool isFree aclDataBuffer* data_buffer = aclmdlGetDatasetBuffer(output, i); if (isFree) { void* data_addr = aclGetDataBufferAddr(data_buffer); - aclError ret = aclrtFree(data_addr); + ret = aclrtFree(data_addr); if (ret != ACL_ERROR_NONE) { ADP_LOG(ERROR) << "Free acl device memory failed."; } diff --git a/tf_adapter/kernels/aicpu/map_and_batch_dataset_op.cc b/tf_adapter/kernels/aicpu/map_and_batch_dataset_op.cc index 5387eef039776ab798030c6648b83c1fbb80a347..d42c7ed08b2fdb2850fdcf71fa34ce61c18efb95 100644 --- a/tf_adapter/kernels/aicpu/map_and_batch_dataset_op.cc +++ b/tf_adapter/kernels/aicpu/map_and_batch_dataset_op.cc @@ -396,6 +396,7 @@ class NpuMapAndBatchDatasetOp::Dataset : public DatasetBase { if (output_cpu_addr != nullptr) { delete[] output_cpu_addr; output_cpu_addr = nullptr; + output_cpu = nullptr; } ADP_LOG(EVENT) << "~BatchResultBase finish."; } @@ -571,7 +572,8 @@ class NpuMapAndBatchDatasetOp::Dataset : public DatasetBase { batch_result.output_cpu = batch_result.output_cpu_addr; // reset start address for cpu memory when pass data to tensorflow - uint64_t offset = reinterpret_cast(batch_result.output_cpu_addr) % kTFTensorAlignment; + uint64_t offset = reinterpret_cast(reinterpret_cast(batch_result.output_cpu_addr)) + % kTFTensorAlignment; if (offset != 0UL) { offset = kTFTensorAlignment - offset; batch_result.output_cpu = batch_result.output_cpu_addr + offset; @@ -1130,6 +1132,10 @@ class NpuMapAndBatchDatasetOp::Dataset : public DatasetBase { explicit IteratorDyn(const Params& params) : IteratorMeBase(params) { max_batch_results_ = CeilDiv(GetParallelCallsNum(), params.dataset->batch_size_); + if(max_batch_results_ == 1ULL) { + // one result for consumer, one results for producer + max_batch_results_ += 1ULL; + } ADP_LOG(EVENT) << "IteratorDyn."; }; diff --git a/tf_adapter/kernels/aicpu/map_dataset_op.cc b/tf_adapter/kernels/aicpu/map_dataset_op.cc index 9aa95e71515345cbd2c612b6227c84f0c6d3ff29..16af4ce8c6a2dda2794359e9f47784881ae63e8e 100644 --- a/tf_adapter/kernels/aicpu/map_dataset_op.cc +++ b/tf_adapter/kernels/aicpu/map_dataset_op.cc @@ -346,6 +346,7 @@ private: if (output_cpu_addr != nullptr) { delete[] output_cpu_addr; output_cpu_addr = nullptr; + output_cpu = nullptr; } ADP_LOG(EVENT) << "~OutputResultBase finish."; } @@ -430,7 +431,8 @@ private: output_result.output_cpu = output_result.output_cpu_addr; // reset start address for cpu memory when pass data to tensorflow - uint64_t offset = reinterpret_cast(output_result.output_cpu_addr) % kTFTensorAlignment; + uint64_t offset = reinterpret_cast(reinterpret_cast(output_result.output_cpu_addr)) + % kTFTensorAlignment; if (offset != 0UL) { offset = kTFTensorAlignment - offset; output_result.output_cpu = output_result.output_cpu_addr + offset;