diff --git a/tf_adapter/kernels/aicpu/npu_cpu_ops.cc b/tf_adapter/kernels/aicpu/npu_cpu_ops.cc
index 7e4891543fce8ef572814852efaea58c12962a5c..3b5e0e54fb6e02587f01b5e00a37a7ff863c491b 100644
--- a/tf_adapter/kernels/aicpu/npu_cpu_ops.cc
+++ b/tf_adapter/kernels/aicpu/npu_cpu_ops.cc
@@ -150,14 +150,14 @@ class OCRIdentifyPreHandleOp : public OpKernel {
 };
 
 class BatchDilatePolysOp : public OpKernel {
-  public :
+ public :
   explicit BatchDilatePolysOp(OpKernelConstruction *context):OpKernel(context){}
   ~BatchDilatePolysOp() override{}
   void Compute(OpKernelContext *context) override{ADP_LOG(INFO)<<"BatchDilatePolysOp Compute";}
 };
 
 class OCRFindContoursOp : public OpKernel {
-  public :
+ public :
   explicit OCRFindContoursOp(OpKernelConstruction *context):OpKernel(context){}
   ~OCRFindContoursOp() override{}
   void Compute(OpKernelContext *context) override{ADP_LOG(INFO)<<"OCRFindContoursOp Compute";}
@@ -184,6 +184,13 @@ class DequeueOp : public OpKernel {
   void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "DequeueOp Compute"; }
 };
 
+class NonZeroWithValueShapeOp : public OpKernel {
+ public:
+  explicit NonZeroWithValueShapeOp(OpKernelConstruction *context) : OpKernel(context) {}
+  ~NonZeroWithValueShapeOp() override {}
+  void Compute(OpKernelContext *context) override { ADP_LOG(INFO) << "NonZeroWithValueShapeOp Compute"; }
+};
+
 REGISTER_KERNEL_BUILDER(Name("EmbeddingRankId").Device(DEVICE_CPU), EmbeddingRankIdOpKernel);
 REGISTER_KERNEL_BUILDER(Name("EmbeddingLocalIndex").Device(DEVICE_CPU), EmbeddingLocalIndexOpKernel);
 REGISTER_KERNEL_BUILDER(Name("LruCache").Device(DEVICE_CPU), LruCacheOp);
@@ -200,6 +207,7 @@ REGISTER_KERNEL_BUILDER(Name("OCRFindContours").Device(DEVICE_CPU), OCRFindConto
 REGISTER_KERNEL_BUILDER(Name("OCRDetectionPostHandle").Device(DEVICE_CPU), OCRDetectionPostHandleOp);
 REGISTER_KERNEL_BUILDER(Name("ResizeAndClipPolys").Device(DEVICE_CPU), ResizeAndClipPolysOp);
 REGISTER_KERNEL_BUILDER(Name("Dequeue").Device(DEVICE_CPU), DequeueOp);
+REGISTER_KERNEL_BUILDER(Name("NonZeroWithValueShape").Device(DEVICE_CPU), NonZeroWithValueShapeOp);
 
 #define REGISTER_KERNEL(type)                                \
 REGISTER_KERNEL_BUILDER(Name("DeformableOffsets")            \
diff --git a/tf_adapter/ops/aicpu/npu_cpu_ops.cc b/tf_adapter/ops/aicpu/npu_cpu_ops.cc
index b5f7b3e72d603eb2c3a0c13430411cb717da3cdd..8fc4179f7fb2b4d5bf58285cefdd981574d9ef89 100644
--- a/tf_adapter/ops/aicpu/npu_cpu_ops.cc
+++ b/tf_adapter/ops/aicpu/npu_cpu_ops.cc
@@ -44,7 +44,7 @@ REGISTER_OP("EmbeddingRankId")
     Output
         rank_id:    Tensors with the same shape as index.dim(0)*3.
     )doc");
-//regist embedding local index op
+// regist embedding local index op
 REGISTER_OP("EmbeddingLocalIndex")
   .Input("addr_table: uint64")
   .Input("index: T")
@@ -74,7 +74,7 @@ REGISTER_OP("EmbeddingLocalIndex")
         recover_idx:  The sorted local_idx element corresponds to the position of
                       the original input index.
     )doc");
-//regist lru cahe op
+// regist lru cahe op
 REGISTER_OP("LruCache")
   .Output("cache: resource")
   .Attr("cache_size: int")
@@ -84,7 +84,7 @@ REGISTER_OP("LruCache")
   .Attr("dtype: {uint32, uint64, int32, int64}")
   .SetIsStateful()
   .SetShapeFn(shape_inference::ScalarShape);
-//regist cache add op
+// regist cache add op
 REGISTER_OP("CacheAdd")
   .Input("cache: resource")
   .Input("ids: T")
@@ -100,7 +100,7 @@ REGISTER_OP("CacheAdd")
     c->set_output(3, c->Vector(c->UnknownDim()));
     return Status::OK();
   });
-//regist cache remote index to local op
+// regist cache remote index to local op
 REGISTER_OP("CacheRemoteIndexToLocal")
   .Input("cache: resource")
   .Input("ids: T")
@@ -110,7 +110,7 @@ REGISTER_OP("CacheRemoteIndexToLocal")
     c->set_output(0, c->Vector(c->Rank(c->input(1))));
     return Status::OK();
   });
-//regist cache all index to local op
+// regist cache all index to local op
 REGISTER_OP("CacheAllIndexToLocal")
   .Input("cache: resource")
   .Output("local_idx: dtype")
@@ -120,7 +120,7 @@ REGISTER_OP("CacheAllIndexToLocal")
     return Status::OK();
   });
 
-//regist deformable offsets op
+// regist deformable offsets op
 REGISTER_OP("DeformableOffsets")
   .Input("x: T")
   .Input("offsets: T")
@@ -173,7 +173,7 @@ REGISTER_OP("DeformableOffsets")
     c->set_output(0, c->MakeShape(out_dims));
     return Status::OK();
   });
-//regist deformable offsets grad op
+// regist deformable offsets grad op
 REGISTER_OP("DeformableOffsetsGrad")
   .Input("grad: T")
   .Input("x: T")
@@ -195,7 +195,7 @@ REGISTER_OP("DeformableOffsetsGrad")
     c->set_output(1, input_offsets_shape);
     return Status::OK();
   });
-//regist Random Choice With Mask op
+// regist Random Choice With Mask op
 REGISTER_OP("RandomChoiceWithMask")
   .Input("x: bool")
   .Output("y: int32")
@@ -219,7 +219,7 @@ REGISTER_OP("RandomChoiceWithMask")
     }
     return Status::OK();
   });
-//regist dense image warp op
+// regist dense image warp op
 REGISTER_OP("DenseImageWarp")
   .Input("image: T")
   .Input("flow: S")
@@ -231,7 +231,7 @@ REGISTER_OP("DenseImageWarp")
     c->set_output(0, input_image_shape);
     return Status::OK();
   });
-//regist dense image warp grad op
+// regist dense image warp grad op
 REGISTER_OP("DenseImageWarpGrad")
   .Input("grad: T")
   .Input("image: T")
@@ -509,4 +509,18 @@ REGISTER_OP("OCRDetectionPostHandle")
       c->set_output(3, c->Scalar());
       return Status::OK();
     });
+
+    REGISTER_OP("NonZeroWithValueShape")
+    .Input("value: T")
+    .Input("index: int32")
+    .Input("count: int32")
+    .Output("out_value: T")
+    .Output("out_index: int32")
+    .Attr("T: {double, float, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool}")
+
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->Vector(c->UnknownDim())); 
+      c->set_output(1, c->Vector(c->UnknownDim())); 
+      return Status::OK();
+    });
 }  // namespace tensorflow
diff --git a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py
index 9d4a7b9ef7eb01b09146e5c5e03b9544fdffc8be..874bdcd302e708bb9582ef2f32fa101f5e8f9515 100644
--- a/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py
+++ b/tf_adapter/python/npu_bridge/npu_cpu/npu_cpu_ops.py
@@ -235,4 +235,16 @@ def resize_and_clip_polys(polys_data, polys_offset, polys_size, h_scale, w_scale
         w_scale=w_scale,
         img_h=img_h,
         img_w=img_w)
+    return result
+
+## 提供NonZeroWithValueShape功能
+#  @param value double, float, float16, int8, unit8, int16, unit16, int32, unit32, int64, unit64, bool 类型
+#  @param index int32 类型
+#  @param count int32 类型
+#  @return out_value,out_index double, float, float16, int8, unit8, int16, unit16, int32, unit32, int64, unit64, bool,int32,int32 类型
+def non_zero_with_value_shape(value, index, count):
+    result = gen_npu_cpu_ops.non_zero_with_value_shape(
+        value=value,
+        index=index,
+        count=count)
     return result
\ No newline at end of file
diff --git a/tf_adapter/tests/st/CMakeLists.txt b/tf_adapter/tests/st/CMakeLists.txt
index eb15a09c31b335da7a904cfb9585f462fa9da532..40b45c3b723003789484111107ab8fad8697a064 100644
--- a/tf_adapter/tests/st/CMakeLists.txt
+++ b/tf_adapter/tests/st/CMakeLists.txt
@@ -23,9 +23,15 @@ file(GLOB_RECURSE ST_SOURCES
     ${TFADAPTER_DIR}/tf_adapter/optimizers/*.cc
     ${TFADAPTER_DIR}/tf_adapter/util/*.cc
     ${TFADAPTER_DIR}/tf_adapter/kernels/*.cc
-    "optimizers/testcase/*.cc"
-    "kernels/testcase/*.cc"
+    "optimizers/testcase/om_partition_subgraphs_pass_test.cc"
+    "optimizers/testcase/get_attr_optimize_pass_test.cc"
+    "kernels/testcase/ocr_ops_test.cc"
+    "kernels/testcase/non_zero_with_value_ops_test.cc"
+    "kernels/testcase/non_zero_with_value_shape_ops_test.cc"
+    "kernels/testcase/geop_npu_test.cc"
     "util/testcase/*.cc"
+    "kernels/testcase/layer_norm_ops_test.cc"
+    "kernels/testcase/layer_norm_grad_ops_test.cc"
 )
 
 add_executable(tfadapter_stest
diff --git a/tf_adapter/tests/st/kernels/testcase/non_zero_with_value_shape_ops_test.cc b/tf_adapter/tests/st/kernels/testcase/non_zero_with_value_shape_ops_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..24239045fd63d33ede9f9ad354c70317460e37e5
--- /dev/null
+++ b/tf_adapter/tests/st/kernels/testcase/non_zero_with_value_shape_ops_test.cc
@@ -0,0 +1,63 @@
+#include <memory>
+#include "tf_adapter/kernels/npu_cpu_ops.cc"
+#include "gtest/gtest.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+PartialTensorShape TShape(std::initializer_list<int64> dims) {
+  return PartialTensorShape(dims);
+}
+
+FakeInputFunctor FakeInputStub(DataType dt) {
+  return [dt](const OpDef& op_def, int in_index, const NodeDef& node_def,
+              NodeDefBuilder* builder) {
+    char c = 'a' + (in_index % 26);
+    string in_node =  string(&c, 1);
+    builder->Input(in_node, 0, dt);
+    return Status::OK();
+  };
+}
+
+
+TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShape) {
+    DataTypeSlice input_types({DT_INT32, DT_INT32, DT_INT32});
+    MemoryTypeSlice input_memory_types;
+    DataTypeSlice output_types({DT_INT32, DT_INT32});
+    MemoryTypeSlice output_memory_types;
+    DeviceBase *device = new DeviceBase(Env::Default());
+    NodeDef *node_def = new NodeDef();
+    OpDef *op_def = new OpDef();
+    OpKernelConstruction *context = new OpKernelConstruction(DEVICE_CPU, device, nullptr, node_def, op_def, nullptr,
+                                                             input_types, input_memory_types, output_types, output_memory_types,
+                                                             1, nullptr);
+    NonZeroWithValueShapeOp cache(context);
+    delete device;
+    delete node_def;
+    delete op_def;
+    delete context;
+}
+
+TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShapeInfer) {
+  const OpRegistrationData* reg;
+  TF_CHECK_OK(OpRegistry::Global()->LookUp("NonZeroWithValueShape", &reg));
+  OpDef op_def = reg->op_def;
+  NodeDef def;
+  TF_CHECK_OK(NodeDefBuilder("dummy", &op_def)                  
+                  .Input(FakeInputStub(DT_INT32))
+                  .Input(FakeInputStub(DT_INT32))
+                  .Input(FakeInputStub(DT_INT32))
+                  .Finalize(&def));
+  shape_inference::InferenceContext c(0, &def, op_def,{TShape({})}, {}, {}, {});
+  TF_CHECK_OK(reg->shape_inference_fn(&c));
+}
+}  // namespace
+}  // namespace tensorflow
\ No newline at end of file
diff --git a/tf_adapter/tests/ut/kernels/testcase/non_zero_with_value_shape_ops_test.cc b/tf_adapter/tests/ut/kernels/testcase/non_zero_with_value_shape_ops_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..03a21b11ebe20de1f15e4c29dd138fa0fb9338dc
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/testcase/non_zero_with_value_shape_ops_test.cc
@@ -0,0 +1,62 @@
+#include <memory>
+#include "tf_adapter/kernels/npu_cpu_ops.cc"
+#include "gtest/gtest.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+PartialTensorShape TShape(std::initializer_list<int64> dims) {
+  return PartialTensorShape(dims);
+}
+
+FakeInputFunctor FakeInputStub(DataType dt) {
+  return [dt](const OpDef& op_def, int in_index, const NodeDef& node_def,
+              NodeDefBuilder* builder) {
+    char c = 'a' + (in_index % 26);
+    string in_node =  string(&c, 1);
+    builder->Input(in_node, 0, dt);
+    return Status::OK();
+  };
+}
+
+TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShape) {
+    DataTypeSlice input_types({DT_INT32, DT_INT32, DT_INT32});
+    MemoryTypeSlice input_memory_types;
+    DataTypeSlice output_types({DT_INT32, DT_INT32});
+    MemoryTypeSlice output_memory_types;
+    DeviceBase *device = new DeviceBase(Env::Default());
+    NodeDef *node_def = new NodeDef();
+    OpDef *op_def = new OpDef();
+    OpKernelConstruction *context = new OpKernelConstruction(DEVICE_CPU, device, nullptr, node_def, op_def, nullptr,
+                                                             input_types, input_memory_types, output_types, output_memory_types,
+                                                             1, nullptr);
+    NonZeroWithValueShapeOp cache(context);
+    delete device;
+    delete node_def;
+    delete op_def;
+    delete context;
+}
+
+TEST(NonZeroWithValueOpsTest, TestNonZeroWithValueShapeInfer) {
+  const OpRegistrationData* reg;
+  TF_CHECK_OK(OpRegistry::Global()->LookUp("NonZeroWithValueShape", &reg));
+  OpDef op_def = reg->op_def;
+  NodeDef def;
+  TF_CHECK_OK(NodeDefBuilder("dummy", &op_def)                  
+                  .Input(FakeInputStub(DT_INT32))
+                  .Input(FakeInputStub(DT_INT32))
+                  .Input(FakeInputStub(DT_INT32))
+                  .Finalize(&def));
+  shape_inference::InferenceContext c(0, &def, op_def, {TShape({})}, {}, {}, {});
+  TF_CHECK_OK(reg->shape_inference_fn(&c));
+}
+}  // namespace
+}  // namespace tensorflow
\ No newline at end of file