diff --git a/source/tools/detect/sched/tasktop/tasktop.c b/source/tools/detect/sched/tasktop/tasktop.c
index 54909cf69089af7f11079519e9acc0805c1fa9e1..37a92cec38ec6a4aed525d5ca944aec934498e08 100644
--- a/source/tools/detect/sched/tasktop/tasktop.c
+++ b/source/tools/detect/sched/tasktop/tasktop.c
@@ -243,10 +243,7 @@ static int read_sched_delay(struct sys_record_t* sys_rec) {
                    &ph, &ph, &ph, &ph, &ph, &delay, &ph);
 
             int cpu_id = atoi(name + 3);
-            // #ifdef DEBUG
-            //             fprintf(stderr, "cpu_id = %d delay=%llu\n", cpu_id,
-            //             delay);
-            // #endif
+
             sys_rec->percpu_sched_delay[cpu_id] = delay - prev_delay[cpu_id];
             prev_delay[cpu_id] = delay;
         } else {
@@ -271,7 +268,8 @@ static int read_cgroup_throttle() {
             continue;
         }
         char stat_path[BUF_SIZE];
-        snprintf(stat_path, BUF_SIZE, "%s/%s/cpu.stat", CGROUP_PATH, dir->d_name);
+        snprintf(stat_path, BUF_SIZE, "%s/%s/cpu.stat", CGROUP_PATH,
+                 dir->d_name);
 
         cgroup_cpu_stat_t stat;
 
@@ -343,8 +341,8 @@ static int read_stat(struct sys_cputime_t* prev_sys,
         // int all_time = (sysconf(_SC_NPROCESSORS_ONLN) * env.delay *
         // sysconf(_SC_CLK_TCK));
 
-        /* all_time can't not calculate by delay * ticks * online-cpu-num,
-         * because there is error between process waked up and running, when
+        /* all_time can't calculated by delay * ticks * online-cpu-num,
+         * because there is an error between process waked up and running, when
          * sched delay occur , the sum of cpu rates more than 100%. */
 
         sys_rec->cpu[i].usr =
@@ -572,7 +570,6 @@ static void sort_records(struct record_t* rec, int proc_num,
 }
 
 static char* ts2str(time_t ts, char* buf, int size) {
-    // __builtin_memset(buf, 0, size;
     struct tm* t = gmtime(&ts);
     strftime(buf, size, "%Y-%m-%d %H:%M:%S", t);
     return buf;
@@ -616,7 +613,6 @@ static char* second2str(time_t ts, char* buf, int size) {
 }
 
 static void output(struct record_t* rec, int proc_num, FILE* dest) {
-    // system("clear");
     struct task_record_t** records = rec->tasks;
     struct sys_record_t* sys = &rec->sys;
     struct proc_fork_info_t* info = &(sys->most_fork_info);
diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/test b/source/tools/detect/sched/tasktop/tasktopSelftest/test
index f4347c84649c7e2b14144cb6928d37b82339bc7e..8c0c54324813df8bcaf0ae536fb6be1b1dc57831 100755
Binary files a/source/tools/detect/sched/tasktop/tasktopSelftest/test and b/source/tools/detect/sched/tasktop/tasktopSelftest/test differ
diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/test.c b/source/tools/detect/sched/tasktop/tasktopSelftest/test.c
index bc14b463700d4a7b537c7e2deab96e435bc4a9f0..c71f45d6c831fbd799f12b92b76767d85c244db9 100644
--- a/source/tools/detect/sched/tasktop/tasktopSelftest/test.c
+++ b/source/tools/detect/sched/tasktop/tasktopSelftest/test.c
@@ -14,6 +14,7 @@ void create_process(int n) {
     for (i = 0; i < n; i++) {
         if ((pid[i] = fork()) == 0) {
             sleep(120);
+            exit(0);
         }
     }
 
@@ -123,7 +124,7 @@ void cpu_bind(int cpu_id) {
 }
 
 int main(int argc, char **argv) {
-    if (argc != 2) {
+    if (argc > 3) {
         printf("usage: test [bind|fork|clone|multi_thread]\n");
         return -1;
     }
@@ -137,5 +138,7 @@ int main(int argc, char **argv) {
     } else if (!strcmp(argv[1], "multi_thread")) {
         sleep(10);
         run_multithread();
+    } else if (!strcmp(argv[1], "sleep")) {
+        create_process(atoi(argv[2]));
     }
 }
\ No newline at end of file
diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/test.md b/source/tools/detect/sched/tasktop/tasktopSelftest/test.md
index 6d66d18434ddff81b92dce790c846b06064d91b7..408535dd0048b7dcfc0eed0f974c9a5118bc1661 100644
--- a/source/tools/detect/sched/tasktop/tasktopSelftest/test.md
+++ b/source/tools/detect/sched/tasktop/tasktopSelftest/test.md
@@ -1,17 +1,52 @@
-# 1 Tasktop功能测试
-## 1.0 低负载状态
+# Tasktop 测试文档
+
+## 1. 测试环境
+
+    $lscpu
+    Architecture:          x86_64
+    CPU op-mode(s):        32-bit, 64-bit
+    Byte Order:            Little Endian
+    CPU(s):                4
+    On-line CPU(s) list:   0-3
+    Thread(s) per core:    2
+    Core(s) per socket:    1
+    Socket(s):             2
+    NUMA node(s):          1
+    Vendor ID:             GenuineIntel
+    CPU family:            6
+    Model:                 79
+    Model name:            Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
+    Stepping:              1
+    CPU MHz:               2494.224
+    BogoMIPS:              4988.44
+    Hypervisor vendor:     KVM
+    Virtualization type:   full
+    L1d cache:             32K
+    L1i cache:             32K
+    L2 cache:              256K
+    L3 cache:              40960K
+    NUMA node0 CPU(s):     0-3
+    Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx smap xsaveopt
+
+## 2. Tasktop功能测试
+
+### 2.0 低负载状态
 
     $uptime
     11:13:52 up 783 days, 20:59,  0 users,  load average: 1.28, 10.09, 32.79
 
-## 1.1 多线程、多进程场景
+### 2.1 多线程、多进程场景
+
 由于多线程或者多进程导致cpu资源不足，大量task在队列中无法被调度导致的R状态冲高
-### 1.1.1 测试方法
+
+#### 2.1.1 测试方法
+
 通过stress工具 启动64个进程进行计算
 
     stress -c 64
 
-### 1.1.2 测试结果
+#### 2.1.2 测试结果
+
     2023-05-24 02:15:36
     UTIL&LOAD
     usr    sys iowait  load1      R      D   fork : proc 
@@ -33,15 +68,20 @@
 
 观察到load1迅速冲高，伴随系统以及per-cpu的cpu利用率打满，cpu时间集中于用户态，per-cpu的调度延迟达到50s。
 
-## 1.2 cpu绑核场景
+### 2.2 cpu绑核场景
+
 由于配置不当，导致大量进程堆积在少部分CPU核上导致的R状态进程数冲高
-### 1.2.1 测试方法
+
+#### 2.2.1 测试方法
+
 启动64个计算进程 绑定到cpu0上
 
     cd tasktopSelftest
     make clean;make
     ./test bind
-### 1.2.2 测试结果
+
+#### 2.2.2 测试结果
+
     2023-05-24 02:49:31
     UTIL&LOAD
     usr    sys iowait  load1      R      D   fork : proc 
@@ -61,15 +101,20 @@
                 (test)  48449  48433 1684896441        130    1.7    0.0    1.7
                 (test)  48451  48433 1684896441        130    1.7    0.0    1.7
 观察到load1冲高，伴随有R状态进程数增多，但系统cpu利用率不高，cpu-0的利用率打满，cpu-0的调度延迟达到190s
-## 1.3 大量fork场景
+
+### 2.3 大量fork场景
+
 由于loadavg的采样是周期性的，可能存在大量短task在采样时出现但是无法被top等工具捕捉等情况
-### 1.3.1 测试方法
+
+#### 2.3.1 测试方法
+
 主进程每1ms周期性的进行fork出128个进程 每个进程执行10w次自增运算后退出
 
     cd tasktopSelftest
     make clean;make
     ./test fork
-### 1.3.2 测试结果
+
+#### 2.3.2 测试结果
 
     2023-05-24 03:42:18
     UTIL&LOAD
@@ -93,9 +138,13 @@
             (dfget)  56945      1 1684899541        197    0.3    0.0    0.3
 
 观察到load增高，同时CPU利用率也跑满，存在较多R进程但是没有被top捕捉到。此时fork增量激增，fork调用次数最多的进程为test，同时test进程的sys利用率较高。
-## 1.4 cgroup限流场景
+
+### 2.4 cgroup限流场景
+
 与cpu核绑定类似，通过cgroup限制了cpu带宽导致task堆积在就绪队列
-### 1.4.1 测试方法
+
+#### 2.4.1 测试方法
+
 创建一个cgroup 限定cgroup的cpu额度 启动一个进程并将task的pid加入cgroup的tasks中 之后该进程创建128个线程执行计算任务
 
     # 创建cgroup 设置限流30% 使用cpuset.cpus=0-3 
@@ -107,7 +156,7 @@
     # run test
     ./test multi_thread
 
-### 1.4.2 测试结果
+#### 2.4.2 测试结果
 
         [/sys/fs/cgroup/cpu/aegis/cpu.stat] nr_periods=4 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0
         [/sys/fs/cgroup/cpu/docker/cpu.stat] nr_periods=0 nr_throttled=0 throttled_time=0 nr_burst=0 burst_time=0
@@ -139,4 +188,27 @@
     (logagent-collecS   2423   2347 1684908951         83    0.3    0.0    0.3
         (argusagent)   3096      1 1684908962       7490    0.0    0.3    0.3
 
-可以观察到此时虽然**实际负载**很高，大量task由于限流处于R状态，但是由于cgroup机制task并不位于就绪队列中，因此R状态数量指标不准确导致load1计算不准（load1无法准确体现出系统的负载情况）。但是在cgroup限流信息中可以看到stress_cg中**出现了大量的限流**，并且**per-cpu的调度延迟很高**，一定程度体现了cpu就绪队列中存在task堆积。
\ No newline at end of file
+可以观察到此时虽然**实际负载**很高，大量task由于限流处于R状态，但是由于cgroup机制task并不位于就绪队列中，因此R状态数量指标不准确导致load1计算不准（load1无法准确体现出系统的负载情况）。但是在cgroup限流信息中可以看到stress_cg中**出现了大量的限流**，并且**per-cpu的调度延迟很高**，一定程度体现了cpu就绪队列中存在task堆积。
+
+## 3. Tasktop性能测试
+
+tasktop在运行时会对/proc文件系统进行遍历，采集相关信息，大量进程下可能会影响业务，因此对tasktop在不同进程数的场景下进行性能测试。
+
+### 3.1 测试方法
+
+创建N个进程，并让这N个进程进入sleep状态，不占用CPU资源，只增加proc文件数量。
+
+### 3.2 测试结果
+
+| Process Number   | CPU Utilization    |
+| :---------: | :---------: |
+| 147         | 0.3%        |
+| 1155        | 1.0%        |
+| 2157        | 1.9-2.0%    |
+| 4147        | 3.7-4.7%    |
+| 8152        | 8-11%       |
+| 12198       | 15.7-18%    |
+| 15161       | 18-21%      |
+| 20173       | 26.9-31.6%  |
+
+在存在20000个进程proc文件情况下，tasktop的整体cpu资源消耗在单核的30%左右。