From 9d51e27402465f7c6bbad4b4cf776a9540cece80 Mon Sep 17 00:00:00 2001
From: luzhihao <luzhihao@huawei.com>
Date: Sat, 22 Oct 2022 12:05:38 +0800
Subject: [PATCH] update docs

---
 gopher_tech.md          | 29 +++++++++++++++--------------
 gopher_tech_abnormal.md | 12 ++++++------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/gopher_tech.md b/gopher_tech.md
index 6748835..ba3a454 100644
--- a/gopher_tech.md
+++ b/gopher_tech.md
@@ -100,18 +100,13 @@
 
 # THREAD（entity_name：task）
 
-| metrics_name    | table_name | metrics_type | unit  | KPI  | metrics description                                          |
-| --------------- | ---------- | ------------ | ----- | ---- | ------------------------------------------------------------ |
-| pid             | thread     | key          |       |      | 线程PID                                                      |
-| tgid            | thread     | label        |       |      | 所属进程ID                                                   |
-| comm            | thread     | label        |       |      | 线程所属进程名称                                             |
-| off_cpu_ns      | thread     | Gauge        | ns    | Y    | task调度offcpu的最大时间，统计方式：      1. KPROBE finish_task_switch 获取入参prev   task（pid）以及当前时间，当前CPU信息（bpf_get_smp_processor_id()），记录MAP（pid/cpu作为key）；      2. finish_task_switch   中bpf_get_current_pid_tgid获取当前pid，以及当前CPU信息（bpf_get_smp_processor_id()），匹配步骤1中的数据以及计算时间差，得出一次offcpu时间。      注意：      1. 过滤idle(pid=0)      2. 只记录offcpu最大值 |
-| migration_count | thread     | Gauge        |       |      | task   CPU之间迁移次数                                       |
-| iowait_us       | thread     | Gauge        | us    | Y    | task   IO操作等待时间（单位us）                              |
-| bio_bytes_write | thread     | Gauge        | bytes | Y    | task发起bio写操作字节数                                      |
-| bio_bytes_read  | thread     | Gauge        | bytes | Y    | task发起bio读操作字节数                                      |
-| bio_err_count   | thread     | Gauge        |       |      | task发起的bio结果失败的次数                                  |
-| hang_count      | thread     | Gauge        |       |      | task发生io   hang次数                                        |
+| metrics_name    | table_name | metrics_type | unit | KPI  | metrics description                                          |
+| --------------- | ---------- | ------------ | ---- | ---- | ------------------------------------------------------------ |
+| pid             | thread     | key          |      |      | 线程PID                                                      |
+| tgid            | thread     | label        |      |      | 所属进程ID                                                   |
+| comm            | thread     | label        |      |      | 线程所属进程名称                                             |
+| off_cpu_ns      | thread     | Gauge        | ns   | Y    | task调度offcpu的最大时间，统计方式：      1. KPROBE finish_task_switch 获取入参prev   task（pid）以及当前时间，当前CPU信息（bpf_get_smp_processor_id()），记录MAP（pid/cpu作为key）；      2. finish_task_switch   中bpf_get_current_pid_tgid获取当前pid，以及当前CPU信息（bpf_get_smp_processor_id()），匹配步骤1中的数据以及计算时间差，得出一次offcpu时间。      注意：      1. 过滤idle(pid=0)      2. 只记录offcpu最大值 |
+| migration_count | thread     | Gauge        |      |      | task   CPU之间迁移次数                                       |
 
 # Process（entity_name：proc）
 
@@ -178,8 +173,14 @@
 | ns_vfork              | proc_syscall_fork  | Gauge        | ns   |      | 进程系统调用vfork时长，单位ns                                |
 | ns_clone              | proc_syscall_fork  | Gauge        | ns   |      | 进程系统调用clone时长，单位ns                                |
 | syscall_failed        | proc_syscall       | Gauge        |      | Y    | 进程系统调用失败次数                                         |
-|                       |                    |              |      |      |                                                              |
-|                       |                    |              |      |      |                                                              |
+| less_4k_io_read       | proc_io            | Gauge        |      |      | Number of small I/O (less than 4 KB) read operations at the BIO layer. |
+| less_4k_io_write      | proc_io            | Gauge        |      |      | Number of small I/O (less than 4 KB) write operations at the BIO layer. |
+| greater_4k_io_read    | proc_io            | Gauge        |      |      | Number of big I/O (greater than 4 KB) read operations at the BIO layer. |
+| greater_4k_io_write   | proc_io            | Gauge        |      |      | Number of big I/O (greater than 4 KB) write operations at the BIO layer. |
+| bio_latency           | proc_io            | Gauge        | us   |      | I/O operation delay at the BIO layer (unit: us).             |
+| bio_err_count         | proc_io            | Gauge        |      |      | Number of I/O operation failures at the BIO layer.           |
+| hang_count            | proc_io            | Gauge        |      |      | Number of process hang times.                                |
+| iowait_us             | proc_io            | Gauge        | us   |      | Process IO_wait time (unit: us).                             |
 
 # BLOCK
 
diff --git a/gopher_tech_abnormal.md b/gopher_tech_abnormal.md
index 3eb2992..b91eacc 100644
--- a/gopher_tech_abnormal.md
+++ b/gopher_tech_abnormal.md
@@ -47,12 +47,9 @@ gala-gopher提供系统异常检测能力，支持用户在启动各个探针的
 
 ### THREAD
 
-| 异常事件名    | 事件信息                                                     | 输出参数                                                     | 输入参数 | 异常等级 |
-| ------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- |
-| off_cpu_ns    | Process(COMM:%s TID:%d) is preempted(COMM:%s PID:%d) and off-CPU %llu ns. | P1: process name P2: process id P3: process name P4: process id P5: off-cpu times | NA       | WARN     |
-| iowait_us     | Process(COMM:%s TID:%d) iowait %llu us.                      | P1: process name P2: process id P3: io-wait times            | [-T <>]  | WARN     |
-| hang_count    | Process(COMM:%s TID:%d) io hang %u.                          | P1: process name P2: process id P3: error count              | NA       | WARN     |
-| bio_err_count | Process(COMM:%s TID:%d) bio error %u.                        | P1: process name P2: process id P3: error count              | NA       | WARN     |
+| 异常事件名 | 事件信息                                                     | 输出参数                                                     | 输入参数 | 异常等级 |
+| ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- |
+| off_cpu_ns | Process(COMM:%s TID:%d) is preempted(COMM:%s PID:%d) and off-CPU %llu ns. | P1: process name P2: process id P3: process name P4: process id P5: off-cpu times | [-O <>]  | WARN     |
 
 ### PROC
 
@@ -60,6 +57,9 @@ gala-gopher提供系统异常检测能力，支持用户在启动各个探针的
 | ------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------- | -------- |
 | syscall_failed     | Process(COMM:%s PID:%u) syscall failed(SysCall-ID:%d RET:%d COUNT:%u). | P1: process name P2: process id P3: syscall no P4: syscall ret-code P5 failed count | NA       | WARN     |
 | gethostname_failed | Process(COMM:%s PID:%u) gethostname failed(COUNT:%u).        | P1: process name P2: process id P3 failed count              | NA       | WARN     |
+| iowait_us          | Process(COMM:%s PID:%u) iowait %llu us.                      | P1: process name P2: process id P3: io-wait times            | [-T <>]  | WARN     |
+| hang_count         | Process(COMM:%s PID:%u) hang count %u.                       | P1: process name P2: process id P3: error count              | NA       | WARN     |
+| bio_err_count      | Process(COMM:%s PID:%u) bio error %u.                        | P1: process name P2: process id P3: error count              | NA       | WARN     |
 
 ### BLOCK
 
-- 
Gitee