diff --git a/source/tools/monitor/unity/etc/lingjun.yaml b/source/tools/monitor/unity/etc/lingjun.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b2a606a9cf968dff8822107a0499e0c3a26466a --- /dev/null +++ b/source/tools/monitor/unity/etc/lingjun.yaml @@ -0,0 +1,244 @@ +config: + freq: 15 # unit second + port: 8405 # bind port + bind_addr: 0.0.0.0 # bind ip + backlog: 32 # listen backlog + identity: # support hostip, curl(need url arg), hostname, file(need path arg), specify(need name arg) + # mode: curl + # url: "http://100.100.100.200/latest/meta-data/instance-id" + # name: test_specify + mode: hostip + # real_timestamps: true + # unix_socket: "/tmp/sysom_unity.sock" + proc_path: / # in container mode, like -v /:/mnt/host , should use /mnt/host/ + db: + rotate: 7 # tsdb file retention time, unit day + budget: 200 # max query buffer from tsdb. + limit: + cpu: 300 # unit % + mem: 500 # unit mb + tasks: 10 # monitor 10 pid max. + +outline: + - /var/sysom/outline + +luaPlugins: ["proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", + "proc_snmp_stat", "proc_sockstat", "proc_stat", "proc_statm", "proc_vmstat", + "proc_uptime", "proc_arp", "proc_cgroups", "proc_softirqs", "proc_softnet_stat", +] + +plugins: + # - so: kmsg + # description: "collect dmesg info." + # - + # so: proc_schedstat + # description: "collect schedule stat info of percpu" + - + so: proc_loadavg + description: "collect load avg" + - so: net_health + description: "tcp net health." + - so: net_retrans + description: "tcp retrans monitor." + - + so: unity_irqoff + description: "irqoff:detect irq turned off and can't response" + # - + # so: gpuinfo + # description: "collect gpuinfo" + - + so: pmu_events + description: "collect pmu events" + +metrics: + - + title: sysom_proc_cpu_total + from: cpu_total + head: mode + help: "cpu usage info for total." + type: "gauge" + # - title: sysom_proc_cpus + # from: cpus + # head: mode + # help: "cpu usage info for per-cpu." + # type: "gauge" + - title: sysom_proc_sirq + from: sirq + head: type + help: "system soft irq times." + type: "gauge" + - title: sysom_proc_stat_counters + from: stat_counters + head: counter + help: "system state counter." + type: "gauge" + - title: sysom_proc_meminfo + from: meminfo + head: value + help: "meminfo from /proc/meminfo." + type: "gauge" + - title: sysom_proc_vmstat + from: vmstat + head: value + help: "vmstat info from /proc/vmstat." + type: "gauge" + - title: sysom_proc_self_statm + from: self_statm + head: value + help: "statm info from /proc/self/statm." + type: "gauge" + # - title: sysom_proc_networks + # from: networks + # head: counter + # help: "networks info from /proc/net/dev." + # type: "gauge" + - title: sysom_proc_disks + from: disks + head: counter + help: "disk info from /proc/diskstats." + type: "gauge" + - title: sysom_proc_pkt_status + from: pkt_status + head: counter + help: "net status info from /proc/net/snmp and /proc/net/status." + type: "gauge" + # - title: sysom_fs_stat + # from: fs_stat + # head: counter + # help: "file system information." + # type: "gauge" + - title: sysom_sock_stat + from: sock_stat + head: value + help: "sock stat counters from /proc/net/sockstat" + type: "gauge" + # - title: sysom_proc_schedstat + # from: proc_schedstat + # head: value + # help: "schedule state of percpu." + # type: "gauge" + - title: sysom_proc_loadavg + from: proc_loadavg + head: value + help: "loadavg of system from /proc/loadavg" + type: "gauge" + - title: sysom_proc_buddyinfo + from: buddyinfo + head: value + help: "buddyinfo of system from /proc/buddyinfo" + type: "gauge" + # - title: sysom_IOMonIndForDisksIO + # from: IOMonIndForDisksIO + # head: value + # help: "Disk IO indicators and abnormal events" + # type: "gauge" + # - title: sysom_IOMonIndForSystemIO + # from: IOMonIndForSystemIO + # head: value + # help: "System indicators and abnormal events about IO" + # type: "gauge" + # - title: sysom_IOMonDiagLog + # from: IOMonDiagLog + # head: value + # help: "Diagnose log for IO exception" + # type: "gauge" + - title: sched_moni_jitter + from: sched_moni_jitter + head: value + help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling" + type: "gauge" + - title: sysom_cpu_dist + from: cpu_dist + head: value + help: "task cpu sched dist." + type: "gauge" + - title: sysom_net_health_hist + from: net_health_hist + head: value + help: "net_health_hist" + type: "gauge" + - title: sysom_net_health_count + from: net_health_count + head: value + help: "net_health_count" + type: "gauge" + - title: sysom_net_retrans_count + from: net_retrans_count + head: value + help: "net_retrans_count" + type: "gauge" + # - title: sysom_gpuinfo + # from: gpuinfo + # head: value + # help: "gpuinfo of system from nvidia-smi" + # type: "gauge" + - title: sysom_uname + from: uname + head: value + help: "uname info" + type: "gauge" + - title: sysom_uptime + from: uptime + head: value + help: "uptime from /proc/uptime" + type: "gauge" + - title: sysom_system_release + from: system_release + head: value + help: "system_release from /etc/os-release" + type: "gauge" + - title: sysom_cgroups + from: cgroups + head: value + help: "cgroup number." + type: "gauge" + # - title: sysom_per_sirqs + # from: per_sirqs + # head: value + # help: "per_sirqs." + # type: "gauge" + # - title: sysom_softnets + # from: softnets + # head: value + # help: "cgroup number." + # type: "gauge" + # - title: sysom_interrupts + # from: interrupts + # head: value + # help: "interrupts." + # type: "gauge" + - title: sysom_net_ip_count + from: net_ip_count + head: value + help: "net snmp net_ip_count" + type: "gauge" + - title: sysom_net_icmp_count + from: net_icmp_count + head: value + help: "net snmp net_icmp_count" + type: "gauge" + - title: sysom_net_udp_count + from: net_udp_count + head: value + help: "net snmp net_udp_count" + type: "gauge" + - title: sysom_net_tcp_count + from: net_tcp_count + head: value + help: "net snmp net_tcp_count" + type: "gauge" + - title: sysom_net_tcp_ext_count + from: net_tcp_ext_count + head: value + help: "net stat net_tcp_ext_count" + type: "gauge" + - title: sysom_pmu_events + from: pmu_events + head: value + help: "pmu events, such as cycles/instructions, llc events" + type: "gauge" + # - title: sysom_pmu_events_percpu + # from: pmu_events_percpu + # head: value + # help: "pmu events of percpu" + # type: "gauge" \ No newline at end of file