diff --git a/deps/4_grafana/sysom-cluster-dashboard.json b/deps/4_grafana/sysom-cluster-dashboard.json index 07f78cb16ee0fb57817f37be3175109065c72086..31c667be72889d5b30964421c621045e90cc5e51 100644 --- a/deps/4_grafana/sysom-cluster-dashboard.json +++ b/deps/4_grafana/sysom-cluster-dashboard.json @@ -3357,7 +3357,7 @@ "datasource": "sysom-prometheus", "description": "\u5bb9\u5668\u5927\u76d8", "gridPos": { - "h": 2, + "h": 3, "w": 24, "x": 0, "y": 91 @@ -3370,8 +3370,8 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "[\u5bb9\u5668\u5927\u76d8\u8be6\u60c5](../grafana/d/rYdddlPWW/rong-qi-jian-kong?var-node=192.168.0.12:8889&var-podname=All&var-podns=All&orgId=1&refresh=5s)\n\n", - "mode": "markdown" + "content": "\u5bb9\u5668\u5927\u76d8\u8be6\u60c5", + "mode": "html" }, "pluginVersion": "9.2.2", "title": "\u5bb9\u5668\u5927\u76d8", @@ -3381,10 +3381,10 @@ "datasource": "sysom-prometheus", "description": "\u8282\u70b9\u5927\u76d8", "gridPos": { - "h": 2, + "h": 3, "w": 24, "x": 0, - "y": 93 + "y": 94 }, "id": 53, "links": [], @@ -3394,8 +3394,8 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "[\u8282\u70b9\u5927\u76d8\u8be6\u60c5](../grafana/d/rYdddlPWk/sysom_base?orgId=1&refresh=5s)\n\n", - "mode": "markdown" + "content": "\u8282\u70b9\u5927\u76d8\u8be6\u60c5", + "mode": "html" }, "pluginVersion": "9.2.2", "title": "\u8282\u70b9\u5927\u76d8", @@ -3498,7 +3498,7 @@ "timezone": "", "title": "\u96c6\u7fa4\u89c6\u89d2", "uid": "F4UBT8w4k", - "version": 10, + "version": 8, "weekStart": "" } } \ No newline at end of file diff --git a/sysom_server/sysom_diagnosis/config.yml b/sysom_server/sysom_diagnosis/config.yml index 9ffefa5ec6d8f5ebbc0cdff8e70fe7c747dce72e..5d286d8fc303a933f7680f48b06f8c0a43f3a5bc 100644 --- a/sysom_server/sysom_diagnosis/config.yml +++ b/sysom_server/sysom_diagnosis/config.yml @@ -1,8 +1,8 @@ vars: - SYSAK_DOWNLOAD_URL: &SYSAK_DOWNLOAD_URL https://mirrors.openanolis.cn/sysak/packages/release-v2.4.0/ - SYSAK_VERSION: &SYSAK_VERSION 2.4.0-1 + SYSAK_DOWNLOAD_URL: &SYSAK_DOWNLOAD_URL https://mirrors.openanolis.cn/sysak/packages/release-v3.2.0/ + SYSAK_VERSION: &SYSAK_VERSION 3.2.0-1 SERVICE_NAME: &SERVICE_NAME sysom_diagnosis - SERVICE_CONSUMER_GROUP: !concat &SERVICE_CONSUMER_GROUP [*SERVICE_NAME, "_consumer_group"] + SERVICE_CONSUMER_GROUP: &SERVICE_CONSUMER_GROUP !concat [*SERVICE_NAME, "_consumer_group"] sysom_server: cec: @@ -23,15 +23,15 @@ sysom_service: taskexecutetimeout: 10 default_channel: ssh ignore_tool_check_channels: - - offline + - offline framework: gcache: protocol: redis node_dispatch: cmg: tags: - - Diagnosis - - Django + - Diagnosis + - Django # Metadata of service metadata: check: @@ -47,29 +47,23 @@ sysom_service: sysom_node: version: 2.1 env: - SYSAK_VERSION: *SYSAK_VERSION + SYSAK_VERSION: *SYSAK_VERSION # 节点分发配置 delivery: from_dir: scripts to_dir: node files: comm: &code_delivery_files_comm - - local: node_init.sh - remote: - - local: node_clear.sh - remote: - - local: node_update.sh - remote: + - local: node_init.sh + remote: + - local: node_clear.sh + remote: + - local: node_update.sh + remote: amd64: &node_delivery_files_amd64 - - *code_delivery_files_comm - - local: !concat ["sysak-", *SYSAK_VERSION, ".x86_64.rpm"] - remote: - !concat [ - *SYSAK_DOWNLOAD_URL, - "sysak-", - *SYSAK_VERSION, - ".x86_64.rpm", - ] + - *code_delivery_files_comm + - local: !concat ["sysak-", *SYSAK_VERSION, ".x86_64.rpm"] + remote: !concat [*SYSAK_DOWNLOAD_URL, "sysak-", *SYSAK_VERSION, ".x86_64.rpm"] arm64: *node_delivery_files_amd64 x86_64: *node_delivery_files_amd64 scripts: diff --git a/sysom_server/sysom_diagnosis/scripts/node_init.sh b/sysom_server/sysom_diagnosis/scripts/node_init.sh index f9a5678de27dd3bd9427639bddd4837e6ba64b17..6b8ded6f0ae54a97f88d2c9e0f6745e4ff2328eb 100755 --- a/sysom_server/sysom_diagnosis/scripts/node_init.sh +++ b/sysom_server/sysom_diagnosis/scripts/node_init.sh @@ -2,7 +2,7 @@ RESOURCE_DIR=${NODE_HOME}/${SERVICE_NAME} if [ "$SYSAK_VERTION" == "" ]; then - export SYSAK_VERTION=2.4.0-1 + export SYSAK_VERTION=3.2.0-1 fi if [ "$ARCH" == "" ]; then export ARCH=x86_64 diff --git a/sysom_server/sysom_diagnosis/scripts/node_update.sh b/sysom_server/sysom_diagnosis/scripts/node_update.sh index d6967c2ee18e2cf4adb17c4d1fe6da09bae119b9..80811bda0db675ddb545440a27ee7da0114f48ad 100755 --- a/sysom_server/sysom_diagnosis/scripts/node_update.sh +++ b/sysom_server/sysom_diagnosis/scripts/node_update.sh @@ -2,7 +2,7 @@ RESOURCE_DIR=${NODE_HOME}/${SERVICE_NAME} if [ "$SYSAK_VERTION" == "" ]; then - export SYSAK_VERTION=2.4.0-1 + export SYSAK_VERTION=3.2.0-1 fi if [ "$ARCH" == "" ]; then export ARCH=x86_64 diff --git a/sysom_server/sysom_monitor_server/config.yml b/sysom_server/sysom_monitor_server/config.yml index 0e941feda96e08753606e7263e997f29310c0d1c..a36c3d52010f71216650f55119aab3517a33c334 100644 --- a/sysom_server/sysom_monitor_server/config.yml +++ b/sysom_server/sysom_monitor_server/config.yml @@ -1,11 +1,10 @@ vars: NODE_EXPORT_BASE_DOWNLOAD_URL: &NODE_EXPORT_BASE_DOWNLOAD_URL https://sysom.oss-cn-beijing.aliyuncs.com/monitor/ NODE_EXPORT_VERSION: &NODE_EXPORT_VERSION 1.5.0 - SYSAK_DOWNLOAD_URL: &SYSAK_DOWNLOAD_URL https://mirrors.openanolis.cn/sysak/packages/release-v2.4.0/ - SYSAK_VERSION: &SYSAK_VERSION 2.4.0-1 + SYSAK_DOWNLOAD_URL: &SYSAK_DOWNLOAD_URL https://mirrors.openanolis.cn/sysak/packages/release-v3.2.0/ + SYSAK_VERSION: &SYSAK_VERSION 3.2.0-1 SERVICE_NAME: &SERVICE_NAME sysom_monitor_server - SERVICE_CONSUMER_GROUP: - !concat &SERVICE_CONSUMER_GROUP [*SERVICE_NAME, "_consumer_group"] + SERVICE_CONSUMER_GROUP: &SERVICE_CONSUMER_GROUP !concat [*SERVICE_NAME, "_consumer_group"] sysom_server: cec: @@ -28,8 +27,8 @@ sysom_service: node_dispatch: cmg: tags: - - MonitorServer - - FastApi + - MonitorServer + - FastApi # Metadata of service metadata: check: @@ -58,42 +57,24 @@ sysom_node: to_dir: node files: comm: &code_delivery_files_comm - - local: node_init.sh - remote: - - local: node_clear.sh - remote: - - local: node_update.sh - remote: - - local: base.yaml - remote: + - local: node_init.sh + remote: + - local: node_clear.sh + remote: + - local: node_update.sh + remote: + - local: base.yaml + remote: amd64: &node_delivery_files_amd64 - - *code_delivery_files_comm - - local: node_exporter-1.5.0.linux-amd64.tar.gz - remote: - !concat [ - *NODE_EXPORT_BASE_DOWNLOAD_URL, - "node_exporter-", - *NODE_EXPORT_VERSION, - ".linux-amd64.tar.gz", - ] - - local: !concat ["sysak-", *SYSAK_VERSION, ".x86_64.rpm"] - remote: - !concat [ - *SYSAK_DOWNLOAD_URL, - "sysak-", - *SYSAK_VERSION, - ".x86_64.rpm", - ] + - *code_delivery_files_comm + - local: node_exporter-1.5.0.linux-amd64.tar.gz + remote: !concat [*NODE_EXPORT_BASE_DOWNLOAD_URL, "node_exporter-", *NODE_EXPORT_VERSION, ".linux-amd64.tar.gz"] + - local: !concat ["sysak-", *SYSAK_VERSION, ".x86_64.rpm"] + remote: !concat [*SYSAK_DOWNLOAD_URL, "sysak-", *SYSAK_VERSION, ".x86_64.rpm"] arm64: - - *code_delivery_files_comm - - local: node_exporter-1.5.0.linux-arm64.tar.gz - remote: - !concat [ - *NODE_EXPORT_BASE_DOWNLOAD_URL, - "node_exporter-", - *NODE_EXPORT_VERSION, - ".linux-arm64.tar.gz", - ] + - *code_delivery_files_comm + - local: node_exporter-1.5.0.linux-arm64.tar.gz + remote: !concat [*NODE_EXPORT_BASE_DOWNLOAD_URL, "node_exporter-", *NODE_EXPORT_VERSION, ".linux-arm64.tar.gz"] x86_64: *node_delivery_files_amd64 scripts: prepare: node_prepare.sh diff --git a/sysom_server/sysom_monitor_server/scripts/base.yaml b/sysom_server/sysom_monitor_server/scripts/base.yaml index af10b09e885db8f2df040dc6e8311904abe7058f..a687b67258a86e6e68c6000abb26a1aa349592c4 100644 --- a/sysom_server/sysom_monitor_server/scripts/base.yaml +++ b/sysom_server/sysom_monitor_server/scripts/base.yaml @@ -23,12 +23,8 @@ config: cellLimit: -1 # set guard limit time. guard time is unlimit when cellLimit is -1,default is 50(ms) forkRun: - - cmd: "../../../ntopo" - args: [] - - cmd: "../../../sql-obs" - args: ["-y", "/etc/sysak/base.yaml"] - cmd: "../../../ioMonitor" - args: ["-y", "/etc/sysak/base.yaml"] + args: ["-y", "/etc/sysak/base.yaml", "-s", "await=5,iops=1000,diagIolat=on"] pushTo: to: "Influx" @@ -47,15 +43,17 @@ container: "cg_cpu_cfs_quota", "cg_mem_drcm_glob_latency", "cg_memory_util", - "cg_cpu_stat_sample", + "cg_cpu_stat", "cg_cpuacct_stat", + "cg_cpuacct_proc_stat", "cg_memory_drcm_latency", "cg_memory_fail_cnt", "cg_memory_dcmp_latency", "cg_cpuacct_wait_latency", "con_net_stat", "cg_blkio_stat", - "podmem", + "cg_memory_oom_cnt", + "podmem" ] luaPlugins: @@ -75,6 +73,7 @@ luaPlugins: "proc_cgroups", "proc_softirqs", "proc_softnet_stat", + "proc_fd" ] resctrl: @@ -118,6 +117,10 @@ plugins: description: "collect pmu events" - so: cpufreq description: "collect cpu frequence of perf cpu" + - so: sql-obs + description: "collect mysql observ." + - so: ntopo_unity + description: "network topology" metrics: - title: sysom_java_app @@ -186,6 +189,11 @@ metrics: head: counter help: "file system information." type: "gauge" + - title: sysom_file_descriptor + from: procfd + head: type + help: "used file descriptor number." + type: "gauge" - title: sysom_sock_stat from: sock_stat head: value @@ -352,6 +360,26 @@ metrics: head: value help: "alarm on os exception of mysqld" type: "gauge" + - title: sysom_iolatency + from: sysom_iolatency + head: value + help: "IO average processing latency information" + type: "gauge" + - title: sysom_iolatency_max + from: sysom_iolatency_max + head: value + help: "IO Max processing latency information" + type: "gauge" + - title: sysom_obser_mysqld_Disk_usage + from: sysom_obser_mysqld_Disk_usage + head: value + help: "Disk usage information about mysqld" + type: "gauge" + - title: sysom_obser_mysqld_storage_usage + from: sysom_obser_mysqld_storage_usage + head: value + help: "Table usage information about mysqld" + type: "gauge" - title: sysom_podmem from: podmem head: value @@ -367,6 +395,11 @@ metrics: head: value help: "sysom_container_memory_util" type: "gauge" + - title: sysom_container_memory_oomcnt + from: cg_memoom_cnt + head: value + help: "container oom event count" + type: "counter" - title: sysom_container_memgdrcm_latency from: cgGlbDrcmLatency head: value @@ -392,6 +425,11 @@ metrics: head: value help: "cpuacct/cpuacct.stat" type: "gauge" + - title: sysom_container_proc_stat + from: cg_proc_stat + head: value + help: "container cpuacct proc stat" + type: "gauge" - title: sysom_container_cfs_quota from: cgCpuQuota head: value diff --git a/sysom_server/sysom_monitor_server/scripts/node_init.sh b/sysom_server/sysom_monitor_server/scripts/node_init.sh index efbff7a97c37e789205685d8db8487bf55c19c4d..7b5df6c10c69a2597f50144111bee76404909d51 100755 --- a/sysom_server/sysom_monitor_server/scripts/node_init.sh +++ b/sysom_server/sysom_monitor_server/scripts/node_init.sh @@ -2,7 +2,7 @@ RESOURCE_DIR=${NODE_HOME}/${SERVICE_NAME} if [ "$SYSAK_VERTION" == "" ]; then - export SYSAK_VERTION=2.4.0-1 + export SYSAK_VERTION=3.2.0-1 fi if [ "$NODE_EXPORT_VERSION" == "" ]; then export NODE_EXPORT_VERSION=1.5.0 diff --git a/sysom_web/cypress/e2e/app_observable/java_process.cy.js b/sysom_web/cypress/e2e/app_observable/java_process.cy.js index 629f88b0b4ff8a9170c50f5f0ed6686773493a5e..01abd4c999bbcd133b6196b73cf6e2c5fd4d4056 100644 --- a/sysom_web/cypress/e2e/app_observable/java_process.cy.js +++ b/sysom_web/cypress/e2e/app_observable/java_process.cy.js @@ -10,7 +10,7 @@ describe("SysOM Migration Monitor Dashboard Test", () => { cy.visit("/app_observable/process_app"); // 2. 等待页面加载完成 - cy.wait(1000); + cy.wait(3000); // 运行时间(Stat面板数值类型) cy.getPannelContentByTitle("运行时间") @@ -75,14 +75,14 @@ describe("SysOM Migration Monitor Dashboard Test", () => { // cpu占用率(Time series 面板) - cy.getPannelContentByTitle("cpu占用率折线图").find("tbody tr").should("have.length", 2); // Legend 有两列 - cy.getPannelContentByTitle("cpu占用率折线图").find("tbody tr").eq(0).find("td").eq(0).contains("内核态cpu_sys"); // 第一列的第一行是 内核态cpu_sys - cy.getPannelContentByTitle("cpu占用率折线图").find("tbody tr").eq(0).find("td").eq(1).contains(/\d+/).then(($el) => { // 第二列的第二行是数值 + cy.getPannelContentByTitle("cpu占用率").find("tbody tr").should("have.length", 2); // Legend 有两列 + cy.getPannelContentByTitle("cpu占用率").find("tbody tr").eq(0).find("td").eq(0).contains("内核态cpu_sys"); // 第一列的第一行是 内核态cpu_sys + cy.getPannelContentByTitle("cpu占用率").find("tbody tr").eq(0).find("td").eq(1).contains(/\d+/).then(($el) => { // 第二列的第二行是数值 const num = parseFloat($el.text()); expect(num).to.be.least(0); }); - cy.getPannelContentByTitle("cpu占用率折线图").find("tbody tr").eq(1).find("td").eq(0).contains("用户态cpu_user"); // 第一列的第二行是 用户态cpu_user - cy.getPannelContentByTitle("cpu占用率折线图").find("tbody tr").eq(1).find("td").eq(1).contains(/\d+/).then(($el) => { // 第二列的第二行是数值 + cy.getPannelContentByTitle("cpu占用率").find("tbody tr").eq(1).find("td").eq(0).contains("用户态cpu_user"); // 第一列的第二行是 用户态cpu_user + cy.getPannelContentByTitle("cpu占用率").find("tbody tr").eq(1).find("td").eq(1).contains(/\d+/).then(($el) => { // 第二列的第二行是数值 const num = parseFloat($el.text()); expect(num).to.be.least(0); });