diff --git a/monitor/en/redis-v2/manifest.yaml b/monitor/en/redis-v2/manifest.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aea03037303851a35017dc7ed2c1f326eef77436 --- /dev/null +++ b/monitor/en/redis-v2/manifest.yaml @@ -0,0 +1,5 @@ +title: Redis Detection Library +tags: + + - Monitor +url: diff --git a/monitor/en/redis-v2/meta.json b/monitor/en/redis-v2/meta.json new file mode 100644 index 0000000000000000000000000000000000000000..e601b5c5ba5421faf782d76cf81a4e6643e1c37a --- /dev/null +++ b/monitor/en/redis-v2/meta.json @@ -0,0 +1,912 @@ +{ + "checkers": [ + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Redis memory usage on host {{host}} is too high: {{Result}}%", + "groupBy": [ + "server", + "host" + ], + "message": "Redis Server: {{server}}, Host: {{host}}. Redis memory usage is too high, detection result: {{Result}}%", + "targets": [ + { + "dql": "eval(A/B*100, A=\"M::`redis`:(avg(`used_memory`)) BY `server`, `host`\", B=\"M::`redis`:(avg(`maxmemory`)) BY `server`, `host`\")", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "90" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "80" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "70" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "infoEvent": false, + "combineExpr": "A && B", + "openMatchTimes": false + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "90" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "80" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "70" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "47d75378-8616-4a0f-86e3-0f3f6e31fc1e", + "qtype": "dql", + "query": { + "q": "eval(A/B*100, A=\"M::`redis`:(avg(`used_memory`)) BY `server`, `host`\", B=\"M::`redis`:(avg(`maxmemory`)) BY `server`, `host`\")", + "code": "Result", + "type": "expression", + "alias": "", + "children": [ + { + "q": "M::`redis`:(avg(`used_memory`)) BY `server`, `host`", + "code": "A", + "fill": null, + "type": "dql", + "alias": "", + "field": "used_memory", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "server", + "host" + ], + "indexes": [], + "labelOp": "", + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + { + "q": "M::`redis`:(avg(`maxmemory`)) BY `server`, `host`", + "code": "B", + "fill": null, + "type": "dql", + "alias": "", + "field": "maxmemory", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "server", + "host" + ], + "indexes": [], + "labelOp": "", + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + } + ], + "funcList": [], + "expression": "A/B*100" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "seniorMutationsCheck", + "every": "1m", + "range": 300, + "title": "Redis connected clients on host {{host}} surged, value: {{Result}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "range_2": -1, + "targets": [ + { + "dql": "M::`redis`:(max(`connected_clients`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "diffMode": "value", + "direction": "up", + "threshold": { + "value": null, + "status": false, + "operator": ">" + }, + "combineExpr": "A && B" + }, + "noDataTitle": "", + "periodBefore": 0, + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "9b527470-095a-40cd-b546-b20af98d655c", + "qtype": "dql", + "query": { + "q": "M::`redis`:(max(`connected_clients`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "connected_clients", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "max", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Redis slots allocation failed on node {{server}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(max(`cluster_slots_fail`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "25ba38bf-27f6-4fa7-bbf4-f9cf0134ef59", + "qtype": "dql", + "query": { + "q": "M::`redis`:(max(`cluster_slots_fail`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "cluster_slots_fail", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "max", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "redis_cluster status abnormal on node {{server}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(min(`cluster_state`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": "!=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": "!=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "f3a8299c-a395-42ca-9032-01d761869104", + "qtype": "dql", + "query": { + "q": "M::`redis`:(min(`cluster_state`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "cluster_state", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "min", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Redis on host {{host}} has restarted, please confirm if it is normal", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(avg(`uptime_in_seconds`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "600" + ], + "operator": "<=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "Host {{host}} Redis -- uptime_in_seconds metric gap, please confirm if Redis process exists", + "noDataAction": "noDataEvent", + "noDataMessage": "", + "noDataInterval": 300, + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "600" + ], + "operator": "<=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "cf4b98d0-fd9f-43d6-99b3-303e51e6e531", + "qtype": "dql", + "query": { + "q": "M::`redis`:(avg(`uptime_in_seconds`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "uptime_in_seconds", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Master-replica sync disconnected for {{host}}, {{server}}, please check if nodes are normal", + "groupBy": [ + "server", + "host" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(last(`master_link_status`)) BY `server`, `host`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "0" + ], + "operator": "=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "notificationMessage": "", + "recoverNeedPeriodCount": 2, + "openNotificationMessage": false, + "noDataNotificationMessage": "", + "openNoDataNotificationMessage": false + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "0" + ], + "operator": "=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "f41a7145-41d1-4a84-a6fc-ec0a8665e669", + "qtype": "dql", + "query": { + "q": "M::`redis`:(last(`master_link_status`)) BY `server`, `host`", + "code": "Result", + "type": "simple", + "alias": "", + "field": "master_link_status", + "groupBy": [ + "server", + "host" + ], + "funcList": [], + "fieldFunc": "last", + "fieldType": "float", + "namespace": "metric", + "dataSource": "redis", + "groupByTime": "", + "additionalFields": null + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Master-replica sync lag is high for {{host}}, {{server}}, lag value: {{Result}} ms", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(max(`slave_lag`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1000" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "10" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "notificationMessage": "", + "recoverNeedPeriodCount": 2, + "openNotificationMessage": false, + "noDataNotificationMessage": "", + "openNoDataNotificationMessage": false + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1000" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "10" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "651ca34d-bb99-4673-a5f2-d30ff1ec6e29", + "qtype": "dql", + "query": { + "q": "M::`redis`:(max(`slave_lag`)) BY `host`, `server`", + "code": "Result", + "type": "simple", + "alias": "", + "field": "slave_lag", + "groupBy": [ + "host", + "server" + ], + "funcList": [], + "fieldFunc": "max", + "fieldType": "float", + "namespace": "metric", + "dataSource": "redis", + "groupByTime": "", + "additionalFields": null + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + } + ] +} \ No newline at end of file diff --git a/monitor/en/redis-v2/redis.png b/monitor/en/redis-v2/redis.png new file mode 100644 index 0000000000000000000000000000000000000000..11d536ab4971d581ebdbf47c87d0c01c0a0478db Binary files /dev/null and b/monitor/en/redis-v2/redis.png differ diff --git a/monitor/en/redis/meta.json b/monitor/en/redis/meta.json index 4f23982d570f319a6a286506f9021dd74138efec..df557c6371b0735ae00674ef5052ec7497d1b83f 100644 --- a/monitor/en/redis/meta.json +++ b/monitor/en/redis/meta.json @@ -1,67 +1,912 @@ { "checkers": [ { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Redis memory usage on host {{host}} is too high: {{Result}}%", + "groupBy": [ + "server", + "host" + ], + "message": "Redis Server: {{server}}, Host: {{host}}. Redis memory usage is too high, detection result: {{Result}}%", + "targets": [ + { + "dql": "eval(A/B*100, A=\"M::`redis_info`:(avg(`used_memory`)) BY `server`, `host`\", B=\"M::`redis_info`:(avg(`maxmemory`)) BY `server`, `host`\")", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "90" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "80" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "70" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "infoEvent": false, + "combineExpr": "A && B", + "openMatchTimes": false + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "90" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "80" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "70" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], "funcName": "", "querylist": [ { - "datasource": "dataflux", + "uuid": "47d75378-8616-4a0f-86e3-0f3f6e31fc1e", "qtype": "dql", "query": { + "q": "eval(A/B*100, A=\"M::`redis_info`:(avg(`used_memory`)) BY `server`, `host`\", B=\"M::`redis_info`:(avg(`maxmemory`)) BY `server`, `host`\")", + "code": "Result", + "type": "expression", "alias": "", + "children": [ + { + "q": "M::`redis_info`:(avg(`used_memory`)) BY `server`, `host`", + "code": "A", + "fill": null, + "type": "dql", + "alias": "", + "field": "used_memory", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "server", + "host" + ], + "indexes": [], + "labelOp": "", + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis_info", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + { + "q": "M::`redis_info`:(avg(`maxmemory`)) BY `server`, `host`", + "code": "B", + "fill": null, + "type": "dql", + "alias": "", + "field": "maxmemory", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "server", + "host" + ], + "indexes": [], + "labelOp": "", + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis_info", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + } + ], + "funcList": [], + "expression": "A/B*100" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "seniorMutationsCheck", + "every": "1m", + "range": 300, + "title": "Redis connected clients on host {{host}} surged, value: {{Result}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "range_2": -1, + "targets": [ + { + "dql": "M::`redis_info`:(max(`connected_clients`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "diffMode": "value", + "direction": "up", + "threshold": { + "value": null, + "status": false, + "operator": ">" + }, + "combineExpr": "A && B" + }, + "noDataTitle": "", + "periodBefore": 0, + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "9b527470-095a-40cd-b546-b20af98d655c", + "qtype": "dql", + "query": { + "q": "M::`redis_info`:(max(`connected_clients`)) BY `host`, `server`", "code": "Result", - "dataSource": "redis", - "field": "blocked_clients", - "fieldFunc": "last", - "fieldType": "integer", + "fill": null, + "type": "dql", + "alias": "", + "field": "connected_clients", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", "funcList": [], + "interval": "", + "fieldFunc": "max", + "namespace": "metric", + "dataSource": "redis_info", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Redis slots allocation failed on node {{server}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis_cluster`:(max(`cluster_slots_fail`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "25ba38bf-27f6-4fa7-bbf4-f9cf0134ef59", + "qtype": "dql", + "query": { + "q": "M::`redis_cluster`:(max(`cluster_slots_fail`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "cluster_slots_fail", + "index": "", + "fillNum": null, + "filters": [], "groupBy": [ - "host" + "host", + "server" ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "max", + "namespace": "metric", + "dataSource": "redis_cluster", + "queryFuncs": [], + "withLabels": [], "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "redis_cluster status abnormal on node {{server}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis_cluster`:(min(`cluster_state`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": "!=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": "!=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "f3a8299c-a395-42ca-9032-01d761869104", + "qtype": "dql", + "query": { + "q": "M::`redis_cluster`:(min(`cluster_state`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "cluster_state", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "min", "namespace": "metric", - "q": "M::`redis`:(LAST(`blocked_clients`)) by `host`", - "type": "simple" + "dataSource": "redis_cluster", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" }, - "uuid": "166892fb-25c4-4244-9ba6-efa8b307da10" + "datasource": "dataflux" } - ] + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false }, "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Redis on host {{host}} has restarted, please confirm if it is normal", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis_info`:(avg(`uptime_in_seconds`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], "checkerOpt": { "rules": [ { - "conditionLogic": "and", + "status": "critical", "conditions": [ { "alias": "Result", "operands": [ - "0" + "600" ], - "operator": ">" + "operator": "<=" } ], - "status": "critical" + "matchTimes": 1, + "conditionLogic": "and" } - ] + ], + "combineExpr": "A && B" }, - "every": "1m", + "noDataTitle": "Host {{host}} Redis -- uptime_in_seconds metric gap, please confirm if Redis process exists", + "noDataAction": "noDataEvent", + "noDataMessage": "", + "noDataInterval": 300, + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "600" + ], + "operator": "<=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "cf4b98d0-fd9f-43d6-99b3-303e51e6e531", + "qtype": "dql", + "query": { + "q": "M::`redis_info`:(avg(`uptime_in_seconds`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "uptime_in_seconds", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis_info", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Master-replica sync disconnected for {{host}}, {{server}}, please check if nodes are normal", "groupBy": [ + "server", "host" ], - "interval": 900, - "message": ">Level: {{status}} \n>Host: {{host}} \n>Content: The number of client connections waiting for blocking commands is {{ Result }}. \n>Suggest: Delays or other issues may prevent the source list from being populated. While blocked clients by themselves do not cause alarm, if you see a consistently non-zero value for this metric, it should be a cause for concern.", + "message": "", + "targets": [ + { + "dql": "M::`redis_replica`:(last(`master_link_status`)) BY `server`, `host`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "0" + ], + "operator": "=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "notificationMessage": "", + "recoverNeedPeriodCount": 2, + "openNotificationMessage": false, + "noDataNotificationMessage": "", + "openNoDataNotificationMessage": false + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "0" + ], + "operator": "=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "f41a7145-41d1-4a84-a6fc-ec0a8665e669", + "qtype": "dql", + "query": { + "q": "M::`redis_replica`:(last(`master_link_status`)) BY `server`, `host`", + "code": "Result", + "type": "simple", + "alias": "", + "field": "master_link_status", + "groupBy": [ + "server", + "host" + ], + "funcList": [], + "fieldFunc": "last", + "fieldType": "float", + "namespace": "metric", + "dataSource": "redis_replica", + "groupByTime": "", + "additionalFields": null + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "Master-replica sync lag is high for {{host}}, {{server}}, lag value: {{Result}} ms", + "groupBy": [ + "host", + "server" + ], + "message": "", "targets": [ { + "dql": "M::`redis_replica`:(max(`slave_lag`)) BY `host`, `server`", "alias": "Result", - "dql": "M::`redis`:(LAST(`blocked_clients`)) by `host`" + "qtype": "dql" } ], - "title": "The number of Redis client connections waiting for blocking commands on Host {{ host }} increased abnormally.", - "type": "simpleCheck" + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1000" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "10" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "notificationMessage": "", + "recoverNeedPeriodCount": 2, + "openNotificationMessage": false, + "noDataNotificationMessage": "", + "openNoDataNotificationMessage": false }, - "monitorName": "Redis check" + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1000" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "10" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "651ca34d-bb99-4673-a5f2-d30ff1ec6e29", + "qtype": "dql", + "query": { + "q": "M::`redis_replica`:(max(`slave_lag`)) BY `host`, `server`", + "code": "Result", + "type": "simple", + "alias": "", + "field": "slave_lag", + "groupBy": [ + "host", + "server" + ], + "funcList": [], + "fieldFunc": "max", + "fieldType": "float", + "namespace": "metric", + "dataSource": "redis_replica", + "groupByTime": "", + "additionalFields": null + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] } ] -} +} \ No newline at end of file diff --git a/monitor/zh/redis-v2/manifest.yaml b/monitor/zh/redis-v2/manifest.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af4e13afdb33d00a540784e2fce66538a7d0f6a3 --- /dev/null +++ b/monitor/zh/redis-v2/manifest.yaml @@ -0,0 +1,5 @@ +title: Redis 检测库 +tags: + + - Monitor +url: diff --git a/monitor/zh/redis-v2/meta.json b/monitor/zh/redis-v2/meta.json new file mode 100644 index 0000000000000000000000000000000000000000..927308cdb564c867fc45e84956493a9840ef6521 --- /dev/null +++ b/monitor/zh/redis-v2/meta.json @@ -0,0 +1,912 @@ +{ + "checkers": [ + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "{{host}} 节点 Redis 内存使用过高 {{Result}}%", + "groupBy": [ + "server", + "host" + ], + "message": "Redis Server 为 {{server}}, Host 为{{host}} 节点 Redis 内存使用过高,检测结果: {{Result}}%", + "targets": [ + { + "dql": "eval(A/B*100, A=\"M::`redis`:(avg(`used_memory`)) BY `server`, `host`\", B=\"M::`redis`:(avg(`maxmemory`)) BY `server`, `host`\")", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "90" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "80" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "70" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "infoEvent": false, + "combineExpr": "A && B", + "openMatchTimes": false + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "90" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "80" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "70" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "47d75378-8616-4a0f-86e3-0f3f6e31fc1e", + "qtype": "dql", + "query": { + "q": "eval(A/B*100, A=\"M::`redis`:(avg(`used_memory`)) BY `server`, `host`\", B=\"M::`redis`:(avg(`maxmemory`)) BY `server`, `host`\")", + "code": "Result", + "type": "expression", + "alias": "", + "children": [ + { + "q": "M::`redis`:(avg(`used_memory`)) BY `server`, `host`", + "code": "A", + "fill": null, + "type": "dql", + "alias": "", + "field": "used_memory", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "server", + "host" + ], + "indexes": [], + "labelOp": "", + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + { + "q": "M::`redis`:(avg(`maxmemory`)) BY `server`, `host`", + "code": "B", + "fill": null, + "type": "dql", + "alias": "", + "field": "maxmemory", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "server", + "host" + ], + "indexes": [], + "labelOp": "", + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + } + ], + "funcList": [], + "expression": "A/B*100" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "seniorMutationsCheck", + "every": "1m", + "range": 300, + "title": "{{host}} 节点 Redis 连接数发生突增 ,突增值为 {{Result}}", + "groupBy": [ + "host", + "server" + ], + "message": "", + "range_2": -1, + "targets": [ + { + "dql": "M::`redis`:(max(`connected_clients`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "diffMode": "value", + "direction": "up", + "threshold": { + "value": null, + "status": false, + "operator": ">" + }, + "combineExpr": "A && B" + }, + "noDataTitle": "", + "periodBefore": 0, + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "9b527470-095a-40cd-b546-b20af98d655c", + "qtype": "dql", + "query": { + "q": "M::`redis`:(max(`connected_clients`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "connected_clients", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "max", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "{{server}} 节点Redis slots 分配失败", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(max(`cluster_slots_fail`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": ">=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "25ba38bf-27f6-4fa7-bbf4-f9cf0134ef59", + "qtype": "dql", + "query": { + "q": "M::`redis`:(max(`cluster_slots_fail`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "cluster_slots_fail", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "max", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "{{server}} 节点 redis_cluster 状态异常", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(min(`cluster_state`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": "!=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1" + ], + "operator": "!=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "f3a8299c-a395-42ca-9032-01d761869104", + "qtype": "dql", + "query": { + "q": "M::`redis`:(min(`cluster_state`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "cluster_state", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "min", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "{{host}} 节点redis发生重启,请确认是否正常", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(avg(`uptime_in_seconds`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "600" + ], + "operator": "<=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "{{host}} 节点redis -- uptime_in_seconds 指标断档,请确认redis进程是否存在", + "noDataAction": "noDataEvent", + "noDataMessage": "", + "noDataInterval": 300, + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "recoverNeedPeriodCount": 2 + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "600" + ], + "operator": "<=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "cf4b98d0-fd9f-43d6-99b3-303e51e6e531", + "qtype": "dql", + "query": { + "q": "M::`redis`:(avg(`uptime_in_seconds`)) BY `host`, `server`", + "code": "Result", + "fill": null, + "type": "dql", + "alias": "", + "field": "uptime_in_seconds", + "index": "", + "fillNum": null, + "filters": [], + "groupBy": [ + "host", + "server" + ], + "indexes": [], + "labelOp": "", + "funcList": [], + "interval": "", + "fieldFunc": "avg", + "namespace": "metric", + "dataSource": "redis", + "queryFuncs": [], + "withLabels": [], + "groupByTime": "", + "dataSourceFunc": "" + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "{{host}} ,{{server}} 主从同步断开,请注意查看节点是否正常", + "groupBy": [ + "server", + "host" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(last(`master_link_status`)) BY `server`, `host`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "0" + ], + "operator": "=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "notificationMessage": "", + "recoverNeedPeriodCount": 2, + "openNotificationMessage": false, + "noDataNotificationMessage": "", + "openNoDataNotificationMessage": false + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "0" + ], + "operator": "=" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "f41a7145-41d1-4a84-a6fc-ec0a8665e669", + "qtype": "dql", + "query": { + "q": "M::`redis`:(last(`master_link_status`)) BY `server`, `host`", + "code": "Result", + "type": "simple", + "alias": "", + "field": "master_link_status", + "groupBy": [ + "server", + "host" + ], + "funcList": [], + "fieldFunc": "last", + "fieldType": "float", + "namespace": "metric", + "dataSource": "redis", + "groupByTime": "", + "additionalFields": null + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + }, + { + "jsonScript": { + "type": "simpleCheck", + "every": "5m", + "title": "{{host}} , {{server}} 主从同步延迟较高,延迟值为{{Result}} ms", + "groupBy": [ + "host", + "server" + ], + "message": "", + "targets": [ + { + "dql": "M::`redis`:(max(`slave_lag`)) BY `host`, `server`", + "alias": "Result", + "qtype": "dql" + } + ], + "channels": [], + "interval": 300, + "atAccounts": [], + "checkerOpt": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1000" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "10" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "combineExpr": "A && B" + }, + "noDataTitle": "", + "noDataMessage": "", + "atNoDataAccounts": [], + "disableCheckEndTime": false, + "notificationMessage": "", + "recoverNeedPeriodCount": 2, + "openNotificationMessage": false, + "noDataNotificationMessage": "", + "openNoDataNotificationMessage": false + }, + "extend": { + "rules": [ + { + "status": "critical", + "conditions": [ + { + "alias": "Result", + "operands": [ + "1000" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "error", + "conditions": [ + { + "alias": "Result", + "operands": [ + "100" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + }, + { + "status": "warning", + "conditions": [ + { + "alias": "Result", + "operands": [ + "10" + ], + "operator": ">" + } + ], + "matchTimes": 1, + "conditionLogic": "and" + } + ], + "manager": [], + "funcName": "", + "querylist": [ + { + "uuid": "651ca34d-bb99-4673-a5f2-d30ff1ec6e29", + "qtype": "dql", + "query": { + "q": "M::`redis`:(max(`slave_lag`)) BY `host`, `server`", + "code": "Result", + "type": "simple", + "alias": "", + "field": "slave_lag", + "groupBy": [ + "host", + "server" + ], + "funcList": [], + "fieldFunc": "max", + "fieldType": "float", + "namespace": "metric", + "dataSource": "redis", + "groupByTime": "", + "additionalFields": null + }, + "datasource": "dataflux" + } + ], + "issueLevelUUID": "", + "needRecoverIssue": false, + "isNeedCreateIssue": false + }, + "is_disable": false, + "tagInfo": [], + "secret": "", + "type": "trigger", + "monitorName": "default", + "alertPolicyNames": [] + } + ] +} \ No newline at end of file diff --git a/monitor/zh/redis-v2/redis.png b/monitor/zh/redis-v2/redis.png new file mode 100644 index 0000000000000000000000000000000000000000..11d536ab4971d581ebdbf47c87d0c01c0a0478db Binary files /dev/null and b/monitor/zh/redis-v2/redis.png differ diff --git a/monitor/zh/redis/meta.json b/monitor/zh/redis/meta.json index 6a2ef1ae20db9809e4af8db077d06e70b94679ef..08f41576eb9e2b6c0dcfd63001a34ae85c8f6dcf 100644 --- a/monitor/zh/redis/meta.json +++ b/monitor/zh/redis/meta.json @@ -214,7 +214,7 @@ "range_2": -1, "targets": [ { - "dql": "M::`redis_info`:(avg(`connected_clients`)) BY `host`, `server`", + "dql": "M::`redis_info`:(max(`connected_clients`)) BY `host`, `server`", "alias": "Result", "qtype": "dql" } @@ -279,7 +279,7 @@ "uuid": "9b527470-095a-40cd-b546-b20af98d655c", "qtype": "dql", "query": { - "q": "M::`redis_info`:(avg(`connected_clients`)) BY `host`, `server`", + "q": "M::`redis_info`:(max(`connected_clients`)) BY `host`, `server`", "code": "Result", "fill": null, "type": "dql", @@ -296,7 +296,7 @@ "labelOp": "", "funcList": [], "interval": "", - "fieldFunc": "avg", + "fieldFunc": "max", "namespace": "metric", "dataSource": "redis_info", "queryFuncs": [], @@ -330,7 +330,7 @@ "message": "", "targets": [ { - "dql": "M::`redis_cluster`:(avg(`cluster_slots_fail`)) BY `host`, `server`", + "dql": "M::`redis_cluster`:(max(`cluster_slots_fail`)) BY `host`, `server`", "alias": "Result", "qtype": "dql" } @@ -387,7 +387,7 @@ "uuid": "25ba38bf-27f6-4fa7-bbf4-f9cf0134ef59", "qtype": "dql", "query": { - "q": "M::`redis_cluster`:(avg(`cluster_slots_fail`)) BY `host`, `server`", + "q": "M::`redis_cluster`:(max(`cluster_slots_fail`)) BY `host`, `server`", "code": "Result", "fill": null, "type": "dql", @@ -404,7 +404,7 @@ "labelOp": "", "funcList": [], "interval": "", - "fieldFunc": "avg", + "fieldFunc": "max", "namespace": "metric", "dataSource": "redis_cluster", "queryFuncs": [], @@ -438,7 +438,7 @@ "message": "", "targets": [ { - "dql": "M::`redis_cluster`:(avg(`cluster_state`)) BY `host`, `server`", + "dql": "M::`redis_cluster`:(min(`cluster_state`)) BY `host`, `server`", "alias": "Result", "qtype": "dql" } @@ -495,7 +495,7 @@ "uuid": "f3a8299c-a395-42ca-9032-01d761869104", "qtype": "dql", "query": { - "q": "M::`redis_cluster`:(avg(`cluster_state`)) BY `host`, `server`", + "q": "M::`redis_cluster`:(min(`cluster_state`)) BY `host`, `server`", "code": "Result", "fill": null, "type": "dql", @@ -512,7 +512,7 @@ "labelOp": "", "funcList": [], "interval": "", - "fieldFunc": "avg", + "fieldFunc": "min", "namespace": "metric", "dataSource": "redis_cluster", "queryFuncs": [], @@ -760,7 +760,7 @@ "message": "", "targets": [ { - "dql": "M::`redis_replica`:(avg(`slave_lag`)) BY `host`, `server`", + "dql": "M::`redis_replica`:(max(`slave_lag`)) BY `host`, `server`", "alias": "Result", "qtype": "dql" } @@ -877,7 +877,7 @@ "uuid": "651ca34d-bb99-4673-a5f2-d30ff1ec6e29", "qtype": "dql", "query": { - "q": "M::`redis_replica`:(avg(`slave_lag`)) BY `host`, `server`", + "q": "M::`redis_replica`:(max(`slave_lag`)) BY `host`, `server`", "code": "Result", "type": "simple", "alias": "", @@ -887,7 +887,7 @@ "server" ], "funcList": [], - "fieldFunc": "avg", + "fieldFunc": "max", "fieldType": "float", "namespace": "metric", "dataSource": "redis_replica",