sysSentry/avg_block_io-send-alarm-to-xalarmd.patch

74 lines
3.3 KiB
Diff
Raw Normal View History

2024-10-09 15:17:12 +08:00
From 7d5ad8f2dd87432b8f46ea5002400ee46cb6756a Mon Sep 17 00:00:00 2001
From: gaoruoshu <gaoruoshu@huawei.com>
Date: Wed, 9 Oct 2024 14:22:38 +0800
Subject: [PATCH] avg_block_io send alarm to xalarmd
---
config/tasks/avg_block_io.mod | 2 ++
.../sentryPlugins/avg_block_io/module_conn.py | 23 +++++++++++++++----
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/config/tasks/avg_block_io.mod b/config/tasks/avg_block_io.mod
index b9b6f34..bcd063b 100644
--- a/config/tasks/avg_block_io.mod
+++ b/config/tasks/avg_block_io.mod
@@ -3,3 +3,5 @@ enabled=yes
task_start=/usr/bin/python3 /usr/bin/avg_block_io
task_stop=pkill -f /usr/bin/avg_block_io
type=oneshot
+alarm_id=1002
+alarm_clear_time=5
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
index 0da4208..2fc5a83 100644
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
@@ -16,6 +16,7 @@ import time
from .utils import is_abnormal
from sentryCollector.collect_plugin import is_iocollect_valid, get_io_data, Result_Messages
from syssentry.result import ResultLevel, report_result
+from xalarm.sentry_notify import xalarm_report, MINOR_ALM, ALARM_TYPE_OCCUR
TASK_NAME = "avg_block_io"
@@ -68,19 +69,33 @@ def process_report_data(disk_name, rw, io_data):
if not is_abnormal((disk_name, 'bio', rw), io_data):
return
+ msg = {"alarm_source": TASK_NAME, "driver_name": disk_name, "io_type": rw}
+
ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq']
for stage_name in ctrl_stage:
if is_abnormal((disk_name, stage_name, rw), io_data):
- logging.warning("{} - {} - {} report IO press".format(time.ctime(), disk_name, rw))
+ msg["reason"] = "IO press slow"
+ msg["block_stack"] = f"bio,{stage_name}"
+ logging.warning("{} - {} report IO press slow".format(disk_name, rw))
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
return
if is_abnormal((disk_name, 'rq_driver', rw), io_data):
- logging.warning("{} - {} - {} report driver".format(time.ctime(), disk_name, rw))
+ msg["reason"] = "driver slow"
+ msg["block_stack"] = "bio,rq_driver"
+ logging.warning("{} - {} report driver slow".format(disk_name, rw))
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
return
kernel_stage = ['gettag', 'plug', 'deadline', 'hctx', 'requeue']
for stage_name in kernel_stage:
if is_abnormal((disk_name, stage_name, rw), io_data):
- logging.warning("{} - {} - {} report kernel".format(time.ctime(), disk_name, rw))
+ msg["reason"] = "kernel slow"
+ msg["block_stack"] = f"bio,{stage_name}"
+ logging.warning("{} - {} report kernel slow".format(disk_name, rw))
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
return
- logging.warning("{} - {} - {} report IO press".format(time.ctime(), disk_name, rw))
+ msg["reason"] = "unknown"
+ msg["block_stack"] = "bio"
+ logging.warning("{} - {} report UNKNOWN slow".format(disk_name, rw))
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
--
2.33.0