74 lines
3.3 KiB
Diff
74 lines
3.3 KiB
Diff
|
|
From 7d5ad8f2dd87432b8f46ea5002400ee46cb6756a Mon Sep 17 00:00:00 2001
|
||
|
|
From: gaoruoshu <gaoruoshu@huawei.com>
|
||
|
|
Date: Wed, 9 Oct 2024 14:22:38 +0800
|
||
|
|
Subject: [PATCH] avg_block_io send alarm to xalarmd
|
||
|
|
|
||
|
|
---
|
||
|
|
config/tasks/avg_block_io.mod | 2 ++
|
||
|
|
.../sentryPlugins/avg_block_io/module_conn.py | 23 +++++++++++++++----
|
||
|
|
2 files changed, 21 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/config/tasks/avg_block_io.mod b/config/tasks/avg_block_io.mod
|
||
|
|
index b9b6f34..bcd063b 100644
|
||
|
|
--- a/config/tasks/avg_block_io.mod
|
||
|
|
+++ b/config/tasks/avg_block_io.mod
|
||
|
|
@@ -3,3 +3,5 @@ enabled=yes
|
||
|
|
task_start=/usr/bin/python3 /usr/bin/avg_block_io
|
||
|
|
task_stop=pkill -f /usr/bin/avg_block_io
|
||
|
|
type=oneshot
|
||
|
|
+alarm_id=1002
|
||
|
|
+alarm_clear_time=5
|
||
|
|
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||
|
|
index 0da4208..2fc5a83 100644
|
||
|
|
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||
|
|
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||
|
|
@@ -16,6 +16,7 @@ import time
|
||
|
|
from .utils import is_abnormal
|
||
|
|
from sentryCollector.collect_plugin import is_iocollect_valid, get_io_data, Result_Messages
|
||
|
|
from syssentry.result import ResultLevel, report_result
|
||
|
|
+from xalarm.sentry_notify import xalarm_report, MINOR_ALM, ALARM_TYPE_OCCUR
|
||
|
|
|
||
|
|
|
||
|
|
TASK_NAME = "avg_block_io"
|
||
|
|
@@ -68,19 +69,33 @@ def process_report_data(disk_name, rw, io_data):
|
||
|
|
if not is_abnormal((disk_name, 'bio', rw), io_data):
|
||
|
|
return
|
||
|
|
|
||
|
|
+ msg = {"alarm_source": TASK_NAME, "driver_name": disk_name, "io_type": rw}
|
||
|
|
+
|
||
|
|
ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq']
|
||
|
|
for stage_name in ctrl_stage:
|
||
|
|
if is_abnormal((disk_name, stage_name, rw), io_data):
|
||
|
|
- logging.warning("{} - {} - {} report IO press".format(time.ctime(), disk_name, rw))
|
||
|
|
+ msg["reason"] = "IO press slow"
|
||
|
|
+ msg["block_stack"] = f"bio,{stage_name}"
|
||
|
|
+ logging.warning("{} - {} report IO press slow".format(disk_name, rw))
|
||
|
|
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||
|
|
return
|
||
|
|
|
||
|
|
if is_abnormal((disk_name, 'rq_driver', rw), io_data):
|
||
|
|
- logging.warning("{} - {} - {} report driver".format(time.ctime(), disk_name, rw))
|
||
|
|
+ msg["reason"] = "driver slow"
|
||
|
|
+ msg["block_stack"] = "bio,rq_driver"
|
||
|
|
+ logging.warning("{} - {} report driver slow".format(disk_name, rw))
|
||
|
|
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||
|
|
return
|
||
|
|
|
||
|
|
kernel_stage = ['gettag', 'plug', 'deadline', 'hctx', 'requeue']
|
||
|
|
for stage_name in kernel_stage:
|
||
|
|
if is_abnormal((disk_name, stage_name, rw), io_data):
|
||
|
|
- logging.warning("{} - {} - {} report kernel".format(time.ctime(), disk_name, rw))
|
||
|
|
+ msg["reason"] = "kernel slow"
|
||
|
|
+ msg["block_stack"] = f"bio,{stage_name}"
|
||
|
|
+ logging.warning("{} - {} report kernel slow".format(disk_name, rw))
|
||
|
|
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||
|
|
return
|
||
|
|
- logging.warning("{} - {} - {} report IO press".format(time.ctime(), disk_name, rw))
|
||
|
|
+ msg["reason"] = "unknown"
|
||
|
|
+ msg["block_stack"] = "bio"
|
||
|
|
+ logging.warning("{} - {} report UNKNOWN slow".format(disk_name, rw))
|
||
|
|
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|