ai_block_io adapt alarm module.patch

Signed-off-by: 贺有志 <1037617413@qq.com>

ai_block_io adapt alarm module

Signed-off-by: 贺有志 <1037617413@qq.com>
This commit is contained in:
贺有志 2024-10-10 13:09:33 +00:00 committed by PshySimon
parent 4ce962da8c
commit 5eb6aaf745
2 changed files with 229 additions and 1 deletions

View File

@ -0,0 +1,221 @@
From 367f8ab8a5ad26d80caf1bc4529c79d279ef0fb1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
Date: Thu, 10 Oct 2024 17:21:48 +0800
Subject: [PATCH] ai_block_io adapt alarm module
---
config/tasks/ai_block_io.mod | 4 +-
.../sentryPlugins/ai_block_io/ai_block_io.py | 28 +++++---
.../sentryPlugins/ai_block_io/alarm_report.py | 65 ++++++++++++++-----
.../sentryPlugins/ai_block_io/data_access.py | 5 +-
.../sentryPlugins/ai_block_io/detector.py | 2 +-
5 files changed, 73 insertions(+), 31 deletions(-)
diff --git a/config/tasks/ai_block_io.mod b/config/tasks/ai_block_io.mod
index 1971d7d..82f4f0b 100644
--- a/config/tasks/ai_block_io.mod
+++ b/config/tasks/ai_block_io.mod
@@ -2,4 +2,6 @@
enabled=yes
task_start=/usr/bin/python3 /usr/bin/ai_block_io
task_stop=pkill -f /usr/bin/ai_block_io
-type=oneshot
\ No newline at end of file
+type=oneshot
+alarm_id=1002
+alarm_clear_time=5
\ No newline at end of file
diff --git a/src/python/sentryPlugins/ai_block_io/ai_block_io.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
index 3b00ef3..77104a9 100644
--- a/src/python/sentryPlugins/ai_block_io/ai_block_io.py
+++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
@@ -20,14 +20,14 @@ from .utils import get_data_queue_size_and_update_size
from .config_parser import ConfigParser
from .data_access import get_io_data_from_collect_plug, check_collect_valid
from .io_data import MetricName
-from .alarm_report import AlarmReport
+from .alarm_report import Xalarm, Report
CONFIG_FILE = "/etc/sysSentry/plugins/ai_block_io.ini"
def sig_handler(signum, frame):
logging.info("receive signal: %d", signum)
- AlarmReport().report_fail(f"receive signal: {signum}")
+ Report.report_pass(f"receive signal: {signum}, exiting...")
exit(signum)
@@ -44,6 +44,10 @@ class SlowIODetection:
def __init_detector_name_list(self):
self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency())
+ if self._disk_list is None:
+ Report.report_pass("get available disk error, please check if the collector plug is enable. exiting...")
+ exit(1)
+
logging.info(f"ai_block_io plug has found disks: {self._disk_list}")
disks_to_detection: list = self._config_parser.get_disks_to_detection()
# 情况1None则启用所有磁盘检测
@@ -101,7 +105,8 @@ class SlowIODetection:
)
logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}')
if io_data_dict_with_disk_name is None:
- continue
+ Report.report_pass("get io data error, please check if the collector plug is enable. exitting...")
+ exit(1)
# Step2慢IO检测
logging.debug('step2. Start to detection slow io event.')
@@ -117,13 +122,16 @@ class SlowIODetection:
for slow_io_event in slow_io_event_list:
metric_name: MetricName = slow_io_event[0]
result = slow_io_event[1]
- alarm_content = (f"disk {metric_name.get_disk_name()} has slow io event. "
- f"stage is: {metric_name.get_stage_name()}, "
- f"io access type is: {metric_name.get_io_access_type_name()}, "
- f"metric is: {metric_name.get_metric_name()}, "
- f"current window is: {result[1]}, "
- f"threshold is: {result[2]}")
- AlarmReport.report_major_alm(alarm_content)
+ alarm_content = {
+ "driver_name": f"{metric_name.get_disk_name()}",
+ "reason": "disk_slow",
+ "block_stack": f"{metric_name.get_stage_name()}",
+ "io_type": f"{metric_name.get_io_access_type_name()}",
+ "alarm_source": "ai_block_io",
+ "alarm_type": "latency",
+ "details": f"current window is: {result[1]}, threshold is: {result[2]}.",
+ }
+ Xalarm.major(alarm_content)
logging.warning(alarm_content)
# Step4等待检测时间
diff --git a/src/python/sentryPlugins/ai_block_io/alarm_report.py b/src/python/sentryPlugins/ai_block_io/alarm_report.py
index 230c8cd..92bd6e3 100644
--- a/src/python/sentryPlugins/ai_block_io/alarm_report.py
+++ b/src/python/sentryPlugins/ai_block_io/alarm_report.py
@@ -9,41 +9,72 @@
# PURPOSE.
# See the Mulan PSL v2 for more details.
-from syssentry.result import ResultLevel, report_result
import logging
import json
+from xalarm.sentry_notify import (
+ xalarm_report,
+ MINOR_ALM,
+ MAJOR_ALM,
+ CRITICAL_ALM,
+ ALARM_TYPE_OCCUR,
+ ALARM_TYPE_RECOVER,
+)
+
+from syssentry.result import ResultLevel, report_result
+
-class AlarmReport:
+class Report:
TASK_NAME = "ai_block_io"
@staticmethod
def report_pass(info: str):
- report_result(AlarmReport.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info}))
- logging.info(f'Report {AlarmReport.TASK_NAME} PASS: {info}')
+ report_result(Report.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info}))
+ logging.info(f'Report {Report.TASK_NAME} PASS: {info}')
@staticmethod
def report_fail(info: str):
- report_result(AlarmReport.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info}))
- logging.info(f'Report {AlarmReport.TASK_NAME} FAIL: {info}')
+ report_result(Report.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info}))
+ logging.info(f'Report {Report.TASK_NAME} FAIL: {info}')
@staticmethod
def report_skip(info: str):
- report_result(AlarmReport.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info}))
- logging.info(f'Report {AlarmReport.TASK_NAME} SKIP: {info}')
+ report_result(Report.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info}))
+ logging.info(f'Report {Report.TASK_NAME} SKIP: {info}')
+
+
+class Xalarm:
+ ALARM_ID = 1002
@staticmethod
- def report_minor_alm(info: str):
- report_result(AlarmReport.TASK_NAME, ResultLevel.MINOR_ALM, json.dumps({"msg": info}))
- logging.info(f'Report {AlarmReport.TASK_NAME} MINOR_ALM: {info}')
+ def minor(info: dict):
+ info_str = json.dumps(info)
+ xalarm_report(Xalarm.ALARM_ID, MINOR_ALM, ALARM_TYPE_OCCUR, info_str)
+ logging.info(f"Report {Xalarm.ALARM_ID} MINOR_ALM: {info_str}")
@staticmethod
- def report_major_alm(info: str):
- report_result(AlarmReport.TASK_NAME, ResultLevel.MAJOR_ALM, json.dumps({"msg": info}))
- logging.info(f'Report {AlarmReport.TASK_NAME} MAJOR_ALM: {info}')
+ def major(info: dict):
+ info_str = json.dumps(info)
+ xalarm_report(Xalarm.ALARM_ID, MAJOR_ALM, ALARM_TYPE_OCCUR, info_str)
+ logging.info(f"Report {Xalarm.ALARM_ID} MAJOR_ALM: {info_str}")
@staticmethod
- def report_critical_alm(info: str):
- report_result(AlarmReport.TASK_NAME, ResultLevel.CRITICAL_ALM, json.dumps({"msg": info}))
- logging.info(f'Report {AlarmReport.TASK_NAME} CRITICAL_ALM: {info}')
+ def critical(info: dict):
+ info_str = json.dumps(info)
+ xalarm_report(Xalarm.ALARM_ID, CRITICAL_ALM, ALARM_TYPE_OCCUR, info_str)
+ logging.info(f"Report {Xalarm.ALARM_ID} CRITICAL_ALM: {info_str}")
+
+ def minor_recover(info: dict):
+ info_str = json.dumps(info)
+ xalarm_report(Xalarm.ALARM_ID, MINOR_ALM, ALARM_TYPE_RECOVER, info_str)
+ logging.info(f"Report {Xalarm.ALARM_ID} MINOR_ALM Recover: {info_str}")
+
+ def major_recover(info: dict):
+ info_str = json.dumps(info)
+ xalarm_report(Xalarm.ALARM_ID, MAJOR_ALM, ALARM_TYPE_RECOVER, info_str)
+ logging.info(f"Report {Xalarm.ALARM_ID} MAJOR_ALM Recover: {info_str}")
+ def critical_recover(info: dict):
+ info_str = json.dumps(info)
+ xalarm_report(Xalarm.ALARM_ID, CRITICAL_ALM, ALARM_TYPE_RECOVER, info_str)
+ logging.info(f"Report {Xalarm.ALARM_ID} CRITICAL_ALM Recover: {info_str}")
diff --git a/src/python/sentryPlugins/ai_block_io/data_access.py b/src/python/sentryPlugins/ai_block_io/data_access.py
index 01c5315..c7679cd 100644
--- a/src/python/sentryPlugins/ai_block_io/data_access.py
+++ b/src/python/sentryPlugins/ai_block_io/data_access.py
@@ -42,10 +42,11 @@ def check_collect_valid(period):
data = json.loads(data_raw["message"])
except Exception as e:
logging.warning(f"get io data failed, {e}")
- return []
+ return None
return [k for k in data.keys()]
else:
- return []
+ logging.warning(f"get io data failed, return {data_raw}")
+ return None
def _get_raw_data(period, disk_list):
diff --git a/src/python/sentryPlugins/ai_block_io/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
index a48144f..0ed282b 100644
--- a/src/python/sentryPlugins/ai_block_io/detector.py
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
@@ -35,7 +35,7 @@ class Detector:
self._count += 1
if self._count % 15 == 0:
self._count = 0
- logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
+ logging.debug(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
logging.debug(f'enter Detector: {self}')
metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
if metric_value is None:
--
2.23.0

View File

@ -4,7 +4,7 @@
Summary: System Inspection Framework
Name: sysSentry
Version: 1.0.2
Release: 28
Release: 29
License: Mulan PSL v2
Group: System Environment/Daemons
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -41,6 +41,7 @@ Patch28: update-log-when-it-is-not-lock-collect.patch
Patch29: change-alarm-length.patch
Patch30: add-detail-time.patch
Patch31: xalarm-add-alarm-msg-length-to-8192.patch
Patch32: ai_block_io-adapt-alarm-module.patch
BuildRequires: cmake gcc-c++
BuildRequires: python3 python3-setuptools
@ -285,6 +286,12 @@ rm -rf %{buildroot}
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io
%changelog
* Thu Oct 10 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-29
- Type:requirement
- CVE:NA
- SUG:NA
- DESC:ai_block_io adapt alarm module
* Thu Oct 10 2024 caixiaomeng <caixiaomeng2@huawei.com> - 1.0.2-28
- Type:bugfix
- CVE:NA