sysSentry/fix-ai_block_io-some-issues.patch
贺有志 4db9027149 add fix-ai-block-io-issues.patch.
Signed-off-by: 贺有志 <1037617413@qq.com>

update sysSentry.spec.

Signed-off-by: 贺有志 <1037617413@qq.com>

rename fix-ai-block-io-issues.patch to fix-ai_block_io-some-issues.patch.

Signed-off-by: 贺有志 <1037617413@qq.com>

update sysSentry.spec.

Signed-off-by: 贺有志 <1037617413@qq.com>
2024-09-30 09:21:35 +08:00

833 lines
43 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 35ba8fe8e241c5e3508c5dadc82a777065a5cc4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
Date: Mon, 30 Sep 2024 00:15:29 +0800
Subject: [PATCH] fix ai_block_io some issues
---
..._slow_io_detection.ini => ai_block_io.ini} | 6 +-
config/tasks/ai_block_io.mod | 5 +
.../tasks/ai_threshold_slow_io_detection.mod | 5 -
...ow_io_detection.py => test_ai_block_io.py} | 0
.../README.md | 0
.../__init__.py | 0
.../ai_block_io.py} | 57 ++--
.../alarm_report.py | 2 +-
.../ai_block_io/config_parser.py | 256 ++++++++++++++++++
.../data_access.py | 3 +
.../detector.py | 17 +-
.../io_data.py | 0
.../sliding_window.py | 0
.../threshold.py | 13 +-
.../utils.py | 15 +-
.../config_parser.py | 141 ----------
src/python/setup.py | 2 +-
17 files changed, 336 insertions(+), 186 deletions(-)
rename config/plugins/{ai_threshold_slow_io_detection.ini => ai_block_io.ini} (66%)
create mode 100644 config/tasks/ai_block_io.mod
delete mode 100644 config/tasks/ai_threshold_slow_io_detection.mod
rename selftest/test/{test_ai_threshold_slow_io_detection.py => test_ai_block_io.py} (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/README.md (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/__init__.py (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection/slow_io_detection.py => ai_block_io/ai_block_io.py} (66%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/alarm_report.py (98%)
create mode 100644 src/python/sentryPlugins/ai_block_io/config_parser.py
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/data_access.py (99%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/detector.py (77%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/io_data.py (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/sliding_window.py (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/threshold.py (92%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/utils.py (86%)
delete mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
diff --git a/config/plugins/ai_threshold_slow_io_detection.ini b/config/plugins/ai_block_io.ini
similarity index 66%
rename from config/plugins/ai_threshold_slow_io_detection.ini
rename to config/plugins/ai_block_io.ini
index 44eb928..01ce266 100644
--- a/config/plugins/ai_threshold_slow_io_detection.ini
+++ b/config/plugins/ai_block_io.ini
@@ -4,9 +4,9 @@ slow_io_detect_frequency=1
log_level=info
[algorithm]
-train_data_duration=0.1
-train_update_duration=0.02
-algorithm_type=n_sigma
+train_data_duration=24
+train_update_duration=2
+algorithm_type=boxplot
boxplot_parameter=1.5
n_sigma_parameter=3
diff --git a/config/tasks/ai_block_io.mod b/config/tasks/ai_block_io.mod
new file mode 100644
index 0000000..1971d7d
--- /dev/null
+++ b/config/tasks/ai_block_io.mod
@@ -0,0 +1,5 @@
+[common]
+enabled=yes
+task_start=/usr/bin/python3 /usr/bin/ai_block_io
+task_stop=pkill -f /usr/bin/ai_block_io
+type=oneshot
\ No newline at end of file
diff --git a/config/tasks/ai_threshold_slow_io_detection.mod b/config/tasks/ai_threshold_slow_io_detection.mod
deleted file mode 100644
index 2729f72..0000000
--- a/config/tasks/ai_threshold_slow_io_detection.mod
+++ /dev/null
@@ -1,5 +0,0 @@
-[common]
-enabled=yes
-task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection
-task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection
-type=oneshot
\ No newline at end of file
diff --git a/selftest/test/test_ai_threshold_slow_io_detection.py b/selftest/test/test_ai_block_io.py
similarity index 100%
rename from selftest/test/test_ai_threshold_slow_io_detection.py
rename to selftest/test/test_ai_block_io.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md b/src/python/sentryPlugins/ai_block_io/README.md
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
rename to src/python/sentryPlugins/ai_block_io/README.md
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py b/src/python/sentryPlugins/ai_block_io/__init__.py
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py
rename to src/python/sentryPlugins/ai_block_io/__init__.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
similarity index 66%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
rename to src/python/sentryPlugins/ai_block_io/ai_block_io.py
index 43cf770..31b8a97 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
+++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
@@ -23,7 +23,7 @@ from .data_access import get_io_data_from_collect_plug, check_collect_valid
from .io_data import MetricName
from .alarm_report import AlarmReport
-CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini"
+CONFIG_FILE = "/etc/sysSentry/plugins/ai_block_io.ini"
def sig_handler(signum, frame):
@@ -40,34 +40,48 @@ class SlowIODetection:
def __init__(self, config_parser: ConfigParser):
self._config_parser = config_parser
- self.__set_log_format()
self.__init_detector_name_list()
self.__init_detector()
- def __set_log_format(self):
- log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
- log_level = get_log_level(self._config_parser.get_log_level())
- logging.basicConfig(level=log_level, format=log_format)
-
def __init_detector_name_list(self):
self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency())
- for disk in self._disk_list:
- self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
- self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+ disks_to_detection: list = self._config_parser.get_disks_to_detection()
+ # 情况1None则启用所有磁盘检测
+ # 情况2is not None and len = 0则不启动任何磁盘检测
+ # 情况3len = 0则取交集
+ if disks_to_detection is None:
+ for disk in self._disk_list:
+ self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
+ self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+ elif len(disks_to_detection) == 0:
+ logging.warning('please attention: conf file not specify any disk to detection, '
+ 'so it will not start ai block io.')
+ else:
+ disks_name_to_detection = []
+ for disk_name_to_detection in disks_to_detection:
+ disks_name_to_detection.append(disk_name_to_detection.get_disk_name())
+ disk_intersection = [disk for disk in self._disk_list if disk in disks_name_to_detection]
+ for disk in disk_intersection:
+ self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
+ self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+ logging.info(f'start to detection follow disk and it\'s metric: {self._detector_name_list}')
def __init_detector(self):
train_data_duration, train_update_duration = (self._config_parser.
get_train_data_duration_and_train_update_duration())
slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency()
- threshold_type = get_threshold_type_enum(self._config_parser.get_algorithm_type())
+ threshold_type = self._config_parser.get_algorithm_type()
data_queue_size, update_size = get_data_queue_size_and_update_size(train_data_duration,
train_update_duration,
slow_io_detection_frequency)
- sliding_window_type = get_sliding_window_type_enum(self._config_parser.get_sliding_window_type())
+ sliding_window_type = self._config_parser.get_sliding_window_type()
window_size, window_threshold = self._config_parser.get_window_size_and_window_minimum_threshold()
for detector_name in self._detector_name_list:
- threshold = ThresholdFactory().get_threshold(threshold_type, data_queue_size=data_queue_size,
+ threshold = ThresholdFactory().get_threshold(threshold_type,
+ boxplot_parameter=self._config_parser.get_boxplot_parameter(),
+ n_sigma_paramter=self._config_parser.get_n_sigma_parameter(),
+ data_queue_size=data_queue_size,
data_queue_update_size=update_size)
sliding_window = SlidingWindowFactory().get_sliding_window(sliding_window_type, queue_length=window_size,
threshold=window_threshold)
@@ -89,6 +103,7 @@ class SlowIODetection:
logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}')
if io_data_dict_with_disk_name is None:
continue
+
# Step2慢IO检测
logging.debug('step2. Start to detection slow io event.')
slow_io_event_list = []
@@ -103,13 +118,14 @@ class SlowIODetection:
for slow_io_event in slow_io_event_list:
metric_name: MetricName = slow_io_event[0]
result = slow_io_event[1]
- AlarmReport.report_major_alm(f"disk {metric_name.get_disk_name()} has slow io event."
- f"stage: {metric_name.get_metric_name()},"
- f"type: {metric_name.get_io_access_type_name()},"
- f"metric: {metric_name.get_metric_name()},"
- f"current window: {result[1]},"
- f"threshold: {result[2]}")
- logging.error(f"slow io event happen: {str(slow_io_event)}")
+ alarm_content = (f"disk {metric_name.get_disk_name()} has slow io event. "
+ f"stage is: {metric_name.get_stage_name()}, "
+ f"io access type is: {metric_name.get_io_access_type_name()}, "
+ f"metric is: {metric_name.get_metric_name()}, "
+ f"current window is: {result[1]}, "
+ f"threshold is: {result[2]}")
+ AlarmReport.report_major_alm(alarm_content)
+ logging.warning(alarm_content)
# Step4等待检测时间
logging.debug('step4. Wait to start next slow io event detection loop.')
@@ -120,6 +136,7 @@ def main():
# Step1注册消息处理函数
signal.signal(signal.SIGINT, sig_handler)
signal.signal(signal.SIGTERM, sig_handler)
+
# Step2断点恢复
# todo:
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py b/src/python/sentryPlugins/ai_block_io/alarm_report.py
similarity index 98%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
rename to src/python/sentryPlugins/ai_block_io/alarm_report.py
index 3f4f34e..230c8cd 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
+++ b/src/python/sentryPlugins/ai_block_io/alarm_report.py
@@ -15,7 +15,7 @@ import json
class AlarmReport:
- TASK_NAME = "SLOW_IO_DETECTION"
+ TASK_NAME = "ai_block_io"
@staticmethod
def report_pass(info: str):
diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py
new file mode 100644
index 0000000..632391d
--- /dev/null
+++ b/src/python/sentryPlugins/ai_block_io/config_parser.py
@@ -0,0 +1,256 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+import configparser
+import json
+import logging
+
+from .io_data import MetricName
+from .threshold import ThresholdType
+from .utils import get_threshold_type_enum, get_sliding_window_type_enum, get_log_level
+
+LOG_FORMAT = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
+
+
+def init_log_format(log_level: str):
+ logging.basicConfig(level=get_log_level(log_level), format=LOG_FORMAT)
+
+
+class ConfigParser:
+ DEFAULT_ABSOLUTE_THRESHOLD = 40
+ DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1
+ DEFAULT_LOG_LEVEL = 'info'
+
+ DEFAULT_ALGORITHM_TYPE = 'boxplot'
+ DEFAULT_TRAIN_DATA_DURATION = 24
+ DEFAULT_TRAIN_UPDATE_DURATION = 2
+ DEFAULT_BOXPLOT_PARAMETER = 1.5
+ DEFAULT_N_SIGMA_PARAMETER = 3
+
+ DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous'
+ DEFAULT_WINDOW_SIZE = 30
+ DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6
+
+ def __init__(self, config_file_name):
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ self.__log_level = ConfigParser.DEFAULT_LOG_LEVEL
+ self.__disks_to_detection: list = []
+
+ self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+
+ self.__sliding_window_type = ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
+
+ self.__config_file_name = config_file_name
+
+ def __read_absolute_threshold(self, items_common: dict):
+ try:
+ self.__absolute_threshold = float(items_common.get('absolute_threshold',
+ ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD))
+ if self.__absolute_threshold <= 0:
+ logging.warning(
+ f'the_absolute_threshold: {self.__absolute_threshold} you set is invalid, use default value: {ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD}.')
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ except ValueError:
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ logging.warning(
+ f'the_absolute_threshold type conversion has error, use default value: {self.__absolute_threshold}.')
+
+ def __read__slow_io_detect_frequency(self, items_common: dict):
+ try:
+ self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency',
+ ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY))
+ if self.__slow_io_detect_frequency < 1 or self.__slow_io_detect_frequency > 10:
+ logging.warning(
+ f'the slow_io_detect_frequency: {self.__slow_io_detect_frequency} you set is invalid, use default value: {ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY}.')
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ except ValueError:
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ logging.warning(f'slow_io_detect_frequency type conversion has error, use default value: {self.__slow_io_detect_frequency}.')
+
+ def __read__disks_to_detect(self, items_common: dict):
+ disks_to_detection = items_common.get('disks_to_detect')
+ if disks_to_detection is None:
+ logging.warning(f'config of disks_to_detect not found, the default value be used.')
+ self.__disks_to_detection = None
+ return
+ try:
+ disks_to_detection_list = json.loads(disks_to_detection)
+ for disk_to_detection in disks_to_detection_list:
+ disk_name = disk_to_detection.get('disk_name', None)
+ stage_name = disk_to_detection.get('stage_name', None)
+ io_access_type_name = disk_to_detection.get('io_access_type_name', None)
+ metric_name = disk_to_detection.get('metric_name', None)
+ if not (disk_name is None or stage_name is None or io_access_type_name is None or metric_name is None):
+ metric_name_object = MetricName(disk_name, stage_name, io_access_type_name, metric_name)
+ self.__disks_to_detection.append(metric_name_object)
+ else:
+ logging.warning(f'config of disks_to_detect\'s some part has some error: {disk_to_detection}, it will be ignored.')
+ except json.decoder.JSONDecodeError as e:
+ logging.warning(f'config of disks_to_detect is error: {e}, it will be ignored and default value be used.')
+ self.__disks_to_detection = None
+
+ def __read__train_data_duration(self, items_algorithm: dict):
+ try:
+ self.__train_data_duration = float(items_algorithm.get('train_data_duration',
+ ConfigParser.DEFAULT_TRAIN_DATA_DURATION))
+ if self.__train_data_duration <= 0 or self.__train_data_duration > 720:
+ logging.warning(
+ f'the train_data_duration: {self.__train_data_duration} you set is invalid, use default value: {ConfigParser.DEFAULT_TRAIN_DATA_DURATION}.')
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
+ except ValueError:
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
+ logging.warning(f'the train_data_duration type conversion has error, use default value: {self.__train_data_duration}.')
+
+ def __read__train_update_duration(self, items_algorithm: dict):
+ default_train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ if default_train_update_duration > self.__train_data_duration:
+ default_train_update_duration = self.__train_data_duration / 2
+
+ try:
+ self.__train_update_duration = float(items_algorithm.get('train_update_duration',
+ ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION))
+ if self.__train_update_duration <= 0 or self.__train_update_duration > self.__train_data_duration:
+ logging.warning(
+ f'the train_update_duration: {self.__train_update_duration} you set is invalid, use default value: {default_train_update_duration}.')
+ self.__train_update_duration = default_train_update_duration
+ except ValueError:
+ self.__train_update_duration = default_train_update_duration
+ logging.warning(f'the train_update_duration type conversion has error, use default value: {self.__train_update_duration}.')
+
+ def __read__algorithm_type_and_parameter(self, items_algorithm: dict):
+ algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE)
+ self.__algorithm_type = get_threshold_type_enum(algorithm_type)
+
+ if self.__algorithm_type == ThresholdType.NSigmaThreshold:
+ try:
+ self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter',
+ ConfigParser.DEFAULT_N_SIGMA_PARAMETER))
+ if self.__n_sigma_parameter <= 0 or self.__n_sigma_parameter > 10:
+ logging.warning(
+ f'the n_sigma_parameter: {self.__n_sigma_parameter} you set is invalid, use default value: {ConfigParser.DEFAULT_N_SIGMA_PARAMETER}.')
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+ except ValueError:
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+ logging.warning(f'the n_sigma_parameter type conversion has error, use default value: {self.__n_sigma_parameter}.')
+ elif self.__algorithm_type == ThresholdType.BoxplotThreshold:
+ try:
+ self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter',
+ ConfigParser.DEFAULT_BOXPLOT_PARAMETER))
+ if self.__boxplot_parameter <= 0 or self.__boxplot_parameter > 10:
+ logging.warning(
+ f'the boxplot_parameter: {self.__boxplot_parameter} you set is invalid, use default value: {ConfigParser.DEFAULT_BOXPLOT_PARAMETER}.')
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ except ValueError:
+ self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ logging.warning(f'the boxplot_parameter type conversion has error, use default value: {self.__boxplot_parameter}.')
+
+ def __read__window_size(self, items_sliding_window: dict):
+ try:
+ self.__window_size = int(items_sliding_window.get('window_size',
+ ConfigParser.DEFAULT_WINDOW_SIZE))
+ if self.__window_size < 1 or self.__window_size > 3600:
+ logging.warning(
+ f'the window_size: {self.__window_size} you set is invalid, use default value: {ConfigParser.DEFAULT_WINDOW_SIZE}.')
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ except ValueError:
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ logging.warning(f'window_size type conversion has error, use default value: {self.__window_size}.')
+
+ def __read__window_minimum_threshold(self, items_sliding_window: dict):
+ default_window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
+ if default_window_minimum_threshold > self.__window_size:
+ default_window_minimum_threshold = self.__window_size / 2
+ try:
+ self.__window_minimum_threshold = (
+ int(items_sliding_window.get('window_minimum_threshold',
+ ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD)))
+ if self.__window_minimum_threshold < 1 or self.__window_minimum_threshold > self.__window_size:
+ logging.warning(
+ f'the window_minimum_threshold: {self.__window_minimum_threshold} you set is invalid, use default value: {default_window_minimum_threshold}.')
+ self.__window_minimum_threshold = default_window_minimum_threshold
+ except ValueError:
+ self.__window_minimum_threshold = default_window_minimum_threshold
+ logging.warning(f'window_minimum_threshold type conversion has error, use default value: {self.__window_minimum_threshold}.')
+
+ def read_config_from_file(self):
+ con = configparser.ConfigParser()
+ con.read(self.__config_file_name, encoding='utf-8')
+
+ if con.has_section('common'):
+ items_common = dict(con.items('common'))
+ self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL)
+ init_log_format(self.__log_level)
+ self.__read_absolute_threshold(items_common)
+ self.__read__slow_io_detect_frequency(items_common)
+ self.__read__disks_to_detect(items_common)
+ else:
+ init_log_format(self.__log_level)
+ logging.warning("common section parameter not found, it will be set to default value.")
+
+ if con.has_section('algorithm'):
+ items_algorithm = dict(con.items('algorithm'))
+ self.__read__train_data_duration(items_algorithm)
+ self.__read__train_update_duration(items_algorithm)
+ self.__read__algorithm_type_and_parameter(items_algorithm)
+ else:
+ logging.warning("algorithm section parameter not found, it will be set to default value.")
+
+ if con.has_section('sliding_window'):
+ items_sliding_window = dict(con.items('sliding_window'))
+ sliding_window_type = items_sliding_window.get('sliding_window_type',
+ ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE)
+ self.__sliding_window_type = get_sliding_window_type_enum(sliding_window_type)
+ self.__read__window_size(items_sliding_window)
+ self.__read__window_minimum_threshold(items_sliding_window)
+ else:
+ logging.warning("sliding_window section parameter not found, it will be set to default value.")
+
+ self.__print_all_config_value()
+
+ def __print_all_config_value(self):
+ pass
+
+ def get_slow_io_detect_frequency(self):
+ return self.__slow_io_detect_frequency
+
+ def get_algorithm_type(self):
+ return self.__algorithm_type
+
+ def get_sliding_window_type(self):
+ return self.__sliding_window_type
+
+ def get_train_data_duration_and_train_update_duration(self):
+ return self.__train_data_duration, self.__train_update_duration
+
+ def get_window_size_and_window_minimum_threshold(self):
+ return self.__window_size, self.__window_minimum_threshold
+
+ def get_absolute_threshold(self):
+ return self.__absolute_threshold
+
+ def get_log_level(self):
+ return self.__log_level
+
+ def get_disks_to_detection(self):
+ return self.__disks_to_detection
+
+ def get_boxplot_parameter(self):
+ return self.__boxplot_parameter
+
+ def get_n_sigma_parameter(self):
+ return self.__n_sigma_parameter
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py b/src/python/sentryPlugins/ai_block_io/data_access.py
similarity index 99%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
rename to src/python/sentryPlugins/ai_block_io/data_access.py
index d9f3460..01c5315 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
+++ b/src/python/sentryPlugins/ai_block_io/data_access.py
@@ -17,6 +17,8 @@ from sentryCollector.collect_plugin import (
get_io_data,
is_iocollect_valid,
)
+
+
from .io_data import IOStageData, IOData
COLLECT_STAGES = [
@@ -32,6 +34,7 @@ COLLECT_STAGES = [
"iocost",
]
+
def check_collect_valid(period):
data_raw = is_iocollect_valid(period)
if data_raw["ret"] == 0:
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
similarity index 77%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
rename to src/python/sentryPlugins/ai_block_io/detector.py
index eda9825..bcf62cb 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
@@ -26,19 +26,26 @@ class Detector:
self._threshold = threshold
self._slidingWindow = sliding_window
self._threshold.attach_observer(self._slidingWindow)
+ self._count = 0
def get_metric_name(self):
return self._metric_name
def is_slow_io_event(self, io_data_dict_with_disk_name: dict):
- logging.debug(f'Enter Detector: {self}')
+ self._count += 1
+ if self._count % 15 == 0:
+ self._count = 0
+ logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
+ logging.debug(f'enter Detector: {self}')
metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
- if metric_value > 1e-6:
- logging.debug(f'Input metric value: {str(metric_value)}')
- self._threshold.push_latest_data_to_queue(metric_value)
+ if metric_value is None:
+ logging.debug('not found metric value, so return None.')
+ return False, None, None
+ logging.debug(f'input metric value: {str(metric_value)}')
+ self._threshold.push_latest_data_to_queue(metric_value)
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
logging.debug(f'Detection result: {str(detection_result)}')
- logging.debug(f'Exit Detector: {self}')
+ logging.debug(f'exit Detector: {self}')
return detection_result
def __repr__(self):
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py b/src/python/sentryPlugins/ai_block_io/io_data.py
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
rename to src/python/sentryPlugins/ai_block_io/io_data.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py b/src/python/sentryPlugins/ai_block_io/sliding_window.py
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
rename to src/python/sentryPlugins/ai_block_io/sliding_window.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py b/src/python/sentryPlugins/ai_block_io/threshold.py
similarity index 92%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
rename to src/python/sentryPlugins/ai_block_io/threshold.py
index 9e1ca7b..ff85d85 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
+++ b/src/python/sentryPlugins/ai_block_io/threshold.py
@@ -79,9 +79,9 @@ class AbsoluteThreshold(Threshold):
class BoxplotThreshold(Threshold):
- def __init__(self, parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ def __init__(self, boxplot_parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000, **kwargs):
super().__init__(data_queue_size, data_queue_update_size)
- self.parameter = parameter
+ self.parameter = boxplot_parameter
def _update_threshold(self):
data = list(self.data_queue.queue)
@@ -94,6 +94,8 @@ class BoxplotThreshold(Threshold):
self.notify_observer()
def push_latest_data_to_queue(self, data):
+ if data < 1e-6:
+ return
try:
self.data_queue.put(data, block=False)
except queue.Full:
@@ -111,9 +113,9 @@ class BoxplotThreshold(Threshold):
class NSigmaThreshold(Threshold):
- def __init__(self, parameter: float = 2.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ def __init__(self, n_sigma_parameter: float = 3.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000, **kwargs):
super().__init__(data_queue_size, data_queue_update_size)
- self.parameter = parameter
+ self.parameter = n_sigma_parameter
def _update_threshold(self):
data = list(self.data_queue.queue)
@@ -125,6 +127,8 @@ class NSigmaThreshold(Threshold):
self.notify_observer()
def push_latest_data_to_queue(self, data):
+ if data < 1e-6:
+ return
try:
self.data_queue.put(data, block=False)
except queue.Full:
@@ -157,4 +161,3 @@ class ThresholdFactory:
return NSigmaThreshold(*args, **kwargs)
else:
raise ValueError(f"Invalid threshold type: {threshold_type}")
-
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py b/src/python/sentryPlugins/ai_block_io/utils.py
similarity index 86%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
rename to src/python/sentryPlugins/ai_block_io/utils.py
index f66e5ed..8dbba06 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
+++ b/src/python/sentryPlugins/ai_block_io/utils.py
@@ -8,13 +8,16 @@
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
# See the Mulan PSL v2 for more details.
+
import logging
from dataclasses import asdict
+
from .threshold import ThresholdType
from .sliding_window import SlidingWindowType
from .io_data import MetricName, IOData
+
def get_threshold_type_enum(algorithm_type: str):
if algorithm_type.lower() == 'absolute':
return ThresholdType.AbsoluteThreshold
@@ -22,7 +25,7 @@ def get_threshold_type_enum(algorithm_type: str):
return ThresholdType.BoxplotThreshold
if algorithm_type.lower() == 'n_sigma':
return ThresholdType.NSigmaThreshold
- logging.info('not found correct algorithm type, use default: boxplot.')
+ logging.warning(f"the algorithm type: {algorithm_type} you set is invalid, use default value: boxplot")
return ThresholdType.BoxplotThreshold
@@ -33,7 +36,7 @@ def get_sliding_window_type_enum(sliding_window_type: str):
return SlidingWindowType.ContinuousSlidingWindow
if sliding_window_type.lower() == 'median':
return SlidingWindowType.MedianSlidingWindow
- logging.info('not found correct sliding window type, use default: not_continuous.')
+ logging.warning(f"the sliding window type: {sliding_window_type} you set is invalid, use default value: not_continuous")
return SlidingWindowType.NotContinuousSlidingWindow
@@ -62,6 +65,8 @@ def get_log_level(log_level: str):
return logging.INFO
elif log_level.lower() == 'warning':
return logging.WARNING
- elif log_level.lower() == 'fatal':
- return logging.FATAL
- return None
+ elif log_level.lower() == 'error':
+ return logging.ERROR
+ elif log_level.lower() == 'critical':
+ return logging.CRITICAL
+ return logging.INFO
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
deleted file mode 100644
index cd4e6f1..0000000
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# coding: utf-8
-# Copyright (c) 2024 Huawei Technologies Co., Ltd.
-# sysSentry is licensed under the Mulan PSL v2.
-# You can use this software according to the terms and conditions of the Mulan PSL v2.
-# You may obtain a copy of Mulan PSL v2 at:
-# http://license.coscl.org.cn/MulanPSL2
-# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
-# PURPOSE.
-# See the Mulan PSL v2 for more details.
-
-import configparser
-import logging
-
-
-class ConfigParser:
-
- DEFAULT_ABSOLUTE_THRESHOLD = 40
- DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1
- DEFAULT_LOG_LEVEL = 'info'
- DEFAULT_TRAIN_DATA_DURATION = 24
- DEFAULT_TRAIN_UPDATE_DURATION = 2
- DEFAULT_ALGORITHM_TYPE = 'boxplot'
- DEFAULT_N_SIGMA_PARAMETER = 3
- DEFAULT_BOXPLOT_PARAMETER = 1.5
- DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous'
- DEFAULT_WINDOW_SIZE = 30
- DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6
-
- def __init__(self, config_file_name):
- self.__boxplot_parameter = None
- self.__window_minimum_threshold = None
- self.__window_size = None
- self.__sliding_window_type = None
- self.__n_sigma_parameter = None
- self.__algorithm_type = None
- self.__train_update_duration = None
- self.__log_level = None
- self.__slow_io_detect_frequency = None
- self.__absolute_threshold = None
- self.__train_data_duration = None
- self.__config_file_name = config_file_name
-
- def read_config_from_file(self):
-
- con = configparser.ConfigParser()
- con.read(self.__config_file_name, encoding='utf-8')
-
- items_common = dict(con.items('common'))
- items_algorithm = dict(con.items('algorithm'))
- items_sliding_window = dict(con.items('sliding_window'))
-
- try:
- self.__absolute_threshold = int(items_common.get('absolute_threshold',
- ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD))
- except ValueError:
- self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
- logging.warning('absolute threshold type conversion has error, use default value.')
-
- try:
- self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency',
- ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY))
- except ValueError:
- self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
- logging.warning('slow_io_detect_frequency type conversion has error, use default value.')
-
- self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL)
-
- try:
- self.__train_data_duration = float(items_algorithm.get('train_data_duration',
- ConfigParser.DEFAULT_TRAIN_DATA_DURATION))
- except ValueError:
- self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
- logging.warning('train_data_duration type conversion has error, use default value.')
-
- try:
- self.__train_update_duration = float(items_algorithm.get('train_update_duration',
- ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION))
- except ValueError:
- self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
- logging.warning('train_update_duration type conversion has error, use default value.')
-
- try:
- self.__algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE)
- except ValueError:
- self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE
- logging.warning('algorithmType type conversion has error, use default value.')
-
- if self.__algorithm_type == 'n_sigma':
- try:
- self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter',
- ConfigParser.DEFAULT_N_SIGMA_PARAMETER))
- except ValueError:
- self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
- logging.warning('n_sigma_parameter type conversion has error, use default value.')
- elif self.__algorithm_type == 'boxplot':
- try:
- self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter',
- ConfigParser.DEFAULT_BOXPLOT_PARAMETER))
- except ValueError:
- self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
- logging.warning('boxplot_parameter type conversion has error, use default value.')
-
- self.__sliding_window_type = items_sliding_window.get('sliding_window_type',
- ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE)
-
- try:
- self.__window_size = int(items_sliding_window.get('window_size',
- ConfigParser.DEFAULT_WINDOW_SIZE))
- except ValueError:
- self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
- logging.warning('window_size type conversion has error, use default value.')
-
- try:
- self.__window_minimum_threshold = (
- int(items_sliding_window.get('window_minimum_threshold',
- ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD)))
- except ValueError:
- self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
- logging.warning('window_minimum_threshold type conversion has error, use default value.')
-
- def get_slow_io_detect_frequency(self):
- return self.__slow_io_detect_frequency
-
- def get_algorithm_type(self):
- return self.__algorithm_type
-
- def get_sliding_window_type(self):
- return self.__sliding_window_type
-
- def get_train_data_duration_and_train_update_duration(self):
- return self.__train_data_duration, self.__train_update_duration
-
- def get_window_size_and_window_minimum_threshold(self):
- return self.__window_size, self.__window_minimum_threshold
-
- def get_absolute_threshold(self):
- return self.__absolute_threshold
-
- def get_log_level(self):
- return self.__log_level
diff --git a/src/python/setup.py b/src/python/setup.py
index dac6481..9e26a10 100644
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -34,7 +34,7 @@ setup(
'xalarmd=xalarm.xalarm_daemon:alarm_process_create',
'sentryCollector=sentryCollector.collectd:main',
'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main',
- 'ai_threshold_slow_io_detection=sentryPlugins.ai_threshold_slow_io_detection.slow_io_detection:main'
+ 'ai_block_io=sentryPlugins.ai_block_io.ai_block_io:main'
]
},
)
--
2.23.0