From 35ba8fe8e241c5e3508c5dadc82a777065a5cc4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com> Date: Mon, 30 Sep 2024 00:15:29 +0800 Subject: [PATCH] fix ai_block_io some issues --- ..._slow_io_detection.ini => ai_block_io.ini} | 6 +- config/tasks/ai_block_io.mod | 5 + .../tasks/ai_threshold_slow_io_detection.mod | 5 - ...ow_io_detection.py => test_ai_block_io.py} | 0 .../README.md | 0 .../__init__.py | 0 .../ai_block_io.py} | 57 ++-- .../alarm_report.py | 2 +- .../ai_block_io/config_parser.py | 256 ++++++++++++++++++ .../data_access.py | 3 + .../detector.py | 17 +- .../io_data.py | 0 .../sliding_window.py | 0 .../threshold.py | 13 +- .../utils.py | 15 +- .../config_parser.py | 141 ---------- src/python/setup.py | 2 +- 17 files changed, 336 insertions(+), 186 deletions(-) rename config/plugins/{ai_threshold_slow_io_detection.ini => ai_block_io.ini} (66%) create mode 100644 config/tasks/ai_block_io.mod delete mode 100644 config/tasks/ai_threshold_slow_io_detection.mod rename selftest/test/{test_ai_threshold_slow_io_detection.py => test_ai_block_io.py} (100%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/README.md (100%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/__init__.py (100%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection/slow_io_detection.py => ai_block_io/ai_block_io.py} (66%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/alarm_report.py (98%) create mode 100644 src/python/sentryPlugins/ai_block_io/config_parser.py rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/data_access.py (99%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/detector.py (77%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/io_data.py (100%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/sliding_window.py (100%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/threshold.py (92%) rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/utils.py (86%) delete mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py diff --git a/config/plugins/ai_threshold_slow_io_detection.ini b/config/plugins/ai_block_io.ini similarity index 66% rename from config/plugins/ai_threshold_slow_io_detection.ini rename to config/plugins/ai_block_io.ini index 44eb928..01ce266 100644 --- a/config/plugins/ai_threshold_slow_io_detection.ini +++ b/config/plugins/ai_block_io.ini @@ -4,9 +4,9 @@ slow_io_detect_frequency=1 log_level=info [algorithm] -train_data_duration=0.1 -train_update_duration=0.02 -algorithm_type=n_sigma +train_data_duration=24 +train_update_duration=2 +algorithm_type=boxplot boxplot_parameter=1.5 n_sigma_parameter=3 diff --git a/config/tasks/ai_block_io.mod b/config/tasks/ai_block_io.mod new file mode 100644 index 0000000..1971d7d --- /dev/null +++ b/config/tasks/ai_block_io.mod @@ -0,0 +1,5 @@ +[common] +enabled=yes +task_start=/usr/bin/python3 /usr/bin/ai_block_io +task_stop=pkill -f /usr/bin/ai_block_io +type=oneshot \ No newline at end of file diff --git a/config/tasks/ai_threshold_slow_io_detection.mod b/config/tasks/ai_threshold_slow_io_detection.mod deleted file mode 100644 index 2729f72..0000000 --- a/config/tasks/ai_threshold_slow_io_detection.mod +++ /dev/null @@ -1,5 +0,0 @@ -[common] -enabled=yes -task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection -task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection -type=oneshot \ No newline at end of file diff --git a/selftest/test/test_ai_threshold_slow_io_detection.py b/selftest/test/test_ai_block_io.py similarity index 100% rename from selftest/test/test_ai_threshold_slow_io_detection.py rename to selftest/test/test_ai_block_io.py diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md b/src/python/sentryPlugins/ai_block_io/README.md similarity index 100% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md rename to src/python/sentryPlugins/ai_block_io/README.md diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py b/src/python/sentryPlugins/ai_block_io/__init__.py similarity index 100% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py rename to src/python/sentryPlugins/ai_block_io/__init__.py diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py similarity index 66% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py rename to src/python/sentryPlugins/ai_block_io/ai_block_io.py index 43cf770..31b8a97 100644 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py +++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py @@ -23,7 +23,7 @@ from .data_access import get_io_data_from_collect_plug, check_collect_valid from .io_data import MetricName from .alarm_report import AlarmReport -CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini" +CONFIG_FILE = "/etc/sysSentry/plugins/ai_block_io.ini" def sig_handler(signum, frame): @@ -40,34 +40,48 @@ class SlowIODetection: def __init__(self, config_parser: ConfigParser): self._config_parser = config_parser - self.__set_log_format() self.__init_detector_name_list() self.__init_detector() - def __set_log_format(self): - log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s" - log_level = get_log_level(self._config_parser.get_log_level()) - logging.basicConfig(level=log_level, format=log_format) - def __init_detector_name_list(self): self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency()) - for disk in self._disk_list: - self._detector_name_list.append(MetricName(disk, "bio", "read", "latency")) - self._detector_name_list.append(MetricName(disk, "bio", "write", "latency")) + disks_to_detection: list = self._config_parser.get_disks_to_detection() + # 情况1:None,则启用所有磁盘检测 + # 情况2:is not None and len = 0,则不启动任何磁盘检测 + # 情况3:len != 0,则取交集 + if disks_to_detection is None: + for disk in self._disk_list: + self._detector_name_list.append(MetricName(disk, "bio", "read", "latency")) + self._detector_name_list.append(MetricName(disk, "bio", "write", "latency")) + elif len(disks_to_detection) == 0: + logging.warning('please attention: conf file not specify any disk to detection, ' + 'so it will not start ai block io.') + else: + disks_name_to_detection = [] + for disk_name_to_detection in disks_to_detection: + disks_name_to_detection.append(disk_name_to_detection.get_disk_name()) + disk_intersection = [disk for disk in self._disk_list if disk in disks_name_to_detection] + for disk in disk_intersection: + self._detector_name_list.append(MetricName(disk, "bio", "read", "latency")) + self._detector_name_list.append(MetricName(disk, "bio", "write", "latency")) + logging.info(f'start to detection follow disk and it\'s metric: {self._detector_name_list}') def __init_detector(self): train_data_duration, train_update_duration = (self._config_parser. get_train_data_duration_and_train_update_duration()) slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency() - threshold_type = get_threshold_type_enum(self._config_parser.get_algorithm_type()) + threshold_type = self._config_parser.get_algorithm_type() data_queue_size, update_size = get_data_queue_size_and_update_size(train_data_duration, train_update_duration, slow_io_detection_frequency) - sliding_window_type = get_sliding_window_type_enum(self._config_parser.get_sliding_window_type()) + sliding_window_type = self._config_parser.get_sliding_window_type() window_size, window_threshold = self._config_parser.get_window_size_and_window_minimum_threshold() for detector_name in self._detector_name_list: - threshold = ThresholdFactory().get_threshold(threshold_type, data_queue_size=data_queue_size, + threshold = ThresholdFactory().get_threshold(threshold_type, + boxplot_parameter=self._config_parser.get_boxplot_parameter(), + n_sigma_paramter=self._config_parser.get_n_sigma_parameter(), + data_queue_size=data_queue_size, data_queue_update_size=update_size) sliding_window = SlidingWindowFactory().get_sliding_window(sliding_window_type, queue_length=window_size, threshold=window_threshold) @@ -89,6 +103,7 @@ class SlowIODetection: logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}') if io_data_dict_with_disk_name is None: continue + # Step2:慢IO检测 logging.debug('step2. Start to detection slow io event.') slow_io_event_list = [] @@ -103,13 +118,14 @@ class SlowIODetection: for slow_io_event in slow_io_event_list: metric_name: MetricName = slow_io_event[0] result = slow_io_event[1] - AlarmReport.report_major_alm(f"disk {metric_name.get_disk_name()} has slow io event." - f"stage: {metric_name.get_metric_name()}," - f"type: {metric_name.get_io_access_type_name()}," - f"metric: {metric_name.get_metric_name()}," - f"current window: {result[1]}," - f"threshold: {result[2]}") - logging.error(f"slow io event happen: {str(slow_io_event)}") + alarm_content = (f"disk {metric_name.get_disk_name()} has slow io event. " + f"stage is: {metric_name.get_stage_name()}, " + f"io access type is: {metric_name.get_io_access_type_name()}, " + f"metric is: {metric_name.get_metric_name()}, " + f"current window is: {result[1]}, " + f"threshold is: {result[2]}") + AlarmReport.report_major_alm(alarm_content) + logging.warning(alarm_content) # Step4:等待检测时间 logging.debug('step4. Wait to start next slow io event detection loop.') @@ -120,6 +136,7 @@ def main(): # Step1:注册消息处理函数 signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) + # Step2:断点恢复 # todo: diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py b/src/python/sentryPlugins/ai_block_io/alarm_report.py similarity index 98% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py rename to src/python/sentryPlugins/ai_block_io/alarm_report.py index 3f4f34e..230c8cd 100644 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py +++ b/src/python/sentryPlugins/ai_block_io/alarm_report.py @@ -15,7 +15,7 @@ import json class AlarmReport: - TASK_NAME = "SLOW_IO_DETECTION" + TASK_NAME = "ai_block_io" @staticmethod def report_pass(info: str): diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py new file mode 100644 index 0000000..632391d --- /dev/null +++ b/src/python/sentryPlugins/ai_block_io/config_parser.py @@ -0,0 +1,256 @@ +# coding: utf-8 +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# sysSentry is licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. + +import configparser +import json +import logging + +from .io_data import MetricName +from .threshold import ThresholdType +from .utils import get_threshold_type_enum, get_sliding_window_type_enum, get_log_level + +LOG_FORMAT = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s" + + +def init_log_format(log_level: str): + logging.basicConfig(level=get_log_level(log_level), format=LOG_FORMAT) + + +class ConfigParser: + DEFAULT_ABSOLUTE_THRESHOLD = 40 + DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1 + DEFAULT_LOG_LEVEL = 'info' + + DEFAULT_ALGORITHM_TYPE = 'boxplot' + DEFAULT_TRAIN_DATA_DURATION = 24 + DEFAULT_TRAIN_UPDATE_DURATION = 2 + DEFAULT_BOXPLOT_PARAMETER = 1.5 + DEFAULT_N_SIGMA_PARAMETER = 3 + + DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous' + DEFAULT_WINDOW_SIZE = 30 + DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6 + + def __init__(self, config_file_name): + self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD + self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY + self.__log_level = ConfigParser.DEFAULT_LOG_LEVEL + self.__disks_to_detection: list = [] + + self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE + self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION + self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION + self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER + self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER + + self.__sliding_window_type = ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE + self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE + self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD + + self.__config_file_name = config_file_name + + def __read_absolute_threshold(self, items_common: dict): + try: + self.__absolute_threshold = float(items_common.get('absolute_threshold', + ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD)) + if self.__absolute_threshold <= 0: + logging.warning( + f'the_absolute_threshold: {self.__absolute_threshold} you set is invalid, use default value: {ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD}.') + self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD + except ValueError: + self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD + logging.warning( + f'the_absolute_threshold type conversion has error, use default value: {self.__absolute_threshold}.') + + def __read__slow_io_detect_frequency(self, items_common: dict): + try: + self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency', + ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY)) + if self.__slow_io_detect_frequency < 1 or self.__slow_io_detect_frequency > 10: + logging.warning( + f'the slow_io_detect_frequency: {self.__slow_io_detect_frequency} you set is invalid, use default value: {ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY}.') + self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY + except ValueError: + self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY + logging.warning(f'slow_io_detect_frequency type conversion has error, use default value: {self.__slow_io_detect_frequency}.') + + def __read__disks_to_detect(self, items_common: dict): + disks_to_detection = items_common.get('disks_to_detect') + if disks_to_detection is None: + logging.warning(f'config of disks_to_detect not found, the default value be used.') + self.__disks_to_detection = None + return + try: + disks_to_detection_list = json.loads(disks_to_detection) + for disk_to_detection in disks_to_detection_list: + disk_name = disk_to_detection.get('disk_name', None) + stage_name = disk_to_detection.get('stage_name', None) + io_access_type_name = disk_to_detection.get('io_access_type_name', None) + metric_name = disk_to_detection.get('metric_name', None) + if not (disk_name is None or stage_name is None or io_access_type_name is None or metric_name is None): + metric_name_object = MetricName(disk_name, stage_name, io_access_type_name, metric_name) + self.__disks_to_detection.append(metric_name_object) + else: + logging.warning(f'config of disks_to_detect\'s some part has some error: {disk_to_detection}, it will be ignored.') + except json.decoder.JSONDecodeError as e: + logging.warning(f'config of disks_to_detect is error: {e}, it will be ignored and default value be used.') + self.__disks_to_detection = None + + def __read__train_data_duration(self, items_algorithm: dict): + try: + self.__train_data_duration = float(items_algorithm.get('train_data_duration', + ConfigParser.DEFAULT_TRAIN_DATA_DURATION)) + if self.__train_data_duration <= 0 or self.__train_data_duration > 720: + logging.warning( + f'the train_data_duration: {self.__train_data_duration} you set is invalid, use default value: {ConfigParser.DEFAULT_TRAIN_DATA_DURATION}.') + self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION + except ValueError: + self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION + logging.warning(f'the train_data_duration type conversion has error, use default value: {self.__train_data_duration}.') + + def __read__train_update_duration(self, items_algorithm: dict): + default_train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION + if default_train_update_duration > self.__train_data_duration: + default_train_update_duration = self.__train_data_duration / 2 + + try: + self.__train_update_duration = float(items_algorithm.get('train_update_duration', + ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION)) + if self.__train_update_duration <= 0 or self.__train_update_duration > self.__train_data_duration: + logging.warning( + f'the train_update_duration: {self.__train_update_duration} you set is invalid, use default value: {default_train_update_duration}.') + self.__train_update_duration = default_train_update_duration + except ValueError: + self.__train_update_duration = default_train_update_duration + logging.warning(f'the train_update_duration type conversion has error, use default value: {self.__train_update_duration}.') + + def __read__algorithm_type_and_parameter(self, items_algorithm: dict): + algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE) + self.__algorithm_type = get_threshold_type_enum(algorithm_type) + + if self.__algorithm_type == ThresholdType.NSigmaThreshold: + try: + self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter', + ConfigParser.DEFAULT_N_SIGMA_PARAMETER)) + if self.__n_sigma_parameter <= 0 or self.__n_sigma_parameter > 10: + logging.warning( + f'the n_sigma_parameter: {self.__n_sigma_parameter} you set is invalid, use default value: {ConfigParser.DEFAULT_N_SIGMA_PARAMETER}.') + self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER + except ValueError: + self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER + logging.warning(f'the n_sigma_parameter type conversion has error, use default value: {self.__n_sigma_parameter}.') + elif self.__algorithm_type == ThresholdType.BoxplotThreshold: + try: + self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter', + ConfigParser.DEFAULT_BOXPLOT_PARAMETER)) + if self.__boxplot_parameter <= 0 or self.__boxplot_parameter > 10: + logging.warning( + f'the boxplot_parameter: {self.__boxplot_parameter} you set is invalid, use default value: {ConfigParser.DEFAULT_BOXPLOT_PARAMETER}.') + self.__n_sigma_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER + except ValueError: + self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER + logging.warning(f'the boxplot_parameter type conversion has error, use default value: {self.__boxplot_parameter}.') + + def __read__window_size(self, items_sliding_window: dict): + try: + self.__window_size = int(items_sliding_window.get('window_size', + ConfigParser.DEFAULT_WINDOW_SIZE)) + if self.__window_size < 1 or self.__window_size > 3600: + logging.warning( + f'the window_size: {self.__window_size} you set is invalid, use default value: {ConfigParser.DEFAULT_WINDOW_SIZE}.') + self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE + except ValueError: + self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE + logging.warning(f'window_size type conversion has error, use default value: {self.__window_size}.') + + def __read__window_minimum_threshold(self, items_sliding_window: dict): + default_window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD + if default_window_minimum_threshold > self.__window_size: + default_window_minimum_threshold = self.__window_size / 2 + try: + self.__window_minimum_threshold = ( + int(items_sliding_window.get('window_minimum_threshold', + ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD))) + if self.__window_minimum_threshold < 1 or self.__window_minimum_threshold > self.__window_size: + logging.warning( + f'the window_minimum_threshold: {self.__window_minimum_threshold} you set is invalid, use default value: {default_window_minimum_threshold}.') + self.__window_minimum_threshold = default_window_minimum_threshold + except ValueError: + self.__window_minimum_threshold = default_window_minimum_threshold + logging.warning(f'window_minimum_threshold type conversion has error, use default value: {self.__window_minimum_threshold}.') + + def read_config_from_file(self): + con = configparser.ConfigParser() + con.read(self.__config_file_name, encoding='utf-8') + + if con.has_section('common'): + items_common = dict(con.items('common')) + self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL) + init_log_format(self.__log_level) + self.__read_absolute_threshold(items_common) + self.__read__slow_io_detect_frequency(items_common) + self.__read__disks_to_detect(items_common) + else: + init_log_format(self.__log_level) + logging.warning("common section parameter not found, it will be set to default value.") + + if con.has_section('algorithm'): + items_algorithm = dict(con.items('algorithm')) + self.__read__train_data_duration(items_algorithm) + self.__read__train_update_duration(items_algorithm) + self.__read__algorithm_type_and_parameter(items_algorithm) + else: + logging.warning("algorithm section parameter not found, it will be set to default value.") + + if con.has_section('sliding_window'): + items_sliding_window = dict(con.items('sliding_window')) + sliding_window_type = items_sliding_window.get('sliding_window_type', + ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE) + self.__sliding_window_type = get_sliding_window_type_enum(sliding_window_type) + self.__read__window_size(items_sliding_window) + self.__read__window_minimum_threshold(items_sliding_window) + else: + logging.warning("sliding_window section parameter not found, it will be set to default value.") + + self.__print_all_config_value() + + def __print_all_config_value(self): + pass + + def get_slow_io_detect_frequency(self): + return self.__slow_io_detect_frequency + + def get_algorithm_type(self): + return self.__algorithm_type + + def get_sliding_window_type(self): + return self.__sliding_window_type + + def get_train_data_duration_and_train_update_duration(self): + return self.__train_data_duration, self.__train_update_duration + + def get_window_size_and_window_minimum_threshold(self): + return self.__window_size, self.__window_minimum_threshold + + def get_absolute_threshold(self): + return self.__absolute_threshold + + def get_log_level(self): + return self.__log_level + + def get_disks_to_detection(self): + return self.__disks_to_detection + + def get_boxplot_parameter(self): + return self.__boxplot_parameter + + def get_n_sigma_parameter(self): + return self.__n_sigma_parameter diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py b/src/python/sentryPlugins/ai_block_io/data_access.py similarity index 99% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py rename to src/python/sentryPlugins/ai_block_io/data_access.py index d9f3460..01c5315 100644 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py +++ b/src/python/sentryPlugins/ai_block_io/data_access.py @@ -17,6 +17,8 @@ from sentryCollector.collect_plugin import ( get_io_data, is_iocollect_valid, ) + + from .io_data import IOStageData, IOData COLLECT_STAGES = [ @@ -32,6 +34,7 @@ COLLECT_STAGES = [ "iocost", ] + def check_collect_valid(period): data_raw = is_iocollect_valid(period) if data_raw["ret"] == 0: diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py similarity index 77% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py rename to src/python/sentryPlugins/ai_block_io/detector.py index eda9825..bcf62cb 100644 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py +++ b/src/python/sentryPlugins/ai_block_io/detector.py @@ -26,19 +26,26 @@ class Detector: self._threshold = threshold self._slidingWindow = sliding_window self._threshold.attach_observer(self._slidingWindow) + self._count = 0 def get_metric_name(self): return self._metric_name def is_slow_io_event(self, io_data_dict_with_disk_name: dict): - logging.debug(f'Enter Detector: {self}') + self._count += 1 + if self._count % 15 == 0: + self._count = 0 + logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.") + logging.debug(f'enter Detector: {self}') metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name) - if metric_value > 1e-6: - logging.debug(f'Input metric value: {str(metric_value)}') - self._threshold.push_latest_data_to_queue(metric_value) + if metric_value is None: + logging.debug('not found metric value, so return None.') + return False, None, None + logging.debug(f'input metric value: {str(metric_value)}') + self._threshold.push_latest_data_to_queue(metric_value) detection_result = self._slidingWindow.is_slow_io_event(metric_value) logging.debug(f'Detection result: {str(detection_result)}') - logging.debug(f'Exit Detector: {self}') + logging.debug(f'exit Detector: {self}') return detection_result def __repr__(self): diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py b/src/python/sentryPlugins/ai_block_io/io_data.py similarity index 100% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py rename to src/python/sentryPlugins/ai_block_io/io_data.py diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py b/src/python/sentryPlugins/ai_block_io/sliding_window.py similarity index 100% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py rename to src/python/sentryPlugins/ai_block_io/sliding_window.py diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py b/src/python/sentryPlugins/ai_block_io/threshold.py similarity index 92% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py rename to src/python/sentryPlugins/ai_block_io/threshold.py index 9e1ca7b..ff85d85 100644 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py +++ b/src/python/sentryPlugins/ai_block_io/threshold.py @@ -79,9 +79,9 @@ class AbsoluteThreshold(Threshold): class BoxplotThreshold(Threshold): - def __init__(self, parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000): + def __init__(self, boxplot_parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000, **kwargs): super().__init__(data_queue_size, data_queue_update_size) - self.parameter = parameter + self.parameter = boxplot_parameter def _update_threshold(self): data = list(self.data_queue.queue) @@ -94,6 +94,8 @@ class BoxplotThreshold(Threshold): self.notify_observer() def push_latest_data_to_queue(self, data): + if data < 1e-6: + return try: self.data_queue.put(data, block=False) except queue.Full: @@ -111,9 +113,9 @@ class BoxplotThreshold(Threshold): class NSigmaThreshold(Threshold): - def __init__(self, parameter: float = 2.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000): + def __init__(self, n_sigma_parameter: float = 3.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000, **kwargs): super().__init__(data_queue_size, data_queue_update_size) - self.parameter = parameter + self.parameter = n_sigma_parameter def _update_threshold(self): data = list(self.data_queue.queue) @@ -125,6 +127,8 @@ class NSigmaThreshold(Threshold): self.notify_observer() def push_latest_data_to_queue(self, data): + if data < 1e-6: + return try: self.data_queue.put(data, block=False) except queue.Full: @@ -157,4 +161,3 @@ class ThresholdFactory: return NSigmaThreshold(*args, **kwargs) else: raise ValueError(f"Invalid threshold type: {threshold_type}") - diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py b/src/python/sentryPlugins/ai_block_io/utils.py similarity index 86% rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py rename to src/python/sentryPlugins/ai_block_io/utils.py index f66e5ed..8dbba06 100644 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py +++ b/src/python/sentryPlugins/ai_block_io/utils.py @@ -8,13 +8,16 @@ # IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR # PURPOSE. # See the Mulan PSL v2 for more details. + import logging from dataclasses import asdict + from .threshold import ThresholdType from .sliding_window import SlidingWindowType from .io_data import MetricName, IOData + def get_threshold_type_enum(algorithm_type: str): if algorithm_type.lower() == 'absolute': return ThresholdType.AbsoluteThreshold @@ -22,7 +25,7 @@ def get_threshold_type_enum(algorithm_type: str): return ThresholdType.BoxplotThreshold if algorithm_type.lower() == 'n_sigma': return ThresholdType.NSigmaThreshold - logging.info('not found correct algorithm type, use default: boxplot.') + logging.warning(f"the algorithm type: {algorithm_type} you set is invalid, use default value: boxplot") return ThresholdType.BoxplotThreshold @@ -33,7 +36,7 @@ def get_sliding_window_type_enum(sliding_window_type: str): return SlidingWindowType.ContinuousSlidingWindow if sliding_window_type.lower() == 'median': return SlidingWindowType.MedianSlidingWindow - logging.info('not found correct sliding window type, use default: not_continuous.') + logging.warning(f"the sliding window type: {sliding_window_type} you set is invalid, use default value: not_continuous") return SlidingWindowType.NotContinuousSlidingWindow @@ -62,6 +65,8 @@ def get_log_level(log_level: str): return logging.INFO elif log_level.lower() == 'warning': return logging.WARNING - elif log_level.lower() == 'fatal': - return logging.FATAL - return None + elif log_level.lower() == 'error': + return logging.ERROR + elif log_level.lower() == 'critical': + return logging.CRITICAL + return logging.INFO diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py deleted file mode 100644 index cd4e6f1..0000000 --- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py +++ /dev/null @@ -1,141 +0,0 @@ -# coding: utf-8 -# Copyright (c) 2024 Huawei Technologies Co., Ltd. -# sysSentry is licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. - -import configparser -import logging - - -class ConfigParser: - - DEFAULT_ABSOLUTE_THRESHOLD = 40 - DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1 - DEFAULT_LOG_LEVEL = 'info' - DEFAULT_TRAIN_DATA_DURATION = 24 - DEFAULT_TRAIN_UPDATE_DURATION = 2 - DEFAULT_ALGORITHM_TYPE = 'boxplot' - DEFAULT_N_SIGMA_PARAMETER = 3 - DEFAULT_BOXPLOT_PARAMETER = 1.5 - DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous' - DEFAULT_WINDOW_SIZE = 30 - DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6 - - def __init__(self, config_file_name): - self.__boxplot_parameter = None - self.__window_minimum_threshold = None - self.__window_size = None - self.__sliding_window_type = None - self.__n_sigma_parameter = None - self.__algorithm_type = None - self.__train_update_duration = None - self.__log_level = None - self.__slow_io_detect_frequency = None - self.__absolute_threshold = None - self.__train_data_duration = None - self.__config_file_name = config_file_name - - def read_config_from_file(self): - - con = configparser.ConfigParser() - con.read(self.__config_file_name, encoding='utf-8') - - items_common = dict(con.items('common')) - items_algorithm = dict(con.items('algorithm')) - items_sliding_window = dict(con.items('sliding_window')) - - try: - self.__absolute_threshold = int(items_common.get('absolute_threshold', - ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD)) - except ValueError: - self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD - logging.warning('absolute threshold type conversion has error, use default value.') - - try: - self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency', - ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY)) - except ValueError: - self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY - logging.warning('slow_io_detect_frequency type conversion has error, use default value.') - - self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL) - - try: - self.__train_data_duration = float(items_algorithm.get('train_data_duration', - ConfigParser.DEFAULT_TRAIN_DATA_DURATION)) - except ValueError: - self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION - logging.warning('train_data_duration type conversion has error, use default value.') - - try: - self.__train_update_duration = float(items_algorithm.get('train_update_duration', - ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION)) - except ValueError: - self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION - logging.warning('train_update_duration type conversion has error, use default value.') - - try: - self.__algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE) - except ValueError: - self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE - logging.warning('algorithmType type conversion has error, use default value.') - - if self.__algorithm_type == 'n_sigma': - try: - self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter', - ConfigParser.DEFAULT_N_SIGMA_PARAMETER)) - except ValueError: - self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER - logging.warning('n_sigma_parameter type conversion has error, use default value.') - elif self.__algorithm_type == 'boxplot': - try: - self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter', - ConfigParser.DEFAULT_BOXPLOT_PARAMETER)) - except ValueError: - self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER - logging.warning('boxplot_parameter type conversion has error, use default value.') - - self.__sliding_window_type = items_sliding_window.get('sliding_window_type', - ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE) - - try: - self.__window_size = int(items_sliding_window.get('window_size', - ConfigParser.DEFAULT_WINDOW_SIZE)) - except ValueError: - self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE - logging.warning('window_size type conversion has error, use default value.') - - try: - self.__window_minimum_threshold = ( - int(items_sliding_window.get('window_minimum_threshold', - ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD))) - except ValueError: - self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD - logging.warning('window_minimum_threshold type conversion has error, use default value.') - - def get_slow_io_detect_frequency(self): - return self.__slow_io_detect_frequency - - def get_algorithm_type(self): - return self.__algorithm_type - - def get_sliding_window_type(self): - return self.__sliding_window_type - - def get_train_data_duration_and_train_update_duration(self): - return self.__train_data_duration, self.__train_update_duration - - def get_window_size_and_window_minimum_threshold(self): - return self.__window_size, self.__window_minimum_threshold - - def get_absolute_threshold(self): - return self.__absolute_threshold - - def get_log_level(self): - return self.__log_level diff --git a/src/python/setup.py b/src/python/setup.py index dac6481..9e26a10 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -34,7 +34,7 @@ setup( 'xalarmd=xalarm.xalarm_daemon:alarm_process_create', 'sentryCollector=sentryCollector.collectd:main', 'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main', - 'ai_threshold_slow_io_detection=sentryPlugins.ai_threshold_slow_io_detection.slow_io_detection:main' + 'ai_block_io=sentryPlugins.ai_block_io.ai_block_io:main' ] }, ) -- 2.23.0