1202 lines
51 KiB
Diff
1202 lines
51 KiB
Diff
|
|
From 3d72fa7f517e6e99af1205e965c3775dc23461f4 Mon Sep 17 00:00:00 2001
|
|||
|
|
From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
|
|||
|
|
Date: Mon, 23 Sep 2024 11:03:26 +0800
|
|||
|
|
Subject: [PATCH] add ai threshold slow io detection to sysSentry
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
.../ai_threshold_slow_io_detection.ini | 16 ++
|
|||
|
|
.../tasks/ai_threshold_slow_io_detection.mod | 5 +
|
|||
|
|
.../test_ai_threshold_slow_io_detection.py | 165 ++++++++++++++++++
|
|||
|
|
.../ai_threshold_slow_io_detection/README.md | 2 +
|
|||
|
|
.../__init__.py | 0
|
|||
|
|
.../alarm_report.py | 49 ++++++
|
|||
|
|
.../config_parser.py | 141 +++++++++++++++
|
|||
|
|
.../data_access.py | 91 ++++++++++
|
|||
|
|
.../detector.py | 48 +++++
|
|||
|
|
.../ai_threshold_slow_io_detection/io_data.py | 74 ++++++++
|
|||
|
|
.../sliding_window.py | 113 ++++++++++++
|
|||
|
|
.../slow_io_detection.py | 133 ++++++++++++++
|
|||
|
|
.../threshold.py | 160 +++++++++++++++++
|
|||
|
|
.../ai_threshold_slow_io_detection/utils.py | 67 +++++++
|
|||
|
|
src/python/setup.py | 3 +-
|
|||
|
|
15 files changed, 1066 insertions(+), 1 deletion(-)
|
|||
|
|
create mode 100644 config/plugins/ai_threshold_slow_io_detection.ini
|
|||
|
|
create mode 100644 config/tasks/ai_threshold_slow_io_detection.mod
|
|||
|
|
create mode 100644 selftest/test/test_ai_threshold_slow_io_detection.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
|
|||
|
|
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
|
|||
|
|
|
|||
|
|
diff --git a/config/plugins/ai_threshold_slow_io_detection.ini b/config/plugins/ai_threshold_slow_io_detection.ini
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..44eb928
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/config/plugins/ai_threshold_slow_io_detection.ini
|
|||
|
|
@@ -0,0 +1,16 @@
|
|||
|
|
+[common]
|
|||
|
|
+absolute_threshold=40
|
|||
|
|
+slow_io_detect_frequency=1
|
|||
|
|
+log_level=info
|
|||
|
|
+
|
|||
|
|
+[algorithm]
|
|||
|
|
+train_data_duration=0.1
|
|||
|
|
+train_update_duration=0.02
|
|||
|
|
+algorithm_type=n_sigma
|
|||
|
|
+boxplot_parameter=1.5
|
|||
|
|
+n_sigma_parameter=3
|
|||
|
|
+
|
|||
|
|
+[sliding_window]
|
|||
|
|
+sliding_window_type=not_continuous
|
|||
|
|
+window_size=30
|
|||
|
|
+window_minimum_threshold=6
|
|||
|
|
\ No newline at end of file
|
|||
|
|
diff --git a/config/tasks/ai_threshold_slow_io_detection.mod b/config/tasks/ai_threshold_slow_io_detection.mod
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..2729f72
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/config/tasks/ai_threshold_slow_io_detection.mod
|
|||
|
|
@@ -0,0 +1,5 @@
|
|||
|
|
+[common]
|
|||
|
|
+enabled=yes
|
|||
|
|
+task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection
|
|||
|
|
+task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection
|
|||
|
|
+type=oneshot
|
|||
|
|
\ No newline at end of file
|
|||
|
|
diff --git a/selftest/test/test_ai_threshold_slow_io_detection.py b/selftest/test/test_ai_threshold_slow_io_detection.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..c36fef5
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/selftest/test/test_ai_threshold_slow_io_detection.py
|
|||
|
|
@@ -0,0 +1,165 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+import unittest
|
|||
|
|
+import numpy as np
|
|||
|
|
+
|
|||
|
|
+from sentryPlugins.ai_threshold_slow_io_detection.threshold import AbsoluteThreshold, BoxplotThreshold, NSigmaThreshold
|
|||
|
|
+from sentryPlugins.ai_threshold_slow_io_detection.sliding_window import (NotContinuousSlidingWindow,
|
|||
|
|
+ ContinuousSlidingWindow, MedianSlidingWindow)
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def _get_boxplot_threshold(data_list: list, parameter):
|
|||
|
|
+ q1 = np.percentile(data_list, 25)
|
|||
|
|
+ q3 = np.percentile(data_list, 75)
|
|||
|
|
+ iqr = q3 - q1
|
|||
|
|
+ return q3 + parameter * iqr
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def _get_n_sigma_threshold(data_list: list, parameter):
|
|||
|
|
+ mean = np.mean(data_list)
|
|||
|
|
+ std = np.std(data_list)
|
|||
|
|
+ return mean + parameter * std
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class Test(unittest.TestCase):
|
|||
|
|
+ @classmethod
|
|||
|
|
+ def setUpClass(cls):
|
|||
|
|
+ print("UnitTest Begin...")
|
|||
|
|
+
|
|||
|
|
+ @classmethod
|
|||
|
|
+ def tearDownClass(cls):
|
|||
|
|
+ print("UnitTest End...")
|
|||
|
|
+
|
|||
|
|
+ def setUp(self):
|
|||
|
|
+ print("Begin...")
|
|||
|
|
+
|
|||
|
|
+ def tearDown(self):
|
|||
|
|
+ print("End...")
|
|||
|
|
+
|
|||
|
|
+ def test_absolute_threshold(self):
|
|||
|
|
+ absolute = AbsoluteThreshold()
|
|||
|
|
+ self.assertEqual(None, absolute.get_threshold())
|
|||
|
|
+ self.assertFalse(absolute.is_abnormal(5000))
|
|||
|
|
+ absolute.set_threshold(40)
|
|||
|
|
+ self.assertEqual(40, absolute.get_threshold())
|
|||
|
|
+ self.assertTrue(absolute.is_abnormal(50))
|
|||
|
|
+
|
|||
|
|
+ def test_boxplot_threshold(self):
|
|||
|
|
+ boxplot = BoxplotThreshold(1.5, 5, 1)
|
|||
|
|
+ # 阶段1:尚未初始化
|
|||
|
|
+ self.assertEqual(None, boxplot.get_threshold())
|
|||
|
|
+ self.assertFalse(boxplot.is_abnormal(5000))
|
|||
|
|
+ # 往boxplot中插入5个元素后,会生成阈值
|
|||
|
|
+ data_list = [20, 20, 20, 30, 10]
|
|||
|
|
+ for data in data_list:
|
|||
|
|
+ boxplot.push_latest_data_to_queue(data)
|
|||
|
|
+ # 阶段2:初始化
|
|||
|
|
+ boxplot_threshold = boxplot.get_threshold()
|
|||
|
|
+ self.assertEqual(_get_boxplot_threshold(data_list, 1.5), boxplot_threshold)
|
|||
|
|
+ self.assertTrue(boxplot.is_abnormal(5000))
|
|||
|
|
+ data_list.pop(0)
|
|||
|
|
+ data_list.append(100)
|
|||
|
|
+ boxplot.push_latest_data_to_queue(100)
|
|||
|
|
+ # 阶段3:更新阈值
|
|||
|
|
+ boxplot_threshold = boxplot.get_threshold()
|
|||
|
|
+ self.assertEqual(_get_boxplot_threshold(data_list, 1.5), boxplot_threshold)
|
|||
|
|
+
|
|||
|
|
+ def test_n_sigma_threshold(self):
|
|||
|
|
+ n_sigma = NSigmaThreshold(3, 5, 1)
|
|||
|
|
+ self.assertEqual(None, n_sigma.get_threshold())
|
|||
|
|
+ self.assertFalse(n_sigma.is_abnormal(5000))
|
|||
|
|
+ data_list = [20, 20, 20, 30, 10]
|
|||
|
|
+ for data in data_list:
|
|||
|
|
+ n_sigma.push_latest_data_to_queue(data)
|
|||
|
|
+ n_sigma_threshold = n_sigma.get_threshold()
|
|||
|
|
+ self.assertEqual(_get_n_sigma_threshold(data_list, 3), n_sigma_threshold)
|
|||
|
|
+ self.assertTrue(n_sigma.is_abnormal(5000))
|
|||
|
|
+ data_list.pop(0)
|
|||
|
|
+ data_list.append(100)
|
|||
|
|
+ n_sigma.push_latest_data_to_queue(100)
|
|||
|
|
+ # 阶段3:更新阈值
|
|||
|
|
+ n_sigma_threshold = n_sigma.get_threshold()
|
|||
|
|
+ self.assertEqual(_get_n_sigma_threshold(data_list, 3), n_sigma_threshold)
|
|||
|
|
+
|
|||
|
|
+ def test_not_continuous_sliding_window(self):
|
|||
|
|
+ not_continuous = NotContinuousSlidingWindow(5, 3)
|
|||
|
|
+ boxplot_threshold = BoxplotThreshold(1.5, 10, 8)
|
|||
|
|
+ boxplot_threshold.attach_observer(not_continuous)
|
|||
|
|
+ data_list1 = [19, 20, 20, 20, 20, 20, 22, 24, 23, 20]
|
|||
|
|
+ for data in data_list1:
|
|||
|
|
+ boxplot_threshold.push_latest_data_to_queue(data)
|
|||
|
|
+ result = not_continuous.is_slow_io_event(data)
|
|||
|
|
+ self.assertFalse(result[0])
|
|||
|
|
+ self.assertEqual(23.75, boxplot_threshold.get_threshold())
|
|||
|
|
+ boxplot_threshold.push_latest_data_to_queue(24)
|
|||
|
|
+ result = not_continuous.is_slow_io_event(24)
|
|||
|
|
+ self.assertFalse(result[0])
|
|||
|
|
+ boxplot_threshold.push_latest_data_to_queue(25)
|
|||
|
|
+ result = not_continuous.is_slow_io_event(25)
|
|||
|
|
+ self.assertTrue(result[0])
|
|||
|
|
+ data_list2 = [20, 20, 20, 20, 20, 20]
|
|||
|
|
+ for data in data_list2:
|
|||
|
|
+ boxplot_threshold.push_latest_data_to_queue(data)
|
|||
|
|
+ result = not_continuous.is_slow_io_event(data)
|
|||
|
|
+ self.assertFalse(result[0])
|
|||
|
|
+ self.assertEqual(25.625, boxplot_threshold.get_threshold())
|
|||
|
|
+
|
|||
|
|
+ def test_continuous_sliding_window(self):
|
|||
|
|
+ continuous = ContinuousSlidingWindow(5, 3)
|
|||
|
|
+ boxplot_threshold = BoxplotThreshold(1.5, 10, 8)
|
|||
|
|
+ boxplot_threshold.attach_observer(continuous)
|
|||
|
|
+ data_list = [19, 20, 20, 20, 20, 20, 22, 24, 23, 20]
|
|||
|
|
+ for data in data_list:
|
|||
|
|
+ boxplot_threshold.push_latest_data_to_queue(data)
|
|||
|
|
+ result = continuous.is_slow_io_event(data)
|
|||
|
|
+ self.assertFalse(result[0])
|
|||
|
|
+ self.assertEqual(23.75, boxplot_threshold.get_threshold())
|
|||
|
|
+ # 没有三个异常点
|
|||
|
|
+ self.assertFalse(continuous.is_slow_io_event(25)[0])
|
|||
|
|
+ # 不连续的三个异常点
|
|||
|
|
+ self.assertFalse(continuous.is_slow_io_event(25)[0])
|
|||
|
|
+ # 连续的三个异常点
|
|||
|
|
+ self.assertTrue(continuous.is_slow_io_event(25)[0])
|
|||
|
|
+
|
|||
|
|
+ def test_median_sliding_window(self):
|
|||
|
|
+ median = MedianSlidingWindow(5, 3)
|
|||
|
|
+ absolute_threshold = AbsoluteThreshold(10, 8)
|
|||
|
|
+ absolute_threshold.attach_observer(median)
|
|||
|
|
+ absolute_threshold.set_threshold(24.5)
|
|||
|
|
+ data_list = [24, 24, 24, 25, 25]
|
|||
|
|
+ for data in data_list:
|
|||
|
|
+ self.assertFalse(median.is_slow_io_event(data)[0])
|
|||
|
|
+ self.assertTrue(median.is_slow_io_event(25)[0])
|
|||
|
|
+
|
|||
|
|
+ def test_parse_collect_data(self):
|
|||
|
|
+ collect = {
|
|||
|
|
+ "read": [1.0, 2.0, 3.0, 4.0],
|
|||
|
|
+ "write": [5.0, 6.0, 7.0, 8.0],
|
|||
|
|
+ "flush": [9.0, 10.0, 11.0, 12.0],
|
|||
|
|
+ "discard": [13.0, 14.0, 15.0, 16.0],
|
|||
|
|
+ }
|
|||
|
|
+ from io_data import BaseData
|
|||
|
|
+ from data_access import _get_io_stage_data
|
|||
|
|
+
|
|||
|
|
+ io_data = _get_io_stage_data(collect)
|
|||
|
|
+ self.assertEqual(
|
|||
|
|
+ io_data.read, BaseData(latency=1.0, io_dump=2.0, io_length=3.0, iops=4.0)
|
|||
|
|
+ )
|
|||
|
|
+ self.assertEqual(
|
|||
|
|
+ io_data.write, BaseData(latency=5.0, io_dump=6.0, io_length=7.0, iops=8.0)
|
|||
|
|
+ )
|
|||
|
|
+ self.assertEqual(
|
|||
|
|
+ io_data.flush, BaseData(latency=9.0, io_dump=10.0, io_length=11.0, iops=12.0)
|
|||
|
|
+ )
|
|||
|
|
+ self.assertEqual(
|
|||
|
|
+ io_data.discard, BaseData(latency=13.0, io_dump=14.0, io_length=15.0, iops=16.0)
|
|||
|
|
+ )
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md b/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..f9b8388
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
|
|||
|
|
@@ -0,0 +1,2 @@
|
|||
|
|
+# slow_io_detection
|
|||
|
|
+
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..e69de29
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..3f4f34e
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
|
|||
|
|
@@ -0,0 +1,49 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+from syssentry.result import ResultLevel, report_result
|
|||
|
|
+import logging
|
|||
|
|
+import json
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class AlarmReport:
|
|||
|
|
+ TASK_NAME = "SLOW_IO_DETECTION"
|
|||
|
|
+
|
|||
|
|
+ @staticmethod
|
|||
|
|
+ def report_pass(info: str):
|
|||
|
|
+ report_result(AlarmReport.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info}))
|
|||
|
|
+ logging.info(f'Report {AlarmReport.TASK_NAME} PASS: {info}')
|
|||
|
|
+
|
|||
|
|
+ @staticmethod
|
|||
|
|
+ def report_fail(info: str):
|
|||
|
|
+ report_result(AlarmReport.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info}))
|
|||
|
|
+ logging.info(f'Report {AlarmReport.TASK_NAME} FAIL: {info}')
|
|||
|
|
+
|
|||
|
|
+ @staticmethod
|
|||
|
|
+ def report_skip(info: str):
|
|||
|
|
+ report_result(AlarmReport.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info}))
|
|||
|
|
+ logging.info(f'Report {AlarmReport.TASK_NAME} SKIP: {info}')
|
|||
|
|
+
|
|||
|
|
+ @staticmethod
|
|||
|
|
+ def report_minor_alm(info: str):
|
|||
|
|
+ report_result(AlarmReport.TASK_NAME, ResultLevel.MINOR_ALM, json.dumps({"msg": info}))
|
|||
|
|
+ logging.info(f'Report {AlarmReport.TASK_NAME} MINOR_ALM: {info}')
|
|||
|
|
+
|
|||
|
|
+ @staticmethod
|
|||
|
|
+ def report_major_alm(info: str):
|
|||
|
|
+ report_result(AlarmReport.TASK_NAME, ResultLevel.MAJOR_ALM, json.dumps({"msg": info}))
|
|||
|
|
+ logging.info(f'Report {AlarmReport.TASK_NAME} MAJOR_ALM: {info}')
|
|||
|
|
+
|
|||
|
|
+ @staticmethod
|
|||
|
|
+ def report_critical_alm(info: str):
|
|||
|
|
+ report_result(AlarmReport.TASK_NAME, ResultLevel.CRITICAL_ALM, json.dumps({"msg": info}))
|
|||
|
|
+ logging.info(f'Report {AlarmReport.TASK_NAME} CRITICAL_ALM: {info}')
|
|||
|
|
+
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..cd4e6f1
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
|
|||
|
|
@@ -0,0 +1,141 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+import configparser
|
|||
|
|
+import logging
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class ConfigParser:
|
|||
|
|
+
|
|||
|
|
+ DEFAULT_ABSOLUTE_THRESHOLD = 40
|
|||
|
|
+ DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1
|
|||
|
|
+ DEFAULT_LOG_LEVEL = 'info'
|
|||
|
|
+ DEFAULT_TRAIN_DATA_DURATION = 24
|
|||
|
|
+ DEFAULT_TRAIN_UPDATE_DURATION = 2
|
|||
|
|
+ DEFAULT_ALGORITHM_TYPE = 'boxplot'
|
|||
|
|
+ DEFAULT_N_SIGMA_PARAMETER = 3
|
|||
|
|
+ DEFAULT_BOXPLOT_PARAMETER = 1.5
|
|||
|
|
+ DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous'
|
|||
|
|
+ DEFAULT_WINDOW_SIZE = 30
|
|||
|
|
+ DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6
|
|||
|
|
+
|
|||
|
|
+ def __init__(self, config_file_name):
|
|||
|
|
+ self.__boxplot_parameter = None
|
|||
|
|
+ self.__window_minimum_threshold = None
|
|||
|
|
+ self.__window_size = None
|
|||
|
|
+ self.__sliding_window_type = None
|
|||
|
|
+ self.__n_sigma_parameter = None
|
|||
|
|
+ self.__algorithm_type = None
|
|||
|
|
+ self.__train_update_duration = None
|
|||
|
|
+ self.__log_level = None
|
|||
|
|
+ self.__slow_io_detect_frequency = None
|
|||
|
|
+ self.__absolute_threshold = None
|
|||
|
|
+ self.__train_data_duration = None
|
|||
|
|
+ self.__config_file_name = config_file_name
|
|||
|
|
+
|
|||
|
|
+ def read_config_from_file(self):
|
|||
|
|
+
|
|||
|
|
+ con = configparser.ConfigParser()
|
|||
|
|
+ con.read(self.__config_file_name, encoding='utf-8')
|
|||
|
|
+
|
|||
|
|
+ items_common = dict(con.items('common'))
|
|||
|
|
+ items_algorithm = dict(con.items('algorithm'))
|
|||
|
|
+ items_sliding_window = dict(con.items('sliding_window'))
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__absolute_threshold = int(items_common.get('absolute_threshold',
|
|||
|
|
+ ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
|
|||
|
|
+ logging.warning('absolute threshold type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency',
|
|||
|
|
+ ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
|
|||
|
|
+ logging.warning('slow_io_detect_frequency type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL)
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__train_data_duration = float(items_algorithm.get('train_data_duration',
|
|||
|
|
+ ConfigParser.DEFAULT_TRAIN_DATA_DURATION))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
|
|||
|
|
+ logging.warning('train_data_duration type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__train_update_duration = float(items_algorithm.get('train_update_duration',
|
|||
|
|
+ ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
|
|||
|
|
+ logging.warning('train_update_duration type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE)
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE
|
|||
|
|
+ logging.warning('algorithmType type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ if self.__algorithm_type == 'n_sigma':
|
|||
|
|
+ try:
|
|||
|
|
+ self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter',
|
|||
|
|
+ ConfigParser.DEFAULT_N_SIGMA_PARAMETER))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
|
|||
|
|
+ logging.warning('n_sigma_parameter type conversion has error, use default value.')
|
|||
|
|
+ elif self.__algorithm_type == 'boxplot':
|
|||
|
|
+ try:
|
|||
|
|
+ self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter',
|
|||
|
|
+ ConfigParser.DEFAULT_BOXPLOT_PARAMETER))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
|
|||
|
|
+ logging.warning('boxplot_parameter type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ self.__sliding_window_type = items_sliding_window.get('sliding_window_type',
|
|||
|
|
+ ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE)
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__window_size = int(items_sliding_window.get('window_size',
|
|||
|
|
+ ConfigParser.DEFAULT_WINDOW_SIZE))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
|
|||
|
|
+ logging.warning('window_size type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ self.__window_minimum_threshold = (
|
|||
|
|
+ int(items_sliding_window.get('window_minimum_threshold',
|
|||
|
|
+ ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD)))
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
|
|||
|
|
+ logging.warning('window_minimum_threshold type conversion has error, use default value.')
|
|||
|
|
+
|
|||
|
|
+ def get_slow_io_detect_frequency(self):
|
|||
|
|
+ return self.__slow_io_detect_frequency
|
|||
|
|
+
|
|||
|
|
+ def get_algorithm_type(self):
|
|||
|
|
+ return self.__algorithm_type
|
|||
|
|
+
|
|||
|
|
+ def get_sliding_window_type(self):
|
|||
|
|
+ return self.__sliding_window_type
|
|||
|
|
+
|
|||
|
|
+ def get_train_data_duration_and_train_update_duration(self):
|
|||
|
|
+ return self.__train_data_duration, self.__train_update_duration
|
|||
|
|
+
|
|||
|
|
+ def get_window_size_and_window_minimum_threshold(self):
|
|||
|
|
+ return self.__window_size, self.__window_minimum_threshold
|
|||
|
|
+
|
|||
|
|
+ def get_absolute_threshold(self):
|
|||
|
|
+ return self.__absolute_threshold
|
|||
|
|
+
|
|||
|
|
+ def get_log_level(self):
|
|||
|
|
+ return self.__log_level
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..d9f3460
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
|
|||
|
|
@@ -0,0 +1,91 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+import json
|
|||
|
|
+import logging
|
|||
|
|
+
|
|||
|
|
+from sentryCollector.collect_plugin import (
|
|||
|
|
+ Result_Messages,
|
|||
|
|
+ get_io_data,
|
|||
|
|
+ is_iocollect_valid,
|
|||
|
|
+)
|
|||
|
|
+from .io_data import IOStageData, IOData
|
|||
|
|
+
|
|||
|
|
+COLLECT_STAGES = [
|
|||
|
|
+ "throtl",
|
|||
|
|
+ "wbt",
|
|||
|
|
+ "gettag",
|
|||
|
|
+ "plug",
|
|||
|
|
+ "bfq",
|
|||
|
|
+ "hctx",
|
|||
|
|
+ "requeue",
|
|||
|
|
+ "rq_driver",
|
|||
|
|
+ "bio",
|
|||
|
|
+ "iocost",
|
|||
|
|
+]
|
|||
|
|
+
|
|||
|
|
+def check_collect_valid(period):
|
|||
|
|
+ data_raw = is_iocollect_valid(period)
|
|||
|
|
+ if data_raw["ret"] == 0:
|
|||
|
|
+ try:
|
|||
|
|
+ data = json.loads(data_raw["message"])
|
|||
|
|
+ except Exception as e:
|
|||
|
|
+ logging.warning(f"get io data failed, {e}")
|
|||
|
|
+ return []
|
|||
|
|
+ return [k for k in data.keys()]
|
|||
|
|
+ else:
|
|||
|
|
+ return []
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def _get_raw_data(period, disk_list):
|
|||
|
|
+ return get_io_data(
|
|||
|
|
+ period,
|
|||
|
|
+ disk_list,
|
|||
|
|
+ COLLECT_STAGES,
|
|||
|
|
+ ["read", "write", "flush", "discard"],
|
|||
|
|
+ )
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def _get_io_stage_data(data):
|
|||
|
|
+ io_stage_data = IOStageData()
|
|||
|
|
+ for data_type in ('read', 'write', 'flush', 'discard'):
|
|||
|
|
+ if data_type in data:
|
|||
|
|
+ getattr(io_stage_data, data_type).latency = data[data_type][0]
|
|||
|
|
+ getattr(io_stage_data, data_type).io_dump = data[data_type][1]
|
|||
|
|
+ getattr(io_stage_data, data_type).io_length = data[data_type][2]
|
|||
|
|
+ getattr(io_stage_data, data_type).iops = data[data_type][3]
|
|||
|
|
+ return io_stage_data
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def get_io_data_from_collect_plug(period, disk_list):
|
|||
|
|
+ data_raw = _get_raw_data(period, disk_list)
|
|||
|
|
+ if data_raw["ret"] == 0:
|
|||
|
|
+ ret = {}
|
|||
|
|
+ try:
|
|||
|
|
+ data = json.loads(data_raw["message"])
|
|||
|
|
+ except json.decoder.JSONDecodeError as e:
|
|||
|
|
+ logging.warning(f"get io data failed, {e}")
|
|||
|
|
+ return None
|
|||
|
|
+
|
|||
|
|
+ for disk in data:
|
|||
|
|
+ disk_data = data[disk]
|
|||
|
|
+ disk_ret = IOData()
|
|||
|
|
+ for k, v in disk_data.items():
|
|||
|
|
+ try:
|
|||
|
|
+ getattr(disk_ret, k)
|
|||
|
|
+ setattr(disk_ret, k, _get_io_stage_data(v))
|
|||
|
|
+ except AttributeError:
|
|||
|
|
+ logging.debug(f'no attr {k}')
|
|||
|
|
+ continue
|
|||
|
|
+ ret[disk] = disk_ret
|
|||
|
|
+ return ret
|
|||
|
|
+ logging.warning(f'get io data failed with message: {data_raw["message"]}')
|
|||
|
|
+ return None
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..eda9825
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
|
|||
|
|
@@ -0,0 +1,48 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+import logging
|
|||
|
|
+
|
|||
|
|
+from .io_data import MetricName
|
|||
|
|
+from .threshold import Threshold
|
|||
|
|
+from .sliding_window import SlidingWindow
|
|||
|
|
+from .utils import get_metric_value_from_io_data_dict_by_metric_name
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class Detector:
|
|||
|
|
+ _metric_name: MetricName = None
|
|||
|
|
+ _threshold: Threshold = None
|
|||
|
|
+ _slidingWindow: SlidingWindow = None
|
|||
|
|
+
|
|||
|
|
+ def __init__(self, metric_name: MetricName, threshold: Threshold, sliding_window: SlidingWindow):
|
|||
|
|
+ self._metric_name = metric_name
|
|||
|
|
+ self._threshold = threshold
|
|||
|
|
+ self._slidingWindow = sliding_window
|
|||
|
|
+ self._threshold.attach_observer(self._slidingWindow)
|
|||
|
|
+
|
|||
|
|
+ def get_metric_name(self):
|
|||
|
|
+ return self._metric_name
|
|||
|
|
+
|
|||
|
|
+ def is_slow_io_event(self, io_data_dict_with_disk_name: dict):
|
|||
|
|
+ logging.debug(f'Enter Detector: {self}')
|
|||
|
|
+ metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
|
|||
|
|
+ if metric_value > 1e-6:
|
|||
|
|
+ logging.debug(f'Input metric value: {str(metric_value)}')
|
|||
|
|
+ self._threshold.push_latest_data_to_queue(metric_value)
|
|||
|
|
+ detection_result = self._slidingWindow.is_slow_io_event(metric_value)
|
|||
|
|
+ logging.debug(f'Detection result: {str(detection_result)}')
|
|||
|
|
+ logging.debug(f'Exit Detector: {self}')
|
|||
|
|
+ return detection_result
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return (f'disk_name: {self._metric_name.get_disk_name()}, stage_name: {self._metric_name.get_stage_name()},'
|
|||
|
|
+ f' access_type_name: {self._metric_name.get_io_access_type_name()},'
|
|||
|
|
+ f' metric_name: {self._metric_name.get_metric_name()}, threshold_type: {self._threshold},'
|
|||
|
|
+ f' sliding_window_type: {self._slidingWindow}')
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..0e17051
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
|
|||
|
|
@@ -0,0 +1,74 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+from dataclasses import dataclass, field
|
|||
|
|
+from datetime import datetime
|
|||
|
|
+from typing import Optional
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+@dataclass
|
|||
|
|
+class BaseData:
|
|||
|
|
+ latency: Optional[float] = field(default_factory=lambda: None)
|
|||
|
|
+ io_dump: Optional[int] = field(default_factory=lambda: None)
|
|||
|
|
+ io_length: Optional[int] = field(default_factory=lambda: None)
|
|||
|
|
+ iops: Optional[int] = field(default_factory=lambda: None)
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+@dataclass
|
|||
|
|
+class IOStageData:
|
|||
|
|
+ read: BaseData = field(default_factory=lambda: BaseData())
|
|||
|
|
+ write: BaseData = field(default_factory=lambda: BaseData())
|
|||
|
|
+ flush: BaseData = field(default_factory=lambda: BaseData())
|
|||
|
|
+ discard: BaseData = field(default_factory=lambda: BaseData())
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+@dataclass
|
|||
|
|
+class IOData:
|
|||
|
|
+ throtl: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ wbt: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ gettag: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ iocost: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ plug: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ bfq: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ hctx: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ requeue: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ rq_driver: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ bio: IOStageData = field(default_factory=lambda: IOStageData())
|
|||
|
|
+ time_stamp: float = field(default_factory=lambda: datetime.now().timestamp())
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class MetricName:
|
|||
|
|
+ _disk_name: str = None
|
|||
|
|
+ _stage_name: str = None
|
|||
|
|
+ _io_access_type_name: str = None
|
|||
|
|
+ _metric_name: str = None
|
|||
|
|
+
|
|||
|
|
+ def __init__(self, disk_name: str, stage_name: str, io_access_type_name: str, metric_name: str):
|
|||
|
|
+ self._disk_name = disk_name
|
|||
|
|
+ self._stage_name = stage_name
|
|||
|
|
+ self._io_access_type_name = io_access_type_name
|
|||
|
|
+ self._metric_name = metric_name
|
|||
|
|
+
|
|||
|
|
+ def get_disk_name(self):
|
|||
|
|
+ return self._disk_name
|
|||
|
|
+
|
|||
|
|
+ def get_stage_name(self):
|
|||
|
|
+ return self._stage_name
|
|||
|
|
+
|
|||
|
|
+ def get_io_access_type_name(self):
|
|||
|
|
+ return self._io_access_type_name
|
|||
|
|
+
|
|||
|
|
+ def get_metric_name(self):
|
|||
|
|
+ return self._metric_name
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return (f'disk: {self._disk_name}, stage: {self._stage_name}, io_access_type: {self._io_access_type_name},'
|
|||
|
|
+ f'metric: {self._metric_name}')
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..d395d48
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
|
|||
|
|
@@ -0,0 +1,113 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+from enum import Enum, unique
|
|||
|
|
+import numpy as np
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+@unique
|
|||
|
|
+class SlidingWindowType(Enum):
|
|||
|
|
+ NotContinuousSlidingWindow = 0
|
|||
|
|
+ ContinuousSlidingWindow = 1
|
|||
|
|
+ MedianSlidingWindow = 2
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class SlidingWindow:
|
|||
|
|
+ _ai_threshold = None
|
|||
|
|
+ _queue_length = None
|
|||
|
|
+ _queue_threshold = None
|
|||
|
|
+ _io_data_queue: list = None
|
|||
|
|
+ _io_data_queue_abnormal_tag: list = None
|
|||
|
|
+
|
|||
|
|
+ def __init__(self, queue_length: int, threshold: int):
|
|||
|
|
+ self._queue_length = queue_length
|
|||
|
|
+ self._queue_threshold = threshold
|
|||
|
|
+ self._io_data_queue = []
|
|||
|
|
+ self._io_data_queue_abnormal_tag = []
|
|||
|
|
+
|
|||
|
|
+ def push(self, data: float):
|
|||
|
|
+ if len(self._io_data_queue) == self._queue_length:
|
|||
|
|
+ self._io_data_queue.pop(0)
|
|||
|
|
+ self._io_data_queue_abnormal_tag.pop(0)
|
|||
|
|
+ self._io_data_queue.append(data)
|
|||
|
|
+ self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold if self._ai_threshold is not None else False)
|
|||
|
|
+
|
|||
|
|
+ def update(self, threshold):
|
|||
|
|
+ if self._ai_threshold == threshold:
|
|||
|
|
+ return
|
|||
|
|
+ self._ai_threshold = threshold
|
|||
|
|
+ self._io_data_queue_abnormal_tag.clear()
|
|||
|
|
+ for data in self._io_data_queue:
|
|||
|
|
+ self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold)
|
|||
|
|
+
|
|||
|
|
+ def is_slow_io_event(self, data):
|
|||
|
|
+ return False, None, None
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "SlidingWindow"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class NotContinuousSlidingWindow(SlidingWindow):
|
|||
|
|
+ def is_slow_io_event(self, data):
|
|||
|
|
+ super().push(data)
|
|||
|
|
+ if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None:
|
|||
|
|
+ return False, self._io_data_queue, self._ai_threshold
|
|||
|
|
+ if self._io_data_queue_abnormal_tag.count(True) >= self._queue_threshold:
|
|||
|
|
+ return True, self._io_data_queue, self._ai_threshold
|
|||
|
|
+ return False, self._io_data_queue, self._ai_threshold
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "NotContinuousSlidingWindow"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class ContinuousSlidingWindow(SlidingWindow):
|
|||
|
|
+ def is_slow_io_event(self, data):
|
|||
|
|
+ super().push(data)
|
|||
|
|
+ if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None:
|
|||
|
|
+ return False, self._io_data_queue, self._ai_threshold
|
|||
|
|
+ consecutive_count = 0
|
|||
|
|
+ for tag in self._io_data_queue_abnormal_tag:
|
|||
|
|
+ if tag:
|
|||
|
|
+ consecutive_count += 1
|
|||
|
|
+ if consecutive_count >= self._queue_threshold:
|
|||
|
|
+ return True, self._io_data_queue, self._ai_threshold
|
|||
|
|
+ else:
|
|||
|
|
+ consecutive_count = 0
|
|||
|
|
+ return False, self._io_data_queue, self._ai_threshold
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "ContinuousSlidingWindow"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class MedianSlidingWindow(SlidingWindow):
|
|||
|
|
+ def is_slow_io_event(self, data):
|
|||
|
|
+ super().push(data)
|
|||
|
|
+ if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None:
|
|||
|
|
+ return False, self._io_data_queue, self._ai_threshold
|
|||
|
|
+ median = np.median(self._io_data_queue)
|
|||
|
|
+ if median >= self._ai_threshold:
|
|||
|
|
+ return True, self._io_data_queue, self._ai_threshold
|
|||
|
|
+ return False, self._io_data_queue, self._ai_threshold
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "MedianSlidingWindow"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class SlidingWindowFactory:
|
|||
|
|
+ def get_sliding_window(self, sliding_window_type: SlidingWindowType, *args, **kwargs):
|
|||
|
|
+ if sliding_window_type == SlidingWindowType.NotContinuousSlidingWindow:
|
|||
|
|
+ return NotContinuousSlidingWindow(*args, **kwargs)
|
|||
|
|
+ elif sliding_window_type == SlidingWindowType.ContinuousSlidingWindow:
|
|||
|
|
+ return ContinuousSlidingWindow(*args, **kwargs)
|
|||
|
|
+ elif sliding_window_type == SlidingWindowType.MedianSlidingWindow:
|
|||
|
|
+ return MedianSlidingWindow(*args, **kwargs)
|
|||
|
|
+ else:
|
|||
|
|
+ return NotContinuousSlidingWindow(*args, **kwargs)
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..43cf770
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
|
|||
|
|
@@ -0,0 +1,133 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+
|
|||
|
|
+import time
|
|||
|
|
+import signal
|
|||
|
|
+import logging
|
|||
|
|
+
|
|||
|
|
+from .detector import Detector
|
|||
|
|
+from .threshold import ThresholdFactory, AbsoluteThreshold
|
|||
|
|
+from .sliding_window import SlidingWindowFactory
|
|||
|
|
+from .utils import (get_threshold_type_enum, get_sliding_window_type_enum, get_data_queue_size_and_update_size,
|
|||
|
|
+ get_log_level)
|
|||
|
|
+from .config_parser import ConfigParser
|
|||
|
|
+from .data_access import get_io_data_from_collect_plug, check_collect_valid
|
|||
|
|
+from .io_data import MetricName
|
|||
|
|
+from .alarm_report import AlarmReport
|
|||
|
|
+
|
|||
|
|
+CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def sig_handler(signum, frame):
|
|||
|
|
+ logging.info("receive signal: %d", signum)
|
|||
|
|
+ AlarmReport().report_fail(f"receive signal: {signum}")
|
|||
|
|
+ exit(signum)
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class SlowIODetection:
|
|||
|
|
+ _config_parser = None
|
|||
|
|
+ _disk_list = None
|
|||
|
|
+ _detector_name_list = []
|
|||
|
|
+ _detectors = {}
|
|||
|
|
+
|
|||
|
|
+ def __init__(self, config_parser: ConfigParser):
|
|||
|
|
+ self._config_parser = config_parser
|
|||
|
|
+ self.__set_log_format()
|
|||
|
|
+ self.__init_detector_name_list()
|
|||
|
|
+ self.__init_detector()
|
|||
|
|
+
|
|||
|
|
+ def __set_log_format(self):
|
|||
|
|
+ log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
|||
|
|
+ log_level = get_log_level(self._config_parser.get_log_level())
|
|||
|
|
+ logging.basicConfig(level=log_level, format=log_format)
|
|||
|
|
+
|
|||
|
|
+ def __init_detector_name_list(self):
|
|||
|
|
+ self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency())
|
|||
|
|
+ for disk in self._disk_list:
|
|||
|
|
+ self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
|
|||
|
|
+ self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
|
|||
|
|
+
|
|||
|
|
+ def __init_detector(self):
|
|||
|
|
+ train_data_duration, train_update_duration = (self._config_parser.
|
|||
|
|
+ get_train_data_duration_and_train_update_duration())
|
|||
|
|
+ slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency()
|
|||
|
|
+ threshold_type = get_threshold_type_enum(self._config_parser.get_algorithm_type())
|
|||
|
|
+ data_queue_size, update_size = get_data_queue_size_and_update_size(train_data_duration,
|
|||
|
|
+ train_update_duration,
|
|||
|
|
+ slow_io_detection_frequency)
|
|||
|
|
+ sliding_window_type = get_sliding_window_type_enum(self._config_parser.get_sliding_window_type())
|
|||
|
|
+ window_size, window_threshold = self._config_parser.get_window_size_and_window_minimum_threshold()
|
|||
|
|
+
|
|||
|
|
+ for detector_name in self._detector_name_list:
|
|||
|
|
+ threshold = ThresholdFactory().get_threshold(threshold_type, data_queue_size=data_queue_size,
|
|||
|
|
+ data_queue_update_size=update_size)
|
|||
|
|
+ sliding_window = SlidingWindowFactory().get_sliding_window(sliding_window_type, queue_length=window_size,
|
|||
|
|
+ threshold=window_threshold)
|
|||
|
|
+ detector = Detector(detector_name, threshold, sliding_window)
|
|||
|
|
+ # 绝对阈值的阈值初始化
|
|||
|
|
+ if isinstance(threshold, AbsoluteThreshold):
|
|||
|
|
+ threshold.set_threshold(self._config_parser.get_absolute_threshold())
|
|||
|
|
+ self._detectors[detector_name] = detector
|
|||
|
|
+ logging.info(f"add detector: {detector}")
|
|||
|
|
+
|
|||
|
|
+ def launch(self):
|
|||
|
|
+ while True:
|
|||
|
|
+ logging.debug('step0. AI threshold slow io event detection is looping.')
|
|||
|
|
+
|
|||
|
|
+ # Step1:获取IO数据
|
|||
|
|
+ io_data_dict_with_disk_name = get_io_data_from_collect_plug(
|
|||
|
|
+ self._config_parser.get_slow_io_detect_frequency(), self._disk_list
|
|||
|
|
+ )
|
|||
|
|
+ logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}')
|
|||
|
|
+ if io_data_dict_with_disk_name is None:
|
|||
|
|
+ continue
|
|||
|
|
+ # Step2:慢IO检测
|
|||
|
|
+ logging.debug('step2. Start to detection slow io event.')
|
|||
|
|
+ slow_io_event_list = []
|
|||
|
|
+ for metric_name, detector in self._detectors.items():
|
|||
|
|
+ result = detector.is_slow_io_event(io_data_dict_with_disk_name)
|
|||
|
|
+ if result[0]:
|
|||
|
|
+ slow_io_event_list.append((detector.get_metric_name(), result))
|
|||
|
|
+ logging.debug('step2. End to detection slow io event.')
|
|||
|
|
+
|
|||
|
|
+ # Step3:慢IO事件上报
|
|||
|
|
+ logging.debug('step3. Report slow io event to sysSentry.')
|
|||
|
|
+ for slow_io_event in slow_io_event_list:
|
|||
|
|
+ metric_name: MetricName = slow_io_event[0]
|
|||
|
|
+ result = slow_io_event[1]
|
|||
|
|
+ AlarmReport.report_major_alm(f"disk {metric_name.get_disk_name()} has slow io event."
|
|||
|
|
+ f"stage: {metric_name.get_metric_name()},"
|
|||
|
|
+ f"type: {metric_name.get_io_access_type_name()},"
|
|||
|
|
+ f"metric: {metric_name.get_metric_name()},"
|
|||
|
|
+ f"current window: {result[1]},"
|
|||
|
|
+ f"threshold: {result[2]}")
|
|||
|
|
+ logging.error(f"slow io event happen: {str(slow_io_event)}")
|
|||
|
|
+
|
|||
|
|
+ # Step4:等待检测时间
|
|||
|
|
+ logging.debug('step4. Wait to start next slow io event detection loop.')
|
|||
|
|
+ time.sleep(self._config_parser.get_slow_io_detect_frequency())
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def main():
|
|||
|
|
+ # Step1:注册消息处理函数
|
|||
|
|
+ signal.signal(signal.SIGINT, sig_handler)
|
|||
|
|
+ signal.signal(signal.SIGTERM, sig_handler)
|
|||
|
|
+ # Step2:断点恢复
|
|||
|
|
+ # todo:
|
|||
|
|
+
|
|||
|
|
+ # Step3:读取配置
|
|||
|
|
+ config_file_name = CONFIG_FILE
|
|||
|
|
+ config = ConfigParser(config_file_name)
|
|||
|
|
+ config.read_config_from_file()
|
|||
|
|
+
|
|||
|
|
+ # Step4:启动慢IO检测
|
|||
|
|
+ slow_io_detection = SlowIODetection(config)
|
|||
|
|
+ slow_io_detection.launch()
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..9e1ca7b
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
|
|||
|
|
@@ -0,0 +1,160 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+import logging
|
|||
|
|
+from enum import Enum
|
|||
|
|
+import queue
|
|||
|
|
+import numpy as np
|
|||
|
|
+import math
|
|||
|
|
+
|
|||
|
|
+from .sliding_window import SlidingWindow
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class ThresholdState(Enum):
|
|||
|
|
+ INIT = 0
|
|||
|
|
+ START = 1
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class Threshold:
|
|||
|
|
+ threshold = None
|
|||
|
|
+ data_queue: queue.Queue = None
|
|||
|
|
+ data_queue_update_size: int = None
|
|||
|
|
+ new_data_size: int = None
|
|||
|
|
+ threshold_state: ThresholdState = None
|
|||
|
|
+
|
|||
|
|
+ def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
|
|||
|
|
+ self._observer = None
|
|||
|
|
+ self.data_queue = queue.Queue(data_queue_size)
|
|||
|
|
+ self.data_queue_update_size = data_queue_update_size
|
|||
|
|
+ self.new_data_size = 0
|
|||
|
|
+ self.threshold_state = ThresholdState.INIT
|
|||
|
|
+ self.threshold = math.inf
|
|||
|
|
+
|
|||
|
|
+ def set_threshold(self, threshold):
|
|||
|
|
+ self.threshold = threshold
|
|||
|
|
+ self.threshold_state = ThresholdState.START
|
|||
|
|
+ self.notify_observer()
|
|||
|
|
+
|
|||
|
|
+ def get_threshold(self):
|
|||
|
|
+ if self.threshold_state == ThresholdState.INIT:
|
|||
|
|
+ return None
|
|||
|
|
+ return self.threshold
|
|||
|
|
+
|
|||
|
|
+ def is_abnormal(self, data):
|
|||
|
|
+ if self.threshold_state == ThresholdState.INIT:
|
|||
|
|
+ return False
|
|||
|
|
+ return data >= self.threshold
|
|||
|
|
+
|
|||
|
|
+ # 使用观察者模式,当阈值更新时,自动同步刷新滑窗中的阈值
|
|||
|
|
+ def attach_observer(self, observer: SlidingWindow):
|
|||
|
|
+ self._observer = observer
|
|||
|
|
+
|
|||
|
|
+ def notify_observer(self):
|
|||
|
|
+ if self._observer is not None:
|
|||
|
|
+ self._observer.update(self.threshold)
|
|||
|
|
+
|
|||
|
|
+ def push_latest_data_to_queue(self, data):
|
|||
|
|
+ pass
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "Threshold"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class AbsoluteThreshold(Threshold):
|
|||
|
|
+ def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
|
|||
|
|
+ super().__init__(data_queue_size, data_queue_update_size)
|
|||
|
|
+
|
|||
|
|
+ def push_latest_data_to_queue(self, data):
|
|||
|
|
+ pass
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "AbsoluteThreshold"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class BoxplotThreshold(Threshold):
|
|||
|
|
+ def __init__(self, parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
|
|||
|
|
+ super().__init__(data_queue_size, data_queue_update_size)
|
|||
|
|
+ self.parameter = parameter
|
|||
|
|
+
|
|||
|
|
+ def _update_threshold(self):
|
|||
|
|
+ data = list(self.data_queue.queue)
|
|||
|
|
+ q1 = np.percentile(data, 25)
|
|||
|
|
+ q3 = np.percentile(data, 75)
|
|||
|
|
+ iqr = q3 - q1
|
|||
|
|
+ self.threshold = q3 + self.parameter * iqr
|
|||
|
|
+ if self.threshold_state == ThresholdState.INIT:
|
|||
|
|
+ self.threshold_state = ThresholdState.START
|
|||
|
|
+ self.notify_observer()
|
|||
|
|
+
|
|||
|
|
+ def push_latest_data_to_queue(self, data):
|
|||
|
|
+ try:
|
|||
|
|
+ self.data_queue.put(data, block=False)
|
|||
|
|
+ except queue.Full:
|
|||
|
|
+ self.data_queue.get()
|
|||
|
|
+ self.data_queue.put(data)
|
|||
|
|
+ self.new_data_size += 1
|
|||
|
|
+ if (self.data_queue.full() and (self.threshold_state == ThresholdState.INIT or
|
|||
|
|
+ (self.threshold_state == ThresholdState.START and
|
|||
|
|
+ self.new_data_size >= self.data_queue_update_size))):
|
|||
|
|
+ self._update_threshold()
|
|||
|
|
+ self.new_data_size = 0
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "BoxplotThreshold"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class NSigmaThreshold(Threshold):
|
|||
|
|
+ def __init__(self, parameter: float = 2.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
|
|||
|
|
+ super().__init__(data_queue_size, data_queue_update_size)
|
|||
|
|
+ self.parameter = parameter
|
|||
|
|
+
|
|||
|
|
+ def _update_threshold(self):
|
|||
|
|
+ data = list(self.data_queue.queue)
|
|||
|
|
+ mean = np.mean(data)
|
|||
|
|
+ std = np.std(data)
|
|||
|
|
+ self.threshold = mean + self.parameter * std
|
|||
|
|
+ if self.threshold_state == ThresholdState.INIT:
|
|||
|
|
+ self.threshold_state = ThresholdState.START
|
|||
|
|
+ self.notify_observer()
|
|||
|
|
+
|
|||
|
|
+ def push_latest_data_to_queue(self, data):
|
|||
|
|
+ try:
|
|||
|
|
+ self.data_queue.put(data, block=False)
|
|||
|
|
+ except queue.Full:
|
|||
|
|
+ self.data_queue.get()
|
|||
|
|
+ self.data_queue.put(data)
|
|||
|
|
+ self.new_data_size += 1
|
|||
|
|
+ if (self.data_queue.full() and (self.threshold_state == ThresholdState.INIT or
|
|||
|
|
+ (self.threshold_state == ThresholdState.START and
|
|||
|
|
+ self.new_data_size >= self.data_queue_update_size))):
|
|||
|
|
+ self._update_threshold()
|
|||
|
|
+ self.new_data_size = 0
|
|||
|
|
+
|
|||
|
|
+ def __repr__(self):
|
|||
|
|
+ return "NSigmaThreshold"
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class ThresholdType(Enum):
|
|||
|
|
+ AbsoluteThreshold = 0
|
|||
|
|
+ BoxplotThreshold = 1
|
|||
|
|
+ NSigmaThreshold = 2
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+class ThresholdFactory:
|
|||
|
|
+ def get_threshold(self, threshold_type: ThresholdType, *args, **kwargs):
|
|||
|
|
+ if threshold_type == ThresholdType.AbsoluteThreshold:
|
|||
|
|
+ return AbsoluteThreshold(*args, **kwargs)
|
|||
|
|
+ elif threshold_type == ThresholdType.BoxplotThreshold:
|
|||
|
|
+ return BoxplotThreshold(*args, **kwargs)
|
|||
|
|
+ elif threshold_type == ThresholdType.NSigmaThreshold:
|
|||
|
|
+ return NSigmaThreshold(*args, **kwargs)
|
|||
|
|
+ else:
|
|||
|
|
+ raise ValueError(f"Invalid threshold type: {threshold_type}")
|
|||
|
|
+
|
|||
|
|
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..f66e5ed
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
|
|||
|
|
@@ -0,0 +1,67 @@
|
|||
|
|
+# coding: utf-8
|
|||
|
|
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
|||
|
|
+# sysSentry is licensed under the Mulan PSL v2.
|
|||
|
|
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
|||
|
|
+# You may obtain a copy of Mulan PSL v2 at:
|
|||
|
|
+# http://license.coscl.org.cn/MulanPSL2
|
|||
|
|
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
|
|||
|
|
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
+# PURPOSE.
|
|||
|
|
+# See the Mulan PSL v2 for more details.
|
|||
|
|
+import logging
|
|||
|
|
+from dataclasses import asdict
|
|||
|
|
+
|
|||
|
|
+from .threshold import ThresholdType
|
|||
|
|
+from .sliding_window import SlidingWindowType
|
|||
|
|
+from .io_data import MetricName, IOData
|
|||
|
|
+
|
|||
|
|
+def get_threshold_type_enum(algorithm_type: str):
|
|||
|
|
+ if algorithm_type.lower() == 'absolute':
|
|||
|
|
+ return ThresholdType.AbsoluteThreshold
|
|||
|
|
+ if algorithm_type.lower() == 'boxplot':
|
|||
|
|
+ return ThresholdType.BoxplotThreshold
|
|||
|
|
+ if algorithm_type.lower() == 'n_sigma':
|
|||
|
|
+ return ThresholdType.NSigmaThreshold
|
|||
|
|
+ logging.info('not found correct algorithm type, use default: boxplot.')
|
|||
|
|
+ return ThresholdType.BoxplotThreshold
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def get_sliding_window_type_enum(sliding_window_type: str):
|
|||
|
|
+ if sliding_window_type.lower() == 'not_continuous':
|
|||
|
|
+ return SlidingWindowType.NotContinuousSlidingWindow
|
|||
|
|
+ if sliding_window_type.lower() == 'continuous':
|
|||
|
|
+ return SlidingWindowType.ContinuousSlidingWindow
|
|||
|
|
+ if sliding_window_type.lower() == 'median':
|
|||
|
|
+ return SlidingWindowType.MedianSlidingWindow
|
|||
|
|
+ logging.info('not found correct sliding window type, use default: not_continuous.')
|
|||
|
|
+ return SlidingWindowType.NotContinuousSlidingWindow
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def get_metric_value_from_io_data_dict_by_metric_name(io_data_dict: dict, metric_name: MetricName):
|
|||
|
|
+ try:
|
|||
|
|
+ io_data: IOData = io_data_dict[metric_name.get_disk_name()]
|
|||
|
|
+ io_stage_data = asdict(io_data)[metric_name.get_stage_name()]
|
|||
|
|
+ base_data = io_stage_data[metric_name.get_io_access_type_name()]
|
|||
|
|
+ metric_value = base_data[metric_name.get_metric_name()]
|
|||
|
|
+ return metric_value
|
|||
|
|
+ except KeyError:
|
|||
|
|
+ return None
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def get_data_queue_size_and_update_size(training_data_duration: float, train_update_duration: float,
|
|||
|
|
+ slow_io_detect_frequency: int):
|
|||
|
|
+ data_queue_size = int(training_data_duration * 60 * 60 / slow_io_detect_frequency)
|
|||
|
|
+ update_size = int(train_update_duration * 60 * 60 / slow_io_detect_frequency)
|
|||
|
|
+ return data_queue_size, update_size
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def get_log_level(log_level: str):
|
|||
|
|
+ if log_level.lower() == 'debug':
|
|||
|
|
+ return logging.DEBUG
|
|||
|
|
+ elif log_level.lower() == 'info':
|
|||
|
|
+ return logging.INFO
|
|||
|
|
+ elif log_level.lower() == 'warning':
|
|||
|
|
+ return logging.WARNING
|
|||
|
|
+ elif log_level.lower() == 'fatal':
|
|||
|
|
+ return logging.FATAL
|
|||
|
|
+ return None
|
|||
|
|
diff --git a/src/python/setup.py b/src/python/setup.py
|
|||
|
|
index c28c691..dac6481 100644
|
|||
|
|
--- a/src/python/setup.py
|
|||
|
|
+++ b/src/python/setup.py
|
|||
|
|
@@ -33,7 +33,8 @@ setup(
|
|||
|
|
'syssentry=syssentry.syssentry:main',
|
|||
|
|
'xalarmd=xalarm.xalarm_daemon:alarm_process_create',
|
|||
|
|
'sentryCollector=sentryCollector.collectd:main',
|
|||
|
|
- 'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main'
|
|||
|
|
+ 'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main',
|
|||
|
|
+ 'ai_threshold_slow_io_detection=sentryPlugins.ai_threshold_slow_io_detection.slow_io_detection:main'
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
)
|
|||
|
|
--
|
|||
|
|
2.23.0
|
|||
|
|
|