!99 ai_block_io fix some bugs

From: @hyz2024 Reviewed-by: @gaoruoshu Signed-off-by: @gaoruoshu
2024-10-15 01:27:18 +00:00 · 2024-10-15 01:27:18 +00:00 · 3a42938f00
commit 3a42938f00
parent f084505ce1 5f2e3dd4e4
2 changed files with 243 additions and 1 deletions
--- a/ai_block_io-fix-some-bugs.patch
+++ b/ai_block_io-fix-some-bugs.patch
@ -0,0 +1,235 @@
+From 1e13bc31ae3aa94f36aa124eefdfc8773221eacd Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
+Date: Mon, 14 Oct 2024 23:16:46 +0800
+Subject: [PATCH] ai_block_io fix some bugs
+
+---
+ .../sentryPlugins/ai_block_io/ai_block_io.py  |  1 +
+ .../ai_block_io/config_parser.py              | 20 ++++++++++---------
+ .../sentryPlugins/ai_block_io/detector.py     | 18 ++++++++++++-----
+ .../sentryPlugins/ai_block_io/io_data.py      |  2 +-
+ .../sentryPlugins/ai_block_io/threshold.py    | 17 +++++++++-------
+ 5 files changed, 36 insertions(+), 22 deletions(-)
+
+diff --git a/src/python/sentryPlugins/ai_block_io/ai_block_io.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
+index dd661a1..4eecd43 100644
+--- a/src/python/sentryPlugins/ai_block_io/ai_block_io.py
+++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
+@@ -55,6 +55,7 @@ class SlowIODetection:
+             Report.report_pass(
+                 "get available disk error, please check if the collector plug is enable. exiting..."
+             )
+            logging.critical("get available disk error, please check if the collector plug is enable. exiting...")
+             exit(1)
+ 
+         logging.info(f"ai_block_io plug has found disks: {self._disk_list}")
+diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py
+index 3388cd4..7b0cd29 100644
+--- a/src/python/sentryPlugins/ai_block_io/config_parser.py
+++ b/src/python/sentryPlugins/ai_block_io/config_parser.py
+@@ -190,7 +190,7 @@ class ConfigParser:
+         self._conf["common"]["disk"] = disk_list
+ 
+     def _read_train_data_duration(self, items_algorithm: dict):
+-        self._conf["common"]["train_data_duration"] = self._get_config_value(
+        self._conf["algorithm"]["train_data_duration"] = self._get_config_value(
+             items_algorithm,
+             "train_data_duration",
+             float,
+@@ -203,17 +203,17 @@ class ConfigParser:
+         default_train_update_duration = self.DEFAULT_CONF["algorithm"][
+             "train_update_duration"
+         ]
+-        if default_train_update_duration > self._conf["common"]["train_data_duration"]:
+        if default_train_update_duration > self._conf["algorithm"]["train_data_duration"]:
+             default_train_update_duration = (
+-                self._conf["common"]["train_data_duration"] / 2
+                self._conf["algorithm"]["train_data_duration"] / 2
+             )
+-        self._conf["common"]["train_update_duration"] = self._get_config_value(
+        self._conf["algorithm"]["train_update_duration"] = self._get_config_value(
+             items_algorithm,
+             "train_update_duration",
+             float,
+             default_train_update_duration,
+             gt=0,
+-            le=self._conf["common"]["train_data_duration"],
+            le=self._conf["algorithm"]["train_data_duration"],
+         )
+ 
+     def _read_algorithm_type_and_parameter(self, items_algorithm: dict):
+@@ -401,6 +401,8 @@ class ConfigParser:
+             self._read_stage(items_common)
+             self._read_iotype(items_common)
+         else:
+            self._conf["common"]["stage"] = ALL_STAGE_LIST
+            self._conf["common"]["iotype"] = ALL_IOTPYE_LIST
+             logging.warning(
+                 "common section parameter not found, it will be set to default value."
+             )
+@@ -511,8 +513,8 @@ class ConfigParser:
+ 
+     def get_train_data_duration_and_train_update_duration(self):
+         return (
+-            self._conf["common"]["train_data_duration"],
+-            self._conf["common"]["train_update_duration"],
+            self._conf["algorithm"]["train_data_duration"],
+            self._conf["algorithm"]["train_update_duration"],
+         )
+ 
+     def get_window_size_and_window_minimum_threshold(self):
+@@ -535,11 +537,11 @@ class ConfigParser:
+ 
+     @property
+     def train_data_duration(self):
+-        return self._conf["common"]["train_data_duration"]
+        return self._conf["algorithm"]["train_data_duration"]
+ 
+     @property
+     def train_update_duration(self):
+-        return self._conf["common"]["train_update_duration"]
+        return self._conf["algorithm"]["train_update_duration"]
+ 
+     @property
+     def window_size(self):
+diff --git a/src/python/sentryPlugins/ai_block_io/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
+index 87bd1dd..5b21714 100644
+--- a/src/python/sentryPlugins/ai_block_io/detector.py
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
+@@ -9,6 +9,7 @@
+ # PURPOSE.
+ # See the Mulan PSL v2 for more details.
+ import logging
+from datetime import datetime
+ 
+ from .io_data import MetricName
+ from .threshold import Threshold
+@@ -21,18 +22,25 @@ class Detector:
+     def __init__(self, metric_name: MetricName, threshold: Threshold, sliding_window: SlidingWindow):
+         self._metric_name = metric_name
+         self._threshold = threshold
+        # for when threshold update, it can print latest threshold with metric name
+        self._threshold.set_metric_name(self._metric_name)
+         self._slidingWindow = sliding_window
+         self._threshold.attach_observer(self._slidingWindow)
+-        self._count = 0
+        self._count = None
+ 
+     def get_metric_name(self):
+         return self._metric_name
+ 
+     def is_slow_io_event(self, io_data_dict_with_disk_name: dict):
+-        self._count += 1
+-        if self._count % 15 == 0:
+-            self._count = 0
+-            logging.debug(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
+        if self._count is None:
+            self._count = datetime.now()
+        else:
+            now_time = datetime.now()
+            time_diff = (now_time - self._count).total_seconds()
+            if time_diff >= 60:
+                logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
+                self._count = None
+
+         logging.debug(f'enter Detector: {self}')
+         metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
+         if metric_value is None:
+diff --git a/src/python/sentryPlugins/ai_block_io/io_data.py b/src/python/sentryPlugins/ai_block_io/io_data.py
+index d341b55..6042911 100644
+--- a/src/python/sentryPlugins/ai_block_io/io_data.py
+++ b/src/python/sentryPlugins/ai_block_io/io_data.py
+@@ -48,7 +48,7 @@ class IOData:
+ @dataclass(frozen=True)
+ class MetricName:
+     disk_name: str
+-    disk_type: str
+    disk_type: int
+     stage_name: str
+     io_access_type_name: str
+     metric_name: str
+diff --git a/src/python/sentryPlugins/ai_block_io/threshold.py b/src/python/sentryPlugins/ai_block_io/threshold.py
+index 3b7a5a8..600d041 100644
+--- a/src/python/sentryPlugins/ai_block_io/threshold.py
+++ b/src/python/sentryPlugins/ai_block_io/threshold.py
+@@ -23,11 +23,6 @@ class ThresholdState(Enum):
+ 
+ 
+ class Threshold:
+-    threshold = None
+-    data_queue: queue.Queue = None
+-    data_queue_update_size: int = None
+-    new_data_size: int = None
+-    threshold_state: ThresholdState = None
+ 
+     def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+         self._observer = None
+@@ -36,12 +31,16 @@ class Threshold:
+         self.new_data_size = 0
+         self.threshold_state = ThresholdState.INIT
+         self.threshold = math.inf
+        self.metric_name = None
+ 
+     def set_threshold(self, threshold):
+         self.threshold = threshold
+         self.threshold_state = ThresholdState.START
+         self.notify_observer()
+ 
+    def set_metric_name(self, metric_name):
+        self.metric_name = metric_name
+
+     def get_threshold(self):
+         if self.threshold_state == ThresholdState.INIT:
+             return None
+@@ -84,6 +83,7 @@ class BoxplotThreshold(Threshold):
+         self.parameter = boxplot_parameter
+ 
+     def _update_threshold(self):
+        old_threshold = self.threshold
+         data = list(self.data_queue.queue)
+         q1 = np.percentile(data, 25)
+         q3 = np.percentile(data, 75)
+@@ -91,6 +91,7 @@ class BoxplotThreshold(Threshold):
+         self.threshold = q3 + self.parameter * iqr
+         if self.threshold_state == ThresholdState.INIT:
+             self.threshold_state = ThresholdState.START
+        logging.info(f"MetricName: [{self.metric_name}]'s threshold update, old is: {old_threshold} -> new is: {self.threshold}")
+         self.notify_observer()
+ 
+     def push_latest_data_to_queue(self, data):
+@@ -109,7 +110,7 @@ class BoxplotThreshold(Threshold):
+             self.new_data_size = 0
+ 
+     def __repr__(self):
+-        return f"[BoxplotThreshold, param is: {self.parameter}]"
+        return f"[BoxplotThreshold, param is: {self.parameter}, train_size: {self.data_queue.maxsize}, update_size: {self.data_queue_update_size}]"
+ 
+ 
+ class NSigmaThreshold(Threshold):
+@@ -118,12 +119,14 @@ class NSigmaThreshold(Threshold):
+         self.parameter = n_sigma_parameter
+ 
+     def _update_threshold(self):
+        old_threshold = self.threshold
+         data = list(self.data_queue.queue)
+         mean = np.mean(data)
+         std = np.std(data)
+         self.threshold = mean + self.parameter * std
+         if self.threshold_state == ThresholdState.INIT:
+             self.threshold_state = ThresholdState.START
+        logging.info(f"MetricName: [{self.metric_name}]'s threshold update, old is: {old_threshold} -> new is: {self.threshold}")
+         self.notify_observer()
+ 
+     def push_latest_data_to_queue(self, data):
+@@ -142,7 +145,7 @@ class NSigmaThreshold(Threshold):
+             self.new_data_size = 0
+ 
+     def __repr__(self):
+-        return f"[NSigmaThreshold, param is: {self.parameter}]"
+        return f"[NSigmaThreshold, param is: {self.parameter}, train_size: {self.data_queue.maxsize}, update_size: {self.data_queue_update_size}]"
+ 
+ 
+ class ThresholdType(Enum):
+-- 
+2.23.0
+
--- a/sysSentry.spec
+++ b/sysSentry.spec
@ -4,7 +4,7 @@
 Summary: System Inspection Framework
 Name: sysSentry
 Version: 1.0.2
-Release: 37
+Release: 38
 License: Mulan PSL v2
 Group: System Environment/Daemons
 Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -55,6 +55,7 @@ Patch42:   fix-io_dump-for-collect-module.patch
 Patch43:   add-root-cause-analysis.patch
 Patch44:   update-collect-log.patch
 Patch45:   modify-abnormal-stack-when-the-disk-field-is-not-con.patch
+Patch46:   ai_block_io-fix-some-bugs.patch

 BuildRequires: cmake gcc-c++
 BuildRequires: python3 python3-setuptools
@ -317,6 +318,12 @@ rm -rf %{buildroot}
 %attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*

 %changelog
+* Mon Oct 14 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-38
+- Type:bugfix
+- CVE:NA
+- SUG:NA
+- DESC:ai_block_io fix some bugs
+
 * Sat Oct 12 2024 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-37
 - Type:bugfix
 - CVE:NA