!99 ai_block_io fix some bugs
From: @hyz2024 Reviewed-by: @gaoruoshu Signed-off-by: @gaoruoshu
This commit is contained in:
commit
3a42938f00
235
ai_block_io-fix-some-bugs.patch
Normal file
235
ai_block_io-fix-some-bugs.patch
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
From 1e13bc31ae3aa94f36aa124eefdfc8773221eacd Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
|
||||||
|
Date: Mon, 14 Oct 2024 23:16:46 +0800
|
||||||
|
Subject: [PATCH] ai_block_io fix some bugs
|
||||||
|
|
||||||
|
---
|
||||||
|
.../sentryPlugins/ai_block_io/ai_block_io.py | 1 +
|
||||||
|
.../ai_block_io/config_parser.py | 20 ++++++++++---------
|
||||||
|
.../sentryPlugins/ai_block_io/detector.py | 18 ++++++++++++-----
|
||||||
|
.../sentryPlugins/ai_block_io/io_data.py | 2 +-
|
||||||
|
.../sentryPlugins/ai_block_io/threshold.py | 17 +++++++++-------
|
||||||
|
5 files changed, 36 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryPlugins/ai_block_io/ai_block_io.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
|
||||||
|
index dd661a1..4eecd43 100644
|
||||||
|
--- a/src/python/sentryPlugins/ai_block_io/ai_block_io.py
|
||||||
|
+++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
|
||||||
|
@@ -55,6 +55,7 @@ class SlowIODetection:
|
||||||
|
Report.report_pass(
|
||||||
|
"get available disk error, please check if the collector plug is enable. exiting..."
|
||||||
|
)
|
||||||
|
+ logging.critical("get available disk error, please check if the collector plug is enable. exiting...")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
logging.info(f"ai_block_io plug has found disks: {self._disk_list}")
|
||||||
|
diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py
|
||||||
|
index 3388cd4..7b0cd29 100644
|
||||||
|
--- a/src/python/sentryPlugins/ai_block_io/config_parser.py
|
||||||
|
+++ b/src/python/sentryPlugins/ai_block_io/config_parser.py
|
||||||
|
@@ -190,7 +190,7 @@ class ConfigParser:
|
||||||
|
self._conf["common"]["disk"] = disk_list
|
||||||
|
|
||||||
|
def _read_train_data_duration(self, items_algorithm: dict):
|
||||||
|
- self._conf["common"]["train_data_duration"] = self._get_config_value(
|
||||||
|
+ self._conf["algorithm"]["train_data_duration"] = self._get_config_value(
|
||||||
|
items_algorithm,
|
||||||
|
"train_data_duration",
|
||||||
|
float,
|
||||||
|
@@ -203,17 +203,17 @@ class ConfigParser:
|
||||||
|
default_train_update_duration = self.DEFAULT_CONF["algorithm"][
|
||||||
|
"train_update_duration"
|
||||||
|
]
|
||||||
|
- if default_train_update_duration > self._conf["common"]["train_data_duration"]:
|
||||||
|
+ if default_train_update_duration > self._conf["algorithm"]["train_data_duration"]:
|
||||||
|
default_train_update_duration = (
|
||||||
|
- self._conf["common"]["train_data_duration"] / 2
|
||||||
|
+ self._conf["algorithm"]["train_data_duration"] / 2
|
||||||
|
)
|
||||||
|
- self._conf["common"]["train_update_duration"] = self._get_config_value(
|
||||||
|
+ self._conf["algorithm"]["train_update_duration"] = self._get_config_value(
|
||||||
|
items_algorithm,
|
||||||
|
"train_update_duration",
|
||||||
|
float,
|
||||||
|
default_train_update_duration,
|
||||||
|
gt=0,
|
||||||
|
- le=self._conf["common"]["train_data_duration"],
|
||||||
|
+ le=self._conf["algorithm"]["train_data_duration"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def _read_algorithm_type_and_parameter(self, items_algorithm: dict):
|
||||||
|
@@ -401,6 +401,8 @@ class ConfigParser:
|
||||||
|
self._read_stage(items_common)
|
||||||
|
self._read_iotype(items_common)
|
||||||
|
else:
|
||||||
|
+ self._conf["common"]["stage"] = ALL_STAGE_LIST
|
||||||
|
+ self._conf["common"]["iotype"] = ALL_IOTPYE_LIST
|
||||||
|
logging.warning(
|
||||||
|
"common section parameter not found, it will be set to default value."
|
||||||
|
)
|
||||||
|
@@ -511,8 +513,8 @@ class ConfigParser:
|
||||||
|
|
||||||
|
def get_train_data_duration_and_train_update_duration(self):
|
||||||
|
return (
|
||||||
|
- self._conf["common"]["train_data_duration"],
|
||||||
|
- self._conf["common"]["train_update_duration"],
|
||||||
|
+ self._conf["algorithm"]["train_data_duration"],
|
||||||
|
+ self._conf["algorithm"]["train_update_duration"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_window_size_and_window_minimum_threshold(self):
|
||||||
|
@@ -535,11 +537,11 @@ class ConfigParser:
|
||||||
|
|
||||||
|
@property
|
||||||
|
def train_data_duration(self):
|
||||||
|
- return self._conf["common"]["train_data_duration"]
|
||||||
|
+ return self._conf["algorithm"]["train_data_duration"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def train_update_duration(self):
|
||||||
|
- return self._conf["common"]["train_update_duration"]
|
||||||
|
+ return self._conf["algorithm"]["train_update_duration"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def window_size(self):
|
||||||
|
diff --git a/src/python/sentryPlugins/ai_block_io/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
|
||||||
|
index 87bd1dd..5b21714 100644
|
||||||
|
--- a/src/python/sentryPlugins/ai_block_io/detector.py
|
||||||
|
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
|
||||||
|
@@ -9,6 +9,7 @@
|
||||||
|
# PURPOSE.
|
||||||
|
# See the Mulan PSL v2 for more details.
|
||||||
|
import logging
|
||||||
|
+from datetime import datetime
|
||||||
|
|
||||||
|
from .io_data import MetricName
|
||||||
|
from .threshold import Threshold
|
||||||
|
@@ -21,18 +22,25 @@ class Detector:
|
||||||
|
def __init__(self, metric_name: MetricName, threshold: Threshold, sliding_window: SlidingWindow):
|
||||||
|
self._metric_name = metric_name
|
||||||
|
self._threshold = threshold
|
||||||
|
+ # for when threshold update, it can print latest threshold with metric name
|
||||||
|
+ self._threshold.set_metric_name(self._metric_name)
|
||||||
|
self._slidingWindow = sliding_window
|
||||||
|
self._threshold.attach_observer(self._slidingWindow)
|
||||||
|
- self._count = 0
|
||||||
|
+ self._count = None
|
||||||
|
|
||||||
|
def get_metric_name(self):
|
||||||
|
return self._metric_name
|
||||||
|
|
||||||
|
def is_slow_io_event(self, io_data_dict_with_disk_name: dict):
|
||||||
|
- self._count += 1
|
||||||
|
- if self._count % 15 == 0:
|
||||||
|
- self._count = 0
|
||||||
|
- logging.debug(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
|
||||||
|
+ if self._count is None:
|
||||||
|
+ self._count = datetime.now()
|
||||||
|
+ else:
|
||||||
|
+ now_time = datetime.now()
|
||||||
|
+ time_diff = (now_time - self._count).total_seconds()
|
||||||
|
+ if time_diff >= 60:
|
||||||
|
+ logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
|
||||||
|
+ self._count = None
|
||||||
|
+
|
||||||
|
logging.debug(f'enter Detector: {self}')
|
||||||
|
metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
|
||||||
|
if metric_value is None:
|
||||||
|
diff --git a/src/python/sentryPlugins/ai_block_io/io_data.py b/src/python/sentryPlugins/ai_block_io/io_data.py
|
||||||
|
index d341b55..6042911 100644
|
||||||
|
--- a/src/python/sentryPlugins/ai_block_io/io_data.py
|
||||||
|
+++ b/src/python/sentryPlugins/ai_block_io/io_data.py
|
||||||
|
@@ -48,7 +48,7 @@ class IOData:
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MetricName:
|
||||||
|
disk_name: str
|
||||||
|
- disk_type: str
|
||||||
|
+ disk_type: int
|
||||||
|
stage_name: str
|
||||||
|
io_access_type_name: str
|
||||||
|
metric_name: str
|
||||||
|
diff --git a/src/python/sentryPlugins/ai_block_io/threshold.py b/src/python/sentryPlugins/ai_block_io/threshold.py
|
||||||
|
index 3b7a5a8..600d041 100644
|
||||||
|
--- a/src/python/sentryPlugins/ai_block_io/threshold.py
|
||||||
|
+++ b/src/python/sentryPlugins/ai_block_io/threshold.py
|
||||||
|
@@ -23,11 +23,6 @@ class ThresholdState(Enum):
|
||||||
|
|
||||||
|
|
||||||
|
class Threshold:
|
||||||
|
- threshold = None
|
||||||
|
- data_queue: queue.Queue = None
|
||||||
|
- data_queue_update_size: int = None
|
||||||
|
- new_data_size: int = None
|
||||||
|
- threshold_state: ThresholdState = None
|
||||||
|
|
||||||
|
def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
|
||||||
|
self._observer = None
|
||||||
|
@@ -36,12 +31,16 @@ class Threshold:
|
||||||
|
self.new_data_size = 0
|
||||||
|
self.threshold_state = ThresholdState.INIT
|
||||||
|
self.threshold = math.inf
|
||||||
|
+ self.metric_name = None
|
||||||
|
|
||||||
|
def set_threshold(self, threshold):
|
||||||
|
self.threshold = threshold
|
||||||
|
self.threshold_state = ThresholdState.START
|
||||||
|
self.notify_observer()
|
||||||
|
|
||||||
|
+ def set_metric_name(self, metric_name):
|
||||||
|
+ self.metric_name = metric_name
|
||||||
|
+
|
||||||
|
def get_threshold(self):
|
||||||
|
if self.threshold_state == ThresholdState.INIT:
|
||||||
|
return None
|
||||||
|
@@ -84,6 +83,7 @@ class BoxplotThreshold(Threshold):
|
||||||
|
self.parameter = boxplot_parameter
|
||||||
|
|
||||||
|
def _update_threshold(self):
|
||||||
|
+ old_threshold = self.threshold
|
||||||
|
data = list(self.data_queue.queue)
|
||||||
|
q1 = np.percentile(data, 25)
|
||||||
|
q3 = np.percentile(data, 75)
|
||||||
|
@@ -91,6 +91,7 @@ class BoxplotThreshold(Threshold):
|
||||||
|
self.threshold = q3 + self.parameter * iqr
|
||||||
|
if self.threshold_state == ThresholdState.INIT:
|
||||||
|
self.threshold_state = ThresholdState.START
|
||||||
|
+ logging.info(f"MetricName: [{self.metric_name}]'s threshold update, old is: {old_threshold} -> new is: {self.threshold}")
|
||||||
|
self.notify_observer()
|
||||||
|
|
||||||
|
def push_latest_data_to_queue(self, data):
|
||||||
|
@@ -109,7 +110,7 @@ class BoxplotThreshold(Threshold):
|
||||||
|
self.new_data_size = 0
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
- return f"[BoxplotThreshold, param is: {self.parameter}]"
|
||||||
|
+ return f"[BoxplotThreshold, param is: {self.parameter}, train_size: {self.data_queue.maxsize}, update_size: {self.data_queue_update_size}]"
|
||||||
|
|
||||||
|
|
||||||
|
class NSigmaThreshold(Threshold):
|
||||||
|
@@ -118,12 +119,14 @@ class NSigmaThreshold(Threshold):
|
||||||
|
self.parameter = n_sigma_parameter
|
||||||
|
|
||||||
|
def _update_threshold(self):
|
||||||
|
+ old_threshold = self.threshold
|
||||||
|
data = list(self.data_queue.queue)
|
||||||
|
mean = np.mean(data)
|
||||||
|
std = np.std(data)
|
||||||
|
self.threshold = mean + self.parameter * std
|
||||||
|
if self.threshold_state == ThresholdState.INIT:
|
||||||
|
self.threshold_state = ThresholdState.START
|
||||||
|
+ logging.info(f"MetricName: [{self.metric_name}]'s threshold update, old is: {old_threshold} -> new is: {self.threshold}")
|
||||||
|
self.notify_observer()
|
||||||
|
|
||||||
|
def push_latest_data_to_queue(self, data):
|
||||||
|
@@ -142,7 +145,7 @@ class NSigmaThreshold(Threshold):
|
||||||
|
self.new_data_size = 0
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
- return f"[NSigmaThreshold, param is: {self.parameter}]"
|
||||||
|
+ return f"[NSigmaThreshold, param is: {self.parameter}, train_size: {self.data_queue.maxsize}, update_size: {self.data_queue_update_size}]"
|
||||||
|
|
||||||
|
|
||||||
|
class ThresholdType(Enum):
|
||||||
|
--
|
||||||
|
2.23.0
|
||||||
|
|
||||||
@ -4,7 +4,7 @@
|
|||||||
Summary: System Inspection Framework
|
Summary: System Inspection Framework
|
||||||
Name: sysSentry
|
Name: sysSentry
|
||||||
Version: 1.0.2
|
Version: 1.0.2
|
||||||
Release: 37
|
Release: 38
|
||||||
License: Mulan PSL v2
|
License: Mulan PSL v2
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||||
@ -55,6 +55,7 @@ Patch42: fix-io_dump-for-collect-module.patch
|
|||||||
Patch43: add-root-cause-analysis.patch
|
Patch43: add-root-cause-analysis.patch
|
||||||
Patch44: update-collect-log.patch
|
Patch44: update-collect-log.patch
|
||||||
Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch
|
Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch
|
||||||
|
Patch46: ai_block_io-fix-some-bugs.patch
|
||||||
|
|
||||||
BuildRequires: cmake gcc-c++
|
BuildRequires: cmake gcc-c++
|
||||||
BuildRequires: python3 python3-setuptools
|
BuildRequires: python3 python3-setuptools
|
||||||
@ -317,6 +318,12 @@ rm -rf %{buildroot}
|
|||||||
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
|
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Oct 14 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-38
|
||||||
|
- Type:bugfix
|
||||||
|
- CVE:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:ai_block_io fix some bugs
|
||||||
|
|
||||||
* Sat Oct 12 2024 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-37
|
* Sat Oct 12 2024 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-37
|
||||||
- Type:bugfix
|
- Type:bugfix
|
||||||
- CVE:NA
|
- CVE:NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user