add get_disk_type and fix some bugs
add log for improving maintainability
This commit is contained in:
parent
7fc2db90b6
commit
26ee44cd37
176
add-get_disk_type-and-fix-some-bugs.patch
Normal file
176
add-get_disk_type-and-fix-some-bugs.patch
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
From c2ffc679eddda5d78362612d89a9319d268da7e3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: zhuofeng <zhuofeng2@huawei.com>
|
||||||
|
Date: Thu, 10 Oct 2024 20:17:34 +0800
|
||||||
|
Subject: [PATCH] add get_disk_type and fix some bugs
|
||||||
|
|
||||||
|
---
|
||||||
|
service/sentryCollector.service | 2 +-
|
||||||
|
src/python/sentryCollector/collect_io.py | 16 ++++-
|
||||||
|
src/python/sentryCollector/collect_plugin.py | 68 +++++++++++++++++++-
|
||||||
|
3 files changed, 81 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/service/sentryCollector.service b/service/sentryCollector.service
|
||||||
|
index 4ee07d5..e09ddb3 100644
|
||||||
|
--- a/service/sentryCollector.service
|
||||||
|
+++ b/service/sentryCollector.service
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
[Unit]
|
||||||
|
-Description = Collection module added for sysSentry and kernel lock-free collection
|
||||||
|
+Description = Collection module added for sysSentry
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
ExecStart=/usr/bin/python3 /usr/bin/sentryCollector
|
||||||
|
diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py
|
||||||
|
index 8780648..6699a90 100644
|
||||||
|
--- a/src/python/sentryCollector/collect_io.py
|
||||||
|
+++ b/src/python/sentryCollector/collect_io.py
|
||||||
|
@@ -116,7 +116,7 @@ class CollectIo():
|
||||||
|
return 0
|
||||||
|
if finish <= 0 or lat_time <= 0:
|
||||||
|
return 0
|
||||||
|
- value = lat_time / finish / 1000 / 1000
|
||||||
|
+ value = lat_time / finish / 1000
|
||||||
|
if value.is_integer():
|
||||||
|
return int(value)
|
||||||
|
else:
|
||||||
|
@@ -124,11 +124,17 @@ class CollectIo():
|
||||||
|
|
||||||
|
def get_io_length(self, curr_stage_value, last_stage_value, category):
|
||||||
|
try:
|
||||||
|
- finish = int(curr_stage_value[category * 3 + IoStatus.FINISH]) - int(last_stage_value[category * 3 + IoStatus.FINISH])
|
||||||
|
+ lat_time = (int(curr_stage_value[category * 3 + IoStatus.LATENCY]) - int(last_stage_value[category * 3 + IoStatus.LATENCY]))
|
||||||
|
except ValueError as e:
|
||||||
|
logging.error("get_io_length convert to int failed, %s", e)
|
||||||
|
return 0
|
||||||
|
- value = finish / self.period_time / 1000 / 1000
|
||||||
|
+ if lat_time <= 0:
|
||||||
|
+ return 0
|
||||||
|
+ # ns convert us
|
||||||
|
+ lat_time = lat_time / 1000
|
||||||
|
+ # s convert us
|
||||||
|
+ period_time = self.period_time * 1000 * 1000
|
||||||
|
+ value = lat_time / period_time
|
||||||
|
if value.is_integer():
|
||||||
|
return int(value)
|
||||||
|
else:
|
||||||
|
@@ -141,6 +147,8 @@ class CollectIo():
|
||||||
|
with open(io_dump_file, 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
count += line.count('.op=' + Io_Category[category])
|
||||||
|
+ if count > 0:
|
||||||
|
+ logging.info(f"io_dump info : {disk_name}, {stage}, {category}, {count}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
logging.error("The file %s does not exist.", io_dump_file)
|
||||||
|
return count
|
||||||
|
@@ -223,6 +231,8 @@ class CollectIo():
|
||||||
|
if self.get_blk_io_hierarchy(disk_name, stage_list) < 0:
|
||||||
|
continue
|
||||||
|
self.append_period_lat(disk_name, stage_list)
|
||||||
|
+
|
||||||
|
+ logging.debug(f"no-lock collect data : {IO_GLOBAL_DATA}")
|
||||||
|
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
sleep_time = self.period_time - elapsed_time
|
||||||
|
diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py
|
||||||
|
index 3e2cf4c..31bf11b 100644
|
||||||
|
--- a/src/python/sentryCollector/collect_plugin.py
|
||||||
|
+++ b/src/python/sentryCollector/collect_plugin.py
|
||||||
|
@@ -16,6 +16,7 @@ import json
|
||||||
|
import socket
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
+import os
|
||||||
|
|
||||||
|
COLLECT_SOCKET_PATH = "/var/run/sysSentry/collector.sock"
|
||||||
|
|
||||||
|
@@ -58,6 +59,8 @@ class ResultMessage():
|
||||||
|
RESULT_EXCEED_LIMIT = 4 # the parameter length exceeds the limit.
|
||||||
|
RESULT_PARSE_FAILED = 5 # parse failed
|
||||||
|
RESULT_INVALID_CHAR = 6 # invalid char
|
||||||
|
+ RESULT_DISK_NOEXIST = 7 # disk is not exist
|
||||||
|
+ RESULT_DISK_TYPE_MISMATCH= 8 # disk type mismatch
|
||||||
|
|
||||||
|
Result_Messages = {
|
||||||
|
ResultMessage.RESULT_SUCCEED: "Succeed",
|
||||||
|
@@ -66,9 +69,15 @@ Result_Messages = {
|
||||||
|
ResultMessage.RESULT_INVALID_LENGTH: "Invalid parameter length",
|
||||||
|
ResultMessage.RESULT_EXCEED_LIMIT: "The parameter length exceeds the limit",
|
||||||
|
ResultMessage.RESULT_PARSE_FAILED: "Parse failed",
|
||||||
|
- ResultMessage.RESULT_INVALID_CHAR: "Invalid char"
|
||||||
|
+ ResultMessage.RESULT_INVALID_CHAR: "Invalid char",
|
||||||
|
+ ResultMessage.RESULT_DISK_NOEXIST: "Disk is not exist",
|
||||||
|
+ ResultMessage.RESULT_DISK_TYPE_MISMATCH: "Disk type mismatch"
|
||||||
|
}
|
||||||
|
|
||||||
|
+class DiskType():
|
||||||
|
+ TYPE_NVME_SSD = 0
|
||||||
|
+ TYPE_SATA_SSD = 1
|
||||||
|
+ TYPE_SATA_HDD = 2
|
||||||
|
|
||||||
|
def client_send_and_recv(request_data, data_str_len, protocol):
|
||||||
|
"""client socket send and recv message"""
|
||||||
|
@@ -273,3 +282,60 @@ def inter_get_io_data(period, disk_list, stage, iotype):
|
||||||
|
result['message'] = result_message
|
||||||
|
return result
|
||||||
|
|
||||||
|
+def get_disk_type(disk):
|
||||||
|
+ result = {}
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_UNKNOWN
|
||||||
|
+ result['message'] = ""
|
||||||
|
+ if not disk:
|
||||||
|
+ logging.error("param is invalid")
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_NOT_PARAM
|
||||||
|
+ return result
|
||||||
|
+ if len(disk) <= 0 or len(disk) > LIMIT_DISK_CHAR_LEN:
|
||||||
|
+ logging.error("invalid disk length")
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_INVALID_LENGTH
|
||||||
|
+ return result
|
||||||
|
+ pattern = r'^[a-zA-Z0-9_-]+$'
|
||||||
|
+ if not re.match(pattern, disk):
|
||||||
|
+ logging.error("%s is invalid char", disk)
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_INVALID_CHAR
|
||||||
|
+ return result
|
||||||
|
+
|
||||||
|
+ base_path = '/sys/block'
|
||||||
|
+ all_disk = []
|
||||||
|
+ for disk_name in os.listdir(base_path):
|
||||||
|
+ all_disk.append(disk_name)
|
||||||
|
+
|
||||||
|
+ if disk not in all_disk:
|
||||||
|
+ logging.error("disk %s is not exist", disk)
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_DISK_NOEXIST
|
||||||
|
+ return result
|
||||||
|
+
|
||||||
|
+ if disk[0:4] == "nvme":
|
||||||
|
+ result['message'] = str(DiskType.TYPE_NVME_SSD)
|
||||||
|
+ elif disk[0:2] == "sd":
|
||||||
|
+ disk_file = '/sys/block/{}/queue/rotational'.format(disk)
|
||||||
|
+ try:
|
||||||
|
+ with open(disk_file, 'r') as file:
|
||||||
|
+ num = int(file.read())
|
||||||
|
+ if num == 1:
|
||||||
|
+ result['message'] = str(DiskType.TYPE_SATA_SSD)
|
||||||
|
+ elif num == 0:
|
||||||
|
+ result['message'] = str(DiskType.TYPE_SATA_HDD)
|
||||||
|
+ else:
|
||||||
|
+ logging.error("disk %s is not support, num = %d", disk, num)
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_DISK_TYPE_MISMATCH
|
||||||
|
+ return result
|
||||||
|
+ except FileNotFoundError:
|
||||||
|
+ logging.error("The disk_file [%s] does not exist", disk_file)
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_DISK_NOEXIST
|
||||||
|
+ return result
|
||||||
|
+ except Exception as e:
|
||||||
|
+ logging.error("open disk_file %s happen an error: %s", disk_file, e)
|
||||||
|
+ return result
|
||||||
|
+ else:
|
||||||
|
+ logging.error("disk %s is not support", disk)
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_DISK_TYPE_MISMATCH
|
||||||
|
+ return result
|
||||||
|
+
|
||||||
|
+ result['ret'] = ResultMessage.RESULT_SUCCEED
|
||||||
|
+ return result
|
||||||
|
\ No newline at end of file
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
251
add-log-for-improving-maintainability.patch
Normal file
251
add-log-for-improving-maintainability.patch
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
From a8418093bb37482da7ccaac0c950f2ed8d0ba2fa Mon Sep 17 00:00:00 2001
|
||||||
|
From: gaoruoshu <gaoruoshu@huawei.com>
|
||||||
|
Date: Thu, 10 Oct 2024 15:07:29 +0800
|
||||||
|
Subject: [PATCH] add log for improving maintainability
|
||||||
|
|
||||||
|
---
|
||||||
|
.../avg_block_io/avg_block_io.py | 4 +-
|
||||||
|
.../sentryPlugins/avg_block_io/module_conn.py | 57 ++++++++++-------
|
||||||
|
.../avg_block_io/stage_window.py | 8 +++
|
||||||
|
.../sentryPlugins/avg_block_io/utils.py | 63 +++++++++++++++++--
|
||||||
|
4 files changed, 103 insertions(+), 29 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
index 26a60c5..cf2ded3 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
@@ -194,11 +194,11 @@ def init_io_win(io_dic, config, common_param):
|
||||||
|
|
||||||
|
if avg_lim_value and avg_time_value and tot_lim_value:
|
||||||
|
io_data[disk_name][stage_name][rw]["latency"] = IoWindow(window_size=io_dic["win_size"], window_threshold=io_dic["win_threshold"], abnormal_multiple=avg_time_value, abnormal_multiple_lim=avg_lim_value, abnormal_time=tot_lim_value)
|
||||||
|
- logging.debug("Successfully create {}-{}-{} latency window".format(disk_name, stage_name, rw))
|
||||||
|
+ logging.debug("Successfully create {}-{}-{}-latency window".format(disk_name, stage_name, rw))
|
||||||
|
|
||||||
|
if iodump_lim_value is not None:
|
||||||
|
io_data[disk_name][stage_name][rw]["iodump"] = IoDumpWindow(window_size=io_dic["win_size"], window_threshold=io_dic["win_threshold"], abnormal_time=iodump_lim_value)
|
||||||
|
- logging.debug("Successfully create {}-{}-{} iodump window".format(disk_name, stage_name, rw))
|
||||||
|
+ logging.debug("Successfully create {}-{}-{}-iodump window".format(disk_name, stage_name, rw))
|
||||||
|
return io_data, io_avg_value
|
||||||
|
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
index 2fc5a83..40b3fcc 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
@@ -13,7 +13,7 @@ import logging
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
-from .utils import is_abnormal
|
||||||
|
+from .utils import is_abnormal, get_win_data, log_slow_win
|
||||||
|
from sentryCollector.collect_plugin import is_iocollect_valid, get_io_data, Result_Messages
|
||||||
|
from syssentry.result import ResultLevel, report_result
|
||||||
|
from xalarm.sentry_notify import xalarm_report, MINOR_ALM, ALARM_TYPE_OCCUR
|
||||||
|
@@ -66,36 +66,51 @@ def report_alarm_fail(alarm_info):
|
||||||
|
|
||||||
|
def process_report_data(disk_name, rw, io_data):
|
||||||
|
"""check abnormal window and report to xalarm"""
|
||||||
|
- if not is_abnormal((disk_name, 'bio', rw), io_data):
|
||||||
|
+ abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data)
|
||||||
|
+ if not abnormal:
|
||||||
|
return
|
||||||
|
|
||||||
|
- msg = {"alarm_source": TASK_NAME, "driver_name": disk_name, "io_type": rw}
|
||||||
|
+ msg = {
|
||||||
|
+ "alarm_source": TASK_NAME, "driver_name": disk_name, "io_type": rw,
|
||||||
|
+ "reason": "unknown", "block_stack": "bio", "alarm_type": abnormal_list,
|
||||||
|
+ "details": get_win_data(disk_name, rw, io_data)
|
||||||
|
+ }
|
||||||
|
|
||||||
|
+ # io press
|
||||||
|
ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq']
|
||||||
|
for stage_name in ctrl_stage:
|
||||||
|
- if is_abnormal((disk_name, stage_name, rw), io_data):
|
||||||
|
- msg["reason"] = "IO press slow"
|
||||||
|
- msg["block_stack"] = f"bio,{stage_name}"
|
||||||
|
- logging.warning("{} - {} report IO press slow".format(disk_name, rw))
|
||||||
|
- xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
- return
|
||||||
|
-
|
||||||
|
- if is_abnormal((disk_name, 'rq_driver', rw), io_data):
|
||||||
|
+ abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data)
|
||||||
|
+ if not abnormal:
|
||||||
|
+ continue
|
||||||
|
+ msg["reason"] = "IO press"
|
||||||
|
+ msg["block_stack"] = f"bio,{stage_name}"
|
||||||
|
+ msg["alarm_type"] = abnormal_list
|
||||||
|
+ log_slow_win(msg, "IO press")
|
||||||
|
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
+ return
|
||||||
|
+
|
||||||
|
+ # driver slow
|
||||||
|
+ abnormal, abnormal_list = is_abnormal((disk_name, 'rq_driver', rw), io_data)
|
||||||
|
+ if abnormal:
|
||||||
|
msg["reason"] = "driver slow"
|
||||||
|
msg["block_stack"] = "bio,rq_driver"
|
||||||
|
- logging.warning("{} - {} report driver slow".format(disk_name, rw))
|
||||||
|
+ msg["alarm_type"] = abnormal_list
|
||||||
|
+ log_slow_win(msg, "driver slow")
|
||||||
|
xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
return
|
||||||
|
|
||||||
|
+ # kernel slow
|
||||||
|
kernel_stage = ['gettag', 'plug', 'deadline', 'hctx', 'requeue']
|
||||||
|
for stage_name in kernel_stage:
|
||||||
|
- if is_abnormal((disk_name, stage_name, rw), io_data):
|
||||||
|
- msg["reason"] = "kernel slow"
|
||||||
|
- msg["block_stack"] = f"bio,{stage_name}"
|
||||||
|
- logging.warning("{} - {} report kernel slow".format(disk_name, rw))
|
||||||
|
- xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
- return
|
||||||
|
- msg["reason"] = "unknown"
|
||||||
|
- msg["block_stack"] = "bio"
|
||||||
|
- logging.warning("{} - {} report UNKNOWN slow".format(disk_name, rw))
|
||||||
|
+ abnormal, abnormal_list = is_abnormal((disk_name, stage_name, rw), io_data)
|
||||||
|
+ if not abnormal:
|
||||||
|
+ continue
|
||||||
|
+ msg["reason"] = "kernel slow"
|
||||||
|
+ msg["block_stack"] = f"bio,{stage_name}"
|
||||||
|
+ msg["alarm_type"] = abnormal_list
|
||||||
|
+ log_slow_win(msg, "kernel slow")
|
||||||
|
+ xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
+ return
|
||||||
|
+
|
||||||
|
+ log_slow_win(msg, "unknown")
|
||||||
|
xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/stage_window.py b/src/python/sentryPlugins/avg_block_io/stage_window.py
|
||||||
|
index 9b0ce79..5113782 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/stage_window.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/stage_window.py
|
||||||
|
@@ -14,6 +14,11 @@ class AbnormalWindowBase:
|
||||||
|
self.window_size = window_size
|
||||||
|
self.window_threshold = window_threshold
|
||||||
|
self.abnormal_window = [False] * window_size
|
||||||
|
+ self.window_data = [-1] * window_size
|
||||||
|
+
|
||||||
|
+ def append_new_data(self, ab_res):
|
||||||
|
+ self.window_data.pop(0)
|
||||||
|
+ self.window_data.append(ab_res)
|
||||||
|
|
||||||
|
def append_new_period(self, ab_res, avg_val=0):
|
||||||
|
self.abnormal_window.pop(0)
|
||||||
|
@@ -25,6 +30,9 @@ class AbnormalWindowBase:
|
||||||
|
def is_abnormal_window(self):
|
||||||
|
return sum(self.abnormal_window) > self.window_threshold
|
||||||
|
|
||||||
|
+ def window_data_to_string(self):
|
||||||
|
+ return ",".join(str(x) for x in self.window_data)
|
||||||
|
+
|
||||||
|
|
||||||
|
class IoWindow(AbnormalWindowBase):
|
||||||
|
def __init__(self, window_size=10, window_threshold=7, abnormal_multiple=5, abnormal_multiple_lim=30, abnormal_time=40):
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/utils.py b/src/python/sentryPlugins/avg_block_io/utils.py
|
||||||
|
index 2de9a46..3b7f027 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/utils.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/utils.py
|
||||||
|
@@ -65,15 +65,32 @@ def set_nested_value(data, keys, value):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
+def get_win_data(disk_name, rw, io_data):
|
||||||
|
+ """get latency and iodump win data"""
|
||||||
|
+ latency = ''
|
||||||
|
+ iodump = ''
|
||||||
|
+ for stage_name in io_data[disk_name]:
|
||||||
|
+ if 'latency' in io_data[disk_name][stage_name][rw]:
|
||||||
|
+ latency_list = io_data[disk_name][stage_name][rw]['latency'].window_data_to_string()
|
||||||
|
+ latency += f'{stage_name}: [{latency_list}], '
|
||||||
|
+ if 'iodump' in io_data[disk_name][stage_name][rw]:
|
||||||
|
+ iodump_list = io_data[disk_name][stage_name][rw]['iodump'].window_data_to_string()
|
||||||
|
+ iodump += f'{stage_name}: [{iodump_list}], '
|
||||||
|
+ return {"latency": latency[:-2], "iodump": iodump[:-2]}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
def is_abnormal(io_key, io_data):
|
||||||
|
"""check if latency and iodump win abnormal"""
|
||||||
|
+ abnormal_list = ''
|
||||||
|
for key in ['latency', 'iodump']:
|
||||||
|
all_keys = get_nested_value(io_data, io_key)
|
||||||
|
if all_keys and key in all_keys:
|
||||||
|
win = get_nested_value(io_data, io_key + (key,))
|
||||||
|
if win and win.is_abnormal_window():
|
||||||
|
- return True
|
||||||
|
- return False
|
||||||
|
+ abnormal_list += key + ', '
|
||||||
|
+ if not abnormal_list:
|
||||||
|
+ return False, abnormal_list
|
||||||
|
+ return True, abnormal_list[:-2]
|
||||||
|
|
||||||
|
|
||||||
|
def update_io_avg(old_avg, period_value, win_size):
|
||||||
|
@@ -87,8 +104,8 @@ def update_io_avg(old_avg, period_value, win_size):
|
||||||
|
return [new_avg_value, new_avg_count]
|
||||||
|
|
||||||
|
|
||||||
|
-def update_io_data(old_avg, period_value, win_size, io_data, io_key):
|
||||||
|
- """update data of latency and iodump window"""
|
||||||
|
+def update_io_period(old_avg, period_value, io_data, io_key):
|
||||||
|
+ """update period of latency and iodump window"""
|
||||||
|
all_wins = get_nested_value(io_data, io_key)
|
||||||
|
if all_wins and "latency" in all_wins:
|
||||||
|
io_data[io_key[0]][io_key[1]][io_key[2]]["latency"].append_new_period(period_value[0], old_avg[AVG_VALUE])
|
||||||
|
@@ -96,20 +113,54 @@ def update_io_data(old_avg, period_value, win_size, io_data, io_key):
|
||||||
|
io_data[io_key[0]][io_key[1]][io_key[2]]["iodump"].append_new_period(period_value[1])
|
||||||
|
|
||||||
|
|
||||||
|
+def update_io_data(period_value, io_data, io_key):
|
||||||
|
+ """update data of latency and iodump window"""
|
||||||
|
+ all_wins = get_nested_value(io_data, io_key)
|
||||||
|
+ if all_wins and "latency" in all_wins:
|
||||||
|
+ io_data[io_key[0]][io_key[1]][io_key[2]]["latency"].append_new_data(period_value[0])
|
||||||
|
+ if all_wins and "iodump" in all_wins:
|
||||||
|
+ io_data[io_key[0]][io_key[1]][io_key[2]]["iodump"].append_new_data(period_value[1])
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def log_abnormal_period(old_avg, period_value, io_data, io_key):
|
||||||
|
+ """record log of abnormal period"""
|
||||||
|
+ all_wins = get_nested_value(io_data, io_key)
|
||||||
|
+ if all_wins and "latency" in all_wins:
|
||||||
|
+ if all_wins["latency"].is_abnormal_period(period_value[0], old_avg[AVG_VALUE]):
|
||||||
|
+ logging.info(f"[abnormal_period] disk: {io_key[0]}, stage: {io_key[1]}, iotype: {io_key[2]}, "
|
||||||
|
+ f"type: latency, avg: {round(old_avg[AVG_VALUE], 3)}, curr_val: {period_value[0]}")
|
||||||
|
+ if all_wins and "iodump" in all_wins:
|
||||||
|
+ if all_wins["iodump"].is_abnormal_period(period_value[1]):
|
||||||
|
+ logging.info(f"[abnormal_period] disk: {io_key[0]}, stage: {io_key[1]}, iotype: {io_key[2]}, "
|
||||||
|
+ f"type: iodump, curr_val: {period_value[1]}")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def log_slow_win(msg, reason):
|
||||||
|
+ """record log of slow win"""
|
||||||
|
+ logging.warning(f"[SLOW IO] disk: {msg['driver_name']}, stage: {msg['block_stack']}, "
|
||||||
|
+ f"iotype: {msg['io_type']}, type: {msg['alarm_type']}, reason: {reason}")
|
||||||
|
+ logging.info(f"latency: {msg['details']['latency']}")
|
||||||
|
+ logging.info(f"iodump: {msg['details']['iodump']}")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
def update_avg_and_check_abnormal(data, io_key, win_size, io_avg_value, io_data):
|
||||||
|
"""update avg and check abonrmal, return true if win_size full"""
|
||||||
|
period_value = get_nested_value(data, io_key)
|
||||||
|
old_avg = get_nested_value(io_avg_value, io_key)
|
||||||
|
|
||||||
|
# 更新avg数据
|
||||||
|
+ update_io_data(period_value, io_data, io_key)
|
||||||
|
if old_avg[AVG_COUNT] < win_size:
|
||||||
|
set_nested_value(io_avg_value, io_key, update_io_avg(old_avg, period_value, win_size))
|
||||||
|
return False
|
||||||
|
|
||||||
|
+ # 打印异常周期数据
|
||||||
|
+ log_abnormal_period(old_avg, period_value, io_data, io_key)
|
||||||
|
+
|
||||||
|
# 更新win数据 -- 判断异常周期
|
||||||
|
- update_io_data(old_avg, period_value, win_size, io_data, io_key)
|
||||||
|
+ update_io_period(old_avg, period_value, io_data, io_key)
|
||||||
|
all_wins = get_nested_value(io_data, io_key)
|
||||||
|
- if all_wins and 'latency' not in all_wins:
|
||||||
|
+ if not all_wins or 'latency' not in all_wins:
|
||||||
|
return True
|
||||||
|
period = get_nested_value(io_data, io_key + ("latency",))
|
||||||
|
if period and period.is_abnormal_period(period_value[0], old_avg[AVG_VALUE]):
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
@ -4,7 +4,7 @@
|
|||||||
Summary: System Inspection Framework
|
Summary: System Inspection Framework
|
||||||
Name: sysSentry
|
Name: sysSentry
|
||||||
Version: 1.0.2
|
Version: 1.0.2
|
||||||
Release: 29
|
Release: 30
|
||||||
License: Mulan PSL v2
|
License: Mulan PSL v2
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||||
@ -42,6 +42,8 @@ Patch29: change-alarm-length.patch
|
|||||||
Patch30: add-detail-time.patch
|
Patch30: add-detail-time.patch
|
||||||
Patch31: xalarm-add-alarm-msg-length-to-8192.patch
|
Patch31: xalarm-add-alarm-msg-length-to-8192.patch
|
||||||
Patch32: ai_block_io-adapt-alarm-module.patch
|
Patch32: ai_block_io-adapt-alarm-module.patch
|
||||||
|
Patch33: add-log-for-improving-maintainability.patch
|
||||||
|
Patch34: add-get_disk_type-and-fix-some-bugs.patch
|
||||||
|
|
||||||
BuildRequires: cmake gcc-c++
|
BuildRequires: cmake gcc-c++
|
||||||
BuildRequires: python3 python3-setuptools
|
BuildRequires: python3 python3-setuptools
|
||||||
@ -286,6 +288,13 @@ rm -rf %{buildroot}
|
|||||||
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io
|
%attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_block_io
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Oct 10 2024 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-30
|
||||||
|
- Type:bugfix
|
||||||
|
- CVE:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:add get_disk_type and fix some bugs
|
||||||
|
add log for improving maintainability
|
||||||
|
|
||||||
* Thu Oct 10 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-29
|
* Thu Oct 10 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-29
|
||||||
- Type:requirement
|
- Type:requirement
|
||||||
- CVE:NA
|
- CVE:NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user