!103 refactor config.py and bugfix uncorrect slow io report
Merge pull request !103 from gaoruoshu/master
This commit is contained in:
commit
82961d7902
168
get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch
Normal file
168
get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
From b21607fcec4b290bc78c9f6c4a26db1a2df32a66 Mon Sep 17 00:00:00 2001
|
||||||
|
From: gaoruoshu <gaoruoshu@huawei.com>
|
||||||
|
Date: Tue, 15 Oct 2024 21:21:10 +0800
|
||||||
|
Subject: [PATCH] get_io_data failed wont stop avg_block_io and del disk not
|
||||||
|
support
|
||||||
|
|
||||||
|
---
|
||||||
|
src/python/sentryCollector/collect_plugin.py | 14 ++++-----
|
||||||
|
.../avg_block_io/avg_block_io.py | 9 ++++--
|
||||||
|
.../sentryPlugins/avg_block_io/module_conn.py | 31 +++++++++++++------
|
||||||
|
3 files changed, 35 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py
|
||||||
|
index bec405a..53dddec 100644
|
||||||
|
--- a/src/python/sentryCollector/collect_plugin.py
|
||||||
|
+++ b/src/python/sentryCollector/collect_plugin.py
|
||||||
|
@@ -90,14 +90,14 @@ def client_send_and_recv(request_data, data_str_len, protocol):
|
||||||
|
try:
|
||||||
|
client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||||
|
except socket.error:
|
||||||
|
- logging.error("collect_plugin: client create socket error")
|
||||||
|
+ logging.debug("collect_plugin: client create socket error")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
client_socket.connect(COLLECT_SOCKET_PATH)
|
||||||
|
except OSError:
|
||||||
|
client_socket.close()
|
||||||
|
- logging.error("collect_plugin: client connect error")
|
||||||
|
+ logging.debug("collect_plugin: client connect error")
|
||||||
|
return None
|
||||||
|
|
||||||
|
req_data_len = len(request_data)
|
||||||
|
@@ -109,23 +109,23 @@ def client_send_and_recv(request_data, data_str_len, protocol):
|
||||||
|
res_data = res_data.decode()
|
||||||
|
except (OSError, UnicodeError):
|
||||||
|
client_socket.close()
|
||||||
|
- logging.error("collect_plugin: client communicate error")
|
||||||
|
+ logging.debug("collect_plugin: client communicate error")
|
||||||
|
return None
|
||||||
|
|
||||||
|
res_magic = res_data[:CLT_MSG_MAGIC_LEN]
|
||||||
|
if res_magic != "RES":
|
||||||
|
- logging.error("res msg format error")
|
||||||
|
+ logging.debug("res msg format error")
|
||||||
|
return None
|
||||||
|
|
||||||
|
protocol_str = res_data[CLT_MSG_MAGIC_LEN:CLT_MSG_MAGIC_LEN+CLT_MSG_PRO_LEN]
|
||||||
|
try:
|
||||||
|
protocol_id = int(protocol_str)
|
||||||
|
except ValueError:
|
||||||
|
- logging.error("recv msg protocol id is invalid %s", protocol_str)
|
||||||
|
+ logging.debug("recv msg protocol id is invalid %s", protocol_str)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if protocol_id >= ClientProtocol.PRO_END:
|
||||||
|
- logging.error("protocol id is invalid")
|
||||||
|
+ logging.debug("protocol id is invalid")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
@@ -134,7 +134,7 @@ def client_send_and_recv(request_data, data_str_len, protocol):
|
||||||
|
res_msg_data = res_msg_data.decode()
|
||||||
|
return res_msg_data
|
||||||
|
except (OSError, ValueError, UnicodeError):
|
||||||
|
- logging.error("collect_plugin: client recv res msg error")
|
||||||
|
+ logging.debug("collect_plugin: client recv res msg error")
|
||||||
|
finally:
|
||||||
|
client_socket.close()
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
index cd47919..899d517 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
@@ -15,7 +15,7 @@ import time
|
||||||
|
|
||||||
|
from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage
|
||||||
|
from .stage_window import IoWindow, IoDumpWindow
|
||||||
|
-from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name
|
||||||
|
+from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name, check_disk_list_validation
|
||||||
|
from .utils import update_avg_and_check_abnormal
|
||||||
|
|
||||||
|
CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini"
|
||||||
|
@@ -79,6 +79,8 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage):
|
||||||
|
if not disk_list:
|
||||||
|
report_alarm_fail("Cannot get valid disk name")
|
||||||
|
|
||||||
|
+ disk_list = check_disk_list_validation(disk_list)
|
||||||
|
+
|
||||||
|
disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
|
||||||
|
|
||||||
|
if not config_disk:
|
||||||
|
@@ -117,7 +119,10 @@ def main_loop(io_dic, io_data, io_avg_value):
|
||||||
|
time.sleep(period_time)
|
||||||
|
|
||||||
|
# 采集模块对接,获取周期数据
|
||||||
|
- curr_period_data = avg_get_io_data(io_dic)
|
||||||
|
+ is_success, curr_period_data = avg_get_io_data(io_dic)
|
||||||
|
+ if not is_success:
|
||||||
|
+ logging.error(f"{curr_period_data['msg']}")
|
||||||
|
+ continue
|
||||||
|
|
||||||
|
# 处理周期数据
|
||||||
|
reach_size = False
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
index cbdaad4..a67ef45 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
@@ -40,25 +40,25 @@ def avg_is_iocollect_valid(io_dic, config_disk, config_stage):
|
||||||
|
logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, "
|
||||||
|
f"disk={config_disk}, stage={config_stage}")
|
||||||
|
res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage)
|
||||||
|
- return check_result_validation(res, 'check config validation')
|
||||||
|
+ is_success, data = check_result_validation(res, 'check config validation')
|
||||||
|
+ if not is_success:
|
||||||
|
+ report_alarm_fail(f"{data['msg']}")
|
||||||
|
+ return data
|
||||||
|
|
||||||
|
|
||||||
|
def check_result_validation(res, reason):
|
||||||
|
"""check validation of result from sentryCollector"""
|
||||||
|
if not 'ret' in res or not 'message' in res:
|
||||||
|
- err_msg = "Failed to {}: Cannot connect to sentryCollector.".format(reason)
|
||||||
|
- report_alarm_fail(err_msg)
|
||||||
|
+ return False, {'msg': f"Failed to {reason}: Cannot connect to sentryCollector"}
|
||||||
|
if res['ret'] != 0:
|
||||||
|
- err_msg = "Failed to {}: {}".format(reason, Result_Messages[res['ret']])
|
||||||
|
- report_alarm_fail(err_msg)
|
||||||
|
+ return False, {'msg': f"Failed to {reason}: {Result_Messages[res['ret']]}"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
json_data = json.loads(res['message'])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
- err_msg = f"Failed to {reason}: invalid return message"
|
||||||
|
- report_alarm_fail(err_msg)
|
||||||
|
+ return False, {'msg': f"Failed to {reason}: invalid return message"}
|
||||||
|
|
||||||
|
- return json_data
|
||||||
|
+ return True, json_data
|
||||||
|
|
||||||
|
|
||||||
|
def report_alarm_fail(alarm_info):
|
||||||
|
@@ -120,10 +120,21 @@ def process_report_data(disk_name, rw, io_data):
|
||||||
|
xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
|
||||||
|
|
||||||
|
|
||||||
|
+def check_disk_list_validation(disk_list):
|
||||||
|
+ valid_disk_list = []
|
||||||
|
+ for disk_name in disk_list:
|
||||||
|
+ is_success, _ = check_result_validation(get_disk_type(disk_name), "")
|
||||||
|
+ if not is_success:
|
||||||
|
+ continue
|
||||||
|
+ valid_disk_list.append(disk_name)
|
||||||
|
+ return valid_disk_list
|
||||||
|
+
|
||||||
|
+
|
||||||
|
def get_disk_type_by_name(disk_name):
|
||||||
|
logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}")
|
||||||
|
- res = get_disk_type(disk_name)
|
||||||
|
- disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
|
||||||
|
+ is_success, disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
|
||||||
|
+ if not is_success:
|
||||||
|
+ report_alarm_fail(f"{disk_type_str['msg']}")
|
||||||
|
try:
|
||||||
|
curr_disk_type = int(disk_type_str)
|
||||||
|
if curr_disk_type not in Disk_Type:
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
566
refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch
Normal file
566
refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch
Normal file
@ -0,0 +1,566 @@
|
|||||||
|
From d5cb115a97e27c8270e8fb385fb3914af9ba3c34 Mon Sep 17 00:00:00 2001
|
||||||
|
From: gaoruoshu <gaoruoshu@huawei.com>
|
||||||
|
Date: Tue, 15 Oct 2024 10:00:07 +0000
|
||||||
|
Subject: [PATCH] refactor config.py and bugfix uncorrect slow io report
|
||||||
|
|
||||||
|
Signed-off-by: gaoruoshu <gaoruoshu@huawei.com>
|
||||||
|
---
|
||||||
|
.../avg_block_io/avg_block_io.py | 155 ++-----------
|
||||||
|
.../sentryPlugins/avg_block_io/config.py | 208 ++++++++++++++++++
|
||||||
|
.../sentryPlugins/avg_block_io/module_conn.py | 9 +-
|
||||||
|
.../sentryPlugins/avg_block_io/utils.py | 72 ------
|
||||||
|
4 files changed, 238 insertions(+), 206 deletions(-)
|
||||||
|
create mode 100644 src/python/sentryPlugins/avg_block_io/config.py
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
index f3ade09..cd47919 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
||||||
|
@@ -13,132 +13,13 @@ import signal
|
||||||
|
import configparser
|
||||||
|
import time
|
||||||
|
|
||||||
|
+from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage
|
||||||
|
from .stage_window import IoWindow, IoDumpWindow
|
||||||
|
from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name
|
||||||
|
-from .utils import update_avg_and_check_abnormal, get_log_level, get_section_value
|
||||||
|
-from sentryCollector.collect_plugin import Disk_Type
|
||||||
|
+from .utils import update_avg_and_check_abnormal
|
||||||
|
|
||||||
|
CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini"
|
||||||
|
|
||||||
|
-def log_invalid_keys(not_in_list, keys_name, config_list, default_list):
|
||||||
|
- """print invalid log"""
|
||||||
|
- if config_list and not_in_list:
|
||||||
|
- logging.warning("{} in common.{} are not valid, set {}={}".format(not_in_list, keys_name, keys_name, default_list))
|
||||||
|
- elif config_list == ["default"]:
|
||||||
|
- logging.warning("Default {} use {}".format(keys_name, default_list))
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def read_config_common(config):
|
||||||
|
- """read config file, get [common] section value"""
|
||||||
|
- if not config.has_section("common"):
|
||||||
|
- report_alarm_fail("Cannot find common section in config file")
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- disk_name = config.get("common", "disk")
|
||||||
|
- disk = [] if disk_name == "default" else disk_name.split(",")
|
||||||
|
- except configparser.NoOptionError:
|
||||||
|
- disk = []
|
||||||
|
- logging.warning("Unset common.disk, set to default")
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- stage_name = config.get("common", "stage")
|
||||||
|
- stage = [] if stage_name == "default" else stage_name.split(",")
|
||||||
|
- except configparser.NoOptionError:
|
||||||
|
- stage = []
|
||||||
|
- logging.warning("Unset common.stage, set to default")
|
||||||
|
-
|
||||||
|
- if len(disk) > 10:
|
||||||
|
- logging.warning("Too many common.disks, record only max 10 disks")
|
||||||
|
- disk = disk[:10]
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- iotype_name = config.get("common", "iotype").split(",")
|
||||||
|
- iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']]
|
||||||
|
- err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']]
|
||||||
|
-
|
||||||
|
- if err_iotype:
|
||||||
|
- report_alarm_fail("Invalid common.iotype config")
|
||||||
|
-
|
||||||
|
- except configparser.NoOptionError:
|
||||||
|
- iotype_list = ["read", "write"]
|
||||||
|
- logging.warning("Unset common.iotype, set to read,write")
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- period_time = int(config.get("common", "period_time"))
|
||||||
|
- if not (1 <= period_time <= 300):
|
||||||
|
- raise ValueError("Invalid period_time")
|
||||||
|
- except ValueError:
|
||||||
|
- report_alarm_fail("Invalid common.period_time")
|
||||||
|
- except configparser.NoOptionError:
|
||||||
|
- period_time = 1
|
||||||
|
- logging.warning("Unset common.period_time, use 1s as default")
|
||||||
|
-
|
||||||
|
- return period_time, disk, stage, iotype_list
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def read_config_algorithm(config):
|
||||||
|
- """read config file, get [algorithm] section value"""
|
||||||
|
- if not config.has_section("algorithm"):
|
||||||
|
- report_alarm_fail("Cannot find algorithm section in config file")
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- win_size = int(config.get("algorithm", "win_size"))
|
||||||
|
- if not (1 <= win_size <= 300):
|
||||||
|
- raise ValueError("Invalid algorithm.win_size")
|
||||||
|
- except ValueError:
|
||||||
|
- report_alarm_fail("Invalid algorithm.win_size config")
|
||||||
|
- except configparser.NoOptionError:
|
||||||
|
- win_size = 30
|
||||||
|
- logging.warning("Unset algorithm.win_size, use 30 as default")
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- win_threshold = int(config.get("algorithm", "win_threshold"))
|
||||||
|
- if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size:
|
||||||
|
- raise ValueError("Invalid algorithm.win_threshold")
|
||||||
|
- except ValueError:
|
||||||
|
- report_alarm_fail("Invalid algorithm.win_threshold config")
|
||||||
|
- except configparser.NoOptionError:
|
||||||
|
- win_threshold = 6
|
||||||
|
- logging.warning("Unset algorithm.win_threshold, use 6 as default")
|
||||||
|
-
|
||||||
|
- return win_size, win_threshold
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def read_config_latency(config):
|
||||||
|
- """read config file, get [latency_xxx] section value"""
|
||||||
|
- common_param = {}
|
||||||
|
- for type_name in Disk_Type:
|
||||||
|
- section_name = f"latency_{Disk_Type[type_name]}"
|
||||||
|
- if not config.has_section(section_name):
|
||||||
|
- report_alarm_fail(f"Cannot find {section_name} section in config file")
|
||||||
|
-
|
||||||
|
- common_param[Disk_Type[type_name]] = get_section_value(section_name, config)
|
||||||
|
- return common_param
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def read_config_iodump(config):
|
||||||
|
- """read config file, get [iodump] section value"""
|
||||||
|
- common_param = {}
|
||||||
|
- section_name = "iodump"
|
||||||
|
- if not config.has_section(section_name):
|
||||||
|
- report_alarm_fail(f"Cannot find {section_name} section in config file")
|
||||||
|
-
|
||||||
|
- return get_section_value(section_name, config)
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def read_config_stage(config, stage, iotype_list, curr_disk_type):
|
||||||
|
- """read config file, get [STAGE_NAME_diskType] section value"""
|
||||||
|
- res = {}
|
||||||
|
- section_name = f"{stage}_{curr_disk_type}"
|
||||||
|
- if not config.has_section(section_name):
|
||||||
|
- return res
|
||||||
|
-
|
||||||
|
- for key in config[section_name]:
|
||||||
|
- if config[stage][key].isdecimal():
|
||||||
|
- res[key] = int(config[stage][key])
|
||||||
|
-
|
||||||
|
- return res
|
||||||
|
-
|
||||||
|
|
||||||
|
def init_io_win(io_dic, config, common_param):
|
||||||
|
"""initialize windows of latency, iodump, and dict of avg_value"""
|
||||||
|
@@ -192,24 +73,33 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage):
|
||||||
|
disk_list = [key for key in all_disk_set if key in config_disk]
|
||||||
|
not_in_disk_list = [key for key in config_disk if key not in all_disk_set]
|
||||||
|
|
||||||
|
+ if not config_disk and not not_in_disk_list:
|
||||||
|
+ disk_list = [key for key in all_disk_set]
|
||||||
|
+
|
||||||
|
+ if not disk_list:
|
||||||
|
+ report_alarm_fail("Cannot get valid disk name")
|
||||||
|
+
|
||||||
|
+ disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
|
||||||
|
+
|
||||||
|
+ if not config_disk:
|
||||||
|
+ logging.info(f"Default common.disk using disk={disk_list}")
|
||||||
|
+ elif sorted(disk_list) != sorted(config_disk):
|
||||||
|
+ logging.warning(f"Set common.disk to {disk_list}")
|
||||||
|
+
|
||||||
|
stage_list = [key for key in all_stage_set if key in config_stage]
|
||||||
|
not_in_stage_list = [key for key in config_stage if key not in all_stage_set]
|
||||||
|
|
||||||
|
if not_in_stage_list:
|
||||||
|
report_alarm_fail(f"Invalid common.stage_list config, cannot set {not_in_stage_list}")
|
||||||
|
|
||||||
|
- if not config_disk and not not_in_disk_list:
|
||||||
|
- disk_list = [key for key in all_disk_set]
|
||||||
|
-
|
||||||
|
- if not config_stage and not not_in_stage_list:
|
||||||
|
+ if not config_stage:
|
||||||
|
stage_list = [key for key in all_stage_set]
|
||||||
|
|
||||||
|
- disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
|
||||||
|
-
|
||||||
|
- if not stage_list or not disk_list:
|
||||||
|
- report_alarm_fail("Cannot get valid disk name or stage name.")
|
||||||
|
+ if not stage_list:
|
||||||
|
+ report_alarm_fail("Cannot get valid stage name.")
|
||||||
|
|
||||||
|
- log_invalid_keys(not_in_disk_list, 'disk', config_disk, disk_list)
|
||||||
|
+ if not config_stage:
|
||||||
|
+ logging.info(f"Default common.stage using stage={stage_list}")
|
||||||
|
|
||||||
|
return disk_list, stage_list
|
||||||
|
|
||||||
|
@@ -254,9 +144,8 @@ def main():
|
||||||
|
signal.signal(signal.SIGINT, sig_handler)
|
||||||
|
signal.signal(signal.SIGTERM, sig_handler)
|
||||||
|
|
||||||
|
- log_level = get_log_level(CONFIG_FILE)
|
||||||
|
+ log_level = read_config_log(CONFIG_FILE)
|
||||||
|
log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
||||||
|
-
|
||||||
|
logging.basicConfig(level=log_level, format=log_format)
|
||||||
|
|
||||||
|
# 初始化配置读取
|
||||||
|
@@ -274,6 +163,8 @@ def main():
|
||||||
|
# 采集模块对接,is_iocollect_valid()
|
||||||
|
io_dic["disk_list"], io_dic["stage_list"] = get_valid_disk_stage_list(io_dic, disk, stage)
|
||||||
|
|
||||||
|
+ logging.debug(f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}")
|
||||||
|
+
|
||||||
|
if "bio" not in io_dic["stage_list"]:
|
||||||
|
report_alarm_fail("Cannot run avg_block_io without bio stage")
|
||||||
|
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/config.py b/src/python/sentryPlugins/avg_block_io/config.py
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..c8f45ce
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/config.py
|
||||||
|
@@ -0,0 +1,208 @@
|
||||||
|
+import configparser
|
||||||
|
+import logging
|
||||||
|
+import os
|
||||||
|
+
|
||||||
|
+from .module_conn import report_alarm_fail
|
||||||
|
+from sentryCollector.collect_plugin import Disk_Type
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+CONF_LOG = 'log'
|
||||||
|
+CONF_LOG_LEVEL = 'level'
|
||||||
|
+LogLevel = {
|
||||||
|
+ "debug": logging.DEBUG,
|
||||||
|
+ "info": logging.INFO,
|
||||||
|
+ "warning": logging.WARNING,
|
||||||
|
+ "error": logging.ERROR,
|
||||||
|
+ "critical": logging.CRITICAL
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+CONF_COMMON = 'common'
|
||||||
|
+CONF_COMMON_DISK = 'disk'
|
||||||
|
+CONF_COMMON_STAGE = 'stage'
|
||||||
|
+CONF_COMMON_IOTYPE = 'iotype'
|
||||||
|
+CONF_COMMON_PER_TIME = 'period_time'
|
||||||
|
+
|
||||||
|
+CONF_ALGO = 'algorithm'
|
||||||
|
+CONF_ALGO_SIZE = 'win_size'
|
||||||
|
+CONF_ALGO_THRE = 'win_threshold'
|
||||||
|
+
|
||||||
|
+CONF_LATENCY = 'latency_{}'
|
||||||
|
+CONF_IODUMP = 'iodump'
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+DEFAULT_PARAM = {
|
||||||
|
+ CONF_LOG: {
|
||||||
|
+ CONF_LOG_LEVEL: 'info'
|
||||||
|
+ }, CONF_COMMON: {
|
||||||
|
+ CONF_COMMON_DISK: 'default',
|
||||||
|
+ CONF_COMMON_STAGE: 'default',
|
||||||
|
+ CONF_COMMON_IOTYPE: 'read,write',
|
||||||
|
+ CONF_COMMON_PER_TIME: 1
|
||||||
|
+ }, CONF_ALGO: {
|
||||||
|
+ CONF_ALGO_SIZE: 30,
|
||||||
|
+ CONF_ALGO_THRE: 6
|
||||||
|
+ }, 'latency_nvme_ssd': {
|
||||||
|
+ 'read_avg_lim': 300,
|
||||||
|
+ 'write_avg_lim': 300,
|
||||||
|
+ 'read_avg_time': 3,
|
||||||
|
+ 'write_avg_time': 3,
|
||||||
|
+ 'read_tot_lim': 500,
|
||||||
|
+ 'write_tot_lim': 500,
|
||||||
|
+ }, 'latency_sata_ssd' : {
|
||||||
|
+ 'read_avg_lim': 10000,
|
||||||
|
+ 'write_avg_lim': 10000,
|
||||||
|
+ 'read_avg_time': 3,
|
||||||
|
+ 'write_avg_time': 3,
|
||||||
|
+ 'read_tot_lim': 50000,
|
||||||
|
+ 'write_tot_lim': 50000,
|
||||||
|
+ }, 'latency_sata_hdd' : {
|
||||||
|
+ 'read_avg_lim': 15000,
|
||||||
|
+ 'write_avg_lim': 15000,
|
||||||
|
+ 'read_avg_time': 3,
|
||||||
|
+ 'write_avg_time': 3,
|
||||||
|
+ 'read_tot_lim': 50000,
|
||||||
|
+ 'write_tot_lim': 50000
|
||||||
|
+ }, CONF_IODUMP: {
|
||||||
|
+ 'read_iodump_lim': 0,
|
||||||
|
+ 'write_iodump_lim': 0
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def get_section_value(section_name, config):
|
||||||
|
+ common_param = {}
|
||||||
|
+ config_sec = config[section_name]
|
||||||
|
+ for config_key in DEFAULT_PARAM[section_name]:
|
||||||
|
+ if config_key in config_sec:
|
||||||
|
+ if not config_sec[config_key].isdecimal():
|
||||||
|
+ report_alarm_fail(f"Invalid {section_name}.{config_key} config.")
|
||||||
|
+ common_param[config_key] = int(config_sec[config_key])
|
||||||
|
+ else:
|
||||||
|
+ common_param[config_key] = DEFAULT_PARAM[section_name][config_key]
|
||||||
|
+ logging.warning(f"Unset {section_name}.{config_key} in config file, use {common_param[config_key]} as default")
|
||||||
|
+ return common_param
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def read_config_log(filename):
|
||||||
|
+ """read config file, get [log] section value"""
|
||||||
|
+ default_log_level = DEFAULT_PARAM[CONF_LOG][CONF_LOG_LEVEL]
|
||||||
|
+ if not os.path.exists(filename):
|
||||||
|
+ return LogLevel.get(default_log_level)
|
||||||
|
+
|
||||||
|
+ config = configparser.ConfigParser()
|
||||||
|
+ config.read(filename)
|
||||||
|
+
|
||||||
|
+ log_level = config.get(CONF_LOG, CONF_LOG_LEVEL, fallback=default_log_level)
|
||||||
|
+ if log_level.lower() in LogLevel:
|
||||||
|
+ return LogLevel.get(log_level.lower())
|
||||||
|
+ return LogLevel.get(default_log_level)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def read_config_common(config):
|
||||||
|
+ """read config file, get [common] section value"""
|
||||||
|
+ if not config.has_section(CONF_COMMON):
|
||||||
|
+ report_alarm_fail(f"Cannot find {CONF_COMMON} section in config file")
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ disk_name = config.get(CONF_COMMON, CONF_COMMON_DISK).lower()
|
||||||
|
+ disk = [] if disk_name == "default" else disk_name.split(",")
|
||||||
|
+ except configparser.NoOptionError:
|
||||||
|
+ disk = []
|
||||||
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_DISK}, set to default")
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ stage_name = config.get(CONF_COMMON, CONF_COMMON_STAGE).lower()
|
||||||
|
+ stage = [] if stage_name == "default" else stage_name.split(",")
|
||||||
|
+ except configparser.NoOptionError:
|
||||||
|
+ stage = []
|
||||||
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_STAGE}, set to default")
|
||||||
|
+
|
||||||
|
+ if len(disk) > 10:
|
||||||
|
+ logging.warning(f"Too many {CONF_COMMON}.disks, record only max 10 disks")
|
||||||
|
+ disk = disk[:10]
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ iotype_name = config.get(CONF_COMMON, CONF_COMMON_IOTYPE).lower().split(",")
|
||||||
|
+ iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']]
|
||||||
|
+ err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']]
|
||||||
|
+
|
||||||
|
+ if err_iotype:
|
||||||
|
+ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_IOTYPE} config")
|
||||||
|
+
|
||||||
|
+ except configparser.NoOptionError:
|
||||||
|
+ iotype_list = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_IOTYPE]
|
||||||
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_IOTYPE}, use {iotupe_list} as default")
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ period_time = int(config.get(CONF_COMMON, CONF_COMMON_PER_TIME))
|
||||||
|
+ if not (1 <= period_time <= 300):
|
||||||
|
+ raise ValueError("Invalid period_time")
|
||||||
|
+ except ValueError:
|
||||||
|
+ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_PER_TIME}")
|
||||||
|
+ except configparser.NoOptionError:
|
||||||
|
+ period_time = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_PER_TIME]
|
||||||
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_PER_TIME}, use {period_time} as default")
|
||||||
|
+
|
||||||
|
+ return period_time, disk, stage, iotype_list
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def read_config_algorithm(config):
|
||||||
|
+ """read config file, get [algorithm] section value"""
|
||||||
|
+ if not config.has_section(CONF_ALGO):
|
||||||
|
+ report_alarm_fail(f"Cannot find {CONF_ALGO} section in config file")
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ win_size = int(config.get(CONF_ALGO, CONF_ALGO_SIZE))
|
||||||
|
+ if not (1 <= win_size <= 300):
|
||||||
|
+ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE}")
|
||||||
|
+ except ValueError:
|
||||||
|
+ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE} config")
|
||||||
|
+ except configparser.NoOptionError:
|
||||||
|
+ win_size = DEFAULT_PARAM[CONF_ALGO][CONF_ALGO_SIZE]
|
||||||
|
+ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_SIZE}, use {win_size} as default")
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ win_threshold = int(config.get(CONF_ALGO, CONF_ALGO_THRE))
|
||||||
|
+ if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size:
|
||||||
|
+ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE}")
|
||||||
|
+ except ValueError:
|
||||||
|
+ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE} config")
|
||||||
|
+ except configparser.NoOptionError:
|
||||||
|
+ win_threshold = DEFAULT_PARAM[CONF_ALGO]['win_threshold']
|
||||||
|
+ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_THRE}, use {win_threshold} as default")
|
||||||
|
+
|
||||||
|
+ return win_size, win_threshold
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def read_config_latency(config):
|
||||||
|
+ """read config file, get [latency_xxx] section value"""
|
||||||
|
+ common_param = {}
|
||||||
|
+ for type_name in Disk_Type:
|
||||||
|
+ section_name = CONF_LATENCY.format(Disk_Type[type_name])
|
||||||
|
+ if not config.has_section(section_name):
|
||||||
|
+ report_alarm_fail(f"Cannot find {section_name} section in config file")
|
||||||
|
+
|
||||||
|
+ common_param[Disk_Type[type_name]] = get_section_value(section_name, config)
|
||||||
|
+ return common_param
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def read_config_iodump(config):
|
||||||
|
+ """read config file, get [iodump] section value"""
|
||||||
|
+ if not config.has_section(CONF_IODUMP):
|
||||||
|
+ report_alarm_fail(f"Cannot find {CONF_IODUMP} section in config file")
|
||||||
|
+
|
||||||
|
+ return get_section_value(CONF_IODUMP, config)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def read_config_stage(config, stage, iotype_list, curr_disk_type):
|
||||||
|
+ """read config file, get [STAGE_NAME_diskType] section value"""
|
||||||
|
+ res = {}
|
||||||
|
+ section_name = f"{stage}_{curr_disk_type}"
|
||||||
|
+ if not config.has_section(section_name):
|
||||||
|
+ return res
|
||||||
|
+
|
||||||
|
+ for key in config[section_name]:
|
||||||
|
+ if config[stage][key].isdecimal():
|
||||||
|
+ res[key] = int(config[stage][key])
|
||||||
|
+
|
||||||
|
+ return res
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
index 8d6f429..cbdaad4 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
||||||
|
@@ -29,12 +29,16 @@ def sig_handler(signum, _f):
|
||||||
|
|
||||||
|
def avg_get_io_data(io_dic):
|
||||||
|
"""get_io_data from sentryCollector"""
|
||||||
|
+ logging.debug(f"send to sentryCollector get_io_data: period={io_dic['period_time']}, "
|
||||||
|
+ f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}, iotype={io_dic['iotype_list']}")
|
||||||
|
res = get_io_data(io_dic["period_time"], io_dic["disk_list"], io_dic["stage_list"], io_dic["iotype_list"])
|
||||||
|
return check_result_validation(res, 'get io data')
|
||||||
|
|
||||||
|
|
||||||
|
def avg_is_iocollect_valid(io_dic, config_disk, config_stage):
|
||||||
|
"""is_iocollect_valid from sentryCollector"""
|
||||||
|
+ logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, "
|
||||||
|
+ f"disk={config_disk}, stage={config_stage}")
|
||||||
|
res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage)
|
||||||
|
return check_result_validation(res, 'check config validation')
|
||||||
|
|
||||||
|
@@ -79,7 +83,7 @@ def process_report_data(disk_name, rw, io_data):
|
||||||
|
# io press
|
||||||
|
ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq']
|
||||||
|
for stage_name in ctrl_stage:
|
||||||
|
- abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data)
|
||||||
|
+ abnormal, abnormal_list = is_abnormal((disk_name, stage_name, rw), io_data)
|
||||||
|
if not abnormal:
|
||||||
|
continue
|
||||||
|
msg["reason"] = "IO press"
|
||||||
|
@@ -117,6 +121,7 @@ def process_report_data(disk_name, rw, io_data):
|
||||||
|
|
||||||
|
|
||||||
|
def get_disk_type_by_name(disk_name):
|
||||||
|
+ logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}")
|
||||||
|
res = get_disk_type(disk_name)
|
||||||
|
disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
|
||||||
|
try:
|
||||||
|
@@ -126,4 +131,4 @@ def get_disk_type_by_name(disk_name):
|
||||||
|
except ValueError:
|
||||||
|
report_alarm_fail(f"Failed to get disk type for {disk_name}")
|
||||||
|
|
||||||
|
- return Disk_Type[curr_disk_type]
|
||||||
|
\ No newline at end of file
|
||||||
|
+ return Disk_Type[curr_disk_type]
|
||||||
|
diff --git a/src/python/sentryPlugins/avg_block_io/utils.py b/src/python/sentryPlugins/avg_block_io/utils.py
|
||||||
|
index c381c07..1bfd4e8 100644
|
||||||
|
--- a/src/python/sentryPlugins/avg_block_io/utils.py
|
||||||
|
+++ b/src/python/sentryPlugins/avg_block_io/utils.py
|
||||||
|
@@ -8,84 +8,12 @@
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
||||||
|
# PURPOSE.
|
||||||
|
# See the Mulan PSL v2 for more details.
|
||||||
|
-import configparser
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
AVG_VALUE = 0
|
||||||
|
AVG_COUNT = 1
|
||||||
|
|
||||||
|
-CONF_LOG = 'log'
|
||||||
|
-CONF_LOG_LEVEL = 'level'
|
||||||
|
-LogLevel = {
|
||||||
|
- "debug": logging.DEBUG,
|
||||||
|
- "info": logging.INFO,
|
||||||
|
- "warning": logging.WARNING,
|
||||||
|
- "error": logging.ERROR,
|
||||||
|
- "critical": logging.CRITICAL
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-DEFAULT_PARAM = {
|
||||||
|
- 'latency_nvme_ssd': {
|
||||||
|
- 'read_avg_lim': 300,
|
||||||
|
- 'write_avg_lim': 300,
|
||||||
|
- 'read_avg_time': 3,
|
||||||
|
- 'write_avg_time': 3,
|
||||||
|
- 'read_tot_lim': 500,
|
||||||
|
- 'write_tot_lim': 500,
|
||||||
|
- }, 'latency_sata_ssd' : {
|
||||||
|
- 'read_avg_lim': 10000,
|
||||||
|
- 'write_avg_lim': 10000,
|
||||||
|
- 'read_avg_time': 3,
|
||||||
|
- 'write_avg_time': 3,
|
||||||
|
- 'read_tot_lim': 50000,
|
||||||
|
- 'write_tot_lim': 50000,
|
||||||
|
- }, 'latency_sata_hdd' : {
|
||||||
|
- 'read_avg_lim': 15000,
|
||||||
|
- 'write_avg_lim': 15000,
|
||||||
|
- 'read_avg_time': 3,
|
||||||
|
- 'write_avg_time': 3,
|
||||||
|
- 'read_tot_lim': 50000,
|
||||||
|
- 'write_tot_lim': 50000
|
||||||
|
- }, 'iodump': {
|
||||||
|
- 'read_iodump_lim': 0,
|
||||||
|
- 'write_iodump_lim': 0
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def get_section_value(section_name, config):
|
||||||
|
- common_param = {}
|
||||||
|
- config_sec = config[section_name]
|
||||||
|
- for config_key in DEFAULT_PARAM[section_name]:
|
||||||
|
- if config_key in config_sec:
|
||||||
|
- if not config_sec[config_key].isdecimal():
|
||||||
|
- report_alarm_fail(f"Invalid {section_name}.{config_key} config.")
|
||||||
|
- common_param[config_key] = int(config_sec[config_key])
|
||||||
|
- else:
|
||||||
|
- logging.warning(f"Unset {section_name}.{config_key} in config file, use {DEFAULT_PARAM[section_name][config_key]} as default")
|
||||||
|
- common_param[config_key] = DEFAULT_PARAM[section_name][config_key]
|
||||||
|
- return common_param
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-def get_log_level(filename):
|
||||||
|
- if not os.path.exists(filename):
|
||||||
|
- return logging.INFO
|
||||||
|
-
|
||||||
|
- try:
|
||||||
|
- config = configparser.ConfigParser()
|
||||||
|
- config.read(filename)
|
||||||
|
- if not config.has_option(CONF_LOG, CONF_LOG_LEVEL):
|
||||||
|
- return logging.INFO
|
||||||
|
- log_level = config.get(CONF_LOG, CONF_LOG_LEVEL)
|
||||||
|
-
|
||||||
|
- if log_level.lower() in LogLevel:
|
||||||
|
- return LogLevel.get(log_level.lower())
|
||||||
|
- return logging.INFO
|
||||||
|
- except configparser.Error:
|
||||||
|
- return logging.INFO
|
||||||
|
-
|
||||||
|
|
||||||
|
def get_nested_value(data, keys):
|
||||||
|
"""get data from nested dict"""
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
@ -4,7 +4,7 @@
|
|||||||
Summary: System Inspection Framework
|
Summary: System Inspection Framework
|
||||||
Name: sysSentry
|
Name: sysSentry
|
||||||
Version: 1.0.2
|
Version: 1.0.2
|
||||||
Release: 38
|
Release: 39
|
||||||
License: Mulan PSL v2
|
License: Mulan PSL v2
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||||
@ -56,6 +56,8 @@ Patch43: add-root-cause-analysis.patch
|
|||||||
Patch44: update-collect-log.patch
|
Patch44: update-collect-log.patch
|
||||||
Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch
|
Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch
|
||||||
Patch46: ai_block_io-fix-some-bugs.patch
|
Patch46: ai_block_io-fix-some-bugs.patch
|
||||||
|
Patch47: refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch
|
||||||
|
Patch48: get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch
|
||||||
|
|
||||||
BuildRequires: cmake gcc-c++
|
BuildRequires: cmake gcc-c++
|
||||||
BuildRequires: python3 python3-setuptools
|
BuildRequires: python3 python3-setuptools
|
||||||
@ -318,6 +320,12 @@ rm -rf %{buildroot}
|
|||||||
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
|
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Oct 15 2024 gaoruoshu <gaoruoshu@huawei.com> - 1.0.2-39
|
||||||
|
- Type:bugfix
|
||||||
|
- CVE:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:refactor config.py and bugfix uncorrect slow io report
|
||||||
|
|
||||||
* Mon Oct 14 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-38
|
* Mon Oct 14 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-38
|
||||||
- Type:bugfix
|
- Type:bugfix
|
||||||
- CVE:NA
|
- CVE:NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user