!103 refactor config.py and bugfix uncorrect slow io report

Merge pull request !103 from gaoruoshu/master
This commit is contained in:
openeuler-ci-bot 2024-10-15 14:31:32 +00:00 committed by Gitee
commit 82961d7902
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
3 changed files with 743 additions and 1 deletions

View File

@ -0,0 +1,168 @@
From b21607fcec4b290bc78c9f6c4a26db1a2df32a66 Mon Sep 17 00:00:00 2001
From: gaoruoshu <gaoruoshu@huawei.com>
Date: Tue, 15 Oct 2024 21:21:10 +0800
Subject: [PATCH] get_io_data failed wont stop avg_block_io and del disk not
support
---
src/python/sentryCollector/collect_plugin.py | 14 ++++-----
.../avg_block_io/avg_block_io.py | 9 ++++--
.../sentryPlugins/avg_block_io/module_conn.py | 31 +++++++++++++------
3 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py
index bec405a..53dddec 100644
--- a/src/python/sentryCollector/collect_plugin.py
+++ b/src/python/sentryCollector/collect_plugin.py
@@ -90,14 +90,14 @@ def client_send_and_recv(request_data, data_str_len, protocol):
try:
client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
except socket.error:
- logging.error("collect_plugin: client create socket error")
+ logging.debug("collect_plugin: client create socket error")
return None
try:
client_socket.connect(COLLECT_SOCKET_PATH)
except OSError:
client_socket.close()
- logging.error("collect_plugin: client connect error")
+ logging.debug("collect_plugin: client connect error")
return None
req_data_len = len(request_data)
@@ -109,23 +109,23 @@ def client_send_and_recv(request_data, data_str_len, protocol):
res_data = res_data.decode()
except (OSError, UnicodeError):
client_socket.close()
- logging.error("collect_plugin: client communicate error")
+ logging.debug("collect_plugin: client communicate error")
return None
res_magic = res_data[:CLT_MSG_MAGIC_LEN]
if res_magic != "RES":
- logging.error("res msg format error")
+ logging.debug("res msg format error")
return None
protocol_str = res_data[CLT_MSG_MAGIC_LEN:CLT_MSG_MAGIC_LEN+CLT_MSG_PRO_LEN]
try:
protocol_id = int(protocol_str)
except ValueError:
- logging.error("recv msg protocol id is invalid %s", protocol_str)
+ logging.debug("recv msg protocol id is invalid %s", protocol_str)
return None
if protocol_id >= ClientProtocol.PRO_END:
- logging.error("protocol id is invalid")
+ logging.debug("protocol id is invalid")
return None
try:
@@ -134,7 +134,7 @@ def client_send_and_recv(request_data, data_str_len, protocol):
res_msg_data = res_msg_data.decode()
return res_msg_data
except (OSError, ValueError, UnicodeError):
- logging.error("collect_plugin: client recv res msg error")
+ logging.debug("collect_plugin: client recv res msg error")
finally:
client_socket.close()
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
index cd47919..899d517 100644
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
@@ -15,7 +15,7 @@ import time
from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage
from .stage_window import IoWindow, IoDumpWindow
-from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name
+from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name, check_disk_list_validation
from .utils import update_avg_and_check_abnormal
CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini"
@@ -79,6 +79,8 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage):
if not disk_list:
report_alarm_fail("Cannot get valid disk name")
+ disk_list = check_disk_list_validation(disk_list)
+
disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
if not config_disk:
@@ -117,7 +119,10 @@ def main_loop(io_dic, io_data, io_avg_value):
time.sleep(period_time)
# 采集模块对接,获取周期数据
- curr_period_data = avg_get_io_data(io_dic)
+ is_success, curr_period_data = avg_get_io_data(io_dic)
+ if not is_success:
+ logging.error(f"{curr_period_data['msg']}")
+ continue
# 处理周期数据
reach_size = False
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
index cbdaad4..a67ef45 100644
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
@@ -40,25 +40,25 @@ def avg_is_iocollect_valid(io_dic, config_disk, config_stage):
logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, "
f"disk={config_disk}, stage={config_stage}")
res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage)
- return check_result_validation(res, 'check config validation')
+ is_success, data = check_result_validation(res, 'check config validation')
+ if not is_success:
+ report_alarm_fail(f"{data['msg']}")
+ return data
def check_result_validation(res, reason):
"""check validation of result from sentryCollector"""
if not 'ret' in res or not 'message' in res:
- err_msg = "Failed to {}: Cannot connect to sentryCollector.".format(reason)
- report_alarm_fail(err_msg)
+ return False, {'msg': f"Failed to {reason}: Cannot connect to sentryCollector"}
if res['ret'] != 0:
- err_msg = "Failed to {}: {}".format(reason, Result_Messages[res['ret']])
- report_alarm_fail(err_msg)
+ return False, {'msg': f"Failed to {reason}: {Result_Messages[res['ret']]}"}
try:
json_data = json.loads(res['message'])
except json.JSONDecodeError:
- err_msg = f"Failed to {reason}: invalid return message"
- report_alarm_fail(err_msg)
+ return False, {'msg': f"Failed to {reason}: invalid return message"}
- return json_data
+ return True, json_data
def report_alarm_fail(alarm_info):
@@ -120,10 +120,21 @@ def process_report_data(disk_name, rw, io_data):
xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
+def check_disk_list_validation(disk_list):
+ valid_disk_list = []
+ for disk_name in disk_list:
+ is_success, _ = check_result_validation(get_disk_type(disk_name), "")
+ if not is_success:
+ continue
+ valid_disk_list.append(disk_name)
+ return valid_disk_list
+
+
def get_disk_type_by_name(disk_name):
logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}")
- res = get_disk_type(disk_name)
- disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
+ is_success, disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
+ if not is_success:
+ report_alarm_fail(f"{disk_type_str['msg']}")
try:
curr_disk_type = int(disk_type_str)
if curr_disk_type not in Disk_Type:
--
2.27.0

View File

@ -0,0 +1,566 @@
From d5cb115a97e27c8270e8fb385fb3914af9ba3c34 Mon Sep 17 00:00:00 2001
From: gaoruoshu <gaoruoshu@huawei.com>
Date: Tue, 15 Oct 2024 10:00:07 +0000
Subject: [PATCH] refactor config.py and bugfix uncorrect slow io report
Signed-off-by: gaoruoshu <gaoruoshu@huawei.com>
---
.../avg_block_io/avg_block_io.py | 155 ++-----------
.../sentryPlugins/avg_block_io/config.py | 208 ++++++++++++++++++
.../sentryPlugins/avg_block_io/module_conn.py | 9 +-
.../sentryPlugins/avg_block_io/utils.py | 72 ------
4 files changed, 238 insertions(+), 206 deletions(-)
create mode 100644 src/python/sentryPlugins/avg_block_io/config.py
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
index f3ade09..cd47919 100644
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
@@ -13,132 +13,13 @@ import signal
import configparser
import time
+from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage
from .stage_window import IoWindow, IoDumpWindow
from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name
-from .utils import update_avg_and_check_abnormal, get_log_level, get_section_value
-from sentryCollector.collect_plugin import Disk_Type
+from .utils import update_avg_and_check_abnormal
CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini"
-def log_invalid_keys(not_in_list, keys_name, config_list, default_list):
- """print invalid log"""
- if config_list and not_in_list:
- logging.warning("{} in common.{} are not valid, set {}={}".format(not_in_list, keys_name, keys_name, default_list))
- elif config_list == ["default"]:
- logging.warning("Default {} use {}".format(keys_name, default_list))
-
-
-def read_config_common(config):
- """read config file, get [common] section value"""
- if not config.has_section("common"):
- report_alarm_fail("Cannot find common section in config file")
-
- try:
- disk_name = config.get("common", "disk")
- disk = [] if disk_name == "default" else disk_name.split(",")
- except configparser.NoOptionError:
- disk = []
- logging.warning("Unset common.disk, set to default")
-
- try:
- stage_name = config.get("common", "stage")
- stage = [] if stage_name == "default" else stage_name.split(",")
- except configparser.NoOptionError:
- stage = []
- logging.warning("Unset common.stage, set to default")
-
- if len(disk) > 10:
- logging.warning("Too many common.disks, record only max 10 disks")
- disk = disk[:10]
-
- try:
- iotype_name = config.get("common", "iotype").split(",")
- iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']]
- err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']]
-
- if err_iotype:
- report_alarm_fail("Invalid common.iotype config")
-
- except configparser.NoOptionError:
- iotype_list = ["read", "write"]
- logging.warning("Unset common.iotype, set to read,write")
-
- try:
- period_time = int(config.get("common", "period_time"))
- if not (1 <= period_time <= 300):
- raise ValueError("Invalid period_time")
- except ValueError:
- report_alarm_fail("Invalid common.period_time")
- except configparser.NoOptionError:
- period_time = 1
- logging.warning("Unset common.period_time, use 1s as default")
-
- return period_time, disk, stage, iotype_list
-
-
-def read_config_algorithm(config):
- """read config file, get [algorithm] section value"""
- if not config.has_section("algorithm"):
- report_alarm_fail("Cannot find algorithm section in config file")
-
- try:
- win_size = int(config.get("algorithm", "win_size"))
- if not (1 <= win_size <= 300):
- raise ValueError("Invalid algorithm.win_size")
- except ValueError:
- report_alarm_fail("Invalid algorithm.win_size config")
- except configparser.NoOptionError:
- win_size = 30
- logging.warning("Unset algorithm.win_size, use 30 as default")
-
- try:
- win_threshold = int(config.get("algorithm", "win_threshold"))
- if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size:
- raise ValueError("Invalid algorithm.win_threshold")
- except ValueError:
- report_alarm_fail("Invalid algorithm.win_threshold config")
- except configparser.NoOptionError:
- win_threshold = 6
- logging.warning("Unset algorithm.win_threshold, use 6 as default")
-
- return win_size, win_threshold
-
-
-def read_config_latency(config):
- """read config file, get [latency_xxx] section value"""
- common_param = {}
- for type_name in Disk_Type:
- section_name = f"latency_{Disk_Type[type_name]}"
- if not config.has_section(section_name):
- report_alarm_fail(f"Cannot find {section_name} section in config file")
-
- common_param[Disk_Type[type_name]] = get_section_value(section_name, config)
- return common_param
-
-
-def read_config_iodump(config):
- """read config file, get [iodump] section value"""
- common_param = {}
- section_name = "iodump"
- if not config.has_section(section_name):
- report_alarm_fail(f"Cannot find {section_name} section in config file")
-
- return get_section_value(section_name, config)
-
-
-def read_config_stage(config, stage, iotype_list, curr_disk_type):
- """read config file, get [STAGE_NAME_diskType] section value"""
- res = {}
- section_name = f"{stage}_{curr_disk_type}"
- if not config.has_section(section_name):
- return res
-
- for key in config[section_name]:
- if config[stage][key].isdecimal():
- res[key] = int(config[stage][key])
-
- return res
-
def init_io_win(io_dic, config, common_param):
"""initialize windows of latency, iodump, and dict of avg_value"""
@@ -192,24 +73,33 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage):
disk_list = [key for key in all_disk_set if key in config_disk]
not_in_disk_list = [key for key in config_disk if key not in all_disk_set]
+ if not config_disk and not not_in_disk_list:
+ disk_list = [key for key in all_disk_set]
+
+ if not disk_list:
+ report_alarm_fail("Cannot get valid disk name")
+
+ disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
+
+ if not config_disk:
+ logging.info(f"Default common.disk using disk={disk_list}")
+ elif sorted(disk_list) != sorted(config_disk):
+ logging.warning(f"Set common.disk to {disk_list}")
+
stage_list = [key for key in all_stage_set if key in config_stage]
not_in_stage_list = [key for key in config_stage if key not in all_stage_set]
if not_in_stage_list:
report_alarm_fail(f"Invalid common.stage_list config, cannot set {not_in_stage_list}")
- if not config_disk and not not_in_disk_list:
- disk_list = [key for key in all_disk_set]
-
- if not config_stage and not not_in_stage_list:
+ if not config_stage:
stage_list = [key for key in all_stage_set]
- disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
-
- if not stage_list or not disk_list:
- report_alarm_fail("Cannot get valid disk name or stage name.")
+ if not stage_list:
+ report_alarm_fail("Cannot get valid stage name.")
- log_invalid_keys(not_in_disk_list, 'disk', config_disk, disk_list)
+ if not config_stage:
+ logging.info(f"Default common.stage using stage={stage_list}")
return disk_list, stage_list
@@ -254,9 +144,8 @@ def main():
signal.signal(signal.SIGINT, sig_handler)
signal.signal(signal.SIGTERM, sig_handler)
- log_level = get_log_level(CONFIG_FILE)
+ log_level = read_config_log(CONFIG_FILE)
log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
-
logging.basicConfig(level=log_level, format=log_format)
# 初始化配置读取
@@ -274,6 +163,8 @@ def main():
# 采集模块对接is_iocollect_valid()
io_dic["disk_list"], io_dic["stage_list"] = get_valid_disk_stage_list(io_dic, disk, stage)
+ logging.debug(f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}")
+
if "bio" not in io_dic["stage_list"]:
report_alarm_fail("Cannot run avg_block_io without bio stage")
diff --git a/src/python/sentryPlugins/avg_block_io/config.py b/src/python/sentryPlugins/avg_block_io/config.py
new file mode 100644
index 0000000..c8f45ce
--- /dev/null
+++ b/src/python/sentryPlugins/avg_block_io/config.py
@@ -0,0 +1,208 @@
+import configparser
+import logging
+import os
+
+from .module_conn import report_alarm_fail
+from sentryCollector.collect_plugin import Disk_Type
+
+
+CONF_LOG = 'log'
+CONF_LOG_LEVEL = 'level'
+LogLevel = {
+ "debug": logging.DEBUG,
+ "info": logging.INFO,
+ "warning": logging.WARNING,
+ "error": logging.ERROR,
+ "critical": logging.CRITICAL
+}
+
+CONF_COMMON = 'common'
+CONF_COMMON_DISK = 'disk'
+CONF_COMMON_STAGE = 'stage'
+CONF_COMMON_IOTYPE = 'iotype'
+CONF_COMMON_PER_TIME = 'period_time'
+
+CONF_ALGO = 'algorithm'
+CONF_ALGO_SIZE = 'win_size'
+CONF_ALGO_THRE = 'win_threshold'
+
+CONF_LATENCY = 'latency_{}'
+CONF_IODUMP = 'iodump'
+
+
+DEFAULT_PARAM = {
+ CONF_LOG: {
+ CONF_LOG_LEVEL: 'info'
+ }, CONF_COMMON: {
+ CONF_COMMON_DISK: 'default',
+ CONF_COMMON_STAGE: 'default',
+ CONF_COMMON_IOTYPE: 'read,write',
+ CONF_COMMON_PER_TIME: 1
+ }, CONF_ALGO: {
+ CONF_ALGO_SIZE: 30,
+ CONF_ALGO_THRE: 6
+ }, 'latency_nvme_ssd': {
+ 'read_avg_lim': 300,
+ 'write_avg_lim': 300,
+ 'read_avg_time': 3,
+ 'write_avg_time': 3,
+ 'read_tot_lim': 500,
+ 'write_tot_lim': 500,
+ }, 'latency_sata_ssd' : {
+ 'read_avg_lim': 10000,
+ 'write_avg_lim': 10000,
+ 'read_avg_time': 3,
+ 'write_avg_time': 3,
+ 'read_tot_lim': 50000,
+ 'write_tot_lim': 50000,
+ }, 'latency_sata_hdd' : {
+ 'read_avg_lim': 15000,
+ 'write_avg_lim': 15000,
+ 'read_avg_time': 3,
+ 'write_avg_time': 3,
+ 'read_tot_lim': 50000,
+ 'write_tot_lim': 50000
+ }, CONF_IODUMP: {
+ 'read_iodump_lim': 0,
+ 'write_iodump_lim': 0
+ }
+}
+
+
+def get_section_value(section_name, config):
+ common_param = {}
+ config_sec = config[section_name]
+ for config_key in DEFAULT_PARAM[section_name]:
+ if config_key in config_sec:
+ if not config_sec[config_key].isdecimal():
+ report_alarm_fail(f"Invalid {section_name}.{config_key} config.")
+ common_param[config_key] = int(config_sec[config_key])
+ else:
+ common_param[config_key] = DEFAULT_PARAM[section_name][config_key]
+ logging.warning(f"Unset {section_name}.{config_key} in config file, use {common_param[config_key]} as default")
+ return common_param
+
+
+def read_config_log(filename):
+ """read config file, get [log] section value"""
+ default_log_level = DEFAULT_PARAM[CONF_LOG][CONF_LOG_LEVEL]
+ if not os.path.exists(filename):
+ return LogLevel.get(default_log_level)
+
+ config = configparser.ConfigParser()
+ config.read(filename)
+
+ log_level = config.get(CONF_LOG, CONF_LOG_LEVEL, fallback=default_log_level)
+ if log_level.lower() in LogLevel:
+ return LogLevel.get(log_level.lower())
+ return LogLevel.get(default_log_level)
+
+
+def read_config_common(config):
+ """read config file, get [common] section value"""
+ if not config.has_section(CONF_COMMON):
+ report_alarm_fail(f"Cannot find {CONF_COMMON} section in config file")
+
+ try:
+ disk_name = config.get(CONF_COMMON, CONF_COMMON_DISK).lower()
+ disk = [] if disk_name == "default" else disk_name.split(",")
+ except configparser.NoOptionError:
+ disk = []
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_DISK}, set to default")
+
+ try:
+ stage_name = config.get(CONF_COMMON, CONF_COMMON_STAGE).lower()
+ stage = [] if stage_name == "default" else stage_name.split(",")
+ except configparser.NoOptionError:
+ stage = []
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_STAGE}, set to default")
+
+ if len(disk) > 10:
+ logging.warning(f"Too many {CONF_COMMON}.disks, record only max 10 disks")
+ disk = disk[:10]
+
+ try:
+ iotype_name = config.get(CONF_COMMON, CONF_COMMON_IOTYPE).lower().split(",")
+ iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']]
+ err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']]
+
+ if err_iotype:
+ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_IOTYPE} config")
+
+ except configparser.NoOptionError:
+ iotype_list = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_IOTYPE]
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_IOTYPE}, use {iotupe_list} as default")
+
+ try:
+ period_time = int(config.get(CONF_COMMON, CONF_COMMON_PER_TIME))
+ if not (1 <= period_time <= 300):
+ raise ValueError("Invalid period_time")
+ except ValueError:
+ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_PER_TIME}")
+ except configparser.NoOptionError:
+ period_time = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_PER_TIME]
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_PER_TIME}, use {period_time} as default")
+
+ return period_time, disk, stage, iotype_list
+
+
+def read_config_algorithm(config):
+ """read config file, get [algorithm] section value"""
+ if not config.has_section(CONF_ALGO):
+ report_alarm_fail(f"Cannot find {CONF_ALGO} section in config file")
+
+ try:
+ win_size = int(config.get(CONF_ALGO, CONF_ALGO_SIZE))
+ if not (1 <= win_size <= 300):
+ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE}")
+ except ValueError:
+ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE} config")
+ except configparser.NoOptionError:
+ win_size = DEFAULT_PARAM[CONF_ALGO][CONF_ALGO_SIZE]
+ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_SIZE}, use {win_size} as default")
+
+ try:
+ win_threshold = int(config.get(CONF_ALGO, CONF_ALGO_THRE))
+ if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size:
+ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE}")
+ except ValueError:
+ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE} config")
+ except configparser.NoOptionError:
+ win_threshold = DEFAULT_PARAM[CONF_ALGO]['win_threshold']
+ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_THRE}, use {win_threshold} as default")
+
+ return win_size, win_threshold
+
+
+def read_config_latency(config):
+ """read config file, get [latency_xxx] section value"""
+ common_param = {}
+ for type_name in Disk_Type:
+ section_name = CONF_LATENCY.format(Disk_Type[type_name])
+ if not config.has_section(section_name):
+ report_alarm_fail(f"Cannot find {section_name} section in config file")
+
+ common_param[Disk_Type[type_name]] = get_section_value(section_name, config)
+ return common_param
+
+
+def read_config_iodump(config):
+ """read config file, get [iodump] section value"""
+ if not config.has_section(CONF_IODUMP):
+ report_alarm_fail(f"Cannot find {CONF_IODUMP} section in config file")
+
+ return get_section_value(CONF_IODUMP, config)
+
+
+def read_config_stage(config, stage, iotype_list, curr_disk_type):
+ """read config file, get [STAGE_NAME_diskType] section value"""
+ res = {}
+ section_name = f"{stage}_{curr_disk_type}"
+ if not config.has_section(section_name):
+ return res
+
+ for key in config[section_name]:
+ if config[stage][key].isdecimal():
+ res[key] = int(config[stage][key])
+
+ return res
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
index 8d6f429..cbdaad4 100644
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
@@ -29,12 +29,16 @@ def sig_handler(signum, _f):
def avg_get_io_data(io_dic):
"""get_io_data from sentryCollector"""
+ logging.debug(f"send to sentryCollector get_io_data: period={io_dic['period_time']}, "
+ f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}, iotype={io_dic['iotype_list']}")
res = get_io_data(io_dic["period_time"], io_dic["disk_list"], io_dic["stage_list"], io_dic["iotype_list"])
return check_result_validation(res, 'get io data')
def avg_is_iocollect_valid(io_dic, config_disk, config_stage):
"""is_iocollect_valid from sentryCollector"""
+ logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, "
+ f"disk={config_disk}, stage={config_stage}")
res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage)
return check_result_validation(res, 'check config validation')
@@ -79,7 +83,7 @@ def process_report_data(disk_name, rw, io_data):
# io press
ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq']
for stage_name in ctrl_stage:
- abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data)
+ abnormal, abnormal_list = is_abnormal((disk_name, stage_name, rw), io_data)
if not abnormal:
continue
msg["reason"] = "IO press"
@@ -117,6 +121,7 @@ def process_report_data(disk_name, rw, io_data):
def get_disk_type_by_name(disk_name):
+ logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}")
res = get_disk_type(disk_name)
disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
try:
@@ -126,4 +131,4 @@ def get_disk_type_by_name(disk_name):
except ValueError:
report_alarm_fail(f"Failed to get disk type for {disk_name}")
- return Disk_Type[curr_disk_type]
\ No newline at end of file
+ return Disk_Type[curr_disk_type]
diff --git a/src/python/sentryPlugins/avg_block_io/utils.py b/src/python/sentryPlugins/avg_block_io/utils.py
index c381c07..1bfd4e8 100644
--- a/src/python/sentryPlugins/avg_block_io/utils.py
+++ b/src/python/sentryPlugins/avg_block_io/utils.py
@@ -8,84 +8,12 @@
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
# See the Mulan PSL v2 for more details.
-import configparser
import logging
import os
AVG_VALUE = 0
AVG_COUNT = 1
-CONF_LOG = 'log'
-CONF_LOG_LEVEL = 'level'
-LogLevel = {
- "debug": logging.DEBUG,
- "info": logging.INFO,
- "warning": logging.WARNING,
- "error": logging.ERROR,
- "critical": logging.CRITICAL
-}
-
-
-DEFAULT_PARAM = {
- 'latency_nvme_ssd': {
- 'read_avg_lim': 300,
- 'write_avg_lim': 300,
- 'read_avg_time': 3,
- 'write_avg_time': 3,
- 'read_tot_lim': 500,
- 'write_tot_lim': 500,
- }, 'latency_sata_ssd' : {
- 'read_avg_lim': 10000,
- 'write_avg_lim': 10000,
- 'read_avg_time': 3,
- 'write_avg_time': 3,
- 'read_tot_lim': 50000,
- 'write_tot_lim': 50000,
- }, 'latency_sata_hdd' : {
- 'read_avg_lim': 15000,
- 'write_avg_lim': 15000,
- 'read_avg_time': 3,
- 'write_avg_time': 3,
- 'read_tot_lim': 50000,
- 'write_tot_lim': 50000
- }, 'iodump': {
- 'read_iodump_lim': 0,
- 'write_iodump_lim': 0
- }
-}
-
-
-def get_section_value(section_name, config):
- common_param = {}
- config_sec = config[section_name]
- for config_key in DEFAULT_PARAM[section_name]:
- if config_key in config_sec:
- if not config_sec[config_key].isdecimal():
- report_alarm_fail(f"Invalid {section_name}.{config_key} config.")
- common_param[config_key] = int(config_sec[config_key])
- else:
- logging.warning(f"Unset {section_name}.{config_key} in config file, use {DEFAULT_PARAM[section_name][config_key]} as default")
- common_param[config_key] = DEFAULT_PARAM[section_name][config_key]
- return common_param
-
-
-def get_log_level(filename):
- if not os.path.exists(filename):
- return logging.INFO
-
- try:
- config = configparser.ConfigParser()
- config.read(filename)
- if not config.has_option(CONF_LOG, CONF_LOG_LEVEL):
- return logging.INFO
- log_level = config.get(CONF_LOG, CONF_LOG_LEVEL)
-
- if log_level.lower() in LogLevel:
- return LogLevel.get(log_level.lower())
- return logging.INFO
- except configparser.Error:
- return logging.INFO
-
def get_nested_value(data, keys):
"""get data from nested dict"""
--
2.27.0

View File

@ -4,7 +4,7 @@
Summary: System Inspection Framework Summary: System Inspection Framework
Name: sysSentry Name: sysSentry
Version: 1.0.2 Version: 1.0.2
Release: 38 Release: 39
License: Mulan PSL v2 License: Mulan PSL v2
Group: System Environment/Daemons Group: System Environment/Daemons
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -56,6 +56,8 @@ Patch43: add-root-cause-analysis.patch
Patch44: update-collect-log.patch Patch44: update-collect-log.patch
Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch
Patch46: ai_block_io-fix-some-bugs.patch Patch46: ai_block_io-fix-some-bugs.patch
Patch47: refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch
Patch48: get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch
BuildRequires: cmake gcc-c++ BuildRequires: cmake gcc-c++
BuildRequires: python3 python3-setuptools BuildRequires: python3 python3-setuptools
@ -318,6 +320,12 @@ rm -rf %{buildroot}
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin* %attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
%changelog %changelog
* Tue Oct 15 2024 gaoruoshu <gaoruoshu@huawei.com> - 1.0.2-39
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:refactor config.py and bugfix uncorrect slow io report
* Mon Oct 14 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-38 * Mon Oct 14 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-38
- Type:bugfix - Type:bugfix
- CVE:NA - CVE:NA