From 7e035b92d0a98a405ee0320ea42d4be53c96ad49 Mon Sep 17 00:00:00 2001 From: gaoruoshu Date: Tue, 15 Oct 2024 18:15:18 +0800 Subject: [PATCH] refactor config.py and bugfix uncorrect slow io report get_io_data failed wont stop avg_block_io and del disk not support Signed-off-by: gaoruoshu --- ...ed-wont-stop-avg_block_io-and-del-di.patch | 168 ++++++ ...py-and-bugfix-uncorrect-slow-io-repo.patch | 566 ++++++++++++++++++ sysSentry.spec | 10 +- 3 files changed, 743 insertions(+), 1 deletion(-) create mode 100644 get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch create mode 100644 refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch diff --git a/get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch b/get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch new file mode 100644 index 0000000..ec2aaf2 --- /dev/null +++ b/get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch @@ -0,0 +1,168 @@ +From b21607fcec4b290bc78c9f6c4a26db1a2df32a66 Mon Sep 17 00:00:00 2001 +From: gaoruoshu +Date: Tue, 15 Oct 2024 21:21:10 +0800 +Subject: [PATCH] get_io_data failed wont stop avg_block_io and del disk not + support + +--- + src/python/sentryCollector/collect_plugin.py | 14 ++++----- + .../avg_block_io/avg_block_io.py | 9 ++++-- + .../sentryPlugins/avg_block_io/module_conn.py | 31 +++++++++++++------ + 3 files changed, 35 insertions(+), 19 deletions(-) + +diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py +index bec405a..53dddec 100644 +--- a/src/python/sentryCollector/collect_plugin.py ++++ b/src/python/sentryCollector/collect_plugin.py +@@ -90,14 +90,14 @@ def client_send_and_recv(request_data, data_str_len, protocol): + try: + client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + except socket.error: +- logging.error("collect_plugin: client create socket error") ++ logging.debug("collect_plugin: client create socket error") + return None + + try: + client_socket.connect(COLLECT_SOCKET_PATH) + except OSError: + client_socket.close() +- logging.error("collect_plugin: client connect error") ++ logging.debug("collect_plugin: client connect error") + return None + + req_data_len = len(request_data) +@@ -109,23 +109,23 @@ def client_send_and_recv(request_data, data_str_len, protocol): + res_data = res_data.decode() + except (OSError, UnicodeError): + client_socket.close() +- logging.error("collect_plugin: client communicate error") ++ logging.debug("collect_plugin: client communicate error") + return None + + res_magic = res_data[:CLT_MSG_MAGIC_LEN] + if res_magic != "RES": +- logging.error("res msg format error") ++ logging.debug("res msg format error") + return None + + protocol_str = res_data[CLT_MSG_MAGIC_LEN:CLT_MSG_MAGIC_LEN+CLT_MSG_PRO_LEN] + try: + protocol_id = int(protocol_str) + except ValueError: +- logging.error("recv msg protocol id is invalid %s", protocol_str) ++ logging.debug("recv msg protocol id is invalid %s", protocol_str) + return None + + if protocol_id >= ClientProtocol.PRO_END: +- logging.error("protocol id is invalid") ++ logging.debug("protocol id is invalid") + return None + + try: +@@ -134,7 +134,7 @@ def client_send_and_recv(request_data, data_str_len, protocol): + res_msg_data = res_msg_data.decode() + return res_msg_data + except (OSError, ValueError, UnicodeError): +- logging.error("collect_plugin: client recv res msg error") ++ logging.debug("collect_plugin: client recv res msg error") + finally: + client_socket.close() + +diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +index cd47919..899d517 100644 +--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py ++++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +@@ -15,7 +15,7 @@ import time + + from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage + from .stage_window import IoWindow, IoDumpWindow +-from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name ++from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name, check_disk_list_validation + from .utils import update_avg_and_check_abnormal + + CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini" +@@ -79,6 +79,8 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage): + if not disk_list: + report_alarm_fail("Cannot get valid disk name") + ++ disk_list = check_disk_list_validation(disk_list) ++ + disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list + + if not config_disk: +@@ -117,7 +119,10 @@ def main_loop(io_dic, io_data, io_avg_value): + time.sleep(period_time) + + # 采集模块对接,获取周期数据 +- curr_period_data = avg_get_io_data(io_dic) ++ is_success, curr_period_data = avg_get_io_data(io_dic) ++ if not is_success: ++ logging.error(f"{curr_period_data['msg']}") ++ continue + + # 处理周期数据 + reach_size = False +diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py +index cbdaad4..a67ef45 100644 +--- a/src/python/sentryPlugins/avg_block_io/module_conn.py ++++ b/src/python/sentryPlugins/avg_block_io/module_conn.py +@@ -40,25 +40,25 @@ def avg_is_iocollect_valid(io_dic, config_disk, config_stage): + logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, " + f"disk={config_disk}, stage={config_stage}") + res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage) +- return check_result_validation(res, 'check config validation') ++ is_success, data = check_result_validation(res, 'check config validation') ++ if not is_success: ++ report_alarm_fail(f"{data['msg']}") ++ return data + + + def check_result_validation(res, reason): + """check validation of result from sentryCollector""" + if not 'ret' in res or not 'message' in res: +- err_msg = "Failed to {}: Cannot connect to sentryCollector.".format(reason) +- report_alarm_fail(err_msg) ++ return False, {'msg': f"Failed to {reason}: Cannot connect to sentryCollector"} + if res['ret'] != 0: +- err_msg = "Failed to {}: {}".format(reason, Result_Messages[res['ret']]) +- report_alarm_fail(err_msg) ++ return False, {'msg': f"Failed to {reason}: {Result_Messages[res['ret']]}"} + + try: + json_data = json.loads(res['message']) + except json.JSONDecodeError: +- err_msg = f"Failed to {reason}: invalid return message" +- report_alarm_fail(err_msg) ++ return False, {'msg': f"Failed to {reason}: invalid return message"} + +- return json_data ++ return True, json_data + + + def report_alarm_fail(alarm_info): +@@ -120,10 +120,21 @@ def process_report_data(disk_name, rw, io_data): + xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg)) + + ++def check_disk_list_validation(disk_list): ++ valid_disk_list = [] ++ for disk_name in disk_list: ++ is_success, _ = check_result_validation(get_disk_type(disk_name), "") ++ if not is_success: ++ continue ++ valid_disk_list.append(disk_name) ++ return valid_disk_list ++ ++ + def get_disk_type_by_name(disk_name): + logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}") +- res = get_disk_type(disk_name) +- disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}') ++ is_success, disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}') ++ if not is_success: ++ report_alarm_fail(f"{disk_type_str['msg']}") + try: + curr_disk_type = int(disk_type_str) + if curr_disk_type not in Disk_Type: +-- +2.27.0 diff --git a/refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch b/refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch new file mode 100644 index 0000000..a0be948 --- /dev/null +++ b/refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch @@ -0,0 +1,566 @@ +From d5cb115a97e27c8270e8fb385fb3914af9ba3c34 Mon Sep 17 00:00:00 2001 +From: gaoruoshu +Date: Tue, 15 Oct 2024 10:00:07 +0000 +Subject: [PATCH] refactor config.py and bugfix uncorrect slow io report + +Signed-off-by: gaoruoshu +--- + .../avg_block_io/avg_block_io.py | 155 ++----------- + .../sentryPlugins/avg_block_io/config.py | 208 ++++++++++++++++++ + .../sentryPlugins/avg_block_io/module_conn.py | 9 +- + .../sentryPlugins/avg_block_io/utils.py | 72 ------ + 4 files changed, 238 insertions(+), 206 deletions(-) + create mode 100644 src/python/sentryPlugins/avg_block_io/config.py + +diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +index f3ade09..cd47919 100644 +--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py ++++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +@@ -13,132 +13,13 @@ import signal + import configparser + import time + ++from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage + from .stage_window import IoWindow, IoDumpWindow + from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name +-from .utils import update_avg_and_check_abnormal, get_log_level, get_section_value +-from sentryCollector.collect_plugin import Disk_Type ++from .utils import update_avg_and_check_abnormal + + CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini" + +-def log_invalid_keys(not_in_list, keys_name, config_list, default_list): +- """print invalid log""" +- if config_list and not_in_list: +- logging.warning("{} in common.{} are not valid, set {}={}".format(not_in_list, keys_name, keys_name, default_list)) +- elif config_list == ["default"]: +- logging.warning("Default {} use {}".format(keys_name, default_list)) +- +- +-def read_config_common(config): +- """read config file, get [common] section value""" +- if not config.has_section("common"): +- report_alarm_fail("Cannot find common section in config file") +- +- try: +- disk_name = config.get("common", "disk") +- disk = [] if disk_name == "default" else disk_name.split(",") +- except configparser.NoOptionError: +- disk = [] +- logging.warning("Unset common.disk, set to default") +- +- try: +- stage_name = config.get("common", "stage") +- stage = [] if stage_name == "default" else stage_name.split(",") +- except configparser.NoOptionError: +- stage = [] +- logging.warning("Unset common.stage, set to default") +- +- if len(disk) > 10: +- logging.warning("Too many common.disks, record only max 10 disks") +- disk = disk[:10] +- +- try: +- iotype_name = config.get("common", "iotype").split(",") +- iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']] +- err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']] +- +- if err_iotype: +- report_alarm_fail("Invalid common.iotype config") +- +- except configparser.NoOptionError: +- iotype_list = ["read", "write"] +- logging.warning("Unset common.iotype, set to read,write") +- +- try: +- period_time = int(config.get("common", "period_time")) +- if not (1 <= period_time <= 300): +- raise ValueError("Invalid period_time") +- except ValueError: +- report_alarm_fail("Invalid common.period_time") +- except configparser.NoOptionError: +- period_time = 1 +- logging.warning("Unset common.period_time, use 1s as default") +- +- return period_time, disk, stage, iotype_list +- +- +-def read_config_algorithm(config): +- """read config file, get [algorithm] section value""" +- if not config.has_section("algorithm"): +- report_alarm_fail("Cannot find algorithm section in config file") +- +- try: +- win_size = int(config.get("algorithm", "win_size")) +- if not (1 <= win_size <= 300): +- raise ValueError("Invalid algorithm.win_size") +- except ValueError: +- report_alarm_fail("Invalid algorithm.win_size config") +- except configparser.NoOptionError: +- win_size = 30 +- logging.warning("Unset algorithm.win_size, use 30 as default") +- +- try: +- win_threshold = int(config.get("algorithm", "win_threshold")) +- if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size: +- raise ValueError("Invalid algorithm.win_threshold") +- except ValueError: +- report_alarm_fail("Invalid algorithm.win_threshold config") +- except configparser.NoOptionError: +- win_threshold = 6 +- logging.warning("Unset algorithm.win_threshold, use 6 as default") +- +- return win_size, win_threshold +- +- +-def read_config_latency(config): +- """read config file, get [latency_xxx] section value""" +- common_param = {} +- for type_name in Disk_Type: +- section_name = f"latency_{Disk_Type[type_name]}" +- if not config.has_section(section_name): +- report_alarm_fail(f"Cannot find {section_name} section in config file") +- +- common_param[Disk_Type[type_name]] = get_section_value(section_name, config) +- return common_param +- +- +-def read_config_iodump(config): +- """read config file, get [iodump] section value""" +- common_param = {} +- section_name = "iodump" +- if not config.has_section(section_name): +- report_alarm_fail(f"Cannot find {section_name} section in config file") +- +- return get_section_value(section_name, config) +- +- +-def read_config_stage(config, stage, iotype_list, curr_disk_type): +- """read config file, get [STAGE_NAME_diskType] section value""" +- res = {} +- section_name = f"{stage}_{curr_disk_type}" +- if not config.has_section(section_name): +- return res +- +- for key in config[section_name]: +- if config[stage][key].isdecimal(): +- res[key] = int(config[stage][key]) +- +- return res +- + + def init_io_win(io_dic, config, common_param): + """initialize windows of latency, iodump, and dict of avg_value""" +@@ -192,24 +73,33 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage): + disk_list = [key for key in all_disk_set if key in config_disk] + not_in_disk_list = [key for key in config_disk if key not in all_disk_set] + ++ if not config_disk and not not_in_disk_list: ++ disk_list = [key for key in all_disk_set] ++ ++ if not disk_list: ++ report_alarm_fail("Cannot get valid disk name") ++ ++ disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list ++ ++ if not config_disk: ++ logging.info(f"Default common.disk using disk={disk_list}") ++ elif sorted(disk_list) != sorted(config_disk): ++ logging.warning(f"Set common.disk to {disk_list}") ++ + stage_list = [key for key in all_stage_set if key in config_stage] + not_in_stage_list = [key for key in config_stage if key not in all_stage_set] + + if not_in_stage_list: + report_alarm_fail(f"Invalid common.stage_list config, cannot set {not_in_stage_list}") + +- if not config_disk and not not_in_disk_list: +- disk_list = [key for key in all_disk_set] +- +- if not config_stage and not not_in_stage_list: ++ if not config_stage: + stage_list = [key for key in all_stage_set] + +- disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list +- +- if not stage_list or not disk_list: +- report_alarm_fail("Cannot get valid disk name or stage name.") ++ if not stage_list: ++ report_alarm_fail("Cannot get valid stage name.") + +- log_invalid_keys(not_in_disk_list, 'disk', config_disk, disk_list) ++ if not config_stage: ++ logging.info(f"Default common.stage using stage={stage_list}") + + return disk_list, stage_list + +@@ -254,9 +144,8 @@ def main(): + signal.signal(signal.SIGINT, sig_handler) + signal.signal(signal.SIGTERM, sig_handler) + +- log_level = get_log_level(CONFIG_FILE) ++ log_level = read_config_log(CONFIG_FILE) + log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s" +- + logging.basicConfig(level=log_level, format=log_format) + + # 初始化配置读取 +@@ -274,6 +163,8 @@ def main(): + # 采集模块对接,is_iocollect_valid() + io_dic["disk_list"], io_dic["stage_list"] = get_valid_disk_stage_list(io_dic, disk, stage) + ++ logging.debug(f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}") ++ + if "bio" not in io_dic["stage_list"]: + report_alarm_fail("Cannot run avg_block_io without bio stage") + +diff --git a/src/python/sentryPlugins/avg_block_io/config.py b/src/python/sentryPlugins/avg_block_io/config.py +new file mode 100644 +index 0000000..c8f45ce +--- /dev/null ++++ b/src/python/sentryPlugins/avg_block_io/config.py +@@ -0,0 +1,208 @@ ++import configparser ++import logging ++import os ++ ++from .module_conn import report_alarm_fail ++from sentryCollector.collect_plugin import Disk_Type ++ ++ ++CONF_LOG = 'log' ++CONF_LOG_LEVEL = 'level' ++LogLevel = { ++ "debug": logging.DEBUG, ++ "info": logging.INFO, ++ "warning": logging.WARNING, ++ "error": logging.ERROR, ++ "critical": logging.CRITICAL ++} ++ ++CONF_COMMON = 'common' ++CONF_COMMON_DISK = 'disk' ++CONF_COMMON_STAGE = 'stage' ++CONF_COMMON_IOTYPE = 'iotype' ++CONF_COMMON_PER_TIME = 'period_time' ++ ++CONF_ALGO = 'algorithm' ++CONF_ALGO_SIZE = 'win_size' ++CONF_ALGO_THRE = 'win_threshold' ++ ++CONF_LATENCY = 'latency_{}' ++CONF_IODUMP = 'iodump' ++ ++ ++DEFAULT_PARAM = { ++ CONF_LOG: { ++ CONF_LOG_LEVEL: 'info' ++ }, CONF_COMMON: { ++ CONF_COMMON_DISK: 'default', ++ CONF_COMMON_STAGE: 'default', ++ CONF_COMMON_IOTYPE: 'read,write', ++ CONF_COMMON_PER_TIME: 1 ++ }, CONF_ALGO: { ++ CONF_ALGO_SIZE: 30, ++ CONF_ALGO_THRE: 6 ++ }, 'latency_nvme_ssd': { ++ 'read_avg_lim': 300, ++ 'write_avg_lim': 300, ++ 'read_avg_time': 3, ++ 'write_avg_time': 3, ++ 'read_tot_lim': 500, ++ 'write_tot_lim': 500, ++ }, 'latency_sata_ssd' : { ++ 'read_avg_lim': 10000, ++ 'write_avg_lim': 10000, ++ 'read_avg_time': 3, ++ 'write_avg_time': 3, ++ 'read_tot_lim': 50000, ++ 'write_tot_lim': 50000, ++ }, 'latency_sata_hdd' : { ++ 'read_avg_lim': 15000, ++ 'write_avg_lim': 15000, ++ 'read_avg_time': 3, ++ 'write_avg_time': 3, ++ 'read_tot_lim': 50000, ++ 'write_tot_lim': 50000 ++ }, CONF_IODUMP: { ++ 'read_iodump_lim': 0, ++ 'write_iodump_lim': 0 ++ } ++} ++ ++ ++def get_section_value(section_name, config): ++ common_param = {} ++ config_sec = config[section_name] ++ for config_key in DEFAULT_PARAM[section_name]: ++ if config_key in config_sec: ++ if not config_sec[config_key].isdecimal(): ++ report_alarm_fail(f"Invalid {section_name}.{config_key} config.") ++ common_param[config_key] = int(config_sec[config_key]) ++ else: ++ common_param[config_key] = DEFAULT_PARAM[section_name][config_key] ++ logging.warning(f"Unset {section_name}.{config_key} in config file, use {common_param[config_key]} as default") ++ return common_param ++ ++ ++def read_config_log(filename): ++ """read config file, get [log] section value""" ++ default_log_level = DEFAULT_PARAM[CONF_LOG][CONF_LOG_LEVEL] ++ if not os.path.exists(filename): ++ return LogLevel.get(default_log_level) ++ ++ config = configparser.ConfigParser() ++ config.read(filename) ++ ++ log_level = config.get(CONF_LOG, CONF_LOG_LEVEL, fallback=default_log_level) ++ if log_level.lower() in LogLevel: ++ return LogLevel.get(log_level.lower()) ++ return LogLevel.get(default_log_level) ++ ++ ++def read_config_common(config): ++ """read config file, get [common] section value""" ++ if not config.has_section(CONF_COMMON): ++ report_alarm_fail(f"Cannot find {CONF_COMMON} section in config file") ++ ++ try: ++ disk_name = config.get(CONF_COMMON, CONF_COMMON_DISK).lower() ++ disk = [] if disk_name == "default" else disk_name.split(",") ++ except configparser.NoOptionError: ++ disk = [] ++ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_DISK}, set to default") ++ ++ try: ++ stage_name = config.get(CONF_COMMON, CONF_COMMON_STAGE).lower() ++ stage = [] if stage_name == "default" else stage_name.split(",") ++ except configparser.NoOptionError: ++ stage = [] ++ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_STAGE}, set to default") ++ ++ if len(disk) > 10: ++ logging.warning(f"Too many {CONF_COMMON}.disks, record only max 10 disks") ++ disk = disk[:10] ++ ++ try: ++ iotype_name = config.get(CONF_COMMON, CONF_COMMON_IOTYPE).lower().split(",") ++ iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']] ++ err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']] ++ ++ if err_iotype: ++ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_IOTYPE} config") ++ ++ except configparser.NoOptionError: ++ iotype_list = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_IOTYPE] ++ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_IOTYPE}, use {iotupe_list} as default") ++ ++ try: ++ period_time = int(config.get(CONF_COMMON, CONF_COMMON_PER_TIME)) ++ if not (1 <= period_time <= 300): ++ raise ValueError("Invalid period_time") ++ except ValueError: ++ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_PER_TIME}") ++ except configparser.NoOptionError: ++ period_time = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_PER_TIME] ++ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_PER_TIME}, use {period_time} as default") ++ ++ return period_time, disk, stage, iotype_list ++ ++ ++def read_config_algorithm(config): ++ """read config file, get [algorithm] section value""" ++ if not config.has_section(CONF_ALGO): ++ report_alarm_fail(f"Cannot find {CONF_ALGO} section in config file") ++ ++ try: ++ win_size = int(config.get(CONF_ALGO, CONF_ALGO_SIZE)) ++ if not (1 <= win_size <= 300): ++ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE}") ++ except ValueError: ++ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE} config") ++ except configparser.NoOptionError: ++ win_size = DEFAULT_PARAM[CONF_ALGO][CONF_ALGO_SIZE] ++ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_SIZE}, use {win_size} as default") ++ ++ try: ++ win_threshold = int(config.get(CONF_ALGO, CONF_ALGO_THRE)) ++ if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size: ++ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE}") ++ except ValueError: ++ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE} config") ++ except configparser.NoOptionError: ++ win_threshold = DEFAULT_PARAM[CONF_ALGO]['win_threshold'] ++ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_THRE}, use {win_threshold} as default") ++ ++ return win_size, win_threshold ++ ++ ++def read_config_latency(config): ++ """read config file, get [latency_xxx] section value""" ++ common_param = {} ++ for type_name in Disk_Type: ++ section_name = CONF_LATENCY.format(Disk_Type[type_name]) ++ if not config.has_section(section_name): ++ report_alarm_fail(f"Cannot find {section_name} section in config file") ++ ++ common_param[Disk_Type[type_name]] = get_section_value(section_name, config) ++ return common_param ++ ++ ++def read_config_iodump(config): ++ """read config file, get [iodump] section value""" ++ if not config.has_section(CONF_IODUMP): ++ report_alarm_fail(f"Cannot find {CONF_IODUMP} section in config file") ++ ++ return get_section_value(CONF_IODUMP, config) ++ ++ ++def read_config_stage(config, stage, iotype_list, curr_disk_type): ++ """read config file, get [STAGE_NAME_diskType] section value""" ++ res = {} ++ section_name = f"{stage}_{curr_disk_type}" ++ if not config.has_section(section_name): ++ return res ++ ++ for key in config[section_name]: ++ if config[stage][key].isdecimal(): ++ res[key] = int(config[stage][key]) ++ ++ return res +diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py +index 8d6f429..cbdaad4 100644 +--- a/src/python/sentryPlugins/avg_block_io/module_conn.py ++++ b/src/python/sentryPlugins/avg_block_io/module_conn.py +@@ -29,12 +29,16 @@ def sig_handler(signum, _f): + + def avg_get_io_data(io_dic): + """get_io_data from sentryCollector""" ++ logging.debug(f"send to sentryCollector get_io_data: period={io_dic['period_time']}, " ++ f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}, iotype={io_dic['iotype_list']}") + res = get_io_data(io_dic["period_time"], io_dic["disk_list"], io_dic["stage_list"], io_dic["iotype_list"]) + return check_result_validation(res, 'get io data') + + + def avg_is_iocollect_valid(io_dic, config_disk, config_stage): + """is_iocollect_valid from sentryCollector""" ++ logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, " ++ f"disk={config_disk}, stage={config_stage}") + res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage) + return check_result_validation(res, 'check config validation') + +@@ -79,7 +83,7 @@ def process_report_data(disk_name, rw, io_data): + # io press + ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq'] + for stage_name in ctrl_stage: +- abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data) ++ abnormal, abnormal_list = is_abnormal((disk_name, stage_name, rw), io_data) + if not abnormal: + continue + msg["reason"] = "IO press" +@@ -117,6 +121,7 @@ def process_report_data(disk_name, rw, io_data): + + + def get_disk_type_by_name(disk_name): ++ logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}") + res = get_disk_type(disk_name) + disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}') + try: +@@ -126,4 +131,4 @@ def get_disk_type_by_name(disk_name): + except ValueError: + report_alarm_fail(f"Failed to get disk type for {disk_name}") + +- return Disk_Type[curr_disk_type] +\ No newline at end of file ++ return Disk_Type[curr_disk_type] +diff --git a/src/python/sentryPlugins/avg_block_io/utils.py b/src/python/sentryPlugins/avg_block_io/utils.py +index c381c07..1bfd4e8 100644 +--- a/src/python/sentryPlugins/avg_block_io/utils.py ++++ b/src/python/sentryPlugins/avg_block_io/utils.py +@@ -8,84 +8,12 @@ + # IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + # PURPOSE. + # See the Mulan PSL v2 for more details. +-import configparser + import logging + import os + + AVG_VALUE = 0 + AVG_COUNT = 1 + +-CONF_LOG = 'log' +-CONF_LOG_LEVEL = 'level' +-LogLevel = { +- "debug": logging.DEBUG, +- "info": logging.INFO, +- "warning": logging.WARNING, +- "error": logging.ERROR, +- "critical": logging.CRITICAL +-} +- +- +-DEFAULT_PARAM = { +- 'latency_nvme_ssd': { +- 'read_avg_lim': 300, +- 'write_avg_lim': 300, +- 'read_avg_time': 3, +- 'write_avg_time': 3, +- 'read_tot_lim': 500, +- 'write_tot_lim': 500, +- }, 'latency_sata_ssd' : { +- 'read_avg_lim': 10000, +- 'write_avg_lim': 10000, +- 'read_avg_time': 3, +- 'write_avg_time': 3, +- 'read_tot_lim': 50000, +- 'write_tot_lim': 50000, +- }, 'latency_sata_hdd' : { +- 'read_avg_lim': 15000, +- 'write_avg_lim': 15000, +- 'read_avg_time': 3, +- 'write_avg_time': 3, +- 'read_tot_lim': 50000, +- 'write_tot_lim': 50000 +- }, 'iodump': { +- 'read_iodump_lim': 0, +- 'write_iodump_lim': 0 +- } +-} +- +- +-def get_section_value(section_name, config): +- common_param = {} +- config_sec = config[section_name] +- for config_key in DEFAULT_PARAM[section_name]: +- if config_key in config_sec: +- if not config_sec[config_key].isdecimal(): +- report_alarm_fail(f"Invalid {section_name}.{config_key} config.") +- common_param[config_key] = int(config_sec[config_key]) +- else: +- logging.warning(f"Unset {section_name}.{config_key} in config file, use {DEFAULT_PARAM[section_name][config_key]} as default") +- common_param[config_key] = DEFAULT_PARAM[section_name][config_key] +- return common_param +- +- +-def get_log_level(filename): +- if not os.path.exists(filename): +- return logging.INFO +- +- try: +- config = configparser.ConfigParser() +- config.read(filename) +- if not config.has_option(CONF_LOG, CONF_LOG_LEVEL): +- return logging.INFO +- log_level = config.get(CONF_LOG, CONF_LOG_LEVEL) +- +- if log_level.lower() in LogLevel: +- return LogLevel.get(log_level.lower()) +- return logging.INFO +- except configparser.Error: +- return logging.INFO +- + + def get_nested_value(data, keys): + """get data from nested dict""" +-- +2.27.0 diff --git a/sysSentry.spec b/sysSentry.spec index 8fe593c..848aec6 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 38 +Release: 39 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -56,6 +56,8 @@ Patch43: add-root-cause-analysis.patch Patch44: update-collect-log.patch Patch45: modify-abnormal-stack-when-the-disk-field-is-not-con.patch Patch46: ai_block_io-fix-some-bugs.patch +Patch47: refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch +Patch48: get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -318,6 +320,12 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin* %changelog +* Tue Oct 15 2024 gaoruoshu - 1.0.2-39 +- Type:bugfix +- CVE:NA +- SUG:NA +- DESC:refactor config.py and bugfix uncorrect slow io report + * Mon Oct 14 2024 heyouzhi - 1.0.2-38 - Type:bugfix - CVE:NA