567 lines
21 KiB
Diff
567 lines
21 KiB
Diff
|
|
From d5cb115a97e27c8270e8fb385fb3914af9ba3c34 Mon Sep 17 00:00:00 2001
|
|||
|
|
From: gaoruoshu <gaoruoshu@huawei.com>
|
|||
|
|
Date: Tue, 15 Oct 2024 10:00:07 +0000
|
|||
|
|
Subject: [PATCH] refactor config.py and bugfix uncorrect slow io report
|
|||
|
|
|
|||
|
|
Signed-off-by: gaoruoshu <gaoruoshu@huawei.com>
|
|||
|
|
---
|
|||
|
|
.../avg_block_io/avg_block_io.py | 155 ++-----------
|
|||
|
|
.../sentryPlugins/avg_block_io/config.py | 208 ++++++++++++++++++
|
|||
|
|
.../sentryPlugins/avg_block_io/module_conn.py | 9 +-
|
|||
|
|
.../sentryPlugins/avg_block_io/utils.py | 72 ------
|
|||
|
|
4 files changed, 238 insertions(+), 206 deletions(-)
|
|||
|
|
create mode 100644 src/python/sentryPlugins/avg_block_io/config.py
|
|||
|
|
|
|||
|
|
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
|||
|
|
index f3ade09..cd47919 100644
|
|||
|
|
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
|||
|
|
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
|
|||
|
|
@@ -13,132 +13,13 @@ import signal
|
|||
|
|
import configparser
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
+from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage
|
|||
|
|
from .stage_window import IoWindow, IoDumpWindow
|
|||
|
|
from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name
|
|||
|
|
-from .utils import update_avg_and_check_abnormal, get_log_level, get_section_value
|
|||
|
|
-from sentryCollector.collect_plugin import Disk_Type
|
|||
|
|
+from .utils import update_avg_and_check_abnormal
|
|||
|
|
|
|||
|
|
CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini"
|
|||
|
|
|
|||
|
|
-def log_invalid_keys(not_in_list, keys_name, config_list, default_list):
|
|||
|
|
- """print invalid log"""
|
|||
|
|
- if config_list and not_in_list:
|
|||
|
|
- logging.warning("{} in common.{} are not valid, set {}={}".format(not_in_list, keys_name, keys_name, default_list))
|
|||
|
|
- elif config_list == ["default"]:
|
|||
|
|
- logging.warning("Default {} use {}".format(keys_name, default_list))
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def read_config_common(config):
|
|||
|
|
- """read config file, get [common] section value"""
|
|||
|
|
- if not config.has_section("common"):
|
|||
|
|
- report_alarm_fail("Cannot find common section in config file")
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- disk_name = config.get("common", "disk")
|
|||
|
|
- disk = [] if disk_name == "default" else disk_name.split(",")
|
|||
|
|
- except configparser.NoOptionError:
|
|||
|
|
- disk = []
|
|||
|
|
- logging.warning("Unset common.disk, set to default")
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- stage_name = config.get("common", "stage")
|
|||
|
|
- stage = [] if stage_name == "default" else stage_name.split(",")
|
|||
|
|
- except configparser.NoOptionError:
|
|||
|
|
- stage = []
|
|||
|
|
- logging.warning("Unset common.stage, set to default")
|
|||
|
|
-
|
|||
|
|
- if len(disk) > 10:
|
|||
|
|
- logging.warning("Too many common.disks, record only max 10 disks")
|
|||
|
|
- disk = disk[:10]
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- iotype_name = config.get("common", "iotype").split(",")
|
|||
|
|
- iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']]
|
|||
|
|
- err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']]
|
|||
|
|
-
|
|||
|
|
- if err_iotype:
|
|||
|
|
- report_alarm_fail("Invalid common.iotype config")
|
|||
|
|
-
|
|||
|
|
- except configparser.NoOptionError:
|
|||
|
|
- iotype_list = ["read", "write"]
|
|||
|
|
- logging.warning("Unset common.iotype, set to read,write")
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- period_time = int(config.get("common", "period_time"))
|
|||
|
|
- if not (1 <= period_time <= 300):
|
|||
|
|
- raise ValueError("Invalid period_time")
|
|||
|
|
- except ValueError:
|
|||
|
|
- report_alarm_fail("Invalid common.period_time")
|
|||
|
|
- except configparser.NoOptionError:
|
|||
|
|
- period_time = 1
|
|||
|
|
- logging.warning("Unset common.period_time, use 1s as default")
|
|||
|
|
-
|
|||
|
|
- return period_time, disk, stage, iotype_list
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def read_config_algorithm(config):
|
|||
|
|
- """read config file, get [algorithm] section value"""
|
|||
|
|
- if not config.has_section("algorithm"):
|
|||
|
|
- report_alarm_fail("Cannot find algorithm section in config file")
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- win_size = int(config.get("algorithm", "win_size"))
|
|||
|
|
- if not (1 <= win_size <= 300):
|
|||
|
|
- raise ValueError("Invalid algorithm.win_size")
|
|||
|
|
- except ValueError:
|
|||
|
|
- report_alarm_fail("Invalid algorithm.win_size config")
|
|||
|
|
- except configparser.NoOptionError:
|
|||
|
|
- win_size = 30
|
|||
|
|
- logging.warning("Unset algorithm.win_size, use 30 as default")
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- win_threshold = int(config.get("algorithm", "win_threshold"))
|
|||
|
|
- if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size:
|
|||
|
|
- raise ValueError("Invalid algorithm.win_threshold")
|
|||
|
|
- except ValueError:
|
|||
|
|
- report_alarm_fail("Invalid algorithm.win_threshold config")
|
|||
|
|
- except configparser.NoOptionError:
|
|||
|
|
- win_threshold = 6
|
|||
|
|
- logging.warning("Unset algorithm.win_threshold, use 6 as default")
|
|||
|
|
-
|
|||
|
|
- return win_size, win_threshold
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def read_config_latency(config):
|
|||
|
|
- """read config file, get [latency_xxx] section value"""
|
|||
|
|
- common_param = {}
|
|||
|
|
- for type_name in Disk_Type:
|
|||
|
|
- section_name = f"latency_{Disk_Type[type_name]}"
|
|||
|
|
- if not config.has_section(section_name):
|
|||
|
|
- report_alarm_fail(f"Cannot find {section_name} section in config file")
|
|||
|
|
-
|
|||
|
|
- common_param[Disk_Type[type_name]] = get_section_value(section_name, config)
|
|||
|
|
- return common_param
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def read_config_iodump(config):
|
|||
|
|
- """read config file, get [iodump] section value"""
|
|||
|
|
- common_param = {}
|
|||
|
|
- section_name = "iodump"
|
|||
|
|
- if not config.has_section(section_name):
|
|||
|
|
- report_alarm_fail(f"Cannot find {section_name} section in config file")
|
|||
|
|
-
|
|||
|
|
- return get_section_value(section_name, config)
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def read_config_stage(config, stage, iotype_list, curr_disk_type):
|
|||
|
|
- """read config file, get [STAGE_NAME_diskType] section value"""
|
|||
|
|
- res = {}
|
|||
|
|
- section_name = f"{stage}_{curr_disk_type}"
|
|||
|
|
- if not config.has_section(section_name):
|
|||
|
|
- return res
|
|||
|
|
-
|
|||
|
|
- for key in config[section_name]:
|
|||
|
|
- if config[stage][key].isdecimal():
|
|||
|
|
- res[key] = int(config[stage][key])
|
|||
|
|
-
|
|||
|
|
- return res
|
|||
|
|
-
|
|||
|
|
|
|||
|
|
def init_io_win(io_dic, config, common_param):
|
|||
|
|
"""initialize windows of latency, iodump, and dict of avg_value"""
|
|||
|
|
@@ -192,24 +73,33 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage):
|
|||
|
|
disk_list = [key for key in all_disk_set if key in config_disk]
|
|||
|
|
not_in_disk_list = [key for key in config_disk if key not in all_disk_set]
|
|||
|
|
|
|||
|
|
+ if not config_disk and not not_in_disk_list:
|
|||
|
|
+ disk_list = [key for key in all_disk_set]
|
|||
|
|
+
|
|||
|
|
+ if not disk_list:
|
|||
|
|
+ report_alarm_fail("Cannot get valid disk name")
|
|||
|
|
+
|
|||
|
|
+ disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
|
|||
|
|
+
|
|||
|
|
+ if not config_disk:
|
|||
|
|
+ logging.info(f"Default common.disk using disk={disk_list}")
|
|||
|
|
+ elif sorted(disk_list) != sorted(config_disk):
|
|||
|
|
+ logging.warning(f"Set common.disk to {disk_list}")
|
|||
|
|
+
|
|||
|
|
stage_list = [key for key in all_stage_set if key in config_stage]
|
|||
|
|
not_in_stage_list = [key for key in config_stage if key not in all_stage_set]
|
|||
|
|
|
|||
|
|
if not_in_stage_list:
|
|||
|
|
report_alarm_fail(f"Invalid common.stage_list config, cannot set {not_in_stage_list}")
|
|||
|
|
|
|||
|
|
- if not config_disk and not not_in_disk_list:
|
|||
|
|
- disk_list = [key for key in all_disk_set]
|
|||
|
|
-
|
|||
|
|
- if not config_stage and not not_in_stage_list:
|
|||
|
|
+ if not config_stage:
|
|||
|
|
stage_list = [key for key in all_stage_set]
|
|||
|
|
|
|||
|
|
- disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
|
|||
|
|
-
|
|||
|
|
- if not stage_list or not disk_list:
|
|||
|
|
- report_alarm_fail("Cannot get valid disk name or stage name.")
|
|||
|
|
+ if not stage_list:
|
|||
|
|
+ report_alarm_fail("Cannot get valid stage name.")
|
|||
|
|
|
|||
|
|
- log_invalid_keys(not_in_disk_list, 'disk', config_disk, disk_list)
|
|||
|
|
+ if not config_stage:
|
|||
|
|
+ logging.info(f"Default common.stage using stage={stage_list}")
|
|||
|
|
|
|||
|
|
return disk_list, stage_list
|
|||
|
|
|
|||
|
|
@@ -254,9 +144,8 @@ def main():
|
|||
|
|
signal.signal(signal.SIGINT, sig_handler)
|
|||
|
|
signal.signal(signal.SIGTERM, sig_handler)
|
|||
|
|
|
|||
|
|
- log_level = get_log_level(CONFIG_FILE)
|
|||
|
|
+ log_level = read_config_log(CONFIG_FILE)
|
|||
|
|
log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
|||
|
|
-
|
|||
|
|
logging.basicConfig(level=log_level, format=log_format)
|
|||
|
|
|
|||
|
|
# 初始化配置读取
|
|||
|
|
@@ -274,6 +163,8 @@ def main():
|
|||
|
|
# 采集模块对接,is_iocollect_valid()
|
|||
|
|
io_dic["disk_list"], io_dic["stage_list"] = get_valid_disk_stage_list(io_dic, disk, stage)
|
|||
|
|
|
|||
|
|
+ logging.debug(f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}")
|
|||
|
|
+
|
|||
|
|
if "bio" not in io_dic["stage_list"]:
|
|||
|
|
report_alarm_fail("Cannot run avg_block_io without bio stage")
|
|||
|
|
|
|||
|
|
diff --git a/src/python/sentryPlugins/avg_block_io/config.py b/src/python/sentryPlugins/avg_block_io/config.py
|
|||
|
|
new file mode 100644
|
|||
|
|
index 0000000..c8f45ce
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/src/python/sentryPlugins/avg_block_io/config.py
|
|||
|
|
@@ -0,0 +1,208 @@
|
|||
|
|
+import configparser
|
|||
|
|
+import logging
|
|||
|
|
+import os
|
|||
|
|
+
|
|||
|
|
+from .module_conn import report_alarm_fail
|
|||
|
|
+from sentryCollector.collect_plugin import Disk_Type
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+CONF_LOG = 'log'
|
|||
|
|
+CONF_LOG_LEVEL = 'level'
|
|||
|
|
+LogLevel = {
|
|||
|
|
+ "debug": logging.DEBUG,
|
|||
|
|
+ "info": logging.INFO,
|
|||
|
|
+ "warning": logging.WARNING,
|
|||
|
|
+ "error": logging.ERROR,
|
|||
|
|
+ "critical": logging.CRITICAL
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+CONF_COMMON = 'common'
|
|||
|
|
+CONF_COMMON_DISK = 'disk'
|
|||
|
|
+CONF_COMMON_STAGE = 'stage'
|
|||
|
|
+CONF_COMMON_IOTYPE = 'iotype'
|
|||
|
|
+CONF_COMMON_PER_TIME = 'period_time'
|
|||
|
|
+
|
|||
|
|
+CONF_ALGO = 'algorithm'
|
|||
|
|
+CONF_ALGO_SIZE = 'win_size'
|
|||
|
|
+CONF_ALGO_THRE = 'win_threshold'
|
|||
|
|
+
|
|||
|
|
+CONF_LATENCY = 'latency_{}'
|
|||
|
|
+CONF_IODUMP = 'iodump'
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+DEFAULT_PARAM = {
|
|||
|
|
+ CONF_LOG: {
|
|||
|
|
+ CONF_LOG_LEVEL: 'info'
|
|||
|
|
+ }, CONF_COMMON: {
|
|||
|
|
+ CONF_COMMON_DISK: 'default',
|
|||
|
|
+ CONF_COMMON_STAGE: 'default',
|
|||
|
|
+ CONF_COMMON_IOTYPE: 'read,write',
|
|||
|
|
+ CONF_COMMON_PER_TIME: 1
|
|||
|
|
+ }, CONF_ALGO: {
|
|||
|
|
+ CONF_ALGO_SIZE: 30,
|
|||
|
|
+ CONF_ALGO_THRE: 6
|
|||
|
|
+ }, 'latency_nvme_ssd': {
|
|||
|
|
+ 'read_avg_lim': 300,
|
|||
|
|
+ 'write_avg_lim': 300,
|
|||
|
|
+ 'read_avg_time': 3,
|
|||
|
|
+ 'write_avg_time': 3,
|
|||
|
|
+ 'read_tot_lim': 500,
|
|||
|
|
+ 'write_tot_lim': 500,
|
|||
|
|
+ }, 'latency_sata_ssd' : {
|
|||
|
|
+ 'read_avg_lim': 10000,
|
|||
|
|
+ 'write_avg_lim': 10000,
|
|||
|
|
+ 'read_avg_time': 3,
|
|||
|
|
+ 'write_avg_time': 3,
|
|||
|
|
+ 'read_tot_lim': 50000,
|
|||
|
|
+ 'write_tot_lim': 50000,
|
|||
|
|
+ }, 'latency_sata_hdd' : {
|
|||
|
|
+ 'read_avg_lim': 15000,
|
|||
|
|
+ 'write_avg_lim': 15000,
|
|||
|
|
+ 'read_avg_time': 3,
|
|||
|
|
+ 'write_avg_time': 3,
|
|||
|
|
+ 'read_tot_lim': 50000,
|
|||
|
|
+ 'write_tot_lim': 50000
|
|||
|
|
+ }, CONF_IODUMP: {
|
|||
|
|
+ 'read_iodump_lim': 0,
|
|||
|
|
+ 'write_iodump_lim': 0
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def get_section_value(section_name, config):
|
|||
|
|
+ common_param = {}
|
|||
|
|
+ config_sec = config[section_name]
|
|||
|
|
+ for config_key in DEFAULT_PARAM[section_name]:
|
|||
|
|
+ if config_key in config_sec:
|
|||
|
|
+ if not config_sec[config_key].isdecimal():
|
|||
|
|
+ report_alarm_fail(f"Invalid {section_name}.{config_key} config.")
|
|||
|
|
+ common_param[config_key] = int(config_sec[config_key])
|
|||
|
|
+ else:
|
|||
|
|
+ common_param[config_key] = DEFAULT_PARAM[section_name][config_key]
|
|||
|
|
+ logging.warning(f"Unset {section_name}.{config_key} in config file, use {common_param[config_key]} as default")
|
|||
|
|
+ return common_param
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def read_config_log(filename):
|
|||
|
|
+ """read config file, get [log] section value"""
|
|||
|
|
+ default_log_level = DEFAULT_PARAM[CONF_LOG][CONF_LOG_LEVEL]
|
|||
|
|
+ if not os.path.exists(filename):
|
|||
|
|
+ return LogLevel.get(default_log_level)
|
|||
|
|
+
|
|||
|
|
+ config = configparser.ConfigParser()
|
|||
|
|
+ config.read(filename)
|
|||
|
|
+
|
|||
|
|
+ log_level = config.get(CONF_LOG, CONF_LOG_LEVEL, fallback=default_log_level)
|
|||
|
|
+ if log_level.lower() in LogLevel:
|
|||
|
|
+ return LogLevel.get(log_level.lower())
|
|||
|
|
+ return LogLevel.get(default_log_level)
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def read_config_common(config):
|
|||
|
|
+ """read config file, get [common] section value"""
|
|||
|
|
+ if not config.has_section(CONF_COMMON):
|
|||
|
|
+ report_alarm_fail(f"Cannot find {CONF_COMMON} section in config file")
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ disk_name = config.get(CONF_COMMON, CONF_COMMON_DISK).lower()
|
|||
|
|
+ disk = [] if disk_name == "default" else disk_name.split(",")
|
|||
|
|
+ except configparser.NoOptionError:
|
|||
|
|
+ disk = []
|
|||
|
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_DISK}, set to default")
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ stage_name = config.get(CONF_COMMON, CONF_COMMON_STAGE).lower()
|
|||
|
|
+ stage = [] if stage_name == "default" else stage_name.split(",")
|
|||
|
|
+ except configparser.NoOptionError:
|
|||
|
|
+ stage = []
|
|||
|
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_STAGE}, set to default")
|
|||
|
|
+
|
|||
|
|
+ if len(disk) > 10:
|
|||
|
|
+ logging.warning(f"Too many {CONF_COMMON}.disks, record only max 10 disks")
|
|||
|
|
+ disk = disk[:10]
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ iotype_name = config.get(CONF_COMMON, CONF_COMMON_IOTYPE).lower().split(",")
|
|||
|
|
+ iotype_list = [rw.lower() for rw in iotype_name if rw.lower() in ['read', 'write']]
|
|||
|
|
+ err_iotype = [rw.lower() for rw in iotype_name if rw.lower() not in ['read', 'write']]
|
|||
|
|
+
|
|||
|
|
+ if err_iotype:
|
|||
|
|
+ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_IOTYPE} config")
|
|||
|
|
+
|
|||
|
|
+ except configparser.NoOptionError:
|
|||
|
|
+ iotype_list = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_IOTYPE]
|
|||
|
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_IOTYPE}, use {iotupe_list} as default")
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ period_time = int(config.get(CONF_COMMON, CONF_COMMON_PER_TIME))
|
|||
|
|
+ if not (1 <= period_time <= 300):
|
|||
|
|
+ raise ValueError("Invalid period_time")
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ report_alarm_fail(f"Invalid {CONF_COMMON}.{CONF_COMMON_PER_TIME}")
|
|||
|
|
+ except configparser.NoOptionError:
|
|||
|
|
+ period_time = DEFAULT_PARAM[CONF_COMMON][CONF_COMMON_PER_TIME]
|
|||
|
|
+ logging.warning(f"Unset {CONF_COMMON}.{CONF_COMMON_PER_TIME}, use {period_time} as default")
|
|||
|
|
+
|
|||
|
|
+ return period_time, disk, stage, iotype_list
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def read_config_algorithm(config):
|
|||
|
|
+ """read config file, get [algorithm] section value"""
|
|||
|
|
+ if not config.has_section(CONF_ALGO):
|
|||
|
|
+ report_alarm_fail(f"Cannot find {CONF_ALGO} section in config file")
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ win_size = int(config.get(CONF_ALGO, CONF_ALGO_SIZE))
|
|||
|
|
+ if not (1 <= win_size <= 300):
|
|||
|
|
+ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE}")
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_SIZE} config")
|
|||
|
|
+ except configparser.NoOptionError:
|
|||
|
|
+ win_size = DEFAULT_PARAM[CONF_ALGO][CONF_ALGO_SIZE]
|
|||
|
|
+ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_SIZE}, use {win_size} as default")
|
|||
|
|
+
|
|||
|
|
+ try:
|
|||
|
|
+ win_threshold = int(config.get(CONF_ALGO, CONF_ALGO_THRE))
|
|||
|
|
+ if win_threshold < 1 or win_threshold > 300 or win_threshold > win_size:
|
|||
|
|
+ raise ValueError(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE}")
|
|||
|
|
+ except ValueError:
|
|||
|
|
+ report_alarm_fail(f"Invalid {CONF_ALGO}.{CONF_ALGO_THRE} config")
|
|||
|
|
+ except configparser.NoOptionError:
|
|||
|
|
+ win_threshold = DEFAULT_PARAM[CONF_ALGO]['win_threshold']
|
|||
|
|
+ logging.warning(f"Unset {CONF_ALGO}.{CONF_ALGO_THRE}, use {win_threshold} as default")
|
|||
|
|
+
|
|||
|
|
+ return win_size, win_threshold
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def read_config_latency(config):
|
|||
|
|
+ """read config file, get [latency_xxx] section value"""
|
|||
|
|
+ common_param = {}
|
|||
|
|
+ for type_name in Disk_Type:
|
|||
|
|
+ section_name = CONF_LATENCY.format(Disk_Type[type_name])
|
|||
|
|
+ if not config.has_section(section_name):
|
|||
|
|
+ report_alarm_fail(f"Cannot find {section_name} section in config file")
|
|||
|
|
+
|
|||
|
|
+ common_param[Disk_Type[type_name]] = get_section_value(section_name, config)
|
|||
|
|
+ return common_param
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def read_config_iodump(config):
|
|||
|
|
+ """read config file, get [iodump] section value"""
|
|||
|
|
+ if not config.has_section(CONF_IODUMP):
|
|||
|
|
+ report_alarm_fail(f"Cannot find {CONF_IODUMP} section in config file")
|
|||
|
|
+
|
|||
|
|
+ return get_section_value(CONF_IODUMP, config)
|
|||
|
|
+
|
|||
|
|
+
|
|||
|
|
+def read_config_stage(config, stage, iotype_list, curr_disk_type):
|
|||
|
|
+ """read config file, get [STAGE_NAME_diskType] section value"""
|
|||
|
|
+ res = {}
|
|||
|
|
+ section_name = f"{stage}_{curr_disk_type}"
|
|||
|
|
+ if not config.has_section(section_name):
|
|||
|
|
+ return res
|
|||
|
|
+
|
|||
|
|
+ for key in config[section_name]:
|
|||
|
|
+ if config[stage][key].isdecimal():
|
|||
|
|
+ res[key] = int(config[stage][key])
|
|||
|
|
+
|
|||
|
|
+ return res
|
|||
|
|
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
|||
|
|
index 8d6f429..cbdaad4 100644
|
|||
|
|
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
|
|||
|
|
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
|
|||
|
|
@@ -29,12 +29,16 @@ def sig_handler(signum, _f):
|
|||
|
|
|
|||
|
|
def avg_get_io_data(io_dic):
|
|||
|
|
"""get_io_data from sentryCollector"""
|
|||
|
|
+ logging.debug(f"send to sentryCollector get_io_data: period={io_dic['period_time']}, "
|
|||
|
|
+ f"disk={io_dic['disk_list']}, stage={io_dic['stage_list']}, iotype={io_dic['iotype_list']}")
|
|||
|
|
res = get_io_data(io_dic["period_time"], io_dic["disk_list"], io_dic["stage_list"], io_dic["iotype_list"])
|
|||
|
|
return check_result_validation(res, 'get io data')
|
|||
|
|
|
|||
|
|
|
|||
|
|
def avg_is_iocollect_valid(io_dic, config_disk, config_stage):
|
|||
|
|
"""is_iocollect_valid from sentryCollector"""
|
|||
|
|
+ logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, "
|
|||
|
|
+ f"disk={config_disk}, stage={config_stage}")
|
|||
|
|
res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage)
|
|||
|
|
return check_result_validation(res, 'check config validation')
|
|||
|
|
|
|||
|
|
@@ -79,7 +83,7 @@ def process_report_data(disk_name, rw, io_data):
|
|||
|
|
# io press
|
|||
|
|
ctrl_stage = ['throtl', 'wbt', 'iocost', 'bfq']
|
|||
|
|
for stage_name in ctrl_stage:
|
|||
|
|
- abnormal, abnormal_list = is_abnormal((disk_name, 'bio', rw), io_data)
|
|||
|
|
+ abnormal, abnormal_list = is_abnormal((disk_name, stage_name, rw), io_data)
|
|||
|
|
if not abnormal:
|
|||
|
|
continue
|
|||
|
|
msg["reason"] = "IO press"
|
|||
|
|
@@ -117,6 +121,7 @@ def process_report_data(disk_name, rw, io_data):
|
|||
|
|
|
|||
|
|
|
|||
|
|
def get_disk_type_by_name(disk_name):
|
|||
|
|
+ logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}")
|
|||
|
|
res = get_disk_type(disk_name)
|
|||
|
|
disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
|
|||
|
|
try:
|
|||
|
|
@@ -126,4 +131,4 @@ def get_disk_type_by_name(disk_name):
|
|||
|
|
except ValueError:
|
|||
|
|
report_alarm_fail(f"Failed to get disk type for {disk_name}")
|
|||
|
|
|
|||
|
|
- return Disk_Type[curr_disk_type]
|
|||
|
|
\ No newline at end of file
|
|||
|
|
+ return Disk_Type[curr_disk_type]
|
|||
|
|
diff --git a/src/python/sentryPlugins/avg_block_io/utils.py b/src/python/sentryPlugins/avg_block_io/utils.py
|
|||
|
|
index c381c07..1bfd4e8 100644
|
|||
|
|
--- a/src/python/sentryPlugins/avg_block_io/utils.py
|
|||
|
|
+++ b/src/python/sentryPlugins/avg_block_io/utils.py
|
|||
|
|
@@ -8,84 +8,12 @@
|
|||
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
|
|||
|
|
# PURPOSE.
|
|||
|
|
# See the Mulan PSL v2 for more details.
|
|||
|
|
-import configparser
|
|||
|
|
import logging
|
|||
|
|
import os
|
|||
|
|
|
|||
|
|
AVG_VALUE = 0
|
|||
|
|
AVG_COUNT = 1
|
|||
|
|
|
|||
|
|
-CONF_LOG = 'log'
|
|||
|
|
-CONF_LOG_LEVEL = 'level'
|
|||
|
|
-LogLevel = {
|
|||
|
|
- "debug": logging.DEBUG,
|
|||
|
|
- "info": logging.INFO,
|
|||
|
|
- "warning": logging.WARNING,
|
|||
|
|
- "error": logging.ERROR,
|
|||
|
|
- "critical": logging.CRITICAL
|
|||
|
|
-}
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-DEFAULT_PARAM = {
|
|||
|
|
- 'latency_nvme_ssd': {
|
|||
|
|
- 'read_avg_lim': 300,
|
|||
|
|
- 'write_avg_lim': 300,
|
|||
|
|
- 'read_avg_time': 3,
|
|||
|
|
- 'write_avg_time': 3,
|
|||
|
|
- 'read_tot_lim': 500,
|
|||
|
|
- 'write_tot_lim': 500,
|
|||
|
|
- }, 'latency_sata_ssd' : {
|
|||
|
|
- 'read_avg_lim': 10000,
|
|||
|
|
- 'write_avg_lim': 10000,
|
|||
|
|
- 'read_avg_time': 3,
|
|||
|
|
- 'write_avg_time': 3,
|
|||
|
|
- 'read_tot_lim': 50000,
|
|||
|
|
- 'write_tot_lim': 50000,
|
|||
|
|
- }, 'latency_sata_hdd' : {
|
|||
|
|
- 'read_avg_lim': 15000,
|
|||
|
|
- 'write_avg_lim': 15000,
|
|||
|
|
- 'read_avg_time': 3,
|
|||
|
|
- 'write_avg_time': 3,
|
|||
|
|
- 'read_tot_lim': 50000,
|
|||
|
|
- 'write_tot_lim': 50000
|
|||
|
|
- }, 'iodump': {
|
|||
|
|
- 'read_iodump_lim': 0,
|
|||
|
|
- 'write_iodump_lim': 0
|
|||
|
|
- }
|
|||
|
|
-}
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def get_section_value(section_name, config):
|
|||
|
|
- common_param = {}
|
|||
|
|
- config_sec = config[section_name]
|
|||
|
|
- for config_key in DEFAULT_PARAM[section_name]:
|
|||
|
|
- if config_key in config_sec:
|
|||
|
|
- if not config_sec[config_key].isdecimal():
|
|||
|
|
- report_alarm_fail(f"Invalid {section_name}.{config_key} config.")
|
|||
|
|
- common_param[config_key] = int(config_sec[config_key])
|
|||
|
|
- else:
|
|||
|
|
- logging.warning(f"Unset {section_name}.{config_key} in config file, use {DEFAULT_PARAM[section_name][config_key]} as default")
|
|||
|
|
- common_param[config_key] = DEFAULT_PARAM[section_name][config_key]
|
|||
|
|
- return common_param
|
|||
|
|
-
|
|||
|
|
-
|
|||
|
|
-def get_log_level(filename):
|
|||
|
|
- if not os.path.exists(filename):
|
|||
|
|
- return logging.INFO
|
|||
|
|
-
|
|||
|
|
- try:
|
|||
|
|
- config = configparser.ConfigParser()
|
|||
|
|
- config.read(filename)
|
|||
|
|
- if not config.has_option(CONF_LOG, CONF_LOG_LEVEL):
|
|||
|
|
- return logging.INFO
|
|||
|
|
- log_level = config.get(CONF_LOG, CONF_LOG_LEVEL)
|
|||
|
|
-
|
|||
|
|
- if log_level.lower() in LogLevel:
|
|||
|
|
- return LogLevel.get(log_level.lower())
|
|||
|
|
- return logging.INFO
|
|||
|
|
- except configparser.Error:
|
|||
|
|
- return logging.INFO
|
|||
|
|
-
|
|||
|
|
|
|||
|
|
def get_nested_value(data, keys):
|
|||
|
|
"""get data from nested dict"""
|
|||
|
|
--
|
|||
|
|
2.27.0
|