From dea58a559f3dbad3dbce3b681639ee89c20b1cee Mon Sep 17 00:00:00 2001 From: zhuofeng Date: Fri, 20 Sep 2024 14:35:39 +0800 Subject: [PATCH] fix some about collect module and avg block io --- config/tasks/avg_block_io.mod | 4 ++-- src/python/sentryCollector/collect_io.py | 18 +++++++++++------- src/python/sentryCollector/collect_plugin.py | 17 ++++++++--------- src/python/sentryCollector/collect_server.py | 6 +++--- src/python/sentryCollector/collectd.py | 2 -- .../sentryPlugins/avg_block_io/avg_block_io.py | 13 ++++++++++--- 6 files changed, 34 insertions(+), 26 deletions(-) diff --git a/config/tasks/avg_block_io.mod b/config/tasks/avg_block_io.mod index 814c483..b9b6f34 100644 --- a/config/tasks/avg_block_io.mod +++ b/config/tasks/avg_block_io.mod @@ -1,5 +1,5 @@ [common] enabled=yes task_start=/usr/bin/python3 /usr/bin/avg_block_io -task_stop=pkill avg_block_io -type=oneshot \ No newline at end of file +task_stop=pkill -f /usr/bin/avg_block_io +type=oneshot diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py index b826dc4..104b734 100644 --- a/src/python/sentryCollector/collect_io.py +++ b/src/python/sentryCollector/collect_io.py @@ -175,8 +175,7 @@ class CollectIo(): threading.Timer(self.period_time, self.task_loop).start() - def main_loop(self): - logging.info("collect io thread start") + def is_kernel_avaliable(self): base_path = '/sys/kernel/debug/block' for disk_name in os.listdir(base_path): if not self.loop_all and disk_name not in self.disk_list: @@ -198,8 +197,13 @@ class CollectIo(): self.window_value[disk_name] = {} IO_GLOBAL_DATA[disk_name] = {} - if len(self.disk_map_stage) == 0: - logging.warning("no disks meet the requirements. the thread exits") + return len(IO_GLOBAL_DATA) != 0 + + def main_loop(self): + logging.info("collect io thread start") + + if not self.is_kernel_avaliable() or len(self.disk_map_stage) == 0: + logging.warning("no disks meet the requirements. collect io thread exits") return for disk_name, stage_list in self.disk_map_stage.items(): @@ -213,7 +217,7 @@ class CollectIo(): start_time = time.time() if self.stop_event.is_set(): - logging.info("collect io thread exit") + logging.debug("collect io thread exit") return for disk_name, stage_list in self.disk_map_stage.items(): @@ -227,7 +231,7 @@ class CollectIo(): continue while sleep_time > 1: if self.stop_event.is_set(): - logging.info("collect io thread exit") + logging.debug("collect io thread exit") return time.sleep(1) sleep_time -= 1 @@ -235,5 +239,5 @@ class CollectIo(): # set stop event, notify thread exit def stop_thread(self): - logging.info("collect io thread is preparing to exit") + logging.debug("collect io thread is preparing to exit") self.stop_event.set() diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py index 49ce0a8..9132473 100644 --- a/src/python/sentryCollector/collect_plugin.py +++ b/src/python/sentryCollector/collect_plugin.py @@ -142,22 +142,21 @@ def validate_parameters(param, len_limit, char_limit): ret = ResultMessage.RESULT_INVALID_LENGTH return [False, ret] - if len(param) > len_limit: - print(f"{param} length more than {len_limit}") - ret = ResultMessage.RESULT_EXCEED_LIMIT - return [False, ret] - pattern = r'^[a-zA-Z0-9_-]+$' for info in param: - if len(info) > char_limit: - print(f"{info} length more than {char_limit}") - ret = ResultMessage.RESULT_EXCEED_LIMIT - return [False, ret] if not re.match(pattern, info): print(f"{info} is invalid char") ret = ResultMessage.RESULT_INVALID_CHAR return [False, ret] + # length of len_limit is exceeded, keep len_limit + if len(param) > len_limit: + print(f"{param} length more than {len_limit}, keep the first {len_limit}") + param[:] = param[0:len_limit] + + # only keep elements under the char_limit length + param[:] = [elem for elem in param if len(elem) <= char_limit] + return [True, ret] def is_iocollect_valid(period, disk_list=None, stage=None): diff --git a/src/python/sentryCollector/collect_server.py b/src/python/sentryCollector/collect_server.py index fa49781..bab4e56 100644 --- a/src/python/sentryCollector/collect_server.py +++ b/src/python/sentryCollector/collect_server.py @@ -256,7 +256,7 @@ class CollectServer(): def server_loop(self): """main loop""" - logging.info("collect server thread start") + logging.info("collect listen thread start") server_fd = self.server_fd_create() if not server_fd: return @@ -267,7 +267,7 @@ class CollectServer(): logging.debug("start server_loop loop") while True: if self.stop_event.is_set(): - logging.info("collect server thread exit") + logging.debug("collect listen thread exit") server_fd = None return try: @@ -281,5 +281,5 @@ class CollectServer(): pass def stop_thread(self): - logging.info("collect server thread is preparing to exit") + logging.debug("collect listen thread is preparing to exit") self.stop_event.set() diff --git a/src/python/sentryCollector/collectd.py b/src/python/sentryCollector/collectd.py index b77c642..3a836df 100644 --- a/src/python/sentryCollector/collectd.py +++ b/src/python/sentryCollector/collectd.py @@ -49,7 +49,6 @@ def sig_handler(signum, _f): Thread_List[i][0].stop_thread() remove_sock_file() - sys.exit(0) def main(): """main @@ -64,7 +63,6 @@ def main(): try: signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) - signal.signal(signal.SIGHUP, sig_handler) logging.info("finish main parse_args") diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py index ff2071d..73f0b22 100644 --- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py +++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py @@ -21,7 +21,7 @@ CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini" def log_invalid_keys(not_in_list, keys_name, config_list, default_list): """print invalid log""" - if config_list and default_list: + if config_list and not_in_list: logging.warning("{} in common.{} are not valid, set {}={}".format(not_in_list, keys_name, keys_name, default_list)) elif config_list == ["default"]: logging.warning("Default {} use {}".format(keys_name, default_list)) @@ -144,9 +144,11 @@ def init_io_win(io_dic, config, common_param): if avg_lim_value and avg_time_value and tot_lim_value: io_data[disk_name][stage_name][rw]["latency"] = IoWindow(window_size=io_dic["win_size"], window_threshold=io_dic["win_threshold"], abnormal_multiple=avg_time_value, abnormal_multiple_lim=avg_lim_value, abnormal_time=tot_lim_value) + logging.debug("Successfully create {}-{}-{} latency window".format(disk_name, stage_name, rw)) if iodump_lim_value is not None: io_data[disk_name][stage_name][rw]["iodump"] = IoDumpWindow(window_size=io_dic["win_size"], window_threshold=io_dic["win_threshold"], abnormal_time=iodump_lim_value) + logging.debug("Successfully create {}-{}-{} iodump window".format(disk_name, stage_name, rw)) return io_data, io_avg_value @@ -159,10 +161,10 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage): for disk_stage_list in json_data.values(): all_stage_set.update(disk_stage_list) - disk_list = [key for key in config_disk if key in all_disk_set] + disk_list = [key for key in all_disk_set if key in config_disk] not_in_disk_list = [key for key in config_disk if key not in all_disk_set] - stage_list = [key for key in config_stage if key in all_stage_set] + stage_list = [key for key in all_stage_set if key in config_stage] not_in_stage_list = [key for key in config_stage if key not in all_stage_set] if not config_disk: @@ -171,6 +173,9 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage): if not config_stage: stage_list = [key for key in all_stage_set] + disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list + stage_list = stage_list[:15] if len(stage_list) > 15 else stage_list + if config_disk and not disk_list: logging.warning("Cannot get valid disk by disk={}, set to default".format(config_disk)) disk_list, stage_list = get_valid_disk_stage_list(io_dic, [], config_stage) @@ -228,6 +233,8 @@ def main(): signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) + logging.basicConfig(level=logging.INFO) + # 初始化配置读取 config = configparser.ConfigParser(comment_prefixes=('#', ';')) try: -- 2.33.0