listen thread of collect module exits occasionally

This commit is contained in:
zhuofeng 2024-10-16 11:59:09 +08:00
parent 9373a29382
commit 0337360f50
2 changed files with 112 additions and 1 deletions

View File

@ -0,0 +1,104 @@
From 2135b4e41666d99922eda79e9ee04bbc2b557fea Mon Sep 17 00:00:00 2001
From: zhuofeng <zhuofeng2@huawei.com>
Date: Wed, 16 Oct 2024 12:13:21 +0800
Subject: [PATCH] listen thread of collect module exits occasionally
---
src/python/sentryCollector/collect_io.py | 4 +---
src/python/sentryCollector/collect_server.py | 18 ++++++++----------
2 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py
index 5fe1efc..de308b3 100644
--- a/src/python/sentryCollector/collect_io.py
+++ b/src/python/sentryCollector/collect_io.py
@@ -231,9 +231,7 @@ class CollectIo():
if self.get_blk_io_hierarchy(disk_name, stage_list) < 0:
continue
self.append_period_lat(disk_name, stage_list)
-
- logging.debug(f"no-lock collect data : {IO_GLOBAL_DATA}")
-
+
elapsed_time = time.time() - start_time
sleep_time = self.period_time - elapsed_time
if sleep_time < 0:
diff --git a/src/python/sentryCollector/collect_server.py b/src/python/sentryCollector/collect_server.py
index 11d1af0..ad3ac0e 100644
--- a/src/python/sentryCollector/collect_server.py
+++ b/src/python/sentryCollector/collect_server.py
@@ -64,7 +64,7 @@ class CollectServer():
self.io_global_data = IO_GLOBAL_DATA
if len(IO_CONFIG_DATA) == 0:
- logging.error("the collect thread is not started, the data is invalid. ")
+ logging.error("the collect thread is not started, the data is invalid.")
return json.dumps(result_rev)
period_time = IO_CONFIG_DATA[0]
@@ -75,7 +75,7 @@ class CollectServer():
stage_list = json.loads(data_struct['stage'])
if (period < period_time) or (period > period_time * max_save) or (period % period_time):
- logging.error("is_iocollect_valid: period time: %d is invalid", period)
+ logging.error("is_iocollect_valid: period time is invalid, user period: %d, config period_time: %d", period, period_time)
return json.dumps(result_rev)
for disk_name, stage_info in self.io_global_data.items():
@@ -96,7 +96,7 @@ class CollectServer():
self.io_global_data = IO_GLOBAL_DATA
if len(IO_CONFIG_DATA) == 0:
- logging.error("the collect thread is not started, the data is invalid. ")
+ logging.error("the collect thread is not started, the data is invalid.")
return json.dumps(result_rev)
period_time = IO_CONFIG_DATA[0]
max_save = IO_CONFIG_DATA[1]
@@ -107,11 +107,11 @@ class CollectServer():
iotype_list = json.loads(data_struct['iotype'])
if (period < period_time) or (period > period_time * max_save) or (period % period_time):
- logging.error("get_io_data: period time: %d is invalid", period)
+ logging.error("get_io_data: period time is invalid, user period: %d, config period_time: %d", period, period_time)
return json.dumps(result_rev)
collect_index = period // period_time - 1
- logging.debug("period: %d, collect_index: %d", period, collect_index)
+ logging.debug("user period: %d, config period_time: %d, collect_index: %d", period, period_time, collect_index)
for disk_name, stage_info in self.io_global_data.items():
if disk_name not in disk_list:
@@ -124,7 +124,7 @@ class CollectServer():
for iotype_name, iotype_info in iotype_info.items():
if iotype_name not in iotype_list:
continue
- if len(iotype_info) < collect_index:
+ if len(iotype_info) - 1 < collect_index:
continue
result_rev[disk_name][stage_name][iotype_name] = iotype_info[collect_index]
@@ -250,10 +250,8 @@ class CollectServer():
except socket.error:
logging.error("server fd create failed")
server_fd = None
-
return server_fd
-
def server_loop(self):
"""main loop"""
logging.info("collect listen thread start")
@@ -277,8 +275,8 @@ class CollectServer():
self.server_recv(server_fd)
else:
continue
- except socket.error:
- pass
+ except Exception:
+ logging.error('collect listen exception : %s', traceback.format_exc())
def stop_thread(self):
self.stop_event.set()
--
2.33.0

View File

@ -4,7 +4,7 @@
Summary: System Inspection Framework Summary: System Inspection Framework
Name: sysSentry Name: sysSentry
Version: 1.0.2 Version: 1.0.2
Release: 40 Release: 41
License: Mulan PSL v2 License: Mulan PSL v2
Group: System Environment/Daemons Group: System Environment/Daemons
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -59,6 +59,7 @@ Patch46: ai_block_io-fix-some-bugs.patch
Patch47: refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch Patch47: refactor-config.py-and-bugfix-uncorrect-slow-io-repo.patch
Patch48: get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch Patch48: get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch
Patch49: fix-ai_block_io-root-cause-bug.patch Patch49: fix-ai_block_io-root-cause-bug.patch
Patch50: listen-thread-of-collect-module-exits-occasionally.patch
BuildRequires: cmake gcc-c++ BuildRequires: cmake gcc-c++
BuildRequires: python3 python3-setuptools BuildRequires: python3 python3-setuptools
@ -321,6 +322,12 @@ rm -rf %{buildroot}
%attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin* %attr(0550,root,root) %{python3_sitelib}/sentryCollector/__pycache__/collect_plugin*
%changelog %changelog
* Wed Oct 16 2024 zhuofeng <zhuofeng2@huawei.com> - 1.0.2-41
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:listen thread of collect module exits occasionally
* Wed Oct 16 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-40 * Wed Oct 16 2024 heyouzhi <heyouzhi@huawei.com> - 1.0.2-40
- Type:bugfix - Type:bugfix
- CVE:NA - CVE:NA