sysSentry/get_io_data-failed-wont-stop-avg_block_io-and-del-di.patch

169 lines
7.1 KiB
Diff
Raw Normal View History

From b21607fcec4b290bc78c9f6c4a26db1a2df32a66 Mon Sep 17 00:00:00 2001
From: gaoruoshu <gaoruoshu@huawei.com>
Date: Tue, 15 Oct 2024 21:21:10 +0800
Subject: [PATCH] get_io_data failed wont stop avg_block_io and del disk not
support
---
src/python/sentryCollector/collect_plugin.py | 14 ++++-----
.../avg_block_io/avg_block_io.py | 9 ++++--
.../sentryPlugins/avg_block_io/module_conn.py | 31 +++++++++++++------
3 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/src/python/sentryCollector/collect_plugin.py b/src/python/sentryCollector/collect_plugin.py
index bec405a..53dddec 100644
--- a/src/python/sentryCollector/collect_plugin.py
+++ b/src/python/sentryCollector/collect_plugin.py
@@ -90,14 +90,14 @@ def client_send_and_recv(request_data, data_str_len, protocol):
try:
client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
except socket.error:
- logging.error("collect_plugin: client create socket error")
+ logging.debug("collect_plugin: client create socket error")
return None
try:
client_socket.connect(COLLECT_SOCKET_PATH)
except OSError:
client_socket.close()
- logging.error("collect_plugin: client connect error")
+ logging.debug("collect_plugin: client connect error")
return None
req_data_len = len(request_data)
@@ -109,23 +109,23 @@ def client_send_and_recv(request_data, data_str_len, protocol):
res_data = res_data.decode()
except (OSError, UnicodeError):
client_socket.close()
- logging.error("collect_plugin: client communicate error")
+ logging.debug("collect_plugin: client communicate error")
return None
res_magic = res_data[:CLT_MSG_MAGIC_LEN]
if res_magic != "RES":
- logging.error("res msg format error")
+ logging.debug("res msg format error")
return None
protocol_str = res_data[CLT_MSG_MAGIC_LEN:CLT_MSG_MAGIC_LEN+CLT_MSG_PRO_LEN]
try:
protocol_id = int(protocol_str)
except ValueError:
- logging.error("recv msg protocol id is invalid %s", protocol_str)
+ logging.debug("recv msg protocol id is invalid %s", protocol_str)
return None
if protocol_id >= ClientProtocol.PRO_END:
- logging.error("protocol id is invalid")
+ logging.debug("protocol id is invalid")
return None
try:
@@ -134,7 +134,7 @@ def client_send_and_recv(request_data, data_str_len, protocol):
res_msg_data = res_msg_data.decode()
return res_msg_data
except (OSError, ValueError, UnicodeError):
- logging.error("collect_plugin: client recv res msg error")
+ logging.debug("collect_plugin: client recv res msg error")
finally:
client_socket.close()
diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
index cd47919..899d517 100644
--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py
+++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py
@@ -15,7 +15,7 @@ import time
from .config import read_config_log, read_config_common, read_config_algorithm, read_config_latency, read_config_iodump, read_config_stage
from .stage_window import IoWindow, IoDumpWindow
-from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name
+from .module_conn import avg_is_iocollect_valid, avg_get_io_data, report_alarm_fail, process_report_data, sig_handler, get_disk_type_by_name, check_disk_list_validation
from .utils import update_avg_and_check_abnormal
CONFIG_FILE = "/etc/sysSentry/plugins/avg_block_io.ini"
@@ -79,6 +79,8 @@ def get_valid_disk_stage_list(io_dic, config_disk, config_stage):
if not disk_list:
report_alarm_fail("Cannot get valid disk name")
+ disk_list = check_disk_list_validation(disk_list)
+
disk_list = disk_list[:10] if len(disk_list) > 10 else disk_list
if not config_disk:
@@ -117,7 +119,10 @@ def main_loop(io_dic, io_data, io_avg_value):
time.sleep(period_time)
# 采集模块对接,获取周期数据
- curr_period_data = avg_get_io_data(io_dic)
+ is_success, curr_period_data = avg_get_io_data(io_dic)
+ if not is_success:
+ logging.error(f"{curr_period_data['msg']}")
+ continue
# 处理周期数据
reach_size = False
diff --git a/src/python/sentryPlugins/avg_block_io/module_conn.py b/src/python/sentryPlugins/avg_block_io/module_conn.py
index cbdaad4..a67ef45 100644
--- a/src/python/sentryPlugins/avg_block_io/module_conn.py
+++ b/src/python/sentryPlugins/avg_block_io/module_conn.py
@@ -40,25 +40,25 @@ def avg_is_iocollect_valid(io_dic, config_disk, config_stage):
logging.debug(f"send to sentryCollector is_iocollect_valid: period={io_dic['period_time']}, "
f"disk={config_disk}, stage={config_stage}")
res = is_iocollect_valid(io_dic["period_time"], config_disk, config_stage)
- return check_result_validation(res, 'check config validation')
+ is_success, data = check_result_validation(res, 'check config validation')
+ if not is_success:
+ report_alarm_fail(f"{data['msg']}")
+ return data
def check_result_validation(res, reason):
"""check validation of result from sentryCollector"""
if not 'ret' in res or not 'message' in res:
- err_msg = "Failed to {}: Cannot connect to sentryCollector.".format(reason)
- report_alarm_fail(err_msg)
+ return False, {'msg': f"Failed to {reason}: Cannot connect to sentryCollector"}
if res['ret'] != 0:
- err_msg = "Failed to {}: {}".format(reason, Result_Messages[res['ret']])
- report_alarm_fail(err_msg)
+ return False, {'msg': f"Failed to {reason}: {Result_Messages[res['ret']]}"}
try:
json_data = json.loads(res['message'])
except json.JSONDecodeError:
- err_msg = f"Failed to {reason}: invalid return message"
- report_alarm_fail(err_msg)
+ return False, {'msg': f"Failed to {reason}: invalid return message"}
- return json_data
+ return True, json_data
def report_alarm_fail(alarm_info):
@@ -120,10 +120,21 @@ def process_report_data(disk_name, rw, io_data):
xalarm_report(1002, MINOR_ALM, ALARM_TYPE_OCCUR, json.dumps(msg))
+def check_disk_list_validation(disk_list):
+ valid_disk_list = []
+ for disk_name in disk_list:
+ is_success, _ = check_result_validation(get_disk_type(disk_name), "")
+ if not is_success:
+ continue
+ valid_disk_list.append(disk_name)
+ return valid_disk_list
+
+
def get_disk_type_by_name(disk_name):
logging.debug(f"send to sentryCollector get_disk_type: disk_name={disk_name}")
- res = get_disk_type(disk_name)
- disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
+ is_success, disk_type_str = check_result_validation(get_disk_type(disk_name), f'Invalid disk type {disk_name}')
+ if not is_success:
+ report_alarm_fail(f"{disk_type_str['msg']}")
try:
curr_disk_type = int(disk_type_str)
if curr_disk_type not in Disk_Type:
--
2.27.0