sysSentry/add-sentryctl-get_alarm-module_name-s-time_range-d.patch
jinsaihang ae5556ff59 add alarm event query function
Signed-off-by: jinsaihang <jinsaihang@h-partners.com>
2024-10-08 20:12:17 +08:00

439 lines
16 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 8fa9389a85763831ea85d94f179a305d7f95d585 Mon Sep 17 00:00:00 2001
From: jinsaihang <jinsaihang@h-partners.com>
Date: Sun, 29 Sep 2024 02:04:52 +0000
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=91=8A=E8=AD=A6=E4=BA=8B?=
=?UTF-8?q?=E4=BB=B6=E6=9F=A5=E8=AF=A2=E5=8A=9F=E8=83=BD=EF=BC=9Asentryctl?=
=?UTF-8?q?=20get=5Falarm=20<module=5Fname>=20-s=20<time=5Frange>=20-d?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: jinsaihang <jinsaihang@h-partners.com>
---
src/python/syssentry/alarm.py | 142 ++++++++++++++++++
.../src/python/syssentry/callbacks.py | 17 +++
.../src/python/syssentry/global_values.py | 4 +
.../src/python/syssentry/load_mods.py | 16 ++
.../src/python/syssentry/sentryctl | 20 ++-
.../src/python/syssentry/syssentry.py | 13 +-
.../src/python/syssentry/task_map.py | 5 +-
7 files changed, 212 insertions(+), 5 deletions(-)
create mode 100644 src/python/syssentry/alarm.py
diff --git a/src/python/syssentry/alarm.py b/src/python/syssentry/alarm.py
new file mode 100644
index 0000000..74a2716
--- /dev/null
+++ b/src/python/syssentry/alarm.py
@@ -0,0 +1,142 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+"""
+use for report alarm
+"""
+import threading
+from typing import Dict, List
+from datetime import datetime
+import time
+import logging
+import json
+
+from xalarm.register_xalarm import xalarm_register,xalarm_getid,xalarm_getlevel,xalarm_gettype,xalarm_gettime,xalarm_getdesc
+from xalarm.xalarm_api import Xalarm
+
+from .global_values import InspectTask
+from .task_map import TasksMap
+
+# 告警ID映射字典key为插件名value为告警ID类型为数字
+task_alarm_id_dict: Dict[str, int] = {}
+
+# 告警老化时间字典key为告警IDvalue为老化时间类型为数字单位为秒
+alarm_id_clear_time_dict: Dict[int, int] = {}
+
+# 告警事件列表key为告警IDvalue为告警ID对应的告警事件列表类型为list
+alarm_list_dict: Dict[int, List[Xalarm]] = {}
+# 告警事件列表锁
+alarm_list_lock = threading.Lock()
+
+id_filter = []
+id_base = 1001
+clientId = -1
+
+MILLISECONDS_UNIT_SECONDS = 1000
+
+def update_alarm_list(alarm_info: Xalarm):
+ alarm_id = xalarm_getid(alarm_info)
+ timestamp = xalarm_gettime(alarm_info)
+ if not timestamp:
+ logging.error("Retrieve timestamp failed")
+ return
+ alarm_list_lock.acquire()
+ try:
+ # new alarm is inserted into list head
+ if alarm_id not in alarm_list_dict:
+ logging.warning(f"update_alarm_list: alarm_id {alarm_id} not found in alarm_list_dict")
+ return
+ alarm_list = alarm_list_dict[alarm_id]
+
+ alarm_list.insert(0, alarm_info)
+ # clear alarm_info older than clear time threshold
+ clear_index = -1
+ clear_time = alarm_id_clear_time_dict[alarm_id]
+ for i in range(len(alarm_list)):
+ if (timestamp - xalarm_gettime(alarm_list[i])) / MILLISECONDS_UNIT_SECONDS > clear_time:
+ clear_index = i
+ break
+ if clear_index >= 0:
+ alarm_list_dict[alarm_id] = alarm_list[:clear_index]
+ finally:
+ alarm_list_lock.release()
+
+def alarm_register():
+ logging.debug(f"alarm_register: enter")
+ # 初始化告警ID映射字典、告警老化时间字典
+ for task_type in TasksMap.tasks_dict:
+ for task_name in TasksMap.tasks_dict[task_type]:
+ logging.info(f"alarm_register: {task_name} is registered")
+ task = TasksMap.tasks_dict[task_type][task_name]
+ alarm_id = task.alarm_id
+ alarm_clear_time = task.alarm_clear_time
+ alarm_list_dict[alarm_id] = []
+ task_alarm_id_dict[task_name] = alarm_id
+ if alarm_id not in alarm_id_clear_time_dict:
+ alarm_id_clear_time_dict[alarm_id] = alarm_clear_time
+ else:
+ alarm_id_clear_time_dict[alarm_id] = max(alarm_clear_time, alarm_id_clear_time_dict[alarm_id])
+ # 注册告警回调
+ id_filter = [True] * 128
+ clientId = xalarm_register(update_alarm_list, id_filter)
+ if clientId < 0:
+ logging.info(f'register xalarm: failed')
+ return clientId
+ logging.info('register xalarm: success')
+ return clientId
+
+def get_alarm_result(task_name: str, time_range: int, detailed: bool) -> List[Dict]:
+ alarm_list_lock.acquire()
+ try:
+ if task_name not in task_alarm_id_dict:
+ logging.debug("task_name does not exist")
+ return []
+ alarm_id = task_alarm_id_dict[task_name]
+ if alarm_id not in alarm_list_dict:
+ logging.debug("alarm_id does not exist")
+ return []
+ alarm_list = alarm_list_dict[alarm_id]
+ logging.debug(f"get_alarm_result: alarm_list of {alarm_id} has {len(alarm_list)} elements")
+ # clear alarm_info older than clear time threshold
+ stop_index = -1
+ timestamp = int(datetime.now().timestamp())
+ for i in range(len(alarm_list)):
+ logging.debug(f"timestamp, alarm_list[{i}].timestamp: {timestamp}, {xalarm_gettime(alarm_list[i])}")
+ if timestamp - (xalarm_gettime(alarm_list[i])) / MILLISECONDS_UNIT_SECONDS > int(time_range):
+ stop_index = i
+ break
+ if stop_index >= 0:
+ alarm_list = alarm_list[:stop_index]
+ logging.debug(f"get_alarm_result: final alarm_list of {alarm_id} has {len(alarm_list)} elements")
+
+ def xalarm_to_dict(alarm_info: Xalarm) -> dict:
+ return {
+ 'alarm_id': xalarm_getid(alarm_info),
+ 'alarm_type': xalarm_gettype(alarm_info),
+ 'alarm_level': xalarm_getlevel(alarm_info),
+ 'timetamp': xalarm_gettime(alarm_info),
+ 'msg1': xalarm_getdesc(alarm_info)
+ }
+
+ alarm_list = [xalarm_to_dict(alarm) for alarm in alarm_list]
+
+ # keep detail
+ for alarm in alarm_list:
+ alarm_info = alarm['msg1']
+ alarm_info = json.loads(alarm_info)
+ if not detailed:
+ if 'details' in alarm_info:
+ alarm_info.pop('details', None)
+ alarm.pop('msg1', None)
+ alarm['alarm_info'] = alarm_info
+ return alarm_list
+ finally:
+ alarm_list_lock.release()
diff --git a/src/python/syssentry/callbacks.py b/src/python/syssentry/callbacks.py
index b38b381..6ec2c29 100644
--- a/src/python/syssentry/callbacks.py
+++ b/src/python/syssentry/callbacks.py
@@ -18,6 +18,7 @@ import logging
from .task_map import TasksMap, ONESHOT_TYPE, PERIOD_TYPE
from .mod_status import EXITED_STATUS, RUNNING_STATUS, WAITING_STATUS, set_runtime_status
+from .alarm import get_alarm_result
def task_get_status(mod_name):
@@ -41,6 +42,22 @@ def task_get_result(mod_name):
return "success", task.get_result()
+def task_get_alarm(data):
+ """get alarm by mod name"""
+ task_name = data['task_name']
+ time_range = data['time_range']
+ try:
+ detailed = data['detailed']
+ except KeyError:
+ logging.debug("Key 'detailed' does not exist in the dictionary")
+ detailed = None
+ task = TasksMap.get_task_by_name(task_name)
+ if not task:
+ return "failed", f"cannot find task by name {task_name}"
+ if not task.load_enabled:
+ return "failed", f"mod {task_name} is not enabled"
+
+ return "success", get_alarm_result(task_name, time_range, detailed)
def task_stop(mod_name):
"""stop by mod name"""
diff --git a/src/python/syssentry/global_values.py b/src/python/syssentry/global_values.py
index 483d544..b123b2d 100644
--- a/src/python/syssentry/global_values.py
+++ b/src/python/syssentry/global_values.py
@@ -27,6 +27,7 @@ CTL_SOCKET_PATH = "/var/run/sysSentry/control.sock"
SYSSENTRY_CONF_PATH = "/etc/sysSentry"
INSPECT_CONF_PATH = "/etc/sysSentry/inspect.conf"
TASK_LOG_DIR = "/var/log/sysSentry"
+DEFAULT_ALARM_CLEAR_TIME = 15
SENTRY_RUN_DIR_PERM = 0o750
@@ -76,6 +77,9 @@ class InspectTask:
self.env_file = ""
# start mode
self.conflict = "up"
+ # alarm id
+ self.alarm_id = -1
+ self.alarm_clear_time = DEFAULT_ALARM_CLEAR_TIME
def start(self):
"""
diff --git a/src/python/syssentry/load_mods.py b/src/python/syssentry/load_mods.py
index 48d7e66..ae05e57 100644
--- a/src/python/syssentry/load_mods.py
+++ b/src/python/syssentry/load_mods.py
@@ -24,6 +24,7 @@ from .task_map import TasksMap, ONESHOT_TYPE, PERIOD_TYPE
from .cron_process import PeriodTask
from .mod_status import set_task_status
+from xalarm.register_xalarm import MIN_ALARM_ID, MAX_ALARM_ID
ONESHOT_CONF = 'oneshot'
PERIOD_CONF = 'period'
@@ -41,6 +42,8 @@ CONF_TASK_RESTART = 'task_restart'
CONF_ONSTART = 'onstart'
CONF_ENV_FILE = 'env_file'
CONF_CONFLICT = 'conflict'
+CONF_ALARM_ID = 'alarm_id'
+CONF_ALARM_CLEAR_TIME = 'alarm_clear_time'
MOD_FILE_SUFFIX = '.mod'
MOD_SUFFIX_LEN = 4
@@ -194,6 +197,18 @@ def parse_mod_conf(mod_name, mod_conf):
task.heartbeat_interval = heartbeat_interval
task.load_enabled = is_enabled
+ try:
+ task.alarm_id = int(mod_conf.get(CONF_TASK, CONF_ALARM_ID))
+ task.alarm_clear_time = int(mod_conf.get(CONF_TASK, CONF_ALARM_CLEAR_TIME))
+ if not (MIN_ALARM_ID <= task.alarm_id <= MAX_ALARM_ID):
+ raise ValueError("Invalid alarm_id")
+ except ValueError:
+ task.alarm_id = -1
+ logging.warning("Invalid alarm_id, set to -1")
+ except configparser.NoOptionError:
+ task.alarm_id = -1
+ logging.warning("Unset alarm_id and alarm_clear_time, use -1 and 15s as default")
+
if CONF_ONSTART in mod_conf.options(CONF_TASK):
is_onstart = (mod_conf.get(CONF_TASK, CONF_ONSTART) == 'yes')
if task_type == PERIOD_CONF:
@@ -327,3 +342,4 @@ def reload_single_mod(mod_name):
res, ret = reload_mod_by_name(mod_name)
return res, ret
+
diff --git a/src/python/syssentry/sentryctl b/src/python/syssentry/sentryctl
index e94491f..675c17a 100644
--- a/src/python/syssentry/sentryctl
+++ b/src/python/syssentry/sentryctl
@@ -25,6 +25,7 @@ MAX_PARAM_LENGTH = 256
RESULT_MSG_DATA_LEN = 4
CTL_MSG_LEN_LEN = 3
+DEFAULT_ALARM_TIME_RANGE = 10
def status_output_format(res_data):
"""format output"""
@@ -57,6 +58,8 @@ def res_output_handle(res_struct, req_type):
status_output_format(res_struct['data'])
elif req_type == 'get_result':
result_output_format(res_struct['data'])
+ elif req_type == 'get_alarm':
+ result_output_format(res_struct['data'])
elif res_struct['ret'] == "failed":
print(res_struct['data'])
@@ -75,6 +78,7 @@ def client_send_and_recv(request_data, data_str_len):
print("sentryctl: client creat socket error")
return None
+ # connect to syssentry
try:
client_socket.connect(CTL_SOCKET_PATH)
except OSError:
@@ -82,6 +86,7 @@ def client_send_and_recv(request_data, data_str_len):
print("sentryctl: client connect error")
return None
+ # msg: CTL{len}{data}
req_data_len = len(request_data)
request_msg = "CTL" + str(req_data_len).zfill(3) + request_data
@@ -94,8 +99,8 @@ def client_send_and_recv(request_data, data_str_len):
print("sentryctl: client communicate error")
return None
+ # res: RES{len}{data}
res_magic = res_data[:3]
-
if res_magic != "RES":
print("res msg format error")
return None
@@ -128,6 +133,10 @@ if __name__ == '__main__':
parser_status.add_argument('task_name')
parser_get_result = subparsers.add_parser('get_result', help='get task result')
parser_get_result.add_argument('task_name')
+ parser_get_alarm = subparsers.add_parser('get_alarm', help='get task alarm')
+ parser_get_alarm.add_argument('task_name')
+ parser_get_alarm.add_argument('-s', '--time_range', type=str, default=DEFAULT_ALARM_TIME_RANGE, help='Specified time range')
+ parser_get_alarm.add_argument('-d', '--detailed', action='store_true', help='Print Detailed Information')
parser_list = subparsers.add_parser('list', help='show all loaded task mod')
client_args = parser.parse_args()
@@ -142,6 +151,15 @@ if __name__ == '__main__':
req_msg_struct = {"type": "get_status", "data": client_args.task_name}
elif client_args.cmd_type == 'get_result':
req_msg_struct = {"type": "get_result", "data": client_args.task_name}
+ elif client_args.cmd_type == 'get_alarm':
+ req_msg_struct = {
+ "type": "get_alarm",
+ "data": {
+ 'task_name': client_args.task_name,
+ 'time_range': client_args.time_range,
+ 'detailed': client_args.detailed,
+ }
+ }
elif client_args.cmd_type == 'reload':
req_msg_struct = {"type": "reload", "data": client_args.task_name}
else:
diff --git a/src/python/syssentry/syssentry.py b/src/python/syssentry/syssentry.py
index 9ef0203..c2dee85 100644
--- a/src/python/syssentry/syssentry.py
+++ b/src/python/syssentry/syssentry.py
@@ -28,7 +28,7 @@ from .sentry_config import SentryConfig, get_log_level
from .task_map import TasksMap
from .global_values import SENTRY_RUN_DIR, CTL_SOCKET_PATH, SENTRY_RUN_DIR_PERM
from .cron_process import period_tasks_handle
-from .callbacks import mod_list_show, task_start, task_get_status, task_stop, task_get_result
+from .callbacks import mod_list_show, task_start, task_get_status, task_stop, task_get_result, task_get_alarm
from .mod_status import get_task_by_pid, set_runtime_status
from .load_mods import load_tasks, reload_single_mod
from .heartbeat import (heartbeat_timeout_chk, heartbeat_fd_create,
@@ -36,7 +36,11 @@ from .heartbeat import (heartbeat_timeout_chk, heartbeat_fd_create,
from .result import RESULT_MSG_HEAD_LEN, RESULT_MSG_MAGIC_LEN, RESULT_MAGIC
from .result import RESULT_LEVEL_ERR_MSG_DICT, ResultLevel
from .utils import get_current_time_string
+from .alarm import alarm_register
+from xalarm.register_xalarm import xalarm_unregister
+
+clientId = -1
CPU_EXIST = True
try:
@@ -62,6 +66,7 @@ type_func = {
'stop': task_stop,
'get_status': task_get_status,
'get_result': task_get_result,
+ 'get_alarm': task_get_alarm,
'reload': reload_single_mod
}
@@ -107,11 +112,12 @@ def msg_data_process(msg_data):
return "Invaild cmd type"
cmd_param = data_struct['data']
- logging.debug("msg_data_process cmd_type:%s cmd_param:%s", cmd_type, cmd_param)
+ logging.debug("msg_data_process cmd_type:%s cmd_param:%s", cmd_type, str(cmd_param))
if cmd_type in type_func:
ret, res_data = type_func[cmd_type](cmd_param)
else:
ret, res_data = type_func_void[cmd_type]()
+ logging.debug("msg_data_process res_data:%s",str(res_data))
res_msg_struct = {"ret": ret, "data": res_data}
res_msg = json.dumps(res_msg_struct)
@@ -584,10 +590,13 @@ def main():
_ = SentryConfig.init_param()
TasksMap.init_task_map()
load_tasks()
+ clientId = alarm_register()
main_loop()
except Exception:
logging.error('%s', traceback.format_exc())
finally:
+ if clientId != -1:
+ xalarm_unregister(clientId)
release_pidfile()
diff --git a/src/python/syssentry/task_map.py b/src/python/syssentry/task_map.py
index 70aa19d..27e97ff 100644
--- a/src/python/syssentry/task_map.py
+++ b/src/python/syssentry/task_map.py
@@ -13,16 +13,16 @@
tasks map class and initialize function.
"""
import logging
+from typing import Dict
ONESHOT_TYPE = "ONESHOT"
PERIOD_TYPE = "PERIOD"
TASKS_MAP = None
-
class TasksMap:
"""task map class"""
- tasks_dict = {}
+ tasks_dict: Dict[str, Dict] = {}
@classmethod
def init_task_map(cls):
@@ -65,3 +65,4 @@ class TasksMap:
logging.debug("getting task by name: %s", res)
break
return res
+
--
2.27.0