Compare commits

...

10 Commits

Author SHA1 Message Date
openeuler-ci-bot
91b26141b0
!240 fix period task some bugs and fix env config
From: @tong_1001 
Reviewed-by: @hubin95 
Signed-off-by: @hubin95
2025-03-29 07:43:30 +00:00
shixuantong
b1b02ba203 fix period task some bugs and fix env config 2025-03-29 14:49:26 +08:00
openeuler-ci-bot
87260fb362
!236 add log utils for c
From: @tong_1001 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-03-14 08:42:52 +00:00
shixuantong
d82730b2e6 add log utils for c 2025-03-14 16:16:25 +08:00
openeuler-ci-bot
23cd062a57
!221 ai block io: exit when stage is not supported
From: @luckky7 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-03-14 02:18:58 +00:00
luckky
c6b8404ef4 ai block io: exit when stage is not supported 2025-03-14 10:05:39 +08:00
openeuler-ci-bot
26ec8137f8
!230 [sync] PR-213: fix the sentryCollector service can't be stopped for a long
From: @openeuler-sync-bot 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-03-14 02:00:13 +00:00
zhuofeng
c962ee5f32 fix the sentryCollector service can't be stopped for a long
Signed-off-by: zhuofeng <1107893276@qq.com>
(cherry picked from commit 885bbbd319adfe367baf6e4874b796657fba230e)
2025-03-13 20:39:42 +08:00
openeuler-ci-bot
8474d69ed0
!228 [sync] PR-212: add new func for ebpf in the rq_driver stage
From: @openeuler-sync-bot 
Reviewed-by: @znzjugod 
Signed-off-by: @znzjugod
2025-03-13 12:38:57 +00:00
zhuofeng
52606113b3 add new func for ebpf in the rq_driver stage
Signed-off-by: zhuofeng <1107893276@qq.com>
(cherry picked from commit ec94b36256cf018893e543b4b57e95e6c78e3a4a)
2025-03-13 20:18:05 +08:00
7 changed files with 1014 additions and 1 deletions

203
add-log-utils-for-c.patch Normal file
View File

@ -0,0 +1,203 @@
From 0ee8307d556c200733270fdffd8db2d48869724a Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong1@huawei.com>
Date: Fri, 14 Mar 2025 14:55:56 +0800
Subject: [PATCH] add log utils for c
---
Makefile | 14 ++++++-
src/libsentry/c/log/CMakeLists.txt | 7 ++++
src/libsentry/c/log/log_utils.c | 45 +++++++++++++++++++++
src/libsentry/c/log/log_utils.h | 64 ++++++++++++++++++++++++++++++
4 files changed, 129 insertions(+), 1 deletion(-)
create mode 100644 src/libsentry/c/log/CMakeLists.txt
create mode 100644 src/libsentry/c/log/log_utils.c
create mode 100644 src/libsentry/c/log/log_utils.h
diff --git a/Makefile b/Makefile
index 29c4a53..73ada63 100644
--- a/Makefile
+++ b/Makefile
@@ -29,12 +29,16 @@ PKGVEREGG := syssentry-$(VERSION)-py$(PYTHON_VERSION).egg-info
all: lib ebpf hbm_online_repair
-lib:libxalarm
+lib:libxalarm log
libxalarm:
cd $(CURLIBDIR) && cmake . -DXD_INSTALL_BINDIR=$(LIBINSTALLDIR) -B build
cd $(CURLIBDIR)/build && make
+log:
+ cd $(CURSRCDIR)/libsentry/c/log && cmake . -B build
+ cd $(CURSRCDIR)/libsentry/c/log/build && make
+
ebpf:
@if [ -d "$(CURSRCDIR)/services/sentryCollector/ebpf_collector/" ]; then \
cd $(CURSRCDIR)/services/sentryCollector/ebpf_collector/ && make; \
@@ -134,6 +138,11 @@ isentry:
# pyxalarm
install -m 550 src/libs/pyxalarm/register_xalarm.py $(PYDIR)/xalarm
+
+ # log utils
+ install -d -m 700 $(INCLUDEDIR)/libsentry
+ install -m 644 $(CURSRCDIR)/libsentry/c/log/log_utils.h $(INCLUDEDIR)/libsentry/
+ install -m 550 $(CURSRCDIR)/libsentry/c/log/build/libsentry_log.so $(LIBINSTALLDIR)
ebpf_clean:
cd $(CURSRCDIR)/services/sentryCollector/ebpf_collector && make clean
@@ -144,6 +153,7 @@ hbm_clean:
clean: ebpf_clean hbm_clean
rm -rf $(CURLIBDIR)/build
rm -rf $(CURSRCDIR)/build
+ rm -rf $(CURSRCDIR)/libsentry/c/log/build
rm -rf $(CURSRCDIR)/syssentry.egg-info
rm -rf $(CURSRCDIR)/SENTRY_FILES
@@ -156,6 +166,8 @@ uninstall:
rm -rf $(BINDIR)/ebpf_collector
rm -rf $(LIBINSTALLDIR)/libxalarm.so
rm -rf $(INCLUDEDIR)/xalarm
+ rm -rf $(LIBINSTALLDIR)/libsentry_log.so
+ rm -rf $(INCLUDEDIR)/libsentry
rm -rf $(ETCDIR)/sysSentry
rm -rf $(ETCDIR)/hbm_online_repair.env
rm -rf $(LOGSAVEDIR)/sysSentry
diff --git a/src/libsentry/c/log/CMakeLists.txt b/src/libsentry/c/log/CMakeLists.txt
new file mode 100644
index 0000000..6488195
--- /dev/null
+++ b/src/libsentry/c/log/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved.
+# Description: cmake file for log_utils
+project(sentry_log)
+cmake_minimum_required(VERSION 3.22)
+add_library(sentry_log SHARED log_utils.c)
+set_target_properties(sentry_log PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wtrampolines")
+set_target_properties(sentry_log PROPERTIES CMAKE_C_FLAGS "-shared -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -g")
diff --git a/src/libsentry/c/log/log_utils.c b/src/libsentry/c/log/log_utils.c
new file mode 100644
index 0000000..935e6d6
--- /dev/null
+++ b/src/libsentry/c/log/log_utils.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved.
+ * Description: log utils for sysSentry
+ * Author: sxt1001
+ * Create: 2025-2-16
+ */
+
+#include "log_utils.h"
+
+static LogLevel currentLogLevel = LOG_INFO;
+
+void logMessage(LogLevel level, char* file, int line, const char *format, ...)
+{
+ if (level >= currentLogLevel) {
+ PRINT_LOG_PREFIX(level, file, line);
+ va_list args;
+ va_start(args, format);
+ vfprintf(LOG_FD(level), format, args);
+ va_end(args);
+ fflush(LOG_FD(level));
+ }
+}
+
+void setLogLevel()
+{
+ currentLogLevel = LOG_INFO;
+ char* levelStr = getenv(LOG_LEVEL_ENV);
+ if (levelStr == NULL) {
+ logMessage(LOG_WARN, __FILE__, __LINE__, "getenv('%s') is NULL, use default log level : %s\n", LOG_LEVEL_ENV, LOG_LEVEL_STRING(LOG_INFO));
+ } else if (strcmp(levelStr, "info") == 0) {
+ currentLogLevel = LOG_INFO;
+ logMessage(LOG_INFO, __FILE__, __LINE__, "Set log level : %s\n", LOG_LEVEL_STRING(LOG_INFO));
+ } else if (strcmp(levelStr, "warning") == 0) {
+ currentLogLevel = LOG_WARN;
+ logMessage(LOG_INFO,__FILE__, __LINE__,"Set log level : %s\n", LOG_LEVEL_STRING(LOG_WARN));
+ } else if (strcmp(levelStr, "error") == 0) {
+ currentLogLevel = LOG_ERROR;
+ logMessage(LOG_INFO,__FILE__, __LINE__,"Set log level : %s\n", LOG_LEVEL_STRING(LOG_ERROR));
+ } else if (strcmp(levelStr, "debug") == 0) {
+ currentLogLevel = LOG_DEBUG;
+ logMessage(LOG_INFO,__FILE__, __LINE__,"Set log level : %s\n", LOG_LEVEL_STRING(LOG_DEBUG));
+ } else {
+ logMessage(LOG_WARN, __FILE__, __LINE__, "unknown log level : %s, use default log level : %s\n", levelStr, LOG_LEVEL_STRING(LOG_INFO));
+ }
+}
diff --git a/src/libsentry/c/log/log_utils.h b/src/libsentry/c/log/log_utils.h
new file mode 100644
index 0000000..8a56520
--- /dev/null
+++ b/src/libsentry/c/log/log_utils.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved.
+ * Description: log utils for sysSentry
+ * Author: sxt1001
+ * Create: 2025-2-16
+ */
+
+#ifndef _SYSSENTRY_LOG_H
+#define _SYSSENTRY_LOG_H
+
+#include "stdio.h"
+#include <stdlib.h>
+#include "string.h"
+#include "stdarg.h"
+#include "time.h"
+#include "libgen.h"
+
+typedef enum {
+ LOG_DEBUG = 0,
+ LOG_INFO,
+ LOG_WARN,
+ LOG_ERROR,
+} LogLevel;
+
+#define LOG_FD(level) (level == LOG_ERROR ? stderr : stdout)
+
+#define LOG_LEVEL_STRING(level) \
+ (level == LOG_DEBUG ? "DEBUG": \
+ level == LOG_INFO ? "INFO" : \
+ level == LOG_WARN ? "WARNING" : \
+ level == LOG_ERROR ? "ERROR" : \
+ "UNKNOWN_LEVEL")
+
+#define PRINT_LOG_PREFIX(level, file, line) do { \
+ time_t t = time(NULL); \
+ struct tm *local_time = localtime(&t); \
+ fprintf(LOG_FD(level), "%d-%02d-%02d %02d:%02d:%02d,000 - %s - [%s:%d] - ", \
+ local_time->tm_year + 1900, \
+ local_time->tm_mon + 1, \
+ local_time->tm_mday, \
+ local_time->tm_hour, \
+ local_time->tm_min, \
+ local_time->tm_sec, \
+ LOG_LEVEL_STRING(level), \
+ basename(file), \
+ line); \
+} while (0)
+
+// configure Env for log
+#define LOG_LEVEL_ENV "LOG_LEVEL"
+
+// print msg
+void logMessage(LogLevel level, char* file, int line, const char *format, ...);
+
+// set log level
+void setLogLevel();
+
+// log function
+#define logging_debug(...) logMessage(LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__)
+#define logging_info(...) logMessage(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__)
+#define logging_warn(...) logMessage(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__)
+#define logging_error(...) logMessage(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__)
+
+#endif
--
2.27.0

View File

@ -0,0 +1,586 @@
From 966e539d3b0c0eaaa94fdd1fb21dd29e97e48bee Mon Sep 17 00:00:00 2001
From: zhuofeng <zhuofeng2@huawei.com>
Date: Sat, 22 Feb 2025 18:42:08 +0800
Subject: [PATCH] add new func for ebpf in the rq_driver stage
---
src/sentryPlugins/ai_block_io/detector.py | 3 +-
.../ebpf_collector/ebpf_collector.bpf.c | 459 ++++--------------
.../ebpf_collector/ebpf_collector.c | 2 +-
.../ebpf_collector/ebpf_collector.h | 16 +-
4 files changed, 116 insertions(+), 364 deletions(-)
diff --git a/src/sentryPlugins/ai_block_io/detector.py b/src/sentryPlugins/ai_block_io/detector.py
index 27fb7f7..2688cb1 100644
--- a/src/sentryPlugins/ai_block_io/detector.py
+++ b/src/sentryPlugins/ai_block_io/detector.py
@@ -55,11 +55,12 @@ class Detector:
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
# 检测到慢周期由Detector负责打印info级别日志
if detection_result[0][1]:
+ ai_threshold = "None" if detection_result[2] is None else round(detection_result[2], 3)
logging.info(f'[abnormal_period]: disk: {self._metric_name.disk_name}, '
f'stage: {self._metric_name.stage_name}, '
f'iotype: {self._metric_name.io_access_type_name}, '
f'type: {self._metric_name.metric_name}, '
- f'ai_threshold: {round(detection_result[2], 3)}, '
+ f'ai_threshold: {ai_threshold}, '
f'curr_val: {metric_value}')
else:
logging.debug(f'Detection result: {str(detection_result)}')
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
index 978b114..ece8c93 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
@@ -115,7 +115,7 @@ struct {
} ringbuf SEC(".maps");
-static void log_event(u32 stage, u32 period, u32 err) {
+static void log_event(int stage, int period, int err) {
struct event *e;
void *data = bpf_ringbuf_reserve(&ringbuf, sizeof(struct event), 0);
if (!data)
@@ -338,6 +338,105 @@ int kprobe_blk_mq_start_request(struct pt_regs *regs) {
return 0;
}
+SEC("kprobe/blk_mq_end_request_batch")
+int kprobe_blk_mq_end_request_batch(struct pt_regs *regs) {
+ struct io_comp_batch *iob = (struct io_comp_batch *)PT_REGS_PARM1(regs);
+ struct request *rq;
+ struct request_queue *q;
+ struct gendisk *curr_rq_disk;
+ int major, first_minor;
+ unsigned int cmd_flags;
+ struct io_counter *counterp;
+ struct stage_data *curr_data;
+ rq = BPF_CORE_READ(iob, req_list);
+
+ for (int i = 0; i <= BATCH_COUT; i++) {
+ bpf_core_read(&q, sizeof(q), &rq->q);
+ bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
+ bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
+ bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
+ bpf_core_read(&cmd_flags, sizeof(cmd_flags), &rq->cmd_flags);
+
+ if (major == 0) {
+ log_event(STAGE_RQ_DRIVER, PERIOD_END, ERROR_MAJOR_ZERO);
+ continue;
+ }
+
+ u32 key = find_matching_key_rq_driver(major, first_minor);
+ if (key >= MAP_SIZE) {
+ continue;
+ }
+
+ counterp = bpf_map_lookup_elem(&blk_map, &rq);
+ if (!counterp) {
+ continue;
+ }
+
+ u64 duration = bpf_ktime_get_ns() - counterp->start_time;
+ u64 curr_start_range = counterp->start_time / THRESHOLD;
+
+ struct update_params params = {
+ .major = major,
+ .first_minor = first_minor,
+ .cmd_flags = cmd_flags,
+ .curr_start_range = curr_start_range,
+ };
+
+ curr_data = bpf_map_lookup_elem(&blk_res, &key);
+ if (curr_data == NULL && duration > DURATION_THRESHOLD) {
+ struct stage_data new_data = {
+ .start_count = 1,
+ .finish_count = 1,
+ .finish_over_time = 1,
+ .duration = 0,
+ .major = major,
+ .first_minor = first_minor,
+ .io_type = "",
+ };
+ blk_fill_rwbs(new_data.io_type, cmd_flags);
+ bpf_map_update_elem(&blk_res, &key, &new_data, 0);
+ } else if (curr_data == NULL) {
+ struct stage_data new_data = {
+ .start_count = 1,
+ .finish_count = 1,
+ .finish_over_time = 0,
+ .duration = 0,
+ .major = major,
+ .first_minor = first_minor,
+ .io_type = "",
+ };
+ blk_fill_rwbs(new_data.io_type, cmd_flags);
+ bpf_map_update_elem(&blk_res, &key, &new_data, 0);
+ } else {
+ curr_data->duration += duration;
+ update_curr_data_in_finish(curr_data, &params, duration);
+ }
+
+ struct time_range_io_count *curr_data_time_range;
+ curr_data_time_range = bpf_map_lookup_elem(&blk_res_2, &curr_start_range);
+ if (curr_data_time_range == NULL) {
+ struct time_range_io_count new_data = {.count = 0};
+ bpf_map_update_elem(&blk_res_2, &curr_start_range, &new_data, 0);
+ } else {
+ if (key < MAP_SIZE && key >= 0) {
+ __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
+ }
+ }
+ struct request *rq_next = BPF_CORE_READ(rq, rq_next);
+ bpf_map_delete_elem(&blk_map, &rq);
+ rq = rq_next;
+
+ if (!rq) {
+ break;
+ }
+
+ if (i >= BATCH_COUT) {
+ log_event(STAGE_RQ_DRIVER, PERIOD_END, i);
+ }
+ }
+ return 0;
+}
+
// finish rq_driver
SEC("kprobe/blk_mq_free_request")
int kprobe_blk_mq_free_request(struct pt_regs *regs)
@@ -418,7 +517,7 @@ int kprobe_blk_mq_free_request(struct pt_regs *regs)
struct time_range_io_count new_data = { .count = {0} };
bpf_map_update_elem(&blk_res_2, &curr_start_range, &new_data, 0);
} else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
+ if (key < MAP_SIZE && key >= 0) {
__sync_fetch_and_add(&curr_data_time_range->count[key], -1);
}
}
@@ -463,7 +562,6 @@ int kprobe_blk_mq_submit_bio(struct pt_regs *regs)
long err = bpf_map_update_elem(&bio_map, &bio, &zero, BPF_NOEXIST);
if (err) {
- log_event(STAGE_BIO, PERIOD_START, ERROR_UPDATE_FAIL);
return 0;
}
@@ -597,359 +695,4 @@ int kprobe_bio_endio(struct pt_regs *regs)
return 0;
}
-// start get_tag
-SEC("kprobe/blk_mq_get_tag")
-int kprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM1(regs);
- (void)bpf_map_update_elem(&tag_args, &tagkey, &value, BPF_ANY);
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)value;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&tag_map, &tagkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish get_tag
-SEC("kretprobe/blk_mq_get_tag")
-int kretprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 *tagargs = (u64 *)bpf_map_lookup_elem(&tag_args, &tagkey);
- if (tagargs == NULL) {
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
- }
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)*tagargs;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
-
- bpf_map_delete_elem(&tag_map, &tagkey);
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
-}
-
-// start wbt
-SEC("kprobe/wbt_wait")
-int kprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM2(regs);
- (void)bpf_map_update_elem(&wbt_args, &wbtkey, &value, BPF_ANY);
-
- struct bio *bio;
- struct block_device *bd;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)value;
- bpf_core_read(&bd, sizeof(bd), &bio->bi_bdev);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bd->bd_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&wbt_map, &wbtkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish wbt
-SEC("kretprobe/wbt_wait")
-int kretprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 *wbtargs = (u64 *)bpf_map_lookup_elem(&wbt_args, &wbtkey);
- if (wbtargs == NULL) {
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
- }
-
- struct bio *bio;
- struct block_device *bd;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)(*wbtargs);
- bpf_core_read(&bd, sizeof(bd), &bio->bi_bdev);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bd->bd_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .io_type = "",
- .major = major,
- .first_minor = first_minor,
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
- bpf_map_delete_elem(&wbt_map, &wbtkey);
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
index 5a2528b..445cce7 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
@@ -155,7 +155,7 @@ static void update_io_dump(int fd, int *io_dump, int map_size, char *stage) {
if ((curr_time - io_dump_key) >= 2) {
int isempty = 1;
for (int key = 0; key < map_size; key++) {
- if (time_count.count[key] > 0) {
+ if (time_count.count[key] > IO_DUMP_DIFF) {
io_dump[key] += time_count.count[key];
isempty = 0;
}
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.h b/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
index adf926b..e9de49e 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
@@ -39,6 +39,10 @@ typedef unsigned int u32;
#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
// 阶段
#define STAGE_RQ_DRIVER 1
#define STAGE_BIO 2
@@ -55,6 +59,10 @@ typedef unsigned int u32;
#define ERROR_KEY_EXIST 3
#define ERROR_UPDATE_FAIL 4
#define ERROR_KEY_NOEXIST 5
+#define ERROR_DELETE_FAIL 6
+
+#define BATCH_COUT 100
+#define IO_DUMP_DIFF 3
enum stage_type {
BIO=0,
@@ -94,13 +102,13 @@ struct update_params {
struct time_range_io_count
{
- u32 count[MAP_SIZE];
+ int count[MAP_SIZE];
};
struct event {
- u32 stage;
- u64 period;
- u32 err;
+ int stage;
+ int period;
+ int err;
};
#endif /* __EBPFCOLLECTOR_H */
--
2.43.0

View File

@ -0,0 +1,69 @@
From b462c193e8b6bb7b8f252b9ef8931d91831e1321 Mon Sep 17 00:00:00 2001
From: luckky <guodashun1@huawei.com>
Date: Thu, 13 Mar 2025 11:55:15 +0800
Subject: [PATCH] ai block io: exit when stage is not supported
---
.../ai_block_io/config_parser.py | 32 +++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/src/sentryPlugins/ai_block_io/config_parser.py b/src/sentryPlugins/ai_block_io/config_parser.py
index 1bbb609..612fe9f 100644
--- a/src/sentryPlugins/ai_block_io/config_parser.py
+++ b/src/sentryPlugins/ai_block_io/config_parser.py
@@ -32,6 +32,12 @@ ALL_STAGE_LIST = [
"rq_driver",
"bio",
]
+EBPF_STAGE_LIST = [
+ "wbt",
+ "rq_driver",
+ "bio",
+ "gettag"
+]
ALL_IOTPYE_LIST = ["read", "write"]
DISK_TYPE_MAP = {
0: "nvme_ssd",
@@ -312,15 +318,37 @@ class ConfigParser:
if len(stage_list) == 1 and stage_list[0] == "":
logging.critical("stage value not allow is empty, exiting...")
exit(1)
+
+ # check if kernel or ebpf is supported (code is from collector)
+ valid_stage_list = ALL_STAGE_LIST
+ base_path = '/sys/kernel/debug/block'
+ all_disk = []
+ for disk_name in os.listdir(base_path):
+ disk_path = os.path.join(base_path, disk_name)
+ blk_io_hierarchy_path = os.path.join(disk_path, 'blk_io_hierarchy')
+
+ if not os.path.exists(blk_io_hierarchy_path):
+ logging.warning("no blk_io_hierarchy directory found in %s, skipping.", disk_name)
+ continue
+
+ for file_name in os.listdir(blk_io_hierarchy_path):
+ if file_name == 'stats':
+ all_disk.append(disk_name)
+
+ if len(all_disk) == 0:
+ logging.debug("no blk_io_hierarchy disk, it is not lock-free collection")
+ valid_stage_list = EBPF_STAGE_LIST
+
if len(stage_list) == 1 and stage_list[0] == "default":
logging.warning(
"stage will enable default value: %s",
self.DEFAULT_CONF["common"]["stage"],
)
- self._conf["common"]["stage"] = ALL_STAGE_LIST
+ self._conf["common"]["stage"] = valid_stage_list
return
+
for stage in stage_list:
- if stage not in ALL_STAGE_LIST:
+ if stage not in valid_stage_list:
logging.critical(
"stage: %s is not valid stage, ai_block_io will exit...", stage
)
--
2.43.0

View File

@ -0,0 +1,25 @@
From 8d6ca181b85ee32837e8891a1003d24826902f08 Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong1@huawei.com>
Date: Sat, 29 Mar 2025 10:58:17 +0800
Subject: [PATCH] fix env for subprocess.Popen
---
src/services/syssentry/global_values.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/services/syssentry/global_values.py b/src/services/syssentry/global_values.py
index 931b8ab..7cb99e0 100644
--- a/src/services/syssentry/global_values.py
+++ b/src/services/syssentry/global_values.py
@@ -76,7 +76,7 @@ class InspectTask:
# ccnfig env_file
self.env_file = ""
# env conf to popen arg
- self.environ_conf = {}
+ self.environ_conf = None
# start mode
self.conflict = "up"
# alarm id
--
2.27.0

View File

@ -0,0 +1,54 @@
From b0af4890cd131f33ab85708c75742f0a9680705c Mon Sep 17 00:00:00 2001
From: shixuantong <shixuantong1@huawei.com>
Date: Sat, 22 Mar 2025 10:38:30 +0800
Subject: [PATCH] fix period task some bugs
---
src/services/syssentry/cron_process.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/services/syssentry/cron_process.py b/src/services/syssentry/cron_process.py
index 204d4f3..fab350e 100644
--- a/src/services/syssentry/cron_process.py
+++ b/src/services/syssentry/cron_process.py
@@ -59,7 +59,6 @@ class PeriodTask(InspectTask):
self.result_info["details"] = {}
if not self.period_enabled:
self.period_enabled = True
- self.upgrade_period_timestamp()
if self.conflict != 'up':
ret = self.check_conflict()
@@ -87,6 +86,7 @@ class PeriodTask(InspectTask):
self.runtime_status = FAILED_STATUS
return False, "period task start popen failed, invalid command"
finally:
+ self.upgrade_period_timestamp()
if isinstance(logfile, io.TextIOWrapper) and not logfile.closed:
logfile.close()
@@ -127,7 +127,6 @@ class PeriodTask(InspectTask):
res, _ = self.start()
if res:
set_runtime_status(self.name, RUNNING_STATUS)
- self.upgrade_period_timestamp()
def period_tasks_handle():
@@ -142,7 +141,7 @@ def period_tasks_handle():
logging.debug("period not enabled")
continue
- if not task.onstart:
+ if not task.onstart and task.last_exec_timestamp == 0:
logging.debug("period onstart not enabled, task: %s", task.name)
task.runtime_status = EXITED_STATUS
continue
@@ -153,4 +152,3 @@ def period_tasks_handle():
res, _ = task.start()
if res:
set_runtime_status(task.name, RUNNING_STATUS)
- task.upgrade_period_timestamp()
--
2.27.0

View File

@ -0,0 +1,37 @@
From 93a58d1e0faee37cbc27981c85f41c50e81bdc66 Mon Sep 17 00:00:00 2001
From: zhuofeng <1107893276@qq.com>
Date: Mon, 24 Feb 2025 03:00:57 +0000
Subject: [PATCH] fix the sentryCollector service can't be stopped for a long
time.
Signed-off-by: zhuofeng <1107893276@qq.com>
---
src/services/sentryCollector/collect_io.py | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/src/services/sentryCollector/collect_io.py b/src/services/sentryCollector/collect_io.py
index 10446d9..6db28ec 100644
--- a/src/services/sentryCollector/collect_io.py
+++ b/src/services/sentryCollector/collect_io.py
@@ -405,10 +405,17 @@ class CollectIo():
self
) -> None:
global EBPF_PROCESS
- if EBPF_PROCESS:
+ if not EBPF_PROCESS:
+ logging.debug("No eBPF process to stop")
+ return
+ try:
EBPF_PROCESS.terminate()
+ EBPF_PROCESS.wait(timeout=3)
+ except subprocess.TimeoutExpired:
+ logging.debug("eBPF process did not exit within timeout. Forcing kill.")
+ EBPF_PROCESS.kill()
EBPF_PROCESS.wait()
- logging.info("ebpf collector thread exit")
+ logging.info("ebpf collector thread exit")
def main_loop(self):
global IO_GLOBAL_DATA
--
2.43.0

View File

@ -4,7 +4,7 @@
Summary: System Inspection Framework Summary: System Inspection Framework
Name: sysSentry Name: sysSentry
Version: 1.0.3 Version: 1.0.3
Release: 5 Release: 10
License: Mulan PSL v2 License: Mulan PSL v2
Group: System Environment/Daemons Group: System Environment/Daemons
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -13,6 +13,12 @@ Patch1: add-bidirectional-communication-for-xalarm.patch
Patch2: fix-some-test-cases.patch Patch2: fix-some-test-cases.patch
Patch3: add-log-for-xalarmd-and-fix-delete-on-iter-problem.patch Patch3: add-log-for-xalarmd-and-fix-delete-on-iter-problem.patch
Patch4: fix-xalarm-log-not-print-and-add-on-iter-problem.patch Patch4: fix-xalarm-log-not-print-and-add-on-iter-problem.patch
Patch5: add-new-func-for-ebpf-in-the-rq_driver-stage.patch
Patch6: fix-the-sentryCollector-service-can-t-be-stopped-for.patch
Patch7: ai-block-io-exit-when-stage-is-not-supported.patch
Patch8: add-log-utils-for-c.patch
Patch9: fix-env-for-subprocess.Popen.patch
Patch10: fix-period-task-some-bugs.patch
BuildRequires: cmake gcc-c++ BuildRequires: cmake gcc-c++
BuildRequires: python3 python3-setuptools BuildRequires: python3 python3-setuptools
@ -168,7 +174,9 @@ rm -rf /var/run/sysSentry | :
%attr(0550,root,root) %{_bindir}/sentryCollector %attr(0550,root,root) %{_bindir}/sentryCollector
%attr(0600,root,root) %{_sysconfdir}/sysSentry/collector.conf %attr(0600,root,root) %{_sysconfdir}/sysSentry/collector.conf
%attr(0600,root,root) %{_unitdir}/sentryCollector.service %attr(0600,root,root) %{_unitdir}/sentryCollector.service
%attr(0550,root,root) %{_libdir}/libsentry_log.so
%exclude %{_includedir}/libsentry/log_utils.h
%exclude %{_sysconfdir}/sysSentry/tasks/hbm_online_repair.mod %exclude %{_sysconfdir}/sysSentry/tasks/hbm_online_repair.mod
%exclude %{python3_sitelib}/syssentry/bmc_* %exclude %{python3_sitelib}/syssentry/bmc_*
%exclude %{python3_sitelib}/syssentry/*/bmc_* %exclude %{python3_sitelib}/syssentry/*/bmc_*
@ -209,6 +217,37 @@ rm -rf /var/run/sysSentry | :
%attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py %attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py
%changelog %changelog
* Sat Mar 29 2025 shixuantong <shixuantong1@huawei.com> - 1.0.3-10
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:fix period task some bugs
fix env_file and environ_conf
* Fri Mar 14 2025 shixuantong <shixuantong1@huawei.com> - 1.0.3-9
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC:add log utils for c
* Thu Mar 13 2025 luckky <guodashun1@huawei.com> - 1.0.3-8
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC: fix an issue with printing error
* Mon Feb 24 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.3-7
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC: fix the sentryCollector service can't be stopped for a long
* Sat Feb 22 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.3-6
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC: add new func for ebpf in the rq_driver stage
* Tue Feb 18 2025 caixiaomeng <caixiaomeng2@huawei.com> - 1.0.3-5 * Tue Feb 18 2025 caixiaomeng <caixiaomeng2@huawei.com> - 1.0.3-5
- Type:bugfix - Type:bugfix
- CVE:NA - CVE:NA