add new func for ebpf in the rq_driver stage

Signed-off-by: zhuofeng <1107893276@qq.com>
(cherry picked from commit ec94b36256cf018893e543b4b57e95e6c78e3a4a)
This commit is contained in:
zhuofeng 2025-02-22 02:51:03 +00:00 committed by openeuler-sync-bot
parent 45a9ebffd2
commit 52606113b3
2 changed files with 594 additions and 1 deletions

View File

@ -0,0 +1,586 @@
From 966e539d3b0c0eaaa94fdd1fb21dd29e97e48bee Mon Sep 17 00:00:00 2001
From: zhuofeng <zhuofeng2@huawei.com>
Date: Sat, 22 Feb 2025 18:42:08 +0800
Subject: [PATCH] add new func for ebpf in the rq_driver stage
---
src/sentryPlugins/ai_block_io/detector.py | 3 +-
.../ebpf_collector/ebpf_collector.bpf.c | 459 ++++--------------
.../ebpf_collector/ebpf_collector.c | 2 +-
.../ebpf_collector/ebpf_collector.h | 16 +-
4 files changed, 116 insertions(+), 364 deletions(-)
diff --git a/src/sentryPlugins/ai_block_io/detector.py b/src/sentryPlugins/ai_block_io/detector.py
index 27fb7f7..2688cb1 100644
--- a/src/sentryPlugins/ai_block_io/detector.py
+++ b/src/sentryPlugins/ai_block_io/detector.py
@@ -55,11 +55,12 @@ class Detector:
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
# 检测到慢周期由Detector负责打印info级别日志
if detection_result[0][1]:
+ ai_threshold = "None" if detection_result[2] is None else round(detection_result[2], 3)
logging.info(f'[abnormal_period]: disk: {self._metric_name.disk_name}, '
f'stage: {self._metric_name.stage_name}, '
f'iotype: {self._metric_name.io_access_type_name}, '
f'type: {self._metric_name.metric_name}, '
- f'ai_threshold: {round(detection_result[2], 3)}, '
+ f'ai_threshold: {ai_threshold}, '
f'curr_val: {metric_value}')
else:
logging.debug(f'Detection result: {str(detection_result)}')
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
index 978b114..ece8c93 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
@@ -115,7 +115,7 @@ struct {
} ringbuf SEC(".maps");
-static void log_event(u32 stage, u32 period, u32 err) {
+static void log_event(int stage, int period, int err) {
struct event *e;
void *data = bpf_ringbuf_reserve(&ringbuf, sizeof(struct event), 0);
if (!data)
@@ -338,6 +338,105 @@ int kprobe_blk_mq_start_request(struct pt_regs *regs) {
return 0;
}
+SEC("kprobe/blk_mq_end_request_batch")
+int kprobe_blk_mq_end_request_batch(struct pt_regs *regs) {
+ struct io_comp_batch *iob = (struct io_comp_batch *)PT_REGS_PARM1(regs);
+ struct request *rq;
+ struct request_queue *q;
+ struct gendisk *curr_rq_disk;
+ int major, first_minor;
+ unsigned int cmd_flags;
+ struct io_counter *counterp;
+ struct stage_data *curr_data;
+ rq = BPF_CORE_READ(iob, req_list);
+
+ for (int i = 0; i <= BATCH_COUT; i++) {
+ bpf_core_read(&q, sizeof(q), &rq->q);
+ bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
+ bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
+ bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
+ bpf_core_read(&cmd_flags, sizeof(cmd_flags), &rq->cmd_flags);
+
+ if (major == 0) {
+ log_event(STAGE_RQ_DRIVER, PERIOD_END, ERROR_MAJOR_ZERO);
+ continue;
+ }
+
+ u32 key = find_matching_key_rq_driver(major, first_minor);
+ if (key >= MAP_SIZE) {
+ continue;
+ }
+
+ counterp = bpf_map_lookup_elem(&blk_map, &rq);
+ if (!counterp) {
+ continue;
+ }
+
+ u64 duration = bpf_ktime_get_ns() - counterp->start_time;
+ u64 curr_start_range = counterp->start_time / THRESHOLD;
+
+ struct update_params params = {
+ .major = major,
+ .first_minor = first_minor,
+ .cmd_flags = cmd_flags,
+ .curr_start_range = curr_start_range,
+ };
+
+ curr_data = bpf_map_lookup_elem(&blk_res, &key);
+ if (curr_data == NULL && duration > DURATION_THRESHOLD) {
+ struct stage_data new_data = {
+ .start_count = 1,
+ .finish_count = 1,
+ .finish_over_time = 1,
+ .duration = 0,
+ .major = major,
+ .first_minor = first_minor,
+ .io_type = "",
+ };
+ blk_fill_rwbs(new_data.io_type, cmd_flags);
+ bpf_map_update_elem(&blk_res, &key, &new_data, 0);
+ } else if (curr_data == NULL) {
+ struct stage_data new_data = {
+ .start_count = 1,
+ .finish_count = 1,
+ .finish_over_time = 0,
+ .duration = 0,
+ .major = major,
+ .first_minor = first_minor,
+ .io_type = "",
+ };
+ blk_fill_rwbs(new_data.io_type, cmd_flags);
+ bpf_map_update_elem(&blk_res, &key, &new_data, 0);
+ } else {
+ curr_data->duration += duration;
+ update_curr_data_in_finish(curr_data, &params, duration);
+ }
+
+ struct time_range_io_count *curr_data_time_range;
+ curr_data_time_range = bpf_map_lookup_elem(&blk_res_2, &curr_start_range);
+ if (curr_data_time_range == NULL) {
+ struct time_range_io_count new_data = {.count = 0};
+ bpf_map_update_elem(&blk_res_2, &curr_start_range, &new_data, 0);
+ } else {
+ if (key < MAP_SIZE && key >= 0) {
+ __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
+ }
+ }
+ struct request *rq_next = BPF_CORE_READ(rq, rq_next);
+ bpf_map_delete_elem(&blk_map, &rq);
+ rq = rq_next;
+
+ if (!rq) {
+ break;
+ }
+
+ if (i >= BATCH_COUT) {
+ log_event(STAGE_RQ_DRIVER, PERIOD_END, i);
+ }
+ }
+ return 0;
+}
+
// finish rq_driver
SEC("kprobe/blk_mq_free_request")
int kprobe_blk_mq_free_request(struct pt_regs *regs)
@@ -418,7 +517,7 @@ int kprobe_blk_mq_free_request(struct pt_regs *regs)
struct time_range_io_count new_data = { .count = {0} };
bpf_map_update_elem(&blk_res_2, &curr_start_range, &new_data, 0);
} else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
+ if (key < MAP_SIZE && key >= 0) {
__sync_fetch_and_add(&curr_data_time_range->count[key], -1);
}
}
@@ -463,7 +562,6 @@ int kprobe_blk_mq_submit_bio(struct pt_regs *regs)
long err = bpf_map_update_elem(&bio_map, &bio, &zero, BPF_NOEXIST);
if (err) {
- log_event(STAGE_BIO, PERIOD_START, ERROR_UPDATE_FAIL);
return 0;
}
@@ -597,359 +695,4 @@ int kprobe_bio_endio(struct pt_regs *regs)
return 0;
}
-// start get_tag
-SEC("kprobe/blk_mq_get_tag")
-int kprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM1(regs);
- (void)bpf_map_update_elem(&tag_args, &tagkey, &value, BPF_ANY);
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)value;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&tag_map, &tagkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish get_tag
-SEC("kretprobe/blk_mq_get_tag")
-int kretprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 *tagargs = (u64 *)bpf_map_lookup_elem(&tag_args, &tagkey);
- if (tagargs == NULL) {
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
- }
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)*tagargs;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
-
- bpf_map_delete_elem(&tag_map, &tagkey);
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
-}
-
-// start wbt
-SEC("kprobe/wbt_wait")
-int kprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM2(regs);
- (void)bpf_map_update_elem(&wbt_args, &wbtkey, &value, BPF_ANY);
-
- struct bio *bio;
- struct block_device *bd;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)value;
- bpf_core_read(&bd, sizeof(bd), &bio->bi_bdev);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bd->bd_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&wbt_map, &wbtkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish wbt
-SEC("kretprobe/wbt_wait")
-int kretprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 *wbtargs = (u64 *)bpf_map_lookup_elem(&wbt_args, &wbtkey);
- if (wbtargs == NULL) {
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
- }
-
- struct bio *bio;
- struct block_device *bd;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)(*wbtargs);
- bpf_core_read(&bd, sizeof(bd), &bio->bi_bdev);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bd->bd_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .io_type = "",
- .major = major,
- .first_minor = first_minor,
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
- bpf_map_delete_elem(&wbt_map, &wbtkey);
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
index 5a2528b..445cce7 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
@@ -155,7 +155,7 @@ static void update_io_dump(int fd, int *io_dump, int map_size, char *stage) {
if ((curr_time - io_dump_key) >= 2) {
int isempty = 1;
for (int key = 0; key < map_size; key++) {
- if (time_count.count[key] > 0) {
+ if (time_count.count[key] > IO_DUMP_DIFF) {
io_dump[key] += time_count.count[key];
isempty = 0;
}
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.h b/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
index adf926b..e9de49e 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
@@ -39,6 +39,10 @@ typedef unsigned int u32;
#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
// 阶段
#define STAGE_RQ_DRIVER 1
#define STAGE_BIO 2
@@ -55,6 +59,10 @@ typedef unsigned int u32;
#define ERROR_KEY_EXIST 3
#define ERROR_UPDATE_FAIL 4
#define ERROR_KEY_NOEXIST 5
+#define ERROR_DELETE_FAIL 6
+
+#define BATCH_COUT 100
+#define IO_DUMP_DIFF 3
enum stage_type {
BIO=0,
@@ -94,13 +102,13 @@ struct update_params {
struct time_range_io_count
{
- u32 count[MAP_SIZE];
+ int count[MAP_SIZE];
};
struct event {
- u32 stage;
- u64 period;
- u32 err;
+ int stage;
+ int period;
+ int err;
};
#endif /* __EBPFCOLLECTOR_H */
--
2.43.0

View File

@ -4,7 +4,7 @@
Summary: System Inspection Framework
Name: sysSentry
Version: 1.0.3
Release: 5
Release: 6
License: Mulan PSL v2
Group: System Environment/Daemons
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
@ -13,6 +13,7 @@ Patch1: add-bidirectional-communication-for-xalarm.patch
Patch2: fix-some-test-cases.patch
Patch3: add-log-for-xalarmd-and-fix-delete-on-iter-problem.patch
Patch4: fix-xalarm-log-not-print-and-add-on-iter-problem.patch
Patch5: add-new-func-for-ebpf-in-the-rq_driver-stage.patch
BuildRequires: cmake gcc-c++
BuildRequires: python3 python3-setuptools
@ -209,6 +210,12 @@ rm -rf /var/run/sysSentry | :
%attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py
%changelog
* Sat Feb 22 2025 zhuofeng <zhuofeng2@huawei.com> - 1.0.3-6
- Type:bugfix
- CVE:NA
- SUG:NA
- DESC: add new func for ebpf in the rq_driver stage
* Tue Feb 18 2025 caixiaomeng <caixiaomeng2@huawei.com> - 1.0.3-5
- Type:bugfix
- CVE:NA