sysSentry/add-new-func-for-ebpf-in-the-rq_driver-stage.patch
zhuofeng 52606113b3 add new func for ebpf in the rq_driver stage
Signed-off-by: zhuofeng <1107893276@qq.com>
(cherry picked from commit ec94b36256cf018893e543b4b57e95e6c78e3a4a)
2025-03-13 20:18:05 +08:00

587 lines
21 KiB
Diff
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 966e539d3b0c0eaaa94fdd1fb21dd29e97e48bee Mon Sep 17 00:00:00 2001
From: zhuofeng <zhuofeng2@huawei.com>
Date: Sat, 22 Feb 2025 18:42:08 +0800
Subject: [PATCH] add new func for ebpf in the rq_driver stage
---
src/sentryPlugins/ai_block_io/detector.py | 3 +-
.../ebpf_collector/ebpf_collector.bpf.c | 459 ++++--------------
.../ebpf_collector/ebpf_collector.c | 2 +-
.../ebpf_collector/ebpf_collector.h | 16 +-
4 files changed, 116 insertions(+), 364 deletions(-)
diff --git a/src/sentryPlugins/ai_block_io/detector.py b/src/sentryPlugins/ai_block_io/detector.py
index 27fb7f7..2688cb1 100644
--- a/src/sentryPlugins/ai_block_io/detector.py
+++ b/src/sentryPlugins/ai_block_io/detector.py
@@ -55,11 +55,12 @@ class Detector:
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
# 检测到慢周期由Detector负责打印info级别日志
if detection_result[0][1]:
+ ai_threshold = "None" if detection_result[2] is None else round(detection_result[2], 3)
logging.info(f'[abnormal_period]: disk: {self._metric_name.disk_name}, '
f'stage: {self._metric_name.stage_name}, '
f'iotype: {self._metric_name.io_access_type_name}, '
f'type: {self._metric_name.metric_name}, '
- f'ai_threshold: {round(detection_result[2], 3)}, '
+ f'ai_threshold: {ai_threshold}, '
f'curr_val: {metric_value}')
else:
logging.debug(f'Detection result: {str(detection_result)}')
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
index 978b114..ece8c93 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.bpf.c
@@ -115,7 +115,7 @@ struct {
} ringbuf SEC(".maps");
-static void log_event(u32 stage, u32 period, u32 err) {
+static void log_event(int stage, int period, int err) {
struct event *e;
void *data = bpf_ringbuf_reserve(&ringbuf, sizeof(struct event), 0);
if (!data)
@@ -338,6 +338,105 @@ int kprobe_blk_mq_start_request(struct pt_regs *regs) {
return 0;
}
+SEC("kprobe/blk_mq_end_request_batch")
+int kprobe_blk_mq_end_request_batch(struct pt_regs *regs) {
+ struct io_comp_batch *iob = (struct io_comp_batch *)PT_REGS_PARM1(regs);
+ struct request *rq;
+ struct request_queue *q;
+ struct gendisk *curr_rq_disk;
+ int major, first_minor;
+ unsigned int cmd_flags;
+ struct io_counter *counterp;
+ struct stage_data *curr_data;
+ rq = BPF_CORE_READ(iob, req_list);
+
+ for (int i = 0; i <= BATCH_COUT; i++) {
+ bpf_core_read(&q, sizeof(q), &rq->q);
+ bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
+ bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
+ bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
+ bpf_core_read(&cmd_flags, sizeof(cmd_flags), &rq->cmd_flags);
+
+ if (major == 0) {
+ log_event(STAGE_RQ_DRIVER, PERIOD_END, ERROR_MAJOR_ZERO);
+ continue;
+ }
+
+ u32 key = find_matching_key_rq_driver(major, first_minor);
+ if (key >= MAP_SIZE) {
+ continue;
+ }
+
+ counterp = bpf_map_lookup_elem(&blk_map, &rq);
+ if (!counterp) {
+ continue;
+ }
+
+ u64 duration = bpf_ktime_get_ns() - counterp->start_time;
+ u64 curr_start_range = counterp->start_time / THRESHOLD;
+
+ struct update_params params = {
+ .major = major,
+ .first_minor = first_minor,
+ .cmd_flags = cmd_flags,
+ .curr_start_range = curr_start_range,
+ };
+
+ curr_data = bpf_map_lookup_elem(&blk_res, &key);
+ if (curr_data == NULL && duration > DURATION_THRESHOLD) {
+ struct stage_data new_data = {
+ .start_count = 1,
+ .finish_count = 1,
+ .finish_over_time = 1,
+ .duration = 0,
+ .major = major,
+ .first_minor = first_minor,
+ .io_type = "",
+ };
+ blk_fill_rwbs(new_data.io_type, cmd_flags);
+ bpf_map_update_elem(&blk_res, &key, &new_data, 0);
+ } else if (curr_data == NULL) {
+ struct stage_data new_data = {
+ .start_count = 1,
+ .finish_count = 1,
+ .finish_over_time = 0,
+ .duration = 0,
+ .major = major,
+ .first_minor = first_minor,
+ .io_type = "",
+ };
+ blk_fill_rwbs(new_data.io_type, cmd_flags);
+ bpf_map_update_elem(&blk_res, &key, &new_data, 0);
+ } else {
+ curr_data->duration += duration;
+ update_curr_data_in_finish(curr_data, &params, duration);
+ }
+
+ struct time_range_io_count *curr_data_time_range;
+ curr_data_time_range = bpf_map_lookup_elem(&blk_res_2, &curr_start_range);
+ if (curr_data_time_range == NULL) {
+ struct time_range_io_count new_data = {.count = 0};
+ bpf_map_update_elem(&blk_res_2, &curr_start_range, &new_data, 0);
+ } else {
+ if (key < MAP_SIZE && key >= 0) {
+ __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
+ }
+ }
+ struct request *rq_next = BPF_CORE_READ(rq, rq_next);
+ bpf_map_delete_elem(&blk_map, &rq);
+ rq = rq_next;
+
+ if (!rq) {
+ break;
+ }
+
+ if (i >= BATCH_COUT) {
+ log_event(STAGE_RQ_DRIVER, PERIOD_END, i);
+ }
+ }
+ return 0;
+}
+
// finish rq_driver
SEC("kprobe/blk_mq_free_request")
int kprobe_blk_mq_free_request(struct pt_regs *regs)
@@ -418,7 +517,7 @@ int kprobe_blk_mq_free_request(struct pt_regs *regs)
struct time_range_io_count new_data = { .count = {0} };
bpf_map_update_elem(&blk_res_2, &curr_start_range, &new_data, 0);
} else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
+ if (key < MAP_SIZE && key >= 0) {
__sync_fetch_and_add(&curr_data_time_range->count[key], -1);
}
}
@@ -463,7 +562,6 @@ int kprobe_blk_mq_submit_bio(struct pt_regs *regs)
long err = bpf_map_update_elem(&bio_map, &bio, &zero, BPF_NOEXIST);
if (err) {
- log_event(STAGE_BIO, PERIOD_START, ERROR_UPDATE_FAIL);
return 0;
}
@@ -597,359 +695,4 @@ int kprobe_bio_endio(struct pt_regs *regs)
return 0;
}
-// start get_tag
-SEC("kprobe/blk_mq_get_tag")
-int kprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM1(regs);
- (void)bpf_map_update_elem(&tag_args, &tagkey, &value, BPF_ANY);
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)value;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&tag_map, &tagkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_GET_TAG, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish get_tag
-SEC("kretprobe/blk_mq_get_tag")
-int kretprobe_blk_mq_get_tag(struct pt_regs *regs)
-{
- u64 tagkey = bpf_get_current_task();
- u64 *tagargs = (u64 *)bpf_map_lookup_elem(&tag_args, &tagkey);
- if (tagargs == NULL) {
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
- }
-
- struct blk_mq_alloc_data *bd;
- struct request_queue *q;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags = 0;
-
- bd = (struct blk_mq_alloc_data *)*tagargs;
- bpf_core_read(&q, sizeof(q), &bd->q);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &q->disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
-
- if (major == 0) {
- log_event(STAGE_GET_TAG, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_get_tag(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&tag_map, &tagkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&tag_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&tag_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&tag_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&tag_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
-
- bpf_map_delete_elem(&tag_map, &tagkey);
- bpf_map_delete_elem(&tag_args, &tagkey);
- return 0;
-}
-
-// start wbt
-SEC("kprobe/wbt_wait")
-int kprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 value = (u64)PT_REGS_PARM2(regs);
- (void)bpf_map_update_elem(&wbt_args, &wbtkey, &value, BPF_ANY);
-
- struct bio *bio;
- struct block_device *bd;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)value;
- bpf_core_read(&bd, sizeof(bd), &bio->bi_bdev);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bd->bd_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp, zero = {};
- init_io_counter(&zero, major, first_minor);
- counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (counterp) {
- return 0;
- }
- long err = bpf_map_update_elem(&wbt_map, &wbtkey, &zero, BPF_NOEXIST);
- if (err) {
- log_event(STAGE_WBT, PERIOD_START, ERROR_UPDATE_FAIL);
- return 0;
- }
-
- u64 curr_start_range = zero.start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (!curr_data) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 0,
- .finish_over_time = 0,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- update_curr_data_in_start(curr_data, &params);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && key >= 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], 1);
- }
- }
- return 0;
-}
-
-// finish wbt
-SEC("kretprobe/wbt_wait")
-int kretprobe_wbt_wait(struct pt_regs *regs)
-{
- u64 wbtkey = bpf_get_current_task();
- u64 *wbtargs = (u64 *)bpf_map_lookup_elem(&wbt_args, &wbtkey);
- if (wbtargs == NULL) {
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
- }
-
- struct bio *bio;
- struct block_device *bd;
- struct gendisk *curr_rq_disk;
- int major, first_minor;
- unsigned int cmd_flags;
-
- bio = (struct bio *)(*wbtargs);
- bpf_core_read(&bd, sizeof(bd), &bio->bi_bdev);
- bpf_core_read(&curr_rq_disk, sizeof(curr_rq_disk), &bd->bd_disk);
- bpf_core_read(&major, sizeof(major), &curr_rq_disk->major);
- bpf_core_read(&first_minor, sizeof(first_minor), &curr_rq_disk->first_minor);
- bpf_core_read(&cmd_flags, sizeof(cmd_flags), &bio->bi_opf);
-
- if (major == 0) {
- log_event(STAGE_WBT, PERIOD_END, ERROR_MAJOR_ZERO);
- return 0;
- }
-
- u32 key = find_matching_key_wbt(major, first_minor);
- if (key >= MAP_SIZE) {
- return 0;
- }
-
- struct io_counter *counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey);
- if (!counterp) {
- return 0;
- }
-
- u64 duration = bpf_ktime_get_ns() - counterp->start_time;
- u64 curr_start_range = counterp->start_time / THRESHOLD;
-
- struct update_params params = {
- .major = major,
- .first_minor = first_minor,
- .cmd_flags = cmd_flags,
- .curr_start_range = curr_start_range,
- };
-
- struct stage_data *curr_data;
- curr_data = bpf_map_lookup_elem(&wbt_res, &key);
- if (curr_data == NULL && duration > DURATION_THRESHOLD) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 1,
- .duration = 0,
- .major = major,
- .first_minor = first_minor,
- .io_type = "",
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else if (curr_data == NULL) {
- struct stage_data new_data = {
- .start_count = 1,
- .finish_count = 1,
- .finish_over_time = 0,
- .duration = 0,
- .io_type = "",
- .major = major,
- .first_minor = first_minor,
- };
- blk_fill_rwbs(new_data.io_type, cmd_flags);
- bpf_map_update_elem(&wbt_res, &key, &new_data, 0);
- } else {
- curr_data->duration += duration;
- update_curr_data_in_finish(curr_data, &params, duration);
- }
-
- struct time_range_io_count *curr_data_time_range;
- curr_data_time_range = bpf_map_lookup_elem(&wbt_res_2, &curr_start_range);
- if (curr_data_time_range == NULL) {
- struct time_range_io_count new_data = { .count = {0} };
- bpf_map_update_elem(&wbt_res_2, &curr_start_range, &new_data, 0);
- } else {
- if (key < MAP_SIZE && curr_data_time_range->count[key] > 0) {
- __sync_fetch_and_add(&curr_data_time_range->count[key], -1);
- }
- }
- bpf_map_delete_elem(&wbt_map, &wbtkey);
- bpf_map_delete_elem(&wbt_args, &wbtkey);
- return 0;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
index 5a2528b..445cce7 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.c
@@ -155,7 +155,7 @@ static void update_io_dump(int fd, int *io_dump, int map_size, char *stage) {
if ((curr_time - io_dump_key) >= 2) {
int isempty = 1;
for (int key = 0; key < map_size; key++) {
- if (time_count.count[key] > 0) {
+ if (time_count.count[key] > IO_DUMP_DIFF) {
io_dump[key] += time_count.count[key];
isempty = 0;
}
diff --git a/src/services/sentryCollector/ebpf_collector/ebpf_collector.h b/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
index adf926b..e9de49e 100644
--- a/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
+++ b/src/services/sentryCollector/ebpf_collector/ebpf_collector.h
@@ -39,6 +39,10 @@ typedef unsigned int u32;
#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
// 阶段
#define STAGE_RQ_DRIVER 1
#define STAGE_BIO 2
@@ -55,6 +59,10 @@ typedef unsigned int u32;
#define ERROR_KEY_EXIST 3
#define ERROR_UPDATE_FAIL 4
#define ERROR_KEY_NOEXIST 5
+#define ERROR_DELETE_FAIL 6
+
+#define BATCH_COUT 100
+#define IO_DUMP_DIFF 3
enum stage_type {
BIO=0,
@@ -94,13 +102,13 @@ struct update_params {
struct time_range_io_count
{
- u32 count[MAP_SIZE];
+ int count[MAP_SIZE];
};
struct event {
- u32 stage;
- u64 period;
- u32 err;
+ int stage;
+ int period;
+ int err;
};
#endif /* __EBPFCOLLECTOR_H */
--
2.43.0