add tool: cpuload
This commit is contained in:
parent
f83f777a44
commit
a63e65fac0
250
add-tool-cpuload.patch
Normal file
250
add-tool-cpuload.patch
Normal file
@ -0,0 +1,250 @@
|
||||
From 4d206acc6e85317cf29fc883beb7c852780fe072 Mon Sep 17 00:00:00 2001
|
||||
From: Liu Chao <liuchao173@huawei.com>
|
||||
Date: Mon, 6 Dec 2021 02:36:52 +0000
|
||||
Subject: [PATCH] add tool: cpuload
|
||||
|
||||
cpuload calculates the cpu usage, showing which tasks run out of cpu resource.
|
||||
|
||||
It display top N tasks when the cpu usage exceeds more than P% and calculates
|
||||
every T ms.
|
||||
|
||||
This works by tracing the sched switch events using tracepoints.
|
||||
|
||||
Since this uses BPF, only the root user can use this tool.
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-t TIME, --time TIME interval to calculate, default 1000
|
||||
-n NUMBER, --number NUMBER
|
||||
maximum tasks to print, default 3
|
||||
-p PERCENT, --percent PERCENT
|
||||
minimum percent to print, default 30
|
||||
|
||||
example:
|
||||
[root@localhost ~]# ./cpuload.py -p 50 -n 2 -t 100
|
||||
Tracing task switch. Output when cpu is overload. Ctrl-C to end.
|
||||
DATE COMM PID CPU TIME(ms) %CPU
|
||||
2021-01-27 10:40:39 stress-ng-cpu 33179 1 100.529 96.68%
|
||||
2021-01-27 10:40:39 cpuload.py 395575 1 3.363 03.23%
|
||||
2021-01-27 10:40:39 stress-ng-cpu 33175 3 107.704 99.73%
|
||||
2021-01-27 10:40:39 sshd 2259 3 0.226 00.21%
|
||||
2021-01-27 10:40:39 stress-ng-cpu 33176 0 131.978 99.99%
|
||||
2021-01-27 10:40:39 kworker/0:0 388650 0 0.017 00.01%
|
||||
2021-01-27 10:40:39 stress-ng-cpu 33178 2 183.987 99.99%
|
||||
2021-01-27 10:40:39 kworker/2:0 391880 2 0.011 00.01%
|
||||
^C
|
||||
|
||||
Signed-off-by: liuchao173@huawei.com
|
||||
---
|
||||
tools/cpuload.py | 199 +++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 199 insertions(+)
|
||||
create mode 100755 tools/cpuload.py
|
||||
|
||||
diff --git a/tools/cpuload.py b/tools/cpuload.py
|
||||
new file mode 100755
|
||||
index 0000000..f3378e9
|
||||
--- /dev/null
|
||||
+++ b/tools/cpuload.py
|
||||
@@ -0,0 +1,199 @@
|
||||
+#!/usr/bin/python
|
||||
+# @lint-avoid-python-3-compatibility-imports
|
||||
+#
|
||||
+# cpuload Display top N tasks use more than U percent cpu resource when
|
||||
+# the cpu doesn't enter idle state for more than T ms.
|
||||
+#
|
||||
+# USAGE: cpuload [-h] [-t time] [-n number] [-p percent_limit] [-m max_entry]
|
||||
+#
|
||||
+# This uses in-kernel eBPF maps to cache task details (PID and comm) by
|
||||
+# sched_switch, as well as a running time for calculating cpu usage.
|
||||
+#
|
||||
+# Licensed under the Apache License, Version 2.0 (the "License")
|
||||
+# Copyright (C) 2021 Huawei Technologies Co., Ltd.
|
||||
+
|
||||
+from __future__ import print_function
|
||||
+from bcc import BPF
|
||||
+from bcc.utils import printb
|
||||
+import argparse
|
||||
+from datetime import datetime
|
||||
+
|
||||
+# arguments
|
||||
+examples = """examples:
|
||||
+ ./cpuload # display tasks when cpu overload
|
||||
+ ./cpuload -t 100 # calculate cpu usage every 100 ms
|
||||
+ ./cpuload -n 5 # display top 5 tasks details
|
||||
+ ./cpuload -p 30 # display tasks when cpu usage exceeds 30%
|
||||
+ ./cpuload -m 10000 # set the maximum number of entry to 10,000
|
||||
+"""
|
||||
+parser = argparse.ArgumentParser(
|
||||
+ description="display tasks when cpu overload",
|
||||
+ formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
+ epilog=examples)
|
||||
+parser.add_argument("-t", "--time", default=1000,
|
||||
+ help="interval for calculating the CPU usage, in milliseconds(0 - 60000), default 1000")
|
||||
+parser.add_argument("-n", "--number", default=3,
|
||||
+ help="display top n tasks with high cpu usage, default 3")
|
||||
+parser.add_argument("-p", "--percent_limit", default=90,
|
||||
+ help="display when the usage of a cpu exceeds percent_limit(0 - 100), default 90")
|
||||
+parser.add_argument("-m", "--max_entry", default=10000,
|
||||
+ help="size of the cyclic buffer for recording the scheduling track(1000 - 1000000), default 10000")
|
||||
+parser.add_argument("--ebpf", action="store_true",
|
||||
+ help=argparse.SUPPRESS)
|
||||
+args = parser.parse_args()
|
||||
+time_ms = int(args.time)
|
||||
+time_ns = time_ms * 1000000
|
||||
+number = int(args.number)
|
||||
+percent_limit = int(args.percent_limit)
|
||||
+max_entry = int(args.max_entry)
|
||||
+debug = 0
|
||||
+
|
||||
+if time_ms > 60000 or time_ms < 0:
|
||||
+ print("time invalid")
|
||||
+ exit(1)
|
||||
+
|
||||
+if percent_limit > 100 or percent_limit < 0:
|
||||
+ print("percent_limit invalid")
|
||||
+ exit(1)
|
||||
+
|
||||
+if max_entry > 1000000 or max_entry < 1000:
|
||||
+ print("max_entry invalid")
|
||||
+ exit(1)
|
||||
+
|
||||
+# define BPF program
|
||||
+bpf_text = """
|
||||
+#include <linux/sched.h>
|
||||
+
|
||||
+#define MAX_TIME """ + str(time_ns) + """
|
||||
+#define THRESHOLD """ + str(percent_limit) + """
|
||||
+#define MAX_ENTRY """ + str(max_entry) + """
|
||||
+
|
||||
+struct cpu_data_t {
|
||||
+ u32 index;
|
||||
+ u32 number;
|
||||
+ u64 prev_time;
|
||||
+ u64 busy_time;
|
||||
+ u64 total_time;
|
||||
+};
|
||||
+
|
||||
+struct task_data_t {
|
||||
+ u32 pid;
|
||||
+ char comm[TASK_COMM_LEN];
|
||||
+ u64 delta;
|
||||
+};
|
||||
+
|
||||
+struct data_t {
|
||||
+ u32 index;
|
||||
+ u32 number;
|
||||
+ u64 total_time;
|
||||
+};
|
||||
+
|
||||
+BPF_PERCPU_ARRAY(cpu_data, struct cpu_data_t, 1);
|
||||
+
|
||||
+BPF_PERCPU_ARRAY(task_data, struct task_data_t, MAX_ENTRY);
|
||||
+
|
||||
+BPF_PERF_OUTPUT(events);
|
||||
+TRACEPOINT_PROBE(sched, sched_switch) {
|
||||
+ u32 index = 0;
|
||||
+ u64 now = bpf_ktime_get_ns(), delta;
|
||||
+ struct data_t data = {};
|
||||
+ struct cpu_data_t *cpu = cpu_data.lookup(&index);
|
||||
+ struct task_data_t *task;
|
||||
+
|
||||
+ if (cpu == NULL)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (cpu->prev_time == 0) {
|
||||
+ cpu->prev_time = now;
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ index = (cpu->index + cpu->number) % MAX_ENTRY;
|
||||
+ task = task_data.lookup(&index);
|
||||
+ if (task == NULL)
|
||||
+ return 0;
|
||||
+
|
||||
+ delta = now - cpu->prev_time;
|
||||
+ if (args->prev_pid != 0) {
|
||||
+ cpu->busy_time += delta;
|
||||
+ task->pid = args->prev_pid;
|
||||
+ __builtin_memcpy(&task->comm, &args->prev_comm, sizeof(task->comm));
|
||||
+ task->delta = now - cpu->prev_time;
|
||||
+ cpu->number++;
|
||||
+ }
|
||||
+
|
||||
+ cpu->prev_time = now;
|
||||
+ cpu->total_time += delta;
|
||||
+
|
||||
+ if (cpu->total_time > MAX_TIME || cpu->number == MAX_ENTRY) {
|
||||
+ if (cpu->busy_time * 100 > cpu->total_time * THRESHOLD) {
|
||||
+ data.index = cpu->index;
|
||||
+ data.number = cpu->number;
|
||||
+ data.total_time = cpu->total_time;
|
||||
+ events.perf_submit(args, &data, sizeof(data));
|
||||
+ cpu->index = (index + 1) % MAX_ENTRY;
|
||||
+ }
|
||||
+ cpu->number = 0;
|
||||
+ cpu->busy_time = 0;
|
||||
+ cpu->total_time = 0;
|
||||
+ cpu->prev_time = now;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+"""
|
||||
+
|
||||
+if debug or args.ebpf:
|
||||
+ print(bpf_text)
|
||||
+ if args.ebpf:
|
||||
+ exit()
|
||||
+
|
||||
+# initialize BPF
|
||||
+b = BPF(text=bpf_text)
|
||||
+
|
||||
+print("Tracing task switch. Output when cpu is overload. Ctrl-C to end.")
|
||||
+
|
||||
+print("%-19s %-14s %-7s %-4s %-8s %-5s" %
|
||||
+ ("DATE", "COMM", "PID", "CPU", "TIME(ms)", "%CPU"))
|
||||
+
|
||||
+# process event
|
||||
+def print_event(cpu, data, size):
|
||||
+ date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
+ data = b["events"].event(data)
|
||||
+ dic = {}
|
||||
+ tasks = b["task_data"]
|
||||
+ if data.total_time < time_ns:
|
||||
+ print("max_entry is too small, please set more than %d" %
|
||||
+ (max_entry * time_ns / data.total_time))
|
||||
+ for i in range(data.index, data.number + data.index):
|
||||
+ task = tasks[i % max_entry][cpu]
|
||||
+ entry = dic.get(task.pid)
|
||||
+ if entry is not None:
|
||||
+ entry.delta += task.delta
|
||||
+ else:
|
||||
+ dic[task.pid] = task
|
||||
+
|
||||
+ count = 0
|
||||
+ for item in sorted(dic.items(), key=lambda x: x[1].delta, reverse=True):
|
||||
+ if count >= number:
|
||||
+ break
|
||||
+ task = item[1]
|
||||
+ u = task.delta * 100 / data.total_time
|
||||
+ print("%s %-14.14s %-7s %-4s %-8.3f %05.2f%%" % (
|
||||
+ date,
|
||||
+ task.comm.decode("utf-8", "replace"),
|
||||
+ task.pid,
|
||||
+ cpu,
|
||||
+ float(task.delta) / 1000000,
|
||||
+ u))
|
||||
+ count += 1
|
||||
+ dic.clear()
|
||||
+ print("---------------------------------------------------------------")
|
||||
+
|
||||
+# loop with callback to print_event
|
||||
+b["events"].open_perf_buffer(print_event)
|
||||
+while 1:
|
||||
+ try:
|
||||
+ b.perf_buffer_poll()
|
||||
+ except KeyboardInterrupt:
|
||||
+ exit()
|
||||
--
|
||||
2.23.0
|
||||
|
||||
6
bcc.spec
6
bcc.spec
@ -1,6 +1,6 @@
|
||||
Name: bcc
|
||||
Version: 0.15.0
|
||||
Release: 3
|
||||
Release: 4
|
||||
Summary: BPF Compiler Collection (BCC)
|
||||
License: ASL 2.0
|
||||
URL: https://github.com/iovisor/bcc
|
||||
@ -21,6 +21,7 @@ Requires: %{name}-tools = %{version}-%{release}
|
||||
Requires: libbpf >= 0.0.5-3
|
||||
|
||||
Patch: 0001-bugfix-tcp-and-udp-tools-failed.patch
|
||||
Patch: add-tool-cpuload.patch
|
||||
|
||||
%description
|
||||
BCC is a toolkit for creating efficient kernel tracing and manipulation
|
||||
@ -163,6 +164,9 @@ rm -rf %{buildroot}%{_datadir}/%{name}/tools/old/
|
||||
|
||||
|
||||
%changelog
|
||||
* Mon Dec 6 2021 liuchao <liuchao173@huawei.com> - 0.15.0-4
|
||||
- add tool: cpuload
|
||||
|
||||
* Mon Jun 21 2021 luzhihao <luzhihao@huawei.com> - 0.15.0-3
|
||||
- bugfix: tcp* BPF_SK_LOOKUP undeclared failed
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user