!52 add local file test case
From: @maskinghk Reviewed-by: @chenwei_kernel Signed-off-by: @chenwei_kernel
This commit is contained in:
commit
9812bebaef
228
0006-add-local-file-test-case.patch
Normal file
228
0006-add-local-file-test-case.patch
Normal file
@ -0,0 +1,228 @@
|
||||
From b2ed1a3df17c7c080d156fa2dde11895481d2e97 Mon Sep 17 00:00:00 2001
|
||||
From: wangfenglai1 <wangfenglai1@huawei.com>
|
||||
Date: Fri, 17 Jan 2025 17:00:00 +0800
|
||||
Subject: [PATCH] add local file test case
|
||||
|
||||
---
|
||||
tests/e2e_tests/test_local_file.py | 209 +++++++++++++++++++++++++++++
|
||||
1 file changed, 209 insertions(+)
|
||||
create mode 100644 tests/e2e_tests/test_local_file.py
|
||||
|
||||
diff --git a/tests/e2e_tests/test_local_file.py b/tests/e2e_tests/test_local_file.py
|
||||
new file mode 100644
|
||||
index 0000000..d16b271
|
||||
--- /dev/null
|
||||
+++ b/tests/e2e_tests/test_local_file.py
|
||||
@@ -0,0 +1,209 @@
|
||||
+#!/usr/bin/python3
|
||||
+# ******************************************************************************
|
||||
+# Copyright (c) 2022 Huawei Technologies Co., Ltd.
|
||||
+# gala-anteater is licensed under Mulan PSL v2.
|
||||
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
+# You may obtain a copy of Mulan PSL v2 at:
|
||||
+# http://license.coscl.org.cn/MulanPSL2
|
||||
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
+# See the Mulan PSL v2 for more details.
|
||||
+# ******************************************************************************/
|
||||
+"""
|
||||
+Time: 2025-01-17
|
||||
+Author: wangfl
|
||||
+Description: The main function of gala-anteater project.
|
||||
+"""
|
||||
+import os
|
||||
+import argparse
|
||||
+import logging
|
||||
+import pandas as pd
|
||||
+import numpy as np
|
||||
+import matplotlib.pyplot as plt
|
||||
+
|
||||
+logging.basicConfig(level=logging.ERROR)
|
||||
+
|
||||
+
|
||||
+def read_table_data_pandas(file_path):
|
||||
+ """
|
||||
+ 此函数使用 pandas 从文件读取表格数据。
|
||||
+
|
||||
+ 参数:
|
||||
+ file_path (str): CSV 文件的路径。
|
||||
+
|
||||
+ 返回:
|
||||
+ pandas.DataFrame: 包含表格数据的数据框。
|
||||
+ """
|
||||
+ try:
|
||||
+ df = pd.read_csv(file_path)
|
||||
+ return df
|
||||
+ except FileNotFoundError:
|
||||
+ logging.error(f"Error: The file {file_path} does not exist.")
|
||||
+ return None
|
||||
+ except Exception as e:
|
||||
+ logging.error(f"Error: An error occurred while reading the file {file_path}: {e}")
|
||||
+ return None
|
||||
+
|
||||
+
|
||||
+def calculate_thresholds(window):
|
||||
+ """
|
||||
+ 计算窗口内的上下阈值。
|
||||
+
|
||||
+ 参数:
|
||||
+ window (pandas.DataFrame): 滑动窗口的数据。
|
||||
+
|
||||
+ 返回:
|
||||
+ tuple: 包含上下阈值的元组。
|
||||
+ """
|
||||
+ mean_value = window['value'].mean()
|
||||
+ std_value = window['value'].std()
|
||||
+ threshold = 3 * std_value
|
||||
+ upper_threshold = mean_value + threshold
|
||||
+ lower_threshold = mean_value - threshold
|
||||
+ return upper_threshold, lower_threshold
|
||||
+
|
||||
+
|
||||
+def check_deviation(window, upper_threshold, lower_threshold):
|
||||
+ """
|
||||
+ 检查窗口内的数据是否偏离阈值。
|
||||
+
|
||||
+ 参数:
|
||||
+ window (pandas.DataFrame): 滑动窗口的数据。
|
||||
+ upper_threshold (float): 上阈值。
|
||||
+ lower_threshold (float): 下阈值。
|
||||
+
|
||||
+ 返回:
|
||||
+ pandas.Series: 偏离状态的布尔序列。
|
||||
+ """
|
||||
+ return ((window['value'] > upper_threshold) | (window['value'] < lower_threshold))
|
||||
+
|
||||
+
|
||||
+def check_consecutive_deviation(deviation_status_list, consecutive_count=5):
|
||||
+ """
|
||||
+ 检查连续偏离点是否超过指定数量。
|
||||
+
|
||||
+ 参数:
|
||||
+ deviation_status_list (list): 偏离状态列表。
|
||||
+ consecutive_count (int): 连续偏离点的阈值,默认为 5。
|
||||
+
|
||||
+ 返回:
|
||||
+ list: 标记为故障点的布尔列表。
|
||||
+ """
|
||||
+ corrected_status = []
|
||||
+ consecutive_count_current = 0
|
||||
+ for status in deviation_status_list:
|
||||
+ if status:
|
||||
+ consecutive_count_current += 1
|
||||
+ else:
|
||||
+ consecutive_count_current = 0
|
||||
+ if consecutive_count_current >= consecutive_count:
|
||||
+ corrected_status.append(True)
|
||||
+ else:
|
||||
+ corrected_status.append(False)
|
||||
+ return corrected_status
|
||||
+
|
||||
+
|
||||
+def analyze_table_data_pandas(df, window_size=600):
|
||||
+ """
|
||||
+ 此函数使用 pandas 检查 value 是否偏离均值分布 3 倍以上,
|
||||
+ 并将结果保存在新列 deviation_status 中。
|
||||
+ 进一步检查滑窗内连续 5 个以上的偏离点,将其标记为故障点,其余为正常点。
|
||||
+ 滑窗间不重叠。
|
||||
+
|
||||
+ 参数:
|
||||
+ df (pandas.DataFrame): 包含表格数据的数据框。
|
||||
+ window_size (int): 滑窗大小,默认为 600。
|
||||
+
|
||||
+ 返回:
|
||||
+ pandas.DataFrame: 包含 'timestamp', 'value', 'deviation_status' 列的数据框。
|
||||
+ """
|
||||
+ df['deviation_status'] = False
|
||||
+ df['lower_threshold'] = np.nan
|
||||
+ df['upper_threshold'] = np.nan
|
||||
+ num_windows = len(df) // window_size
|
||||
+ for i in range(num_windows):
|
||||
+ start = i * window_size
|
||||
+ end = (i + 1) * window_size
|
||||
+ window = df.iloc[start:end]
|
||||
+ upper_threshold, lower_threshold = calculate_thresholds(window)
|
||||
+ deviation_status = check_deviation(window, upper_threshold, lower_threshold)
|
||||
+ deviation_status_list = deviation_status.tolist()
|
||||
+ corrected_status = check_consecutive_deviation(deviation_status_list)
|
||||
+ df.loc[start:end - 1, 'deviation_status'] = corrected_status
|
||||
+ df.loc[start:end - 1, 'lower_threshold'] = lower_threshold
|
||||
+ df.loc[start:end - 1, 'upper_threshold'] = upper_threshold
|
||||
+ return df
|
||||
+
|
||||
+
|
||||
+def plot_table_data(df, output_path, window_size):
|
||||
+ """
|
||||
+ 此函数使用 matplotlib 绘制表格数据,并标记出故障点,同时增加方格线,保存图像,并绘制上下阈值线。
|
||||
+
|
||||
+ 参数:
|
||||
+ df (pandas.DataFrame): 包含 'timestamp', 'value', 'deviation_status' 列的数据框。
|
||||
+ output_path (str): 图像保存的路径。
|
||||
+ window_size (int): 滑窗大小。
|
||||
+ """
|
||||
+ fig, ax = plt.subplots(figsize=(10, 6))
|
||||
+ # 绘制正常点
|
||||
+ normal_df = df[~df['deviation_status']]
|
||||
+ ax.plot(normal_df['timestamp'], normal_df['value'], label='Normal', marker='o', linestyle='-', color='blue')
|
||||
+ # 绘制故障点
|
||||
+ fault_df = df[df['deviation_status']]
|
||||
+ ax.plot(fault_df['timestamp'], fault_df['value'], label='Fault', marker='x', linestyle='None', color='red',
|
||||
+ markersize=10)
|
||||
+ # 绘制上下阈值线
|
||||
+ ax.plot(df['timestamp'], df["upper_threshold"], label='Upper Threshold', linestyle='--', color='green')
|
||||
+ ax.plot(df['timestamp'], df["lower_threshold"], label='Lower Threshold', linestyle='--', color='orange')
|
||||
+ ax.set_xlabel('Timestamp')
|
||||
+ ax.set_ylabel('Value')
|
||||
+ ax.set_title('Table Data Analysis')
|
||||
+ ax.legend()
|
||||
+ # 添加方格线
|
||||
+ ax.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray')
|
||||
+ plt.savefig(output_path)
|
||||
+ plt.close(fig)
|
||||
+
|
||||
+
|
||||
+def save_analysis_result(df, result_path):
|
||||
+ """
|
||||
+ 此函数将分析结果保存到 CSV 文件中。
|
||||
+
|
||||
+ 参数:
|
||||
+ df (pandas.DataFrame): 包含分析结果的数据框。
|
||||
+ result_path (str): 结果保存的路径。
|
||||
+ """
|
||||
+ try:
|
||||
+ df.to_csv(result_path, index=False)
|
||||
+ except Exception as e:
|
||||
+ logging.error(f"Error: An error occurred while saving the file {result_path}: {e}")
|
||||
+
|
||||
+
|
||||
+def main():
|
||||
+ parser = argparse.ArgumentParser(description='Table Data Analysis')
|
||||
+ parser.add_argument('directory', type=str, nargs='?', default='./test',
|
||||
+ help='Path to the directory containing CSV files')
|
||||
+ parser.add_argument('--window_size', type=int, default=600, help='Window size for analysis')
|
||||
+ args = parser.parse_args()
|
||||
+
|
||||
+ # 遍历目录下的所有文件
|
||||
+ for root, dirs, files in os.walk(args.directory):
|
||||
+ for file in files:
|
||||
+ if file.endswith('.csv'):
|
||||
+ file_path = os.path.join(root, file)
|
||||
+ # 获取输入文件的基本名称(不包含路径)
|
||||
+ base_name = os.path.basename(file_path)
|
||||
+ file_name = os.path.splitext(base_name)[0]
|
||||
+ output_image_path = os.path.join(root, f'{file_name}.png')
|
||||
+ output_result_path = os.path.join(root, f'{file_name}_result.csv')
|
||||
+
|
||||
+ df = read_table_data_pandas(file_path)
|
||||
+ if df is not None:
|
||||
+ analysis_result = analyze_table_data_pandas(df, args.window_size)
|
||||
+ plot_table_data(analysis_result, output_image_path, args.window_size)
|
||||
+ save_analysis_result(analysis_result, output_result_path)
|
||||
+
|
||||
+
|
||||
+if __name__ == "__main__":
|
||||
+ main()
|
||||
\ No newline at end of file
|
||||
--
|
||||
Gitee
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
Name: gala-anteater
|
||||
Version: 1.2.1
|
||||
Release: 6
|
||||
Release: 7
|
||||
Summary: A time-series anomaly detection platform for operating system.
|
||||
License: MulanPSL2
|
||||
URL: https://gitee.com/openeuler/gala-anteater
|
||||
@ -16,6 +16,7 @@ patch1: 0002-configure-group-in-json.patch
|
||||
patch2: 0003-remove-unuse-code.patch
|
||||
patch3: 0004-fixbug-wrong-label-for-dbscan.patch
|
||||
patch4: 0005-add-detect-type-for-usad-model.patch
|
||||
patch5: 0006-add-local-file-test-case.patch
|
||||
|
||||
%description
|
||||
Abnormal detection module for A-Ops project
|
||||
@ -83,6 +84,9 @@ fi
|
||||
|
||||
|
||||
%changelog
|
||||
* Fri Apr 18 2025 maxin <maxin@xfusion.com> - 1.2.1-7
|
||||
- add local file test case
|
||||
|
||||
* Wed Nov 27 2024 huangbin <huangbin58@huawei.com> - 1.2.1-6
|
||||
- add detect type for usad model.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user