!52 add local file test case

From: @maskinghk 
Reviewed-by: @chenwei_kernel 
Signed-off-by: @chenwei_kernel
This commit is contained in:
openeuler-ci-bot 2025-04-21 01:42:11 +00:00 committed by Gitee
commit 9812bebaef
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 233 additions and 1 deletions

View File

@ -0,0 +1,228 @@
From b2ed1a3df17c7c080d156fa2dde11895481d2e97 Mon Sep 17 00:00:00 2001
From: wangfenglai1 <wangfenglai1@huawei.com>
Date: Fri, 17 Jan 2025 17:00:00 +0800
Subject: [PATCH] add local file test case
---
tests/e2e_tests/test_local_file.py | 209 +++++++++++++++++++++++++++++
1 file changed, 209 insertions(+)
create mode 100644 tests/e2e_tests/test_local_file.py
diff --git a/tests/e2e_tests/test_local_file.py b/tests/e2e_tests/test_local_file.py
new file mode 100644
index 0000000..d16b271
--- /dev/null
+++ b/tests/e2e_tests/test_local_file.py
@@ -0,0 +1,209 @@
+#!/usr/bin/python3
+# ******************************************************************************
+# Copyright (c) 2022 Huawei Technologies Co., Ltd.
+# gala-anteater is licensed under Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# ******************************************************************************/
+"""
+Time: 2025-01-17
+Author: wangfl
+Description: The main function of gala-anteater project.
+"""
+import os
+import argparse
+import logging
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+logging.basicConfig(level=logging.ERROR)
+
+
+def read_table_data_pandas(file_path):
+ """
+ 此函数使用 pandas 从文件读取表格数据。
+
+ 参数:
+ file_path (str): CSV 文件的路径。
+
+ 返回:
+ pandas.DataFrame: 包含表格数据的数据框。
+ """
+ try:
+ df = pd.read_csv(file_path)
+ return df
+ except FileNotFoundError:
+ logging.error(f"Error: The file {file_path} does not exist.")
+ return None
+ except Exception as e:
+ logging.error(f"Error: An error occurred while reading the file {file_path}: {e}")
+ return None
+
+
+def calculate_thresholds(window):
+ """
+ 计算窗口内的上下阈值。
+
+ 参数:
+ window (pandas.DataFrame): 滑动窗口的数据。
+
+ 返回:
+ tuple: 包含上下阈值的元组。
+ """
+ mean_value = window['value'].mean()
+ std_value = window['value'].std()
+ threshold = 3 * std_value
+ upper_threshold = mean_value + threshold
+ lower_threshold = mean_value - threshold
+ return upper_threshold, lower_threshold
+
+
+def check_deviation(window, upper_threshold, lower_threshold):
+ """
+ 检查窗口内的数据是否偏离阈值。
+
+ 参数:
+ window (pandas.DataFrame): 滑动窗口的数据。
+ upper_threshold (float): 上阈值。
+ lower_threshold (float): 下阈值。
+
+ 返回:
+ pandas.Series: 偏离状态的布尔序列。
+ """
+ return ((window['value'] > upper_threshold) | (window['value'] < lower_threshold))
+
+
+def check_consecutive_deviation(deviation_status_list, consecutive_count=5):
+ """
+ 检查连续偏离点是否超过指定数量。
+
+ 参数:
+ deviation_status_list (list): 偏离状态列表。
+ consecutive_count (int): 连续偏离点的阈值,默认为 5。
+
+ 返回:
+ list: 标记为故障点的布尔列表。
+ """
+ corrected_status = []
+ consecutive_count_current = 0
+ for status in deviation_status_list:
+ if status:
+ consecutive_count_current += 1
+ else:
+ consecutive_count_current = 0
+ if consecutive_count_current >= consecutive_count:
+ corrected_status.append(True)
+ else:
+ corrected_status.append(False)
+ return corrected_status
+
+
+def analyze_table_data_pandas(df, window_size=600):
+ """
+ 此函数使用 pandas 检查 value 是否偏离均值分布 3 倍以上,
+ 并将结果保存在新列 deviation_status 中。
+ 进一步检查滑窗内连续 5 个以上的偏离点,将其标记为故障点,其余为正常点。
+ 滑窗间不重叠。
+
+ 参数:
+ df (pandas.DataFrame): 包含表格数据的数据框。
+ window_size (int): 滑窗大小,默认为 600。
+
+ 返回:
+ pandas.DataFrame: 包含 'timestamp', 'value', 'deviation_status' 列的数据框。
+ """
+ df['deviation_status'] = False
+ df['lower_threshold'] = np.nan
+ df['upper_threshold'] = np.nan
+ num_windows = len(df) // window_size
+ for i in range(num_windows):
+ start = i * window_size
+ end = (i + 1) * window_size
+ window = df.iloc[start:end]
+ upper_threshold, lower_threshold = calculate_thresholds(window)
+ deviation_status = check_deviation(window, upper_threshold, lower_threshold)
+ deviation_status_list = deviation_status.tolist()
+ corrected_status = check_consecutive_deviation(deviation_status_list)
+ df.loc[start:end - 1, 'deviation_status'] = corrected_status
+ df.loc[start:end - 1, 'lower_threshold'] = lower_threshold
+ df.loc[start:end - 1, 'upper_threshold'] = upper_threshold
+ return df
+
+
+def plot_table_data(df, output_path, window_size):
+ """
+ 此函数使用 matplotlib 绘制表格数据,并标记出故障点,同时增加方格线,保存图像,并绘制上下阈值线。
+
+ 参数:
+ df (pandas.DataFrame): 包含 'timestamp', 'value', 'deviation_status' 列的数据框。
+ output_path (str): 图像保存的路径。
+ window_size (int): 滑窗大小。
+ """
+ fig, ax = plt.subplots(figsize=(10, 6))
+ # 绘制正常点
+ normal_df = df[~df['deviation_status']]
+ ax.plot(normal_df['timestamp'], normal_df['value'], label='Normal', marker='o', linestyle='-', color='blue')
+ # 绘制故障点
+ fault_df = df[df['deviation_status']]
+ ax.plot(fault_df['timestamp'], fault_df['value'], label='Fault', marker='x', linestyle='None', color='red',
+ markersize=10)
+ # 绘制上下阈值线
+ ax.plot(df['timestamp'], df["upper_threshold"], label='Upper Threshold', linestyle='--', color='green')
+ ax.plot(df['timestamp'], df["lower_threshold"], label='Lower Threshold', linestyle='--', color='orange')
+ ax.set_xlabel('Timestamp')
+ ax.set_ylabel('Value')
+ ax.set_title('Table Data Analysis')
+ ax.legend()
+ # 添加方格线
+ ax.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray')
+ plt.savefig(output_path)
+ plt.close(fig)
+
+
+def save_analysis_result(df, result_path):
+ """
+ 此函数将分析结果保存到 CSV 文件中。
+
+ 参数:
+ df (pandas.DataFrame): 包含分析结果的数据框。
+ result_path (str): 结果保存的路径。
+ """
+ try:
+ df.to_csv(result_path, index=False)
+ except Exception as e:
+ logging.error(f"Error: An error occurred while saving the file {result_path}: {e}")
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Table Data Analysis')
+ parser.add_argument('directory', type=str, nargs='?', default='./test',
+ help='Path to the directory containing CSV files')
+ parser.add_argument('--window_size', type=int, default=600, help='Window size for analysis')
+ args = parser.parse_args()
+
+ # 遍历目录下的所有文件
+ for root, dirs, files in os.walk(args.directory):
+ for file in files:
+ if file.endswith('.csv'):
+ file_path = os.path.join(root, file)
+ # 获取输入文件的基本名称(不包含路径)
+ base_name = os.path.basename(file_path)
+ file_name = os.path.splitext(base_name)[0]
+ output_image_path = os.path.join(root, f'{file_name}.png')
+ output_result_path = os.path.join(root, f'{file_name}_result.csv')
+
+ df = read_table_data_pandas(file_path)
+ if df is not None:
+ analysis_result = analyze_table_data_pandas(df, args.window_size)
+ plot_table_data(analysis_result, output_image_path, args.window_size)
+ save_analysis_result(analysis_result, output_result_path)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
--
Gitee

View File

@ -2,7 +2,7 @@
Name: gala-anteater
Version: 1.2.1
Release: 6
Release: 7
Summary: A time-series anomaly detection platform for operating system.
License: MulanPSL2
URL: https://gitee.com/openeuler/gala-anteater
@ -16,6 +16,7 @@ patch1: 0002-configure-group-in-json.patch
patch2: 0003-remove-unuse-code.patch
patch3: 0004-fixbug-wrong-label-for-dbscan.patch
patch4: 0005-add-detect-type-for-usad-model.patch
patch5: 0006-add-local-file-test-case.patch
%description
Abnormal detection module for A-Ops project
@ -83,6 +84,9 @@ fi
%changelog
* Fri Apr 18 2025 maxin <maxin@xfusion.com> - 1.2.1-7
- add local file test case
* Wed Nov 27 2024 huangbin <huangbin58@huawei.com> - 1.2.1-6
- add detect type for usad model.