diff --git a/0006-add-local-file-test-case.patch b/0006-add-local-file-test-case.patch new file mode 100644 index 0000000..66783e5 --- /dev/null +++ b/0006-add-local-file-test-case.patch @@ -0,0 +1,228 @@ +From b2ed1a3df17c7c080d156fa2dde11895481d2e97 Mon Sep 17 00:00:00 2001 +From: wangfenglai1 +Date: Fri, 17 Jan 2025 17:00:00 +0800 +Subject: [PATCH] add local file test case + +--- + tests/e2e_tests/test_local_file.py | 209 +++++++++++++++++++++++++++++ + 1 file changed, 209 insertions(+) + create mode 100644 tests/e2e_tests/test_local_file.py + +diff --git a/tests/e2e_tests/test_local_file.py b/tests/e2e_tests/test_local_file.py +new file mode 100644 +index 0000000..d16b271 +--- /dev/null ++++ b/tests/e2e_tests/test_local_file.py +@@ -0,0 +1,209 @@ ++#!/usr/bin/python3 ++# ****************************************************************************** ++# Copyright (c) 2022 Huawei Technologies Co., Ltd. ++# gala-anteater is licensed under Mulan PSL v2. ++# You can use this software according to the terms and conditions of the Mulan PSL v2. ++# You may obtain a copy of Mulan PSL v2 at: ++# http://license.coscl.org.cn/MulanPSL2 ++# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++# See the Mulan PSL v2 for more details. ++# ******************************************************************************/ ++""" ++Time: 2025-01-17 ++Author: wangfl ++Description: The main function of gala-anteater project. ++""" ++import os ++import argparse ++import logging ++import pandas as pd ++import numpy as np ++import matplotlib.pyplot as plt ++ ++logging.basicConfig(level=logging.ERROR) ++ ++ ++def read_table_data_pandas(file_path): ++ """ ++ 此函数使用 pandas 从文件读取表格数据。 ++ ++ 参数: ++ file_path (str): CSV 文件的路径。 ++ ++ 返回: ++ pandas.DataFrame: 包含表格数据的数据框。 ++ """ ++ try: ++ df = pd.read_csv(file_path) ++ return df ++ except FileNotFoundError: ++ logging.error(f"Error: The file {file_path} does not exist.") ++ return None ++ except Exception as e: ++ logging.error(f"Error: An error occurred while reading the file {file_path}: {e}") ++ return None ++ ++ ++def calculate_thresholds(window): ++ """ ++ 计算窗口内的上下阈值。 ++ ++ 参数: ++ window (pandas.DataFrame): 滑动窗口的数据。 ++ ++ 返回: ++ tuple: 包含上下阈值的元组。 ++ """ ++ mean_value = window['value'].mean() ++ std_value = window['value'].std() ++ threshold = 3 * std_value ++ upper_threshold = mean_value + threshold ++ lower_threshold = mean_value - threshold ++ return upper_threshold, lower_threshold ++ ++ ++def check_deviation(window, upper_threshold, lower_threshold): ++ """ ++ 检查窗口内的数据是否偏离阈值。 ++ ++ 参数: ++ window (pandas.DataFrame): 滑动窗口的数据。 ++ upper_threshold (float): 上阈值。 ++ lower_threshold (float): 下阈值。 ++ ++ 返回: ++ pandas.Series: 偏离状态的布尔序列。 ++ """ ++ return ((window['value'] > upper_threshold) | (window['value'] < lower_threshold)) ++ ++ ++def check_consecutive_deviation(deviation_status_list, consecutive_count=5): ++ """ ++ 检查连续偏离点是否超过指定数量。 ++ ++ 参数: ++ deviation_status_list (list): 偏离状态列表。 ++ consecutive_count (int): 连续偏离点的阈值,默认为 5。 ++ ++ 返回: ++ list: 标记为故障点的布尔列表。 ++ """ ++ corrected_status = [] ++ consecutive_count_current = 0 ++ for status in deviation_status_list: ++ if status: ++ consecutive_count_current += 1 ++ else: ++ consecutive_count_current = 0 ++ if consecutive_count_current >= consecutive_count: ++ corrected_status.append(True) ++ else: ++ corrected_status.append(False) ++ return corrected_status ++ ++ ++def analyze_table_data_pandas(df, window_size=600): ++ """ ++ 此函数使用 pandas 检查 value 是否偏离均值分布 3 倍以上, ++ 并将结果保存在新列 deviation_status 中。 ++ 进一步检查滑窗内连续 5 个以上的偏离点,将其标记为故障点,其余为正常点。 ++ 滑窗间不重叠。 ++ ++ 参数: ++ df (pandas.DataFrame): 包含表格数据的数据框。 ++ window_size (int): 滑窗大小,默认为 600。 ++ ++ 返回: ++ pandas.DataFrame: 包含 'timestamp', 'value', 'deviation_status' 列的数据框。 ++ """ ++ df['deviation_status'] = False ++ df['lower_threshold'] = np.nan ++ df['upper_threshold'] = np.nan ++ num_windows = len(df) // window_size ++ for i in range(num_windows): ++ start = i * window_size ++ end = (i + 1) * window_size ++ window = df.iloc[start:end] ++ upper_threshold, lower_threshold = calculate_thresholds(window) ++ deviation_status = check_deviation(window, upper_threshold, lower_threshold) ++ deviation_status_list = deviation_status.tolist() ++ corrected_status = check_consecutive_deviation(deviation_status_list) ++ df.loc[start:end - 1, 'deviation_status'] = corrected_status ++ df.loc[start:end - 1, 'lower_threshold'] = lower_threshold ++ df.loc[start:end - 1, 'upper_threshold'] = upper_threshold ++ return df ++ ++ ++def plot_table_data(df, output_path, window_size): ++ """ ++ 此函数使用 matplotlib 绘制表格数据,并标记出故障点,同时增加方格线,保存图像,并绘制上下阈值线。 ++ ++ 参数: ++ df (pandas.DataFrame): 包含 'timestamp', 'value', 'deviation_status' 列的数据框。 ++ output_path (str): 图像保存的路径。 ++ window_size (int): 滑窗大小。 ++ """ ++ fig, ax = plt.subplots(figsize=(10, 6)) ++ # 绘制正常点 ++ normal_df = df[~df['deviation_status']] ++ ax.plot(normal_df['timestamp'], normal_df['value'], label='Normal', marker='o', linestyle='-', color='blue') ++ # 绘制故障点 ++ fault_df = df[df['deviation_status']] ++ ax.plot(fault_df['timestamp'], fault_df['value'], label='Fault', marker='x', linestyle='None', color='red', ++ markersize=10) ++ # 绘制上下阈值线 ++ ax.plot(df['timestamp'], df["upper_threshold"], label='Upper Threshold', linestyle='--', color='green') ++ ax.plot(df['timestamp'], df["lower_threshold"], label='Lower Threshold', linestyle='--', color='orange') ++ ax.set_xlabel('Timestamp') ++ ax.set_ylabel('Value') ++ ax.set_title('Table Data Analysis') ++ ax.legend() ++ # 添加方格线 ++ ax.grid(True, which='both', linestyle='--', linewidth=0.5, color='gray') ++ plt.savefig(output_path) ++ plt.close(fig) ++ ++ ++def save_analysis_result(df, result_path): ++ """ ++ 此函数将分析结果保存到 CSV 文件中。 ++ ++ 参数: ++ df (pandas.DataFrame): 包含分析结果的数据框。 ++ result_path (str): 结果保存的路径。 ++ """ ++ try: ++ df.to_csv(result_path, index=False) ++ except Exception as e: ++ logging.error(f"Error: An error occurred while saving the file {result_path}: {e}") ++ ++ ++def main(): ++ parser = argparse.ArgumentParser(description='Table Data Analysis') ++ parser.add_argument('directory', type=str, nargs='?', default='./test', ++ help='Path to the directory containing CSV files') ++ parser.add_argument('--window_size', type=int, default=600, help='Window size for analysis') ++ args = parser.parse_args() ++ ++ # 遍历目录下的所有文件 ++ for root, dirs, files in os.walk(args.directory): ++ for file in files: ++ if file.endswith('.csv'): ++ file_path = os.path.join(root, file) ++ # 获取输入文件的基本名称(不包含路径) ++ base_name = os.path.basename(file_path) ++ file_name = os.path.splitext(base_name)[0] ++ output_image_path = os.path.join(root, f'{file_name}.png') ++ output_result_path = os.path.join(root, f'{file_name}_result.csv') ++ ++ df = read_table_data_pandas(file_path) ++ if df is not None: ++ analysis_result = analyze_table_data_pandas(df, args.window_size) ++ plot_table_data(analysis_result, output_image_path, args.window_size) ++ save_analysis_result(analysis_result, output_result_path) ++ ++ ++if __name__ == "__main__": ++ main() +\ No newline at end of file +-- +Gitee diff --git a/gala-anteater.spec b/gala-anteater.spec index 88e1d70..78cbf77 100644 --- a/gala-anteater.spec +++ b/gala-anteater.spec @@ -2,7 +2,7 @@ Name: gala-anteater Version: 1.2.1 -Release: 6 +Release: 7 Summary: A time-series anomaly detection platform for operating system. License: MulanPSL2 URL: https://gitee.com/openeuler/gala-anteater @@ -16,6 +16,7 @@ patch1: 0002-configure-group-in-json.patch patch2: 0003-remove-unuse-code.patch patch3: 0004-fixbug-wrong-label-for-dbscan.patch patch4: 0005-add-detect-type-for-usad-model.patch +patch5: 0006-add-local-file-test-case.patch %description Abnormal detection module for A-Ops project @@ -83,6 +84,9 @@ fi %changelog +* Fri Apr 18 2025 maxin - 1.2.1-7 +- add local file test case + * Wed Nov 27 2024 huangbin - 1.2.1-6 - add detect type for usad model.