!151 fix hbm online repair notice logic and efi save
From: @luckky7 Reviewed-by: @lvying6 Signed-off-by: @lvying6
This commit is contained in:
commit
43fd280fdc
508
fix-hbm-online-repair-notice-and-efi-create.patch
Normal file
508
fix-hbm-online-repair-notice-and-efi-create.patch
Normal file
@ -0,0 +1,508 @@
|
|||||||
|
From 85d6dae9d7c6148f2699ef7da7d2d784043a2ee1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: luckky <guodashun1@huawei.com>
|
||||||
|
Date: Wed, 30 Oct 2024 10:41:11 +0800
|
||||||
|
Subject: [PATCH] fix hbm online repair notice and efi create
|
||||||
|
|
||||||
|
---
|
||||||
|
src/c/hbm_online_repair/hbm_online_repair.c | 5 +-
|
||||||
|
.../non-standard-hbm-repair.c | 194 +++++++++---------
|
||||||
|
.../non-standard-hbm-repair.h | 2 +-
|
||||||
|
src/c/hbm_online_repair/ras-events.c | 1 -
|
||||||
|
.../ras-non-standard-handler.c | 33 +--
|
||||||
|
.../ras-non-standard-handler.h | 1 +
|
||||||
|
6 files changed, 116 insertions(+), 120 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/c/hbm_online_repair/hbm_online_repair.c b/src/c/hbm_online_repair/hbm_online_repair.c
|
||||||
|
index 3ace206..b3b2742 100644
|
||||||
|
--- a/src/c/hbm_online_repair/hbm_online_repair.c
|
||||||
|
+++ b/src/c/hbm_online_repair/hbm_online_repair.c
|
||||||
|
@@ -127,10 +127,7 @@ int main(int argc, char *argv[])
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- ret = init_all_flash();
|
||||||
|
- if (ret < 0) {
|
||||||
|
- log(LOG_ERROR, "flash writer init failed\n");
|
||||||
|
- }
|
||||||
|
+ get_flash_total_size();
|
||||||
|
|
||||||
|
handle_ras_events(ras);
|
||||||
|
|
||||||
|
diff --git a/src/c/hbm_online_repair/non-standard-hbm-repair.c b/src/c/hbm_online_repair/non-standard-hbm-repair.c
|
||||||
|
index b175e14..f26d8ae 100644
|
||||||
|
--- a/src/c/hbm_online_repair/non-standard-hbm-repair.c
|
||||||
|
+++ b/src/c/hbm_online_repair/non-standard-hbm-repair.c
|
||||||
|
@@ -15,7 +15,7 @@
|
||||||
|
#include "non-standard-hbm-repair.h"
|
||||||
|
|
||||||
|
extern int page_isolation_threshold;
|
||||||
|
-size_t total_size = 0;
|
||||||
|
+size_t flash_total_size = 0;
|
||||||
|
struct hisi_common_error_section {
|
||||||
|
uint32_t val_bits;
|
||||||
|
uint8_t version;
|
||||||
|
@@ -122,28 +122,58 @@ static void parse_fault_addr_info(struct fault_addr_info* info_struct, unsigned
|
||||||
|
info_struct->crc8 = (uint32_t)fault_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static bool variable_existed(char *name, char *guid)
|
||||||
|
+static bool is_variable_existing(char *name, char *guid)
|
||||||
|
{
|
||||||
|
+ char filename[PATH_MAX];
|
||||||
|
+ snprintf(filename, PATH_MAX - 1, "%s/%s-%s", EFIVARFS_PATH, name, guid);
|
||||||
|
+
|
||||||
|
+ return access(filename, F_OK | R_OK) == 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static size_t get_var_size(char *name, char *guid) {
|
||||||
|
char filename[PATH_MAX];
|
||||||
|
int fd;
|
||||||
|
+ struct stat stat;
|
||||||
|
|
||||||
|
snprintf(filename, PATH_MAX - 1, "%s/%s-%s", EFIVARFS_PATH, name, guid);
|
||||||
|
|
||||||
|
// open var file
|
||||||
|
fd = open(filename, O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
- log(LOG_WARNING, "open file %s failed\n", filename);
|
||||||
|
- return false;
|
||||||
|
+ log(LOG_WARNING, "open %s failed\n", filename);
|
||||||
|
+ goto err;
|
||||||
|
+ }
|
||||||
|
+ // read stat
|
||||||
|
+ if (fstat(fd, &stat) != 0) {
|
||||||
|
+ log(LOG_WARNING, "fstat %s failed\n", filename);
|
||||||
|
+ goto err;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
- return true;
|
||||||
|
+ return stat.st_size;
|
||||||
|
+err:
|
||||||
|
+ if (fd >= 0)
|
||||||
|
+ close(fd);
|
||||||
|
+ return (size_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static uint32_t read_variable_attribute(char *name, char *guid) {
|
||||||
|
+void get_flash_total_size() {
|
||||||
|
+ for (int i = 0; i < FLASH_ENTRY_NUM; i++) {
|
||||||
|
+ if (is_variable_existing(flash_names[i], flash_guids[i])) {
|
||||||
|
+ flash_total_size += get_var_size(flash_names[i], flash_guids[i]);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ // check total entry size
|
||||||
|
+ log(LOG_DEBUG, "current fault info total size: %luKB, flash max threshold: %uKB\n",
|
||||||
|
+ flash_total_size / KB_SIZE, MAX_VAR_SIZE / KB_SIZE);
|
||||||
|
+ if (flash_total_size > MAX_VAR_SIZE) {
|
||||||
|
+ log(LOG_WARNING, "fault info storage %zu reach threshold, cannot save new record\n", flash_total_size);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int read_variable_attribute(char *name, char *guid, uint32_t *attribute) {
|
||||||
|
char filename[PATH_MAX];
|
||||||
|
int fd;
|
||||||
|
size_t readsize;
|
||||||
|
- uint32_t attribute = (uint32_t)-1;
|
||||||
|
|
||||||
|
snprintf(filename, PATH_MAX - 1, "%s/%s-%s", EFIVARFS_PATH, name, guid);
|
||||||
|
|
||||||
|
@@ -151,17 +181,18 @@ static uint32_t read_variable_attribute(char *name, char *guid) {
|
||||||
|
fd = open(filename, O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
log(LOG_ERROR, "open %s failed\n", filename);
|
||||||
|
- return attribute;
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read attributes from first 4 bytes
|
||||||
|
- readsize = read(fd, &attribute, sizeof(uint32_t));
|
||||||
|
+ readsize = read(fd, attribute, sizeof(uint32_t));
|
||||||
|
if (readsize != sizeof(uint32_t)) {
|
||||||
|
log(LOG_ERROR, "read attribute of %s failed\n", filename);
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
- return attribute;
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int efivarfs_set_mutable(char *name, char *guid, bool mutable)
|
||||||
|
@@ -205,8 +236,8 @@ err:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int write_variable(char *name, char *guid, void *value, unsigned long size, uint32_t attribute) {
|
||||||
|
- int fd, mode;
|
||||||
|
+static int write_variable(char *name, char *guid, void *value, unsigned long size, uint32_t attribute, bool is_existing) {
|
||||||
|
+ int fd = -1, mode;
|
||||||
|
size_t writesize;
|
||||||
|
void *buffer;
|
||||||
|
unsigned long total;
|
||||||
|
@@ -225,16 +256,13 @@ static int write_variable(char *name, char *guid, void *value, unsigned long siz
|
||||||
|
memcpy(buffer + sizeof(uint32_t), value, size);
|
||||||
|
|
||||||
|
// change attr
|
||||||
|
- if (efivarfs_set_mutable(name, guid, 1) != 0) {
|
||||||
|
+ if (is_existing && efivarfs_set_mutable(name, guid, 1) != 0) {
|
||||||
|
log(LOG_ERROR, "set mutable for %s failed\n", filename);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
mode = O_WRONLY;
|
||||||
|
- if (attribute & EFI_VARIABLE_APPEND_WRITE)
|
||||||
|
- mode |= O_APPEND;
|
||||||
|
- else
|
||||||
|
- mode |= O_CREAT;
|
||||||
|
+ mode |= is_existing ? O_APPEND : O_CREAT;
|
||||||
|
|
||||||
|
// open var file
|
||||||
|
fd = open(filename, mode, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||||
|
@@ -252,7 +280,7 @@ static int write_variable(char *name, char *guid, void *value, unsigned long siz
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
free(buffer);
|
||||||
|
- if (efivarfs_set_mutable(name, guid, 0) != 0) {
|
||||||
|
+ if (is_existing && efivarfs_set_mutable(name, guid, 0) != 0) {
|
||||||
|
log(LOG_ERROR, "set immutable for %s failed\n", filename);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
@@ -261,86 +289,21 @@ err:
|
||||||
|
close(fd);
|
||||||
|
if (buffer)
|
||||||
|
free(buffer);
|
||||||
|
- if (efivarfs_set_mutable(name, guid, 0) != 0) {
|
||||||
|
+ if (is_existing && efivarfs_set_mutable(name, guid, 0) != 0) {
|
||||||
|
log(LOG_ERROR, "set immutable for %s failed\n", filename);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int append_variable(char *name, char *guid, void *data, unsigned long size) {
|
||||||
|
- // prepare append attribute
|
||||||
|
- uint32_t attribute = read_variable_attribute(name, guid);
|
||||||
|
- if (attribute == (uint32_t)-1) {
|
||||||
|
- log(LOG_ERROR, "read %s-%s attribute failed\n", name, guid);
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
- attribute |= EFI_VARIABLE_APPEND_WRITE;
|
||||||
|
-
|
||||||
|
- return write_variable(name, guid, data, size, attribute);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static size_t get_var_size(char *name, char *guid) {
|
||||||
|
- char filename[PATH_MAX];
|
||||||
|
- int fd;
|
||||||
|
- struct stat stat;
|
||||||
|
-
|
||||||
|
- snprintf(filename, PATH_MAX - 1, "%s/%s-%s", EFIVARFS_PATH, name, guid);
|
||||||
|
-
|
||||||
|
- // open var file
|
||||||
|
- fd = open(filename, O_RDONLY);
|
||||||
|
- if (fd < 0) {
|
||||||
|
- log(LOG_WARNING, "open %s failed\n", filename);
|
||||||
|
- goto err;
|
||||||
|
- }
|
||||||
|
- // read stat
|
||||||
|
- if (fstat(fd, &stat) != 0) {
|
||||||
|
- log(LOG_WARNING, "fstat %s failed\n", filename);
|
||||||
|
- goto err;
|
||||||
|
- }
|
||||||
|
- close(fd);
|
||||||
|
- return stat.st_size;
|
||||||
|
-err:
|
||||||
|
- if (fd >= 0)
|
||||||
|
- close(fd);
|
||||||
|
- return (size_t)-1;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-int init_all_flash() {
|
||||||
|
- for (int i = 0; i < FLASH_ENTRY_NUM; i++) {
|
||||||
|
- // check existed entry
|
||||||
|
- if (variable_existed(flash_names[i], flash_guids[i])) {
|
||||||
|
- total_size += get_var_size(flash_names[i], flash_guids[i]);
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
- // create new entry
|
||||||
|
- uint32_t attribute = EFI_VARIABLE_NON_VOLATILE |
|
||||||
|
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
|
||||||
|
- EFI_VARIABLE_RUNTIME_ACCESS;
|
||||||
|
- char *data = "";
|
||||||
|
- unsigned long size = 1;
|
||||||
|
- int ret = write_variable(flash_names[i], flash_guids[i], data, size, attribute);
|
||||||
|
- if (ret) {
|
||||||
|
- log(LOG_ERROR, "init %s-%s failed, fault info storage funtion not enabled\n", flash_names[i], flash_guids[i]);
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
- total_size += sizeof(uint32_t) + 1;
|
||||||
|
- }
|
||||||
|
- // check total entry size
|
||||||
|
- log(LOG_DEBUG, "current fault info total size: %luKB, flash max threshold: %uKB\n",
|
||||||
|
- total_size / KB_SIZE, MAX_VAR_SIZE / KB_SIZE);
|
||||||
|
- if (total_size > MAX_VAR_SIZE) {
|
||||||
|
- log(LOG_ERROR, "fault info storage reach threshold, cannot save new record\n");
|
||||||
|
- }
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int write_fault_info_to_flash(const struct hisi_common_error_section *err) {
|
||||||
|
int ret, guid_index;
|
||||||
|
uint32_t reg_size;
|
||||||
|
uint64_t fault_addr;
|
||||||
|
+ bool is_existing;
|
||||||
|
+ uint32_t attribute = -1;
|
||||||
|
|
||||||
|
// check flash usage threshold
|
||||||
|
- if (total_size + sizeof(uint64_t) > MAX_VAR_SIZE) {
|
||||||
|
+ if (flash_total_size + sizeof(uint64_t) > MAX_VAR_SIZE) {
|
||||||
|
log(LOG_WARNING, "fault info storage reach threshold, cannot save new record into flash\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
@@ -359,14 +322,29 @@ static int write_fault_info_to_flash(const struct hisi_common_error_section *err
|
||||||
|
log(LOG_ERROR, "invalid fault info\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ // judge if the efivar is existing to set the attribute
|
||||||
|
+ is_existing = is_variable_existing(flash_names[guid_index], flash_guids[guid_index]);
|
||||||
|
+ attribute = EFI_VARIABLE_NON_VOLATILE |
|
||||||
|
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
|
||||||
|
+ EFI_VARIABLE_RUNTIME_ACCESS;
|
||||||
|
+ if (is_existing) {
|
||||||
|
+ ret = read_variable_attribute(flash_names[guid_index], flash_guids[guid_index], &attribute);
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ log(LOG_ERROR, "read variable %s-%s attribute failed, stop writing\n", flash_names[guid_index], flash_guids[guid_index]);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ attribute |= EFI_VARIABLE_APPEND_WRITE;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// record physical addr in flash
|
||||||
|
- ret = append_variable(flash_names[guid_index], flash_guids[guid_index], &fault_addr, sizeof(uint64_t));
|
||||||
|
+ ret = write_variable(flash_names[guid_index], flash_guids[guid_index], &fault_addr, sizeof(uint64_t), attribute, is_existing);
|
||||||
|
if (ret < 0) {
|
||||||
|
- log(LOG_ERROR, "append to %s-%s failed\n", flash_names[guid_index], flash_guids[guid_index]);
|
||||||
|
+ log(LOG_ERROR, "write to %s-%s failed\n", flash_names[guid_index], flash_guids[guid_index]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
- total_size += sizeof(uint64_t);
|
||||||
|
- log(LOG_INFO, "write hbm fault info to flash success\n");
|
||||||
|
+ flash_total_size += sizeof(uint64_t);
|
||||||
|
+ log(LOG_INFO, "write hbm fault info to flash %s-%s success\n", flash_names[guid_index], flash_guids[guid_index]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -421,7 +399,7 @@ static int get_hardware_corrupted_size()
|
||||||
|
return hardware_corrupted_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static uint8_t get_repair_result_code(int ret)
|
||||||
|
+static uint8_t get_repair_failed_result_code(int ret)
|
||||||
|
{
|
||||||
|
if (ret == -ENOSPC) {
|
||||||
|
return REPAIR_FAILED_NO_RESOURCE;
|
||||||
|
@@ -582,11 +560,11 @@ static int hbmc_hbm_page_isolate(const struct hisi_common_error_section *err)
|
||||||
|
static int hbmc_hbm_after_repair(bool is_acls, const int repair_ret, const unsigned long long paddr)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
- if (repair_ret < 0) {
|
||||||
|
+ if (repair_ret <= 0) {
|
||||||
|
log(LOG_WARNING, "HBM %s: Keep page (0x%llx) offline\n", is_acls ? "ACLS" : "SPPR", paddr);
|
||||||
|
/* not much we can do about errors here */
|
||||||
|
(void)write_file("/sys/kernel/page_eject", "remove_page", paddr);
|
||||||
|
- return get_repair_result_code(repair_ret);
|
||||||
|
+ return get_repair_failed_result_code(repair_ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = write_file("/sys/kernel/page_eject", "online_page", paddr);
|
||||||
|
@@ -615,9 +593,13 @@ static uint8_t hbmc_hbm_repair(const struct hisi_common_error_section *err, char
|
||||||
|
err->reg_array[HBM_REPAIR_REQ_TYPE] & HBM_PSUE_ACLS;
|
||||||
|
|
||||||
|
ret = write_file(path, is_acls ? "acls_query" : "sppr_query", paddr);
|
||||||
|
- if (ret < 0) {
|
||||||
|
- notice_BMC(err, get_repair_result_code(ret));
|
||||||
|
- log(LOG_WARNING, "HBM: Address 0x%llx is not supported to %s repair\n", paddr, is_acls ? "ACLS" : "SPPR");
|
||||||
|
+
|
||||||
|
+ /* Only positive num means the error is supported to repair */
|
||||||
|
+ if (ret <= 0) {
|
||||||
|
+ if (ret != -ENXIO) {
|
||||||
|
+ notice_BMC(err, get_repair_failed_result_code(ret));
|
||||||
|
+ log(LOG_WARNING, "HBM: Address 0x%llx is not supported to %s repair\n", paddr, is_acls ? "ACLS" : "SPPR");
|
||||||
|
+ }
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -642,8 +624,9 @@ static uint8_t hbmc_hbm_repair(const struct hisi_common_error_section *err, char
|
||||||
|
all_online_success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- if (ret < 0) {
|
||||||
|
- notice_BMC(err, get_repair_result_code(ret));
|
||||||
|
+ /* The ret is from the acls/sppr repair, and only positive num means the error is repaired successfully */
|
||||||
|
+ if (ret <= 0) {
|
||||||
|
+ notice_BMC(err, get_repair_failed_result_code(ret));
|
||||||
|
return ret;
|
||||||
|
} else if (all_online_success) {
|
||||||
|
notice_BMC(err, ISOLATE_REPAIR_ONLINE_SUCCESS);
|
||||||
|
@@ -698,7 +681,7 @@ static void hbm_repair_handler(const struct hisi_common_error_section *err)
|
||||||
|
struct dirent *dent;
|
||||||
|
DIR *dir;
|
||||||
|
int ret;
|
||||||
|
- bool find_device = false, find_hbm_mem = false;
|
||||||
|
+ bool find_device = false, find_hbm_mem = false, addr_in_hbm_device = false;
|
||||||
|
|
||||||
|
ret = hbmc_hbm_page_isolate(err);
|
||||||
|
if (ret < 0) {
|
||||||
|
@@ -723,10 +706,13 @@ static void hbm_repair_handler(const struct hisi_common_error_section *err)
|
||||||
|
if (hbmc_get_memory_type(path) == HBM_HBM_MEMORY) {
|
||||||
|
find_hbm_mem = true;
|
||||||
|
ret = hbmc_hbm_repair(err, path);
|
||||||
|
- if (ret != -ENXIO)
|
||||||
|
+ if (ret != -ENXIO) {
|
||||||
|
+ addr_in_hbm_device = true;
|
||||||
|
break;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+
|
||||||
|
if (!find_device) {
|
||||||
|
log(LOG_ERROR, "Repair driver is not loaded, skip error, error_type is %u\n",
|
||||||
|
err->reg_array[HBM_REPAIR_REQ_TYPE] & HBM_ERROR_MASK);
|
||||||
|
@@ -735,6 +721,10 @@ static void hbm_repair_handler(const struct hisi_common_error_section *err)
|
||||||
|
log(LOG_ERROR, "No HBM device memory type found, skip error, error_type is %u\n",
|
||||||
|
err->reg_array[HBM_REPAIR_REQ_TYPE] & HBM_ERROR_MASK);
|
||||||
|
notice_BMC(err, REPAIR_FAILED_OTHER_REASON);
|
||||||
|
+ } else if (!addr_in_hbm_device) {
|
||||||
|
+ log(LOG_ERROR, "Err addr is not in device, skip error, error_type is %u\n",
|
||||||
|
+ err->reg_array[HBM_REPAIR_REQ_TYPE] & HBM_ERROR_MASK);
|
||||||
|
+ notice_BMC(err, REPAIR_FAILED_INVALID_PARAM);
|
||||||
|
}
|
||||||
|
|
||||||
|
closedir(dir);
|
||||||
|
@@ -769,7 +759,7 @@ static bool hbm_repair_validate(const struct hisi_common_error_section *err)
|
||||||
|
(err->reg_array_size == HBM_CACHE_ARRAY_SIZE);
|
||||||
|
|
||||||
|
if (!(is_acls_valid || is_sppr_valid || is_cache_mode)) {
|
||||||
|
- log(LOG_DEBUG, "err type (%u) is unknown or address array length (%u) is invalid\n",
|
||||||
|
+ log(LOG_WARNING, "err type (%u) is unknown or address array length (%u) is invalid\n",
|
||||||
|
hbm_repair_reg_type, err->reg_array_size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
diff --git a/src/c/hbm_online_repair/non-standard-hbm-repair.h b/src/c/hbm_online_repair/non-standard-hbm-repair.h
|
||||||
|
index 7e8e448..ecb04fe 100644
|
||||||
|
--- a/src/c/hbm_online_repair/non-standard-hbm-repair.h
|
||||||
|
+++ b/src/c/hbm_online_repair/non-standard-hbm-repair.h
|
||||||
|
@@ -84,6 +84,6 @@
|
||||||
|
#define FLASH_ENTRY_NUM 8
|
||||||
|
#define KB_SIZE 1024
|
||||||
|
|
||||||
|
-extern int init_all_flash();
|
||||||
|
+extern void get_flash_total_size();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
diff --git a/src/c/hbm_online_repair/ras-events.c b/src/c/hbm_online_repair/ras-events.c
|
||||||
|
index 0b12329..4d281ad 100644
|
||||||
|
--- a/src/c/hbm_online_repair/ras-events.c
|
||||||
|
+++ b/src/c/hbm_online_repair/ras-events.c
|
||||||
|
@@ -348,7 +348,6 @@ static int read_ras_event_all_cpus(struct pcpu_data *pdata,
|
||||||
|
"Error on CPU %i\n", i);
|
||||||
|
warnonce[i]++;
|
||||||
|
}
|
||||||
|
- continue;
|
||||||
|
}
|
||||||
|
if (!(fds[i].revents & POLLIN)) {
|
||||||
|
count_nready++;
|
||||||
|
diff --git a/src/c/hbm_online_repair/ras-non-standard-handler.c b/src/c/hbm_online_repair/ras-non-standard-handler.c
|
||||||
|
index 1d1fd04..48ffa70 100644
|
||||||
|
--- a/src/c/hbm_online_repair/ras-non-standard-handler.c
|
||||||
|
+++ b/src/c/hbm_online_repair/ras-non-standard-handler.c
|
||||||
|
@@ -7,17 +7,21 @@
|
||||||
|
#include "ras-non-standard-handler.h"
|
||||||
|
#include "logger.h"
|
||||||
|
|
||||||
|
-static char *uuid_le(const char *uu)
|
||||||
|
+static int uuid_le(const char *uu, char* uuid)
|
||||||
|
{
|
||||||
|
- static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
|
||||||
|
if (!uu) {
|
||||||
|
log(LOG_ERROR, "uuid_le failed: uu is empty");
|
||||||
|
- return uuid;
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
size_t uu_len = strlen(uu);
|
||||||
|
- if (uu_len < SECTION_TYPE_UUID_LEN) {
|
||||||
|
- log(LOG_ERROR, "uuid_le failed: uu is too short");
|
||||||
|
- return uuid;
|
||||||
|
+ if (uu_len != SECTION_TYPE_UUID_LEN) {
|
||||||
|
+ log(LOG_ERROR, "uuid_le failed: uu len is incorrect");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ size_t uuid_len = strlen(uuid);
|
||||||
|
+ if (uuid_len != strlen(UUID_STR_TYPE)) {
|
||||||
|
+ log(LOG_ERROR, "uuid_le failed: uuid len is incorrect");
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *p = uuid;
|
||||||
|
@@ -38,7 +42,7 @@ static char *uuid_le(const char *uu)
|
||||||
|
|
||||||
|
*p = 0;
|
||||||
|
|
||||||
|
- return uuid;
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ras_non_standard_event_handler(struct trace_seq *s,
|
||||||
|
@@ -52,15 +56,20 @@ int ras_non_standard_event_handler(struct trace_seq *s,
|
||||||
|
ev.sec_type = tep_get_field_raw(s, event, "sec_type",
|
||||||
|
record, &len, 1);
|
||||||
|
if(!ev.sec_type) {
|
||||||
|
- log(LOG_WARNING, "get event section type failed");
|
||||||
|
+ log(LOG_WARNING, "get event section type failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_seq_printf(s, "\n");
|
||||||
|
- trace_seq_printf(s, "sec_type: %s\n", uuid_le(ev.sec_type));
|
||||||
|
+ char uuid[sizeof(UUID_STR_TYPE)] = UUID_STR_TYPE;
|
||||||
|
+ if (uuid_le(ev.sec_type, uuid) < 0) {
|
||||||
|
+ log(LOG_WARNING, "get uuid failed\n");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ trace_seq_printf(s, "sec_type: %s\n", uuid);
|
||||||
|
|
||||||
|
if (tep_get_field_val(s, event, "len", record, &val, 1) < 0) {
|
||||||
|
- log(LOG_WARNING, "tep get field val failed");
|
||||||
|
+ log(LOG_WARNING, "tep get field val failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -69,11 +78,11 @@ int ras_non_standard_event_handler(struct trace_seq *s,
|
||||||
|
|
||||||
|
ev.error = tep_get_field_raw(s, event, "buf", record, &len, 1);
|
||||||
|
if(!ev.error || ev.length != len) {
|
||||||
|
- log(LOG_WARNING, "get event error failed");
|
||||||
|
+ log(LOG_WARNING, "get event error failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (strcmp(uuid_le(ev.sec_type), HISI_COMMON_SECTION_TYPE_UUID) == 0) {
|
||||||
|
+ if (strcmp(uuid, HISI_COMMON_SECTION_TYPE_UUID) == 0) {
|
||||||
|
decode_hisi_common_section(&ev);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/src/c/hbm_online_repair/ras-non-standard-handler.h b/src/c/hbm_online_repair/ras-non-standard-handler.h
|
||||||
|
index 0272dc1..15a37ee 100644
|
||||||
|
--- a/src/c/hbm_online_repair/ras-non-standard-handler.h
|
||||||
|
+++ b/src/c/hbm_online_repair/ras-non-standard-handler.h
|
||||||
|
@@ -7,6 +7,7 @@
|
||||||
|
#define BIT(nr) (1UL << (nr))
|
||||||
|
|
||||||
|
#define SECTION_TYPE_UUID_LEN 16
|
||||||
|
+#define UUID_STR_TYPE "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
|
||||||
|
#define HISI_COMMON_SECTION_TYPE_UUID "c8b328a8-9917-4af6-9a13-2e08ab2e7586"
|
||||||
|
|
||||||
|
struct ras_non_standard_event {
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -4,7 +4,7 @@
|
|||||||
Summary: System Inspection Framework
|
Summary: System Inspection Framework
|
||||||
Name: sysSentry
|
Name: sysSentry
|
||||||
Version: 1.0.2
|
Version: 1.0.2
|
||||||
Release: 52
|
Release: 53
|
||||||
License: Mulan PSL v2
|
License: Mulan PSL v2
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz
|
||||||
@ -72,6 +72,7 @@ Patch59: ai_block_io-support-iodump.patch
|
|||||||
Patch60: fix-get_alarm-error.patch
|
Patch60: fix-get_alarm-error.patch
|
||||||
Patch61: fix-alarm_info-newline-break-error.patch
|
Patch61: fix-alarm_info-newline-break-error.patch
|
||||||
Patch62: add-hbm-online-repair.patch
|
Patch62: add-hbm-online-repair.patch
|
||||||
|
Patch63: fix-hbm-online-repair-notice-and-efi-create.patch
|
||||||
|
|
||||||
BuildRequires: cmake gcc-c++
|
BuildRequires: cmake gcc-c++
|
||||||
BuildRequires: python3 python3-setuptools
|
BuildRequires: python3 python3-setuptools
|
||||||
@ -365,6 +366,12 @@ rm -rf %{buildroot}
|
|||||||
%attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py
|
%attr(0550,root,root) %{python3_sitelib}/syssentry/bmc_alarm.py
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Oct 30 2024 luckky <guodashun1@huawei.com> - 1.0.2-53
|
||||||
|
- Type:bugfix
|
||||||
|
- CVE:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:fix hbm online repair notice and efi create
|
||||||
|
|
||||||
* Sat Oct 26 2024 luckky <guodashun1@huawei.com> - 1.0.2-52
|
* Sat Oct 26 2024 luckky <guodashun1@huawei.com> - 1.0.2-52
|
||||||
- Type:requirement
|
- Type:requirement
|
||||||
- CVE:NA
|
- CVE:NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user