sync from branch openEuler-22.03-LTS-Next
(cherry picked from commit 88afcff886535264be2a213f9f9b8113a99a8097)
This commit is contained in:
parent
2c7b9c1d67
commit
ffde4428e9
@ -1,7 +1,7 @@
|
||||
From 746a5dd20bb688e1d830e216059e1de7e59186a3 Mon Sep 17 00:00:00 2001
|
||||
From 4a49af49be378835b65016d5465eae44107a52e1 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Tue, 13 Apr 2021 10:39:45 +0800
|
||||
Subject: [PATCH 01/72] criu: dump and restore cpu affinity of each thread
|
||||
Subject: [PATCH 4/6] criu: dump and restore cpu affinity of each thread
|
||||
|
||||
Criu should dump and restore threads' or processes'
|
||||
cpu affinity.
|
||||
@ -416,5 +416,5 @@ index 0000000..0d0b8ae
|
||||
@@ -0,0 +1 @@
|
||||
+{'dopts': '', 'ropts': '--with-cpu-affinity', 'flags': 'reqrst '}
|
||||
--
|
||||
2.34.1
|
||||
2.27.0
|
||||
|
||||
|
||||
@ -1,74 +0,0 @@
|
||||
From dc6dbe893f7a8b644b655a56e4a0edfb854c577f Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 13:28:51 +0800
|
||||
Subject: [PATCH 02/72] compel: add rseq syscall into compel std plugin syscall
|
||||
tables Add rseq syscall numbers for: arm/aarch64, mips64, ppc64le, s390,
|
||||
x86_64/x86
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
compel/arch/arm/plugins/std/syscalls/syscall.def | 1 +
|
||||
compel/arch/mips/plugins/std/syscalls/syscall_64.tbl | 1 +
|
||||
compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 +
|
||||
compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl | 1 +
|
||||
compel/arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 +
|
||||
compel/arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 +
|
||||
6 files changed, 6 insertions(+)
|
||||
|
||||
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
|
||||
index 1b877d1..bb78cbb 100644
|
||||
--- a/compel/arch/arm/plugins/std/syscalls/syscall.def
|
||||
+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
|
||||
@@ -119,3 +119,4 @@ clone3 435 435 (struct clone_args *uargs, size_t size)
|
||||
sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask)
|
||||
pidfd_open 434 434 (pid_t pid, unsigned int flags)
|
||||
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
|
||||
+rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
|
||||
index 7a6db19..95dc7d3 100644
|
||||
--- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
|
||||
+++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl
|
||||
@@ -115,3 +115,4 @@ __NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr
|
||||
__NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_pidfd_open 5434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
|
||||
index dd79187..ad0d94f 100644
|
||||
--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
|
||||
+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
|
||||
@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
|
||||
index 282adaf..916b697 100644
|
||||
--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
|
||||
+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
|
||||
@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
|
||||
index 3fe3194..90f23d5 100644
|
||||
--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
|
||||
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
|
||||
@@ -103,3 +103,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_f
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
|
||||
index c1d119d..323fab1 100644
|
||||
--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
|
||||
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
|
||||
@@ -114,3 +114,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
|
||||
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
|
||||
+__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
--
|
||||
2.34.1
|
||||
|
||||
295
0002-mm-add-pin-memory-method-for-criu.patch
Normal file
295
0002-mm-add-pin-memory-method-for-criu.patch
Normal file
@ -0,0 +1,295 @@
|
||||
From dc9ba08388bfb3aa28225d9cd5a4f779c10e23a9 Mon Sep 17 00:00:00 2001
|
||||
From: anatasluo <luolongjuna@gmail.com>
|
||||
Date: Sat, 26 Feb 2022 02:48:25 +0000
|
||||
Subject: [PATCH 2/2] mm: add pin memory method for criu
|
||||
|
||||
Add pin memory for criu to improve memory recover
|
||||
speed and avoid user private data saving to files.
|
||||
|
||||
Signed-off-by: anatasluo <luolongjuna@gmail.com>
|
||||
---
|
||||
criu/config.c | 1 +
|
||||
criu/cr-restore.c | 5 ++
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/restorer.h | 28 ++++++++++++
|
||||
criu/mem.c | 96 +++++++++++++++++++++++++++++++++++++++
|
||||
criu/pie/restorer.c | 25 +++++++++-
|
||||
criu/seize.c | 1 +
|
||||
8 files changed, 157 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 71f99c9..53a5cfd 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -696,6 +696,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "pre-dump-mode", required_argument, 0, 1097 },
|
||||
{ "file-validation", required_argument, 0, 1098 },
|
||||
BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity),
|
||||
+ BOOL_OPT("pin-memory", &opts.pin_memory),
|
||||
{ "lsm-mount-context", required_argument, 0, 1099 },
|
||||
{ "network-lock", required_argument, 0, 1100 },
|
||||
{},
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 5b645c1..6d6e63f 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -3805,6 +3805,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
task_args, task_args->t->pid, task_args->nr_threads, task_args->clone_restore_fn,
|
||||
task_args->thread_args);
|
||||
|
||||
+ if (opts.pin_memory)
|
||||
+ task_args->pin_memory = true;
|
||||
+ else
|
||||
+ task_args->pin_memory = false;
|
||||
+
|
||||
/*
|
||||
* An indirect call to task_restore, note it never returns
|
||||
* and restoring core is extremely destructive.
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index b5a36b9..0cd4d11 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -447,6 +447,7 @@ usage:
|
||||
" can be 'filesize' or 'buildid' (default).\n"
|
||||
" --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n"
|
||||
" same cpu quantity.\n"
|
||||
+ " --pin-memory Use pin memory method for checkpoint and restore.\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 3b50e59..61898fd 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -190,6 +190,7 @@ struct cr_options {
|
||||
int file_validation_method;
|
||||
/* restore cpu affinity */
|
||||
int with_cpu_affinity;
|
||||
+ int pin_memory;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index c2ef8f0..c5dcf94 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -225,6 +225,7 @@ struct task_restore_args {
|
||||
int lsm_type;
|
||||
int child_subreaper;
|
||||
bool has_clone3_set_tid;
|
||||
+ bool pin_memory;
|
||||
} __aligned(64);
|
||||
|
||||
/*
|
||||
@@ -316,4 +317,31 @@ enum {
|
||||
#define __r_sym(name) restorer_sym##name
|
||||
#define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name))
|
||||
|
||||
+#define PIN_MEM_FILE "/dev/pinmem"
|
||||
+#define PIN_MEM_MAGIC 0x59
|
||||
+#define _SET_PIN_MEM_AREA 1
|
||||
+#define _CLEAR_PIN_MEM_AREA 2
|
||||
+#define _REMAP_PIN_MEM_AREA 3
|
||||
+#define _DUMP_SEPCIAL_PAGES 6
|
||||
+#define _RETORE_SEPCIAL_PAGES 7
|
||||
+#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set)
|
||||
+#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int)
|
||||
+#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int)
|
||||
+#define DUMP_SEPCIAL_PAGES _IOW(PIN_MEM_MAGIC, _DUMP_SEPCIAL_PAGES, int)
|
||||
+#define RETORE_SEPCIAL_PAGES _IOW(PIN_MEM_MAGIC, _RETORE_SEPCIAL_PAGES, int)
|
||||
+
|
||||
+#define ONCE_PIN_MEM_SIZE_LIMIT 32 * 1024 * 1024
|
||||
+#define MAX_PIN_MEM_AREA_NUM 16
|
||||
+
|
||||
+struct pin_mem_area {
|
||||
+ unsigned long virt_start;
|
||||
+ unsigned long virt_end;
|
||||
+};
|
||||
+
|
||||
+struct pin_mem_area_set {
|
||||
+ unsigned int pid;
|
||||
+ unsigned int area_num;
|
||||
+ struct pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM];
|
||||
+};
|
||||
+
|
||||
#endif /* __CR_RESTORER_H__ */
|
||||
diff --git a/criu/mem.c b/criu/mem.c
|
||||
index ca74bfb..e95c8de 100644
|
||||
--- a/criu/mem.c
|
||||
+++ b/criu/mem.c
|
||||
@@ -432,6 +432,85 @@ again:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+bool should_pin_vmae(VmaEntry *vmae)
|
||||
+{
|
||||
+ /*
|
||||
+ * vDSO area must be always dumped because on restore
|
||||
+ * we might need to generate a proxy.
|
||||
+ */
|
||||
+ if (vma_entry_is(vmae, VMA_AREA_VDSO))
|
||||
+ return false;
|
||||
+ /*
|
||||
+ * In turn VVAR area is special and referenced from
|
||||
+ * vDSO area by IP addressing (at least on x86) thus
|
||||
+ * never ever dump its content but always use one provided
|
||||
+ * by the kernel on restore, ie runtime VVAR area must
|
||||
+ * be remapped into proper place..
|
||||
+ */
|
||||
+ if (vma_entry_is(vmae, VMA_AREA_VVAR))
|
||||
+ return false;
|
||||
+
|
||||
+ if (vma_entry_is(vmae, VMA_AREA_AIORING))
|
||||
+ return false;
|
||||
+ if (vma_entry_is(vmae, VMA_ANON_PRIVATE))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static int pin_one_pmas(int fd, unsigned long start,
|
||||
+ unsigned long *pend, struct pstree_item *item)
|
||||
+{
|
||||
+ int ret;
|
||||
+ unsigned int index = 0;
|
||||
+ unsigned long end;
|
||||
+ unsigned long next = start;
|
||||
+ struct pin_mem_area_set pmas;
|
||||
+ struct pin_mem_area *pma;
|
||||
+
|
||||
+ end = *pend;
|
||||
+ while (start < end) {
|
||||
+ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end) ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT);
|
||||
+ pma = &(pmas.mem_area[index]);
|
||||
+ pma->virt_start = start;
|
||||
+ pma->virt_end = next;
|
||||
+ index++;
|
||||
+ start += ONCE_PIN_MEM_SIZE_LIMIT;
|
||||
+ if (index >= MAX_PIN_MEM_AREA_NUM)
|
||||
+ break;
|
||||
+ }
|
||||
+ *pend = next;
|
||||
+ pmas.area_num = index;
|
||||
+ pmas.pid = vpid(item);
|
||||
+ ret = ioctl(fd, SET_PIN_MEM_AREA, &pmas);
|
||||
+ if (ret < 0)
|
||||
+ pr_err("pin mem fail, errno: %s\n", strerror(errno));
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int pin_vmae(VmaEntry *vmae, struct pstree_item *item)
|
||||
+{
|
||||
+ int fd;
|
||||
+ int ret = 0;
|
||||
+ unsigned long start, end;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ start = vmae->start;
|
||||
+ while (start < vmae->end) {
|
||||
+ end = vmae->end;
|
||||
+ ret = pin_one_pmas(fd, start, &end, item);
|
||||
+ if (ret < 0)
|
||||
+ break;
|
||||
+ start = end;
|
||||
+ }
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasite_dump_pages_args *args,
|
||||
struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc,
|
||||
struct parasite_ctl *ctl)
|
||||
@@ -500,6 +579,19 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
goto out_xfer;
|
||||
}
|
||||
|
||||
+ if (opts.pin_memory) {
|
||||
+ /* pin memory before dump pages */
|
||||
+ list_for_each_entry(vma_area, &vma_area_list->h, list) {
|
||||
+ if (should_pin_vmae(vma_area->e)) {
|
||||
+ ret = pin_vmae(vma_area->e, item);
|
||||
+ if (ret) {
|
||||
+ exit_code = -1;
|
||||
+ goto out_xfer;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Step 1 -- generate the pagemap
|
||||
*/
|
||||
@@ -509,6 +601,10 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
parent_predump_mode = mdc->parent_ie->pre_dump_mode;
|
||||
|
||||
list_for_each_entry(vma_area, &vma_area_list->h, list) {
|
||||
+ if (opts.pin_memory && should_pin_vmae(vma_area->e)) {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump,
|
||||
parent_predump_mode);
|
||||
if (ret < 0)
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index fbc89fe..d04f8f1 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -1384,6 +1384,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int remap_vmas(int pid)
|
||||
+{
|
||||
+ int fd, ret = 0;
|
||||
+
|
||||
+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd == -1) {
|
||||
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
|
||||
+ return -1;;
|
||||
+ }
|
||||
+
|
||||
+ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid);
|
||||
+ if (ret < 0)
|
||||
+ pr_err("remap pin mem fail for pid: %d\n", pid);
|
||||
+ sys_close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
/*
|
||||
* The main routine to restore task via sigreturn.
|
||||
* This one is very special, we never return there
|
||||
@@ -1553,7 +1571,12 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
goto core_restore_end;
|
||||
}
|
||||
}
|
||||
-
|
||||
+ if (args->pin_memory) {
|
||||
+ if (remap_vmas(my_pid) < 0) {
|
||||
+ pr_err("Remap vmas fail\n");
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
+ }
|
||||
/*
|
||||
* Now read the contents (if any)
|
||||
*/
|
||||
diff --git a/criu/seize.c b/criu/seize.c
|
||||
index 95bf9ef..c11ecab 100644
|
||||
--- a/criu/seize.c
|
||||
+++ b/criu/seize.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "string.h"
|
||||
#include "xmalloc.h"
|
||||
#include "util.h"
|
||||
+#include "mem.h"
|
||||
|
||||
#define NR_ATTEMPTS 5
|
||||
|
||||
--
|
||||
2.25.1
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
From 35053ab4bb8fe09818da9421a053e2e13c7ad817 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 13:34:10 +0800
|
||||
Subject: [PATCH 03/72] kerndat: check for rseq syscall support Signed-off-by:
|
||||
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
criu/include/kerndat.h | 1 +
|
||||
criu/kerndat.c | 18 ++++++++++++++++++
|
||||
2 files changed, 19 insertions(+)
|
||||
|
||||
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
|
||||
index 80bad7f..44a6976 100644
|
||||
--- a/criu/include/kerndat.h
|
||||
+++ b/criu/include/kerndat.h
|
||||
@@ -74,6 +74,7 @@ struct kerndat_s {
|
||||
bool has_pidfd_getfd;
|
||||
bool has_nspid;
|
||||
bool has_nftables_concat;
|
||||
+ bool has_rseq;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index 0e88ba4..f5a4490 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -816,6 +816,20 @@ static int kerndat_x86_has_ptrace_fpu_xsave_bug(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int kerndat_has_rseq(void)
|
||||
+{
|
||||
+ if (syscall(__NR_rseq, NULL, 0, 0, 0) != -1) {
|
||||
+ pr_err("rseq should fail\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if (errno == ENOSYS)
|
||||
+ pr_info("rseq syscall isn't supported\n");
|
||||
+ else
|
||||
+ kdat.has_rseq = true;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat"
|
||||
#define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat"
|
||||
|
||||
@@ -1360,6 +1374,10 @@ int kerndat_init(void)
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
+ if (!ret && kerndat_has_rseq()) {
|
||||
+ pr_err("kerndat_has_rseq failed when initializing kerndat.\n");
|
||||
+ ret = -1;
|
||||
+ }
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
kerndat_files_stat();
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,161 +0,0 @@
|
||||
From 30381c725f7c6738bd0df0f822aace1e66065b65 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 13:35:53 +0800
|
||||
Subject: [PATCH 04/72] util: move fork_and_ptrace_attach helper from cr-check
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
criu/cr-check.c | 55 -------------------------------------------
|
||||
criu/include/util.h | 1 +
|
||||
criu/util.c | 57 +++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 58 insertions(+), 55 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-check.c b/criu/cr-check.c
|
||||
index 3575fb3..d41ef8f 100644
|
||||
--- a/criu/cr-check.c
|
||||
+++ b/criu/cr-check.c
|
||||
@@ -537,61 +537,6 @@ static int check_sigqueuinfo(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static pid_t fork_and_ptrace_attach(int (*child_setup)(void))
|
||||
-{
|
||||
- pid_t pid;
|
||||
- int sk_pair[2], sk;
|
||||
- char c = 0;
|
||||
-
|
||||
- if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
|
||||
- pr_perror("socketpair");
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- pid = fork();
|
||||
- if (pid < 0) {
|
||||
- pr_perror("fork");
|
||||
- return -1;
|
||||
- } else if (pid == 0) {
|
||||
- sk = sk_pair[1];
|
||||
- close(sk_pair[0]);
|
||||
-
|
||||
- if (child_setup && child_setup() != 0)
|
||||
- exit(1);
|
||||
-
|
||||
- if (write(sk, &c, 1) != 1) {
|
||||
- pr_perror("write");
|
||||
- exit(1);
|
||||
- }
|
||||
-
|
||||
- while (1)
|
||||
- sleep(1000);
|
||||
- exit(1);
|
||||
- }
|
||||
-
|
||||
- sk = sk_pair[0];
|
||||
- close(sk_pair[1]);
|
||||
-
|
||||
- if (read(sk, &c, 1) != 1) {
|
||||
- close(sk);
|
||||
- kill(pid, SIGKILL);
|
||||
- pr_perror("read");
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- close(sk);
|
||||
-
|
||||
- if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
|
||||
- pr_perror("Unable to ptrace the child");
|
||||
- kill(pid, SIGKILL);
|
||||
- return -1;
|
||||
- }
|
||||
-
|
||||
- waitpid(pid, NULL, 0);
|
||||
-
|
||||
- return pid;
|
||||
-}
|
||||
-
|
||||
static int check_ptrace_peeksiginfo(void)
|
||||
{
|
||||
struct ptrace_peeksiginfo_args arg;
|
||||
diff --git a/criu/include/util.h b/criu/include/util.h
|
||||
index a2dac22..1c0b3c7 100644
|
||||
--- a/criu/include/util.h
|
||||
+++ b/criu/include/util.h
|
||||
@@ -166,6 +166,7 @@ extern int is_anon_link_type(char *link, char *type);
|
||||
|
||||
extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags);
|
||||
extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid);
|
||||
+extern pid_t fork_and_ptrace_attach(int (*child_setup)(void));
|
||||
extern int cr_daemon(int nochdir, int noclose, int close_fd);
|
||||
extern int status_ready(void);
|
||||
extern int is_root_user(void);
|
||||
diff --git a/criu/util.c b/criu/util.c
|
||||
index 06124c2..e682161 100644
|
||||
--- a/criu/util.c
|
||||
+++ b/criu/util.c
|
||||
@@ -654,6 +654,63 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+pid_t fork_and_ptrace_attach(int (*child_setup)(void))
|
||||
+{
|
||||
+ pid_t pid;
|
||||
+ int sk_pair[2], sk;
|
||||
+ char c = 0;
|
||||
+
|
||||
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
|
||||
+ pr_perror("socketpair");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ pid = fork();
|
||||
+ if (pid < 0) {
|
||||
+ pr_perror("fork");
|
||||
+ return -1;
|
||||
+ } else if (pid == 0) {
|
||||
+ sk = sk_pair[1];
|
||||
+ close(sk_pair[0]);
|
||||
+
|
||||
+ if (child_setup && child_setup() != 0)
|
||||
+ exit(1);
|
||||
+
|
||||
+ if (write(sk, &c, 1) != 1) {
|
||||
+ pr_perror("write");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ while (1)
|
||||
+ sleep(1000);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ sk = sk_pair[0];
|
||||
+ close(sk_pair[1]);
|
||||
+
|
||||
+ if (read(sk, &c, 1) != 1) {
|
||||
+ close(sk);
|
||||
+ kill(pid, SIGKILL);
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+ pr_perror("read");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ close(sk);
|
||||
+
|
||||
+ if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
|
||||
+ pr_perror("Unable to ptrace the child");
|
||||
+ kill(pid, SIGKILL);
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+
|
||||
+ return pid;
|
||||
+}
|
||||
+
|
||||
int status_ready(void)
|
||||
{
|
||||
char c = 0;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,162 +0,0 @@
|
||||
From f84bab6b29146ef7fb9867af0324efb90596e12c Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:30:18 +0800
|
||||
Subject: [PATCH 05/72] cr-check: Add ptrace rseq conf dump feature Add
|
||||
"get_rseq_conf" feature corresponding to the
|
||||
ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support.
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
compel/include/uapi/ptrace.h | 12 +++++++++++
|
||||
criu/cr-check.c | 11 ++++++++++
|
||||
criu/include/kerndat.h | 1 +
|
||||
criu/kerndat.c | 41 ++++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 65 insertions(+)
|
||||
|
||||
diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h
|
||||
index c5291d2..bfe28c7 100644
|
||||
--- a/compel/include/uapi/ptrace.h
|
||||
+++ b/compel/include/uapi/ptrace.h
|
||||
@@ -65,6 +65,18 @@ typedef struct {
|
||||
uint64_t flags; /* Output: filter's flags */
|
||||
} seccomp_metadata_t;
|
||||
|
||||
+#ifndef PTRACE_GET_RSEQ_CONFIGURATION
|
||||
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
||||
+
|
||||
+struct ptrace_rseq_configuration {
|
||||
+ __u64 rseq_abi_pointer;
|
||||
+ __u32 rseq_abi_size;
|
||||
+ __u32 signature;
|
||||
+ __u32 flags;
|
||||
+ __u32 pad;
|
||||
+};
|
||||
+#endif
|
||||
+
|
||||
#ifdef PTRACE_EVENT_STOP
|
||||
#if PTRACE_EVENT_STOP == 7 /* Bad value from Linux 3.1-3.3, fixed in 3.4 */
|
||||
#undef PTRACE_EVENT_STOP
|
||||
diff --git a/criu/cr-check.c b/criu/cr-check.c
|
||||
index d41ef8f..ba87511 100644
|
||||
--- a/criu/cr-check.c
|
||||
+++ b/criu/cr-check.c
|
||||
@@ -794,6 +794,15 @@ static int check_ptrace_dump_seccomp_filters(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int check_ptrace_get_rseq_conf(void)
|
||||
+{
|
||||
+ if (!kdat.has_ptrace_get_rseq_conf) {
|
||||
+ pr_warn("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported. C/R of processes which are using rseq() won't work.\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int check_mem_dirty_track(void)
|
||||
{
|
||||
if (!kdat.has_dirty_track) {
|
||||
@@ -1435,6 +1444,7 @@ int cr_check(void)
|
||||
ret |= check_ns_pid();
|
||||
ret |= check_apparmor_stacking();
|
||||
ret |= check_network_lock_nftables();
|
||||
+ ret |= check_ptrace_get_rseq_conf();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1547,6 +1557,7 @@ static struct feature_list feature_list[] = {
|
||||
{ "ns_pid", check_ns_pid },
|
||||
{ "apparmor_stacking", check_apparmor_stacking },
|
||||
{ "network_lock_nftables", check_network_lock_nftables },
|
||||
+ { "get_rseq_conf", check_ptrace_get_rseq_conf },
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
|
||||
index 44a6976..05abeda 100644
|
||||
--- a/criu/include/kerndat.h
|
||||
+++ b/criu/include/kerndat.h
|
||||
@@ -75,6 +75,7 @@ struct kerndat_s {
|
||||
bool has_nspid;
|
||||
bool has_nftables_concat;
|
||||
bool has_rseq;
|
||||
+ bool has_ptrace_get_rseq_conf;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index f5a4490..4841387 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -4,6 +4,8 @@
|
||||
#include <sys/file.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
+#include <sys/ptrace.h>
|
||||
+#include <sys/wait.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#include <sys/syscall.h>
|
||||
@@ -36,6 +38,7 @@
|
||||
#include "sockets.h"
|
||||
#include "net.h"
|
||||
#include "tun.h"
|
||||
+#include <compel/ptrace.h>
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
#include "netfilter.h"
|
||||
#include "fsnotify.h"
|
||||
@@ -830,6 +833,40 @@ static int kerndat_has_rseq(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int kerndat_has_ptrace_get_rseq_conf(void)
|
||||
+{
|
||||
+ pid_t pid;
|
||||
+ int len;
|
||||
+ struct ptrace_rseq_configuration rseq;
|
||||
+
|
||||
+ pid = fork_and_ptrace_attach(NULL);
|
||||
+ if (pid < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ len = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, pid, sizeof(rseq), &rseq);
|
||||
+ if (len != sizeof(rseq)) {
|
||||
+ kdat.has_ptrace_get_rseq_conf = false;
|
||||
+ pr_info("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) is not supported\n");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * flags is always zero from the kernel side, if it will be changed
|
||||
+ * we need to pay attention to that and, possibly, make changes on the CRIU side.
|
||||
+ */
|
||||
+ if (rseq.flags != 0) {
|
||||
+ kdat.has_ptrace_get_rseq_conf = false;
|
||||
+ pr_err("ptrace(PTRACE_GET_RSEQ_CONFIGURATION): rseq.flags != 0\n");
|
||||
+ } else {
|
||||
+ kdat.has_ptrace_get_rseq_conf = true;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ kill(pid, SIGKILL);
|
||||
+ waitpid(pid, NULL, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat"
|
||||
#define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat"
|
||||
|
||||
@@ -1378,6 +1415,10 @@ int kerndat_init(void)
|
||||
pr_err("kerndat_has_rseq failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
+ if (!ret && kerndat_has_ptrace_get_rseq_conf()) {
|
||||
+ pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n");
|
||||
+ ret = -1;
|
||||
+ }
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
kerndat_files_stat();
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,702 +0,0 @@
|
||||
From c905adf3aaa116984e28a51700c53917f3651e3b Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 14:52:35 +0800
|
||||
Subject: [PATCH 06/72] rseq: initial support TODO: 1. properly handle case
|
||||
when the kernel has rseq() support but has no
|
||||
ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support and user processes haven't used
|
||||
rseq(). 2. properly handle "transient" states, when CRIU comes during rseq
|
||||
was executed. We need test for this case with some "heavy" rseq + we need to
|
||||
properly handle RSEQ_CS_* flags.
|
||||
|
||||
Fixes: #1696
|
||||
|
||||
Reported-by: Radostin Stoyanov <radostin@redhat.com>
|
||||
Suggested-by: Florian Weimer <fweimer@redhat.com>
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
compel/include/uapi/ptrace.h | 16 ++--
|
||||
criu/cr-dump.c | 99 ++++++++++++++++++++++++
|
||||
criu/cr-restore.c | 17 +++++
|
||||
criu/include/linux/rseq.h | 144 +++++++++++++++++++++++++++++++++++
|
||||
criu/include/parasite.h | 7 ++
|
||||
criu/include/restorer.h | 7 ++
|
||||
criu/kerndat.c | 2 +-
|
||||
criu/parasite-syscall.c | 11 +++
|
||||
criu/pie/parasite.c | 99 ++++++++++++++++++++++++
|
||||
criu/pie/restorer.c | 24 ++++++
|
||||
images/Makefile | 1 +
|
||||
images/core.proto | 2 +
|
||||
images/rseq.proto | 9 +++
|
||||
13 files changed, 429 insertions(+), 9 deletions(-)
|
||||
create mode 100644 criu/include/linux/rseq.h
|
||||
create mode 100644 images/rseq.proto
|
||||
|
||||
diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h
|
||||
index bfe28c7..d807a92 100644
|
||||
--- a/compel/include/uapi/ptrace.h
|
||||
+++ b/compel/include/uapi/ptrace.h
|
||||
@@ -66,14 +66,14 @@ typedef struct {
|
||||
} seccomp_metadata_t;
|
||||
|
||||
#ifndef PTRACE_GET_RSEQ_CONFIGURATION
|
||||
-#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
||||
-
|
||||
-struct ptrace_rseq_configuration {
|
||||
- __u64 rseq_abi_pointer;
|
||||
- __u32 rseq_abi_size;
|
||||
- __u32 signature;
|
||||
- __u32 flags;
|
||||
- __u32 pad;
|
||||
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
|
||||
+
|
||||
+struct __ptrace_rseq_configuration {
|
||||
+ uint64_t rseq_abi_pointer;
|
||||
+ uint32_t rseq_abi_size;
|
||||
+ uint32_t signature;
|
||||
+ uint32_t flags;
|
||||
+ uint32_t pad;
|
||||
};
|
||||
#endif
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index f07fe6e..91dd08a 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "proc_parse.h"
|
||||
#include "parasite.h"
|
||||
#include "parasite-syscall.h"
|
||||
+#include <compel/ptrace.h>
|
||||
#include "files.h"
|
||||
#include "files-reg.h"
|
||||
#include "shmem.h"
|
||||
@@ -200,6 +201,25 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int check_thread_rseq(pid_t tid, const struct parasite_check_rseq *ti_rseq, bool has_tc_rseq_entry)
|
||||
+{
|
||||
+ if (!kdat.has_rseq || kdat.has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_debug("%d has rseq_inited = %d\n", tid, ti_rseq->rseq_inited);
|
||||
+
|
||||
+ /*
|
||||
+ * We have no kdat.has_ptrace_get_rseq_conf and user
|
||||
+ * process has rseq() used, let's fail dump.
|
||||
+ */
|
||||
+ if (ti_rseq->rseq_inited) {
|
||||
+ pr_err("%d has rseq but kernel lacks get_rseq_conf feature\n", tid);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
struct cr_imgset *glob_imgset;
|
||||
|
||||
static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds)
|
||||
@@ -730,6 +750,17 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
|
||||
if (!ret)
|
||||
ret = seccomp_dump_thread(pid, tc);
|
||||
|
||||
+ /*
|
||||
+ * We are dumping rseq() in the dump_thread_rseq() function,
|
||||
+ * *before* processes gets infected (because of ptrace requests
|
||||
+ * API restriction). At this point, if the kernel lacks
|
||||
+ * kdat.has_ptrace_get_rseq_conf support we have to ensure
|
||||
+ * that dumpable processes haven't initialized rseq() or
|
||||
+ * fail dump if rseq() was used.
|
||||
+ */
|
||||
+ if (!ret)
|
||||
+ ret = check_thread_rseq(pid, &ti->rseq, !!tc->rseq_entry);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1016,6 +1047,68 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
+{
|
||||
+ struct __ptrace_rseq_configuration rseq;
|
||||
+ RseqEntry *rseqe = NULL;
|
||||
+ int ret;
|
||||
+
|
||||
+ /*
|
||||
+ * If we are here it means that rseq() syscall is supported,
|
||||
+ * but ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported,
|
||||
+ * we can just fail dump here. But this is bad idea, IMHO.
|
||||
+ *
|
||||
+ * So, we will try to detect if victim process was used rseq().
|
||||
+ * See check_rseq() and check_thread_rseq() functions.
|
||||
+ */
|
||||
+ if (!kdat.has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq);
|
||||
+ if (ret != sizeof(rseq)) {
|
||||
+ pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (rseq.flags != 0) {
|
||||
+ pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid,
|
||||
+ rseq.flags);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature);
|
||||
+
|
||||
+ rseqe = xmalloc(sizeof(*rseqe));
|
||||
+ if (!rseqe)
|
||||
+ return -1;
|
||||
+
|
||||
+ rseq_entry__init(rseqe);
|
||||
+
|
||||
+ rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer;
|
||||
+ rseqe->rseq_abi_size = rseq.rseq_abi_size;
|
||||
+ rseqe->signature = rseq.signature;
|
||||
+
|
||||
+ *rseqep = rseqe;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int dump_task_rseq(pid_t pid, struct pstree_item *item)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ /* if rseq() syscall isn't supported then nothing to dump */
|
||||
+ if (!kdat.has_rseq)
|
||||
+ return 0;
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry))
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static struct proc_pid_stat pps_buf;
|
||||
|
||||
static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pstree_item *item)
|
||||
@@ -1304,6 +1397,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ ret = dump_task_rseq(pid, item);
|
||||
+ if (ret) {
|
||||
+ pr_err("Dump %d rseq failed %d\n", pid, ret);
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
parasite_ctl = parasite_infect_seized(pid, item, &vmas);
|
||||
if (!parasite_ctl) {
|
||||
pr_err("Can't infect (pid: %d) with parasite\n", pid);
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 5b645c1..b2bd044 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -2975,6 +2975,19 @@ static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int prep_rseq(struct rst_rseq_param *rseq, ThreadCoreEntry *tc)
|
||||
+{
|
||||
+ /* compatibility with older CRIU versions */
|
||||
+ if (!tc->rseq_entry)
|
||||
+ return 0;
|
||||
+
|
||||
+ rseq->rseq_abi_pointer = tc->rseq_entry->rseq_abi_pointer;
|
||||
+ rseq->rseq_abi_size = tc->rseq_entry->rseq_abi_size;
|
||||
+ rseq->signature = tc->rseq_entry->signature;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static rlim_t decode_rlim(rlim_t ival)
|
||||
{
|
||||
return ival == -1 ? RLIM_INFINITY : ival;
|
||||
@@ -3704,6 +3717,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
|
||||
core_get_tls(tcore, &thread_args[i].tls);
|
||||
|
||||
+ ret = prep_rseq(&thread_args[i].rseq, tcore->thread_core);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
+
|
||||
rst_reloc_creds(&thread_args[i], &creds_pos_next);
|
||||
|
||||
thread_args[i].futex_rla = tcore->thread_core->futex_rla;
|
||||
diff --git a/criu/include/linux/rseq.h b/criu/include/linux/rseq.h
|
||||
new file mode 100644
|
||||
index 0000000..5c1706a
|
||||
--- /dev/null
|
||||
+++ b/criu/include/linux/rseq.h
|
||||
@@ -0,0 +1,144 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
|
||||
+#ifndef _UAPI_LINUX_RSEQ_H
|
||||
+#define _UAPI_LINUX_RSEQ_H
|
||||
+
|
||||
+/*
|
||||
+ * linux/rseq.h
|
||||
+ *
|
||||
+ * Restartable sequences system call API
|
||||
+ *
|
||||
+ * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
+ */
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+#include <asm/byteorder.h>
|
||||
+
|
||||
+enum rseq_cpu_id_state {
|
||||
+ RSEQ_CPU_ID_UNINITIALIZED = -1,
|
||||
+ RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
|
||||
+};
|
||||
+
|
||||
+enum rseq_flags {
|
||||
+ RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
+};
|
||||
+
|
||||
+enum rseq_cs_flags_bit {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
|
||||
+};
|
||||
+
|
||||
+enum rseq_cs_flags {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
|
||||
+ * contained within a single cache-line. It is usually declared as
|
||||
+ * link-time constant data.
|
||||
+ */
|
||||
+struct rseq_cs {
|
||||
+ /* Version of this structure. */
|
||||
+ __u32 version;
|
||||
+ /* enum rseq_cs_flags */
|
||||
+ __u32 flags;
|
||||
+ __u64 start_ip;
|
||||
+ /* Offset from start_ip. */
|
||||
+ __u64 post_commit_offset;
|
||||
+ __u64 abort_ip;
|
||||
+} __attribute__((aligned(4 * sizeof(__u64))));
|
||||
+
|
||||
+/*
|
||||
+ * struct rseq is aligned on 4 * 8 bytes to ensure it is always
|
||||
+ * contained within a single cache-line.
|
||||
+ *
|
||||
+ * A single struct rseq per thread is allowed.
|
||||
+ */
|
||||
+struct rseq {
|
||||
+ /*
|
||||
+ * Restartable sequences cpu_id_start field. Updated by the
|
||||
+ * kernel. Read by user-space with single-copy atomicity
|
||||
+ * semantics. This field should only be read by the thread which
|
||||
+ * registered this data structure. Aligned on 32-bit. Always
|
||||
+ * contains a value in the range of possible CPUs, although the
|
||||
+ * value may not be the actual current CPU (e.g. if rseq is not
|
||||
+ * initialized). This CPU number value should always be compared
|
||||
+ * against the value of the cpu_id field before performing a rseq
|
||||
+ * commit or returning a value read from a data structure indexed
|
||||
+ * using the cpu_id_start value.
|
||||
+ */
|
||||
+ __u32 cpu_id_start;
|
||||
+ /*
|
||||
+ * Restartable sequences cpu_id field. Updated by the kernel.
|
||||
+ * Read by user-space with single-copy atomicity semantics. This
|
||||
+ * field should only be read by the thread which registered this
|
||||
+ * data structure. Aligned on 32-bit. Values
|
||||
+ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
|
||||
+ * have a special semantic: the former means "rseq uninitialized",
|
||||
+ * and latter means "rseq initialization failed". This value is
|
||||
+ * meant to be read within rseq critical sections and compared
|
||||
+ * with the cpu_id_start value previously read, before performing
|
||||
+ * the commit instruction, or read and compared with the
|
||||
+ * cpu_id_start value before returning a value loaded from a data
|
||||
+ * structure indexed using the cpu_id_start value.
|
||||
+ */
|
||||
+ __u32 cpu_id;
|
||||
+ /*
|
||||
+ * Restartable sequences rseq_cs field.
|
||||
+ *
|
||||
+ * Contains NULL when no critical section is active for the current
|
||||
+ * thread, or holds a pointer to the currently active struct rseq_cs.
|
||||
+ *
|
||||
+ * Updated by user-space, which sets the address of the currently
|
||||
+ * active rseq_cs at the beginning of assembly instruction sequence
|
||||
+ * block, and set to NULL by the kernel when it restarts an assembly
|
||||
+ * instruction sequence block, as well as when the kernel detects that
|
||||
+ * it is preempting or delivering a signal outside of the range
|
||||
+ * targeted by the rseq_cs. Also needs to be set to NULL by user-space
|
||||
+ * before reclaiming memory that contains the targeted struct rseq_cs.
|
||||
+ *
|
||||
+ * Read and set by the kernel. Set by user-space with single-copy
|
||||
+ * atomicity semantics. This field should only be updated by the
|
||||
+ * thread which registered this data structure. Aligned on 64-bit.
|
||||
+ */
|
||||
+ union {
|
||||
+ __u64 ptr64;
|
||||
+#ifdef __LP64__
|
||||
+ __u64 ptr;
|
||||
+#else
|
||||
+ struct {
|
||||
+#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
|
||||
+ __u32 padding; /* Initialized to zero. */
|
||||
+ __u32 ptr32;
|
||||
+#else /* LITTLE */
|
||||
+ __u32 ptr32;
|
||||
+ __u32 padding; /* Initialized to zero. */
|
||||
+#endif /* ENDIAN */
|
||||
+ } ptr;
|
||||
+#endif
|
||||
+ } rseq_cs;
|
||||
+
|
||||
+ /*
|
||||
+ * Restartable sequences flags field.
|
||||
+ *
|
||||
+ * This field should only be updated by the thread which
|
||||
+ * registered this data structure. Read by the kernel.
|
||||
+ * Mainly used for single-stepping through rseq critical sections
|
||||
+ * with debuggers.
|
||||
+ *
|
||||
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
|
||||
+ * Inhibit instruction sequence block restart on preemption
|
||||
+ * for this thread.
|
||||
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
|
||||
+ * Inhibit instruction sequence block restart on signal
|
||||
+ * delivery for this thread.
|
||||
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
|
||||
+ * Inhibit instruction sequence block restart on migration for
|
||||
+ * this thread.
|
||||
+ */
|
||||
+ __u32 flags;
|
||||
+} __attribute__((aligned(4 * sizeof(__u64))));
|
||||
+
|
||||
+#endif /* _UAPI_LINUX_RSEQ_H */
|
||||
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
|
||||
index 8107aa4..5fde809 100644
|
||||
--- a/criu/include/parasite.h
|
||||
+++ b/criu/include/parasite.h
|
||||
@@ -164,10 +164,17 @@ struct parasite_dump_creds {
|
||||
unsigned int groups[0];
|
||||
};
|
||||
|
||||
+struct parasite_check_rseq {
|
||||
+ bool has_rseq;
|
||||
+ bool has_ptrace_get_rseq_conf; /* no need to check if supported */
|
||||
+ bool rseq_inited;
|
||||
+};
|
||||
+
|
||||
struct parasite_dump_thread {
|
||||
unsigned int *tid_addr;
|
||||
pid_t tid;
|
||||
tls_t tls;
|
||||
+ struct parasite_check_rseq rseq;
|
||||
stack_t sas;
|
||||
int pdeath_sig;
|
||||
char comm[TASK_COMM_LEN];
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index c2ef8f0..c29d869 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -45,6 +45,12 @@ struct rst_sched_param {
|
||||
int prio;
|
||||
};
|
||||
|
||||
+struct rst_rseq_param {
|
||||
+ u64 rseq_abi_pointer;
|
||||
+ u32 rseq_abi_size;
|
||||
+ u32 signature;
|
||||
+};
|
||||
+
|
||||
struct restore_posix_timer {
|
||||
struct str_posix_timer spt;
|
||||
struct itimerspec val;
|
||||
@@ -99,6 +105,7 @@ struct thread_restore_args {
|
||||
struct task_restore_args *ta;
|
||||
|
||||
tls_t tls;
|
||||
+ struct rst_rseq_param rseq;
|
||||
|
||||
siginfo_t *siginfo;
|
||||
unsigned int siginfo_n;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index 4841387..af7113a 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -837,7 +837,7 @@ static int kerndat_has_ptrace_get_rseq_conf(void)
|
||||
{
|
||||
pid_t pid;
|
||||
int len;
|
||||
- struct ptrace_rseq_configuration rseq;
|
||||
+ struct __ptrace_rseq_configuration rseq;
|
||||
|
||||
pid = fork_and_ptrace_attach(NULL);
|
||||
if (pid < 0)
|
||||
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
|
||||
index 7175ade..ee4fa86 100644
|
||||
--- a/criu/parasite-syscall.c
|
||||
+++ b/criu/parasite-syscall.c
|
||||
@@ -132,6 +132,13 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c
|
||||
return ce->groups ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
+static void init_parasite_rseq_arg(struct parasite_check_rseq *rseq)
|
||||
+{
|
||||
+ rseq->has_rseq = kdat.has_rseq;
|
||||
+ rseq->has_ptrace_get_rseq_conf = kdat.has_ptrace_get_rseq_conf;
|
||||
+ rseq->rseq_inited = false;
|
||||
+}
|
||||
+
|
||||
int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core)
|
||||
{
|
||||
ThreadCoreEntry *tc = core->thread_core;
|
||||
@@ -144,6 +151,8 @@ int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEn
|
||||
pc = args->creds;
|
||||
pc->cap_last_cap = kdat.last_cap;
|
||||
|
||||
+ init_parasite_rseq_arg(&args->rseq);
|
||||
+
|
||||
ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_THREAD, ctl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@@ -197,6 +206,8 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit
|
||||
|
||||
compel_arch_get_tls_thread(tctl, &args->tls);
|
||||
|
||||
+ init_parasite_rseq_arg(&args->rseq);
|
||||
+
|
||||
ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD);
|
||||
if (ret) {
|
||||
pr_err("Can't init thread in parasite %d\n", pid);
|
||||
diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c
|
||||
index bc0a33c..e49958b 100644
|
||||
--- a/criu/pie/parasite.c
|
||||
+++ b/criu/pie/parasite.c
|
||||
@@ -8,6 +8,8 @@
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
+#include "linux/rseq.h"
|
||||
+
|
||||
#include "common/config.h"
|
||||
#include "int.h"
|
||||
#include "types.h"
|
||||
@@ -167,6 +169,7 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args)
|
||||
}
|
||||
|
||||
static int dump_creds(struct parasite_dump_creds *args);
|
||||
+static int check_rseq(struct parasite_check_rseq *rseq);
|
||||
|
||||
static int dump_thread_common(struct parasite_dump_thread *ti)
|
||||
{
|
||||
@@ -197,6 +200,12 @@ static int dump_thread_common(struct parasite_dump_thread *ti)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ ret = check_rseq(&ti->rseq);
|
||||
+ if (ret) {
|
||||
+ pr_err("Unable to check if rseq() is initialized: %d\n", ret);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
ret = dump_creds(ti->creds);
|
||||
out:
|
||||
return ret;
|
||||
@@ -313,6 +322,96 @@ grps_err:
|
||||
return -1;
|
||||
}
|
||||
|
||||
+static int check_rseq(struct parasite_check_rseq *rseq)
|
||||
+{
|
||||
+ int ret;
|
||||
+ unsigned long rseq_abi_pointer;
|
||||
+ unsigned long rseq_abi_size;
|
||||
+ uint32_t rseq_signature;
|
||||
+ void *addr;
|
||||
+
|
||||
+ /* no need to do hacky check if we can get all info from ptrace() */
|
||||
+ if (!rseq->has_rseq || rseq->has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We need to determine if victim process has rseq()
|
||||
+ * initialized, but we have no *any* proper kernel interface
|
||||
+ * supported at this point.
|
||||
+ * Our plan:
|
||||
+ * 1. We know that if we call rseq() syscall and process already
|
||||
+ * has current->rseq filled, then we get:
|
||||
+ * -EINVAL if current->rseq != rseq || rseq_len != sizeof(*rseq),
|
||||
+ * -EPERM if current->rseq_sig != sig),
|
||||
+ * -EBUSY if current->rseq == rseq && rseq_len == sizeof(*rseq) &&
|
||||
+ * current->rseq_sig != sig
|
||||
+ * if current->rseq == NULL (rseq() wasn't used) then we go to:
|
||||
+ * IS_ALIGNED(rseq ...) check, if we fail it we get -EINVAL and it
|
||||
+ * will be hard to distinguish case when rseq() was initialized or not.
|
||||
+ * Let's construct arguments payload
|
||||
+ * with:
|
||||
+ * 1. correct rseq_abi_size
|
||||
+ * 2. aligned and correct rseq_abi_pointer
|
||||
+ * And see what rseq() return to us.
|
||||
+ * If ret value is:
|
||||
+ * 0: it means that rseq *wasn't* used and we successfuly registered it,
|
||||
+ * -EINVAL or : it means that rseq is already initialized,
|
||||
+ * so we *have* to dump it. But as we have has_ptrace_get_rseq_conf = false,
|
||||
+ * we should just fail dump as it's unsafe to skip rseq() dump for processes
|
||||
+ * with rseq() initialized.
|
||||
+ * -EPERM or -EBUSY: should not happen as we take a fresh memory area for rseq
|
||||
+ */
|
||||
+ addr = (void *)sys_mmap(NULL, sizeof(struct rseq), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
+ if (addr == MAP_FAILED) {
|
||||
+ pr_err("mmap() failed for struct rseq ret = %lx\n", (unsigned long)addr);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ memset(addr, 0, sizeof(struct rseq));
|
||||
+
|
||||
+ /* sys_mmap returns page aligned addresses */
|
||||
+ rseq_abi_pointer = (unsigned long)addr;
|
||||
+ rseq_abi_size = (unsigned long)sizeof(struct rseq);
|
||||
+ /* it's not so important to have unique signature for us,
|
||||
+ * because rseq_abi_pointer is guaranteed to be unique
|
||||
+ */
|
||||
+ rseq_signature = 0x12345612;
|
||||
+
|
||||
+ pr_info("\ttrying sys_rseq(%lx, %lx, %x, %x)\n", rseq_abi_pointer, rseq_abi_size, 0, rseq_signature);
|
||||
+ ret = sys_rseq((void *)rseq_abi_pointer, rseq_abi_size, 0, rseq_signature);
|
||||
+ if (ret) {
|
||||
+ if (ret == -EINVAL) {
|
||||
+ pr_info("\trseq is initialized in the victim\n");
|
||||
+ rseq->rseq_inited = true;
|
||||
+
|
||||
+ ret = 0;
|
||||
+ } else {
|
||||
+ pr_err("\tunexpected failure of sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer,
|
||||
+ rseq_abi_size, 0, rseq_signature, ret);
|
||||
+
|
||||
+ ret = -1;
|
||||
+ }
|
||||
+ } else {
|
||||
+ ret = sys_rseq((void *)rseq_abi_pointer, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, rseq_signature);
|
||||
+ if (ret) {
|
||||
+ pr_err("\tfailed to unregister sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer,
|
||||
+ rseq_abi_size, RSEQ_FLAG_UNREGISTER, rseq_signature, ret);
|
||||
+
|
||||
+ ret = -1;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ pr_info("\tsys_rseq succeed, let's unregister it back... ok Error\n");
|
||||
+ pr_info("\trseq is non-initialized in the victim Error\n");
|
||||
+ rseq->rseq_inited = false;
|
||||
+ ret = 0;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ sys_munmap(addr, sizeof(struct rseq));
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int fill_fds_fown(int fd, struct fd_opts *p)
|
||||
{
|
||||
int flags, ret;
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index fbc89fe..368b5a0 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -459,6 +459,27 @@ static int restore_cpu_affinity(struct task_restore_args *args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int restore_rseq(struct rst_rseq_param *rseq)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ if (!rseq->rseq_abi_pointer) {
|
||||
+ pr_debug("rseq: nothing to restore\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ pr_debug("rseq: rseq_abi_pointer = %lx signature = %x\n", (unsigned long)rseq->rseq_abi_pointer, rseq->signature);
|
||||
+
|
||||
+ ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 0, rseq->signature);
|
||||
+ if (ret) {
|
||||
+ pr_err("failed sys_rseq(%lx, %lx, %x, %x) = %d\n", (unsigned long)rseq->rseq_abi_pointer,
|
||||
+ (unsigned long)rseq->rseq_abi_size, 0, rseq->signature, ret);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args)
|
||||
{
|
||||
unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0;
|
||||
@@ -583,6 +604,9 @@ static int restore_thread_common(struct thread_restore_args *args)
|
||||
|
||||
restore_tls(&args->tls);
|
||||
|
||||
+ if (restore_rseq(&args->rseq))
|
||||
+ return -1;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/images/Makefile b/images/Makefile
|
||||
index 2eaeb7c..004e22e 100644
|
||||
--- a/images/Makefile
|
||||
+++ b/images/Makefile
|
||||
@@ -71,6 +71,7 @@ proto-obj-y += img-streamer.o
|
||||
proto-obj-y += bpfmap-file.o
|
||||
proto-obj-y += bpfmap-data.o
|
||||
proto-obj-y += apparmor.o
|
||||
+proto-obj-y += rseq.o
|
||||
|
||||
CFLAGS += -iquote $(obj)/
|
||||
|
||||
diff --git a/images/core.proto b/images/core.proto
|
||||
index 39e7f32..b66230e 100644
|
||||
--- a/images/core.proto
|
||||
+++ b/images/core.proto
|
||||
@@ -14,6 +14,7 @@ import "timer.proto";
|
||||
import "creds.proto";
|
||||
import "sa.proto";
|
||||
import "siginfo.proto";
|
||||
+import "rseq.proto";
|
||||
|
||||
import "opts.proto";
|
||||
|
||||
@@ -106,6 +107,7 @@ message thread_core_entry {
|
||||
optional string comm = 13;
|
||||
optional uint64 blk_sigset_extended = 14;
|
||||
required thread_allowedcpus_entry allowed_cpus = 15;
|
||||
+ optional rseq_entry rseq_entry = 16;
|
||||
}
|
||||
|
||||
message task_rlimits_entry {
|
||||
diff --git a/images/rseq.proto b/images/rseq.proto
|
||||
new file mode 100644
|
||||
index 0000000..be28004
|
||||
--- /dev/null
|
||||
+++ b/images/rseq.proto
|
||||
@@ -0,0 +1,9 @@
|
||||
+// SPDX-License-Identifier: MIT
|
||||
+
|
||||
+syntax = "proto2";
|
||||
+
|
||||
+message rseq_entry {
|
||||
+ required uint64 rseq_abi_pointer = 1;
|
||||
+ required uint32 rseq_abi_size = 2;
|
||||
+ required uint32 signature = 3;
|
||||
+}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,217 +0,0 @@
|
||||
From dc83ed27d305237298b8754d1159f2e7f5c926ae Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 14:54:28 +0800
|
||||
Subject: [PATCH 07/72] zdtm: add simple test for rseq C/R Signed-off-by:
|
||||
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
test/zdtm/static/Makefile | 1 +
|
||||
test/zdtm/static/rseq00.c | 174 +++++++++++++++++++++++++++++++++++
|
||||
test/zdtm/static/rseq00.desc | 1 +
|
||||
3 files changed, 176 insertions(+)
|
||||
create mode 100644 test/zdtm/static/rseq00.c
|
||||
create mode 100644 test/zdtm/static/rseq00.desc
|
||||
|
||||
diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile
|
||||
index 70123cf..563d947 100644
|
||||
--- a/test/zdtm/static/Makefile
|
||||
+++ b/test/zdtm/static/Makefile
|
||||
@@ -61,6 +61,7 @@ TST_NOFILE := \
|
||||
pthread02 \
|
||||
pthread_timers \
|
||||
pthread_timers_h \
|
||||
+ rseq00 \
|
||||
vdso00 \
|
||||
vdso01 \
|
||||
vdso02 \
|
||||
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
|
||||
new file mode 100644
|
||||
index 0000000..26f41a2
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/static/rseq00.c
|
||||
@@ -0,0 +1,174 @@
|
||||
+/*
|
||||
+ * test for rseq() syscall
|
||||
+ * See also https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/
|
||||
+ * https://github.com/torvalds/linux/commit/d7822b1e24f2df5df98c76f0e94a5416349ff759
|
||||
+ */
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
+#include <signal.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/wait.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <sys/mman.h>
|
||||
+#include <fcntl.h>
|
||||
+#include <pthread.h>
|
||||
+#include <syscall.h>
|
||||
+
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#if defined(__x86_64__)
|
||||
+
|
||||
+const char *test_doc = "Check that rseq() basic C/R works";
|
||||
+const char *test_author = "Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>";
|
||||
+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */
|
||||
+
|
||||
+/* some useful definitions from kernel uapi */
|
||||
+enum rseq_flags {
|
||||
+ RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
+};
|
||||
+
|
||||
+struct rseq {
|
||||
+ uint32_t cpu_id_start;
|
||||
+ uint32_t cpu_id;
|
||||
+ uint64_t rseq_cs;
|
||||
+ uint32_t flags;
|
||||
+} __attribute__((aligned(4 * sizeof(uint64_t))));
|
||||
+
|
||||
+#ifndef __NR_rseq
|
||||
+#define __NR_rseq 334
|
||||
+#endif
|
||||
+/* EOF */
|
||||
+
|
||||
+static __thread volatile struct rseq __rseq_abi;
|
||||
+
|
||||
+#define RSEQ_SIG 0x53053053
|
||||
+
|
||||
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
+{
|
||||
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
|
||||
+}
|
||||
+
|
||||
+static void register_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (rc) {
|
||||
+ fail("Failed to register rseq");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void unregister_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
|
||||
+ if (rc) {
|
||||
+ fail("Failed to unregister rseq");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void check_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (!(rc && errno == EBUSY)) {
|
||||
+ fail("Failed to check rseq %d", rc);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
|
||||
+
|
||||
+static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
+{
|
||||
+ /* clang-format off */
|
||||
+ __asm__ __volatile__ goto(
|
||||
+ ".pushsection __rseq_table, \"aw\"\n\t"
|
||||
+ ".balign 32\n\t"
|
||||
+ "cs_obj:\n\t"
|
||||
+ /* version, flags */
|
||||
+ ".long 0, 0\n\t"
|
||||
+ /* start_ip, post_commit_ip, abort_ip */
|
||||
+ ".quad 1f, (2f-1f), 4f\n\t"
|
||||
+ ".popsection\n\t"
|
||||
+ "1:\n\t"
|
||||
+ "leaq cs_obj(%%rip), %%rax\n\t"
|
||||
+ "movq %%rax, %[rseq_cs]\n\t"
|
||||
+ "cmpl %[cpu_id], %[current_cpu_id]\n\t"
|
||||
+ "jnz 4f\n\t"
|
||||
+ "addq %[count], %[v]\n\t" /* final store */
|
||||
+ "2:\n\t"
|
||||
+ ".pushsection __rseq_failure, \"ax\"\n\t"
|
||||
+ /* Disassembler-friendly signature: nopl <sig>(%rip). */
|
||||
+ ".byte 0x0f, 0x1f, 0x05\n\t"
|
||||
+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */
|
||||
+ "4:\n\t"
|
||||
+ "jmp abort\n\t"
|
||||
+ ".popsection\n\t"
|
||||
+ : /* gcc asm goto does not allow outputs */
|
||||
+ : [cpu_id] "r" (cpu),
|
||||
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
|
||||
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
|
||||
+ /* final store input */
|
||||
+ [v] "m" (*v),
|
||||
+ [count] "er" (count)
|
||||
+ : "memory", "cc", "rax"
|
||||
+ : abort
|
||||
+ );
|
||||
+ /* clang-format on */
|
||||
+
|
||||
+ return 0;
|
||||
+abort:
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int cpu, ret;
|
||||
+ intptr_t *cpu_data;
|
||||
+ long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data));
|
||||
+ if (!cpu_data) {
|
||||
+ fail("calloc");
|
||||
+ exit(EXIT_FAILURE);
|
||||
+ }
|
||||
+
|
||||
+ register_thread();
|
||||
+
|
||||
+ test_daemon();
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ check_thread();
|
||||
+
|
||||
+ cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
|
||||
+ ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
+ if (ret)
|
||||
+ fail("Failed to increment per-cpu counter");
|
||||
+ else
|
||||
+ test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]);
|
||||
+
|
||||
+ if (cpu_data[cpu] == 2)
|
||||
+ pass();
|
||||
+ else
|
||||
+ fail();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ test_init(argc, argv);
|
||||
+ skip("Unsupported arch");
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
\ No newline at end of file
|
||||
diff --git a/test/zdtm/static/rseq00.desc b/test/zdtm/static/rseq00.desc
|
||||
new file mode 100644
|
||||
index 0000000..0324fa3
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/static/rseq00.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,123 +0,0 @@
|
||||
From 4ebfba180d44706e50afb525cc992ac708e83883 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 14:57:16 +0800
|
||||
Subject: [PATCH 08/72] ci: add Fedora Rawhide based test on Cirrus We have
|
||||
ability to use nested virtualization on Cirrus, and already have "Vagrant
|
||||
Fedora based test (no VDSO)" test, let's do analogical for Fedora Rawhide to
|
||||
get fresh kernel.
|
||||
|
||||
Suggested-by: Adrian Reber <areber@redhat.com>
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
.cirrus.yml | 21 +++++++++++++++++++++
|
||||
scripts/ci/Makefile | 7 +++++--
|
||||
scripts/ci/run-ci-tests.sh | 5 +++++
|
||||
scripts/ci/vagrant.sh | 21 +++++++++++++++++++++
|
||||
4 files changed, 52 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/.cirrus.yml b/.cirrus.yml
|
||||
index 671178d..9716e58 100644
|
||||
--- a/.cirrus.yml
|
||||
+++ b/.cirrus.yml
|
||||
@@ -19,6 +19,27 @@ task:
|
||||
build_script: |
|
||||
make -C scripts/ci vagrant-fedora-no-vdso
|
||||
|
||||
+task:
|
||||
+ name: Vagrant Fedora Rawhide based test
|
||||
+ environment:
|
||||
+ HOME: "/root"
|
||||
+ CIRRUS_WORKING_DIR: "/tmp/criu"
|
||||
+
|
||||
+ compute_engine_instance:
|
||||
+ image_project: cirrus-images
|
||||
+ image: family/docker-kvm
|
||||
+ platform: linux
|
||||
+ cpu: 4
|
||||
+ memory: 16G
|
||||
+ nested_virtualization: true
|
||||
+
|
||||
+ setup_script: |
|
||||
+ scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
+ sudo kvm-ok
|
||||
+ ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
+ build_script: |
|
||||
+ make -C scripts/ci vagrant-fedora-rawhide
|
||||
+
|
||||
task:
|
||||
name: CentOS 8 based test
|
||||
environment:
|
||||
diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile
|
||||
index 02b4d87..9c9264d 100644
|
||||
--- a/scripts/ci/Makefile
|
||||
+++ b/scripts/ci/Makefile
|
||||
@@ -41,7 +41,7 @@ export CONTAINER_TERMINAL
|
||||
ifeq ($(UNAME),x86_64)
|
||||
# On anything besides x86_64 Travis is running unprivileged LXD
|
||||
# containers which do not support running docker with '--privileged'.
|
||||
- CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged -v /lib/modules:/lib/modules --tmpfs /run
|
||||
+ CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged --userns=host --cgroupns=host -v /lib/modules:/lib/modules --tmpfs /run
|
||||
else
|
||||
CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run
|
||||
endif
|
||||
@@ -92,7 +92,10 @@ setup-vagrant:
|
||||
vagrant-fedora-no-vdso: setup-vagrant
|
||||
./vagrant.sh fedora-no-vdso
|
||||
|
||||
-.PHONY: setup-vagrant vagrant-fedora-no-vdso
|
||||
+vagrant-fedora-rawhide: setup-vagrant
|
||||
+ ./vagrant.sh fedora-rawhide
|
||||
+
|
||||
+.PHONY: setup-vagrant vagrant-fedora-no-vdso vagrant-fedora-rawhide
|
||||
|
||||
%:
|
||||
$(MAKE) -C ../build $@$(target-suffix)
|
||||
diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh
|
||||
index 7c66e68..95b4ec7 100755
|
||||
--- a/scripts/ci/run-ci-tests.sh
|
||||
+++ b/scripts/ci/run-ci-tests.sh
|
||||
@@ -194,6 +194,11 @@ if [ "${STREAM_TEST}" = "1" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
+# print some useful debug info
|
||||
+cat /proc/self/status
|
||||
+ls -la /proc/self/ns
|
||||
+cat /proc/self/cgroup
|
||||
+
|
||||
# shellcheck disable=SC2086
|
||||
./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS
|
||||
|
||||
diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh
|
||||
index 839b100..f961b8d 100755
|
||||
--- a/scripts/ci/vagrant.sh
|
||||
+++ b/scripts/ci/vagrant.sh
|
||||
@@ -58,4 +58,25 @@ fedora-no-vdso() {
|
||||
ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -t zdtm/transition/pidfd_store_sk --rpc --pre 2'
|
||||
}
|
||||
|
||||
+fedora-rawhide() {
|
||||
+ #ssh default sudo grubby --update-kernel ALL --args="selinux=0 systemd.unified_cgroup_hierarchy=0"
|
||||
+ ssh default sudo grubby --update-kernel ALL
|
||||
+ #
|
||||
+ # Workaround the problem:
|
||||
+ # error running container: error from /usr/bin/crun creating container for [...]: sd-bus call: Transport endpoint is not connected
|
||||
+ # Let's just use runc instead of crun
|
||||
+ # see also https://github.com/kata-containers/tests/issues/4283
|
||||
+ #
|
||||
+ ssh default 'sudo dnf remove -y crun || true'
|
||||
+ ssh default sudo dnf install -y podman runc
|
||||
+ vagrant reload
|
||||
+ #ssh default sudo setenforce 0
|
||||
+ ssh default cat /proc/cmdline
|
||||
+ ssh default ls -la /proc/self/ns
|
||||
+ ssh default sudo cat /proc/self/status
|
||||
+ ssh default sudo cat /proc/self/cgroup
|
||||
+ #ssh default sudo capsh --print
|
||||
+ ssh default 'cd /vagrant; tar xf criu.tar; cd criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"'
|
||||
+}
|
||||
+
|
||||
$1
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,244 +0,0 @@
|
||||
From 159d2b7c889ae23ece99595af8a12f766c7b1aff Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:00:07 +0800
|
||||
Subject: [PATCH 09/72] include: add thread_pointer.h from Glibc Implementation
|
||||
was taken from the Glibc.
|
||||
|
||||
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5
|
||||
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=cb976fba4c51ede7bf8cee5035888527c308dfbc
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
.../arch/aarch64/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
criu/arch/arm/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
criu/arch/mips/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
criu/arch/ppc64/include/asm/thread_pointer.h | 33 +++++++++++++++++
|
||||
criu/arch/s390/include/asm/thread_pointer.h | 27 ++++++++++++++
|
||||
criu/arch/x86/include/asm/thread_pointer.h | 37 +++++++++++++++++++
|
||||
6 files changed, 178 insertions(+)
|
||||
create mode 100644 criu/arch/aarch64/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/arm/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/mips/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/ppc64/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/s390/include/asm/thread_pointer.h
|
||||
create mode 100644 criu/arch/x86/include/asm/thread_pointer.h
|
||||
|
||||
diff --git a/criu/arch/aarch64/include/asm/thread_pointer.h b/criu/arch/aarch64/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/aarch64/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/arm/include/asm/thread_pointer.h b/criu/arch/arm/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/arm/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/mips/include/asm/thread_pointer.h b/criu/arch/mips/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/mips/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/ppc64/include/asm/thread_pointer.h b/criu/arch/ppc64/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..304516f
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/ppc64/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,33 @@
|
||||
+/* __thread_pointer definition. powerpc version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+#ifdef __powerpc64__
|
||||
+register void *__thread_register asm("r13");
|
||||
+#else
|
||||
+register void *__thread_register asm("r2");
|
||||
+#endif
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __thread_register;
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
\ No newline at end of file
|
||||
diff --git a/criu/arch/s390/include/asm/thread_pointer.h b/criu/arch/s390/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..f7e0706
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/s390/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* __thread_pointer definition. Generic version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+ return __builtin_thread_pointer();
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
diff --git a/criu/arch/x86/include/asm/thread_pointer.h b/criu/arch/x86/include/asm/thread_pointer.h
|
||||
new file mode 100644
|
||||
index 0000000..08603ae
|
||||
--- /dev/null
|
||||
+++ b/criu/arch/x86/include/asm/thread_pointer.h
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* __thread_pointer definition. x86 version.
|
||||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library. If not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _SYS_THREAD_POINTER_H
|
||||
+#define _SYS_THREAD_POINTER_H
|
||||
+
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+#if __GNUC_PREREQ(11, 1)
|
||||
+ return __builtin_thread_pointer();
|
||||
+#else
|
||||
+ void *__result;
|
||||
+#ifdef __x86_64__
|
||||
+ __asm__("mov %%fs:0, %0" : "=r"(__result));
|
||||
+#else
|
||||
+ __asm__("mov %%gs:0, %0" : "=r"(__result));
|
||||
+#endif
|
||||
+ return __result;
|
||||
+#endif /* !GCC 11 */
|
||||
+}
|
||||
+
|
||||
+#endif /* _SYS_THREAD_POINTER_H */
|
||||
\ No newline at end of file
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,102 +0,0 @@
|
||||
From 0fdb1cf439c08f6e957e2e7d234a015ef3b84dfc Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:00:43 +0800
|
||||
Subject: [PATCH 10/72] clone-noasan: unregister rseq at the thread start for
|
||||
new glibc Fresh glibc does rseq registration by default during
|
||||
start_thread(). [ see
|
||||
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=95e114a0919d844d8fe07839cb6538b7f5ee920e
|
||||
]
|
||||
|
||||
This cause process crashes during memory restore procedure, because
|
||||
memory which corresponds to the struct rseq will be overwritten.
|
||||
|
||||
See also
|
||||
("nptl: Add public rseq symbols and <sys/rseq.h>")
|
||||
https://sourceware.org/git?p=glibc.git;a=commit;h=c901c3e764d7c7079f006b4e21e877d5036eb4f5
|
||||
("nptl: Add <thread_pointer.h> for defining __thread_pointer")
|
||||
https://sourceware.org/git?p=glibc.git;a=commit;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
criu/clone-noasan.c | 42 ++++++++++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 40 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c
|
||||
index d657ea2..5f8dd1b 100644
|
||||
--- a/criu/clone-noasan.c
|
||||
+++ b/criu/clone-noasan.c
|
||||
@@ -2,6 +2,13 @@
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
|
||||
+#ifdef __has_include
|
||||
+#if __has_include ("sys/rseq.h")
|
||||
+#include <sys/rseq.h>
|
||||
+#include "asm/thread_pointer.h"
|
||||
+#endif
|
||||
+#endif
|
||||
+
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
|
||||
#include "sched.h"
|
||||
@@ -34,16 +41,45 @@
|
||||
* ... wait for process to finish ...
|
||||
* unlock_last_pid
|
||||
*/
|
||||
+
|
||||
+#if defined(RSEQ_SIG)
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+ /* unregister rseq */
|
||||
+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
|
||||
+}
|
||||
+#else
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+struct call_fn_args {
|
||||
+ int (*fn)(void *);
|
||||
+ void *arg;
|
||||
+};
|
||||
+
|
||||
+int call_fn(void *arg)
|
||||
+{
|
||||
+ struct call_fn_args *cargs = arg;
|
||||
+ unregister_glibc_rseq();
|
||||
+ return cargs->fn(cargs->arg);
|
||||
+}
|
||||
+
|
||||
int clone_noasan(int (*fn)(void *), int flags, void *arg)
|
||||
{
|
||||
void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16);
|
||||
+ struct call_fn_args a = {
|
||||
+ .fn = fn,
|
||||
+ .arg = arg,
|
||||
+ };
|
||||
|
||||
BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK));
|
||||
/*
|
||||
* Reserve some bytes for clone() internal needs
|
||||
* and use as stack the address above this area.
|
||||
*/
|
||||
- return clone(fn, stack_ptr, flags, arg);
|
||||
+ return clone(call_fn, stack_ptr, flags, (void *)&a);
|
||||
}
|
||||
|
||||
int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_signal, pid_t pid)
|
||||
@@ -78,7 +114,9 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_sig
|
||||
c_args.set_tid = ptr_to_u64(&pid);
|
||||
c_args.set_tid_size = 1;
|
||||
pid = syscall(__NR_clone3, &c_args, sizeof(c_args));
|
||||
- if (pid == 0)
|
||||
+ if (pid == 0) {
|
||||
+ unregister_glibc_rseq();
|
||||
exit(fn(arg));
|
||||
+ }
|
||||
return pid;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,158 +0,0 @@
|
||||
From 7cc800d2cfbfb6fe686345a652472b194ca2b9cf Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:01:34 +0800
|
||||
Subject: [PATCH 11/72] zdtm/static/rseq00: fix rseq test when linking with a
|
||||
fresh Glibc Fresh Glibc does rseq() register by default. We need to
|
||||
unregister rseq before registering our own.
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
test/zdtm/static/rseq00.c | 76 +++++++++++++++++++++++++++++----------
|
||||
1 file changed, 58 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
|
||||
index 26f41a2..87053b8 100644
|
||||
--- a/test/zdtm/static/rseq00.c
|
||||
+++ b/test/zdtm/static/rseq00.c
|
||||
@@ -19,13 +19,48 @@
|
||||
|
||||
#include "zdtmtst.h"
|
||||
|
||||
-#if defined(__x86_64__)
|
||||
+#ifdef __has_include
|
||||
+#if __has_include("sys/rseq.h")
|
||||
+#include <sys/rseq.h>
|
||||
+#endif
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__i386__) || defined(__x86_64__)
|
||||
+
|
||||
+#if defined(RSEQ_SIG)
|
||||
+static inline void *__criu_thread_pointer(void)
|
||||
+{
|
||||
+#if __GNUC_PREREQ(11, 1)
|
||||
+ return __builtin_thread_pointer();
|
||||
+#else
|
||||
+ void *__result;
|
||||
+#ifdef __x86_64__
|
||||
+ __asm__("mov %%fs:0, %0" : "=r"(__result));
|
||||
+#else
|
||||
+ __asm__("mov %%gs:0, %0" : "=r"(__result));
|
||||
+#endif
|
||||
+ return __result;
|
||||
+#endif /* !GCC 11 */
|
||||
+}
|
||||
+
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+ /* unregister rseq */
|
||||
+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
|
||||
+}
|
||||
+#else
|
||||
+static inline void unregister_glibc_rseq(void)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
|
||||
const char *test_doc = "Check that rseq() basic C/R works";
|
||||
const char *test_author = "Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>";
|
||||
/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */
|
||||
|
||||
/* some useful definitions from kernel uapi */
|
||||
+#ifndef RSEQ_SIG
|
||||
+
|
||||
enum rseq_flags {
|
||||
RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
};
|
||||
@@ -37,14 +72,21 @@ struct rseq {
|
||||
uint32_t flags;
|
||||
} __attribute__((aligned(4 * sizeof(uint64_t))));
|
||||
|
||||
+#define RSEQ_SIG 0x53053053
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
#ifndef __NR_rseq
|
||||
#define __NR_rseq 334
|
||||
#endif
|
||||
/* EOF */
|
||||
|
||||
-static __thread volatile struct rseq __rseq_abi;
|
||||
+#define RSEQ_TLS_ALLOC 0
|
||||
|
||||
-#define RSEQ_SIG 0x53053053
|
||||
+static volatile struct rseq *rseq_ptr;
|
||||
+#if RSEQ_TLS_ALLOC
|
||||
+static __thread volatile struct rseq __rseq_abi;
|
||||
+#endif
|
||||
|
||||
static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
{
|
||||
@@ -54,27 +96,18 @@ static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags
|
||||
static void register_thread(void)
|
||||
{
|
||||
int rc;
|
||||
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ unregister_glibc_rseq();
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
if (rc) {
|
||||
fail("Failed to register rseq");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
-static void unregister_thread(void)
|
||||
-{
|
||||
- int rc;
|
||||
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
|
||||
- if (rc) {
|
||||
- fail("Failed to unregister rseq");
|
||||
- exit(1);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static void check_thread(void)
|
||||
{
|
||||
int rc;
|
||||
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
if (!(rc && errno == EBUSY)) {
|
||||
fail("Failed to check rseq %d", rc);
|
||||
exit(1);
|
||||
@@ -111,8 +144,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
".popsection\n\t"
|
||||
: /* gcc asm goto does not allow outputs */
|
||||
: [cpu_id] "r" (cpu),
|
||||
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
|
||||
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
|
||||
+ [current_cpu_id] "m" (rseq_ptr->cpu_id),
|
||||
+ [rseq_cs] "m" (rseq_ptr->rseq_cs),
|
||||
/* final store input */
|
||||
[v] "m" (*v),
|
||||
[count] "er" (count)
|
||||
@@ -132,6 +165,13 @@ int main(int argc, char *argv[])
|
||||
intptr_t *cpu_data;
|
||||
long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
+#if RSEQ_TLS_ALLOC
|
||||
+ rseq_ptr = &__rseq_abi;
|
||||
+#else
|
||||
+ //rseq_ptr = malloc(sizeof(struct rseq));
|
||||
+ rseq_ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, 0, 0);
|
||||
+#endif
|
||||
+
|
||||
test_init(argc, argv);
|
||||
|
||||
cpu_data = calloc(nr_cpus, sizeof(*cpu_data));
|
||||
@@ -147,7 +187,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
check_thread();
|
||||
|
||||
- cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
|
||||
+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
|
||||
ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
if (ret)
|
||||
fail("Failed to increment per-cpu counter");
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,265 +0,0 @@
|
||||
From 65eb254d6ad2f1b1d36e95f431b05faf9e67524d Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:02:08 +0800
|
||||
Subject: [PATCH 12/72] compel: add helpers to get/set instruction pointer
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 7 ++++---
|
||||
.../src/lib/include/uapi/asm/infect-types.h | 9 +++++----
|
||||
compel/include/uapi/infect.h | 6 ++++++
|
||||
compel/src/lib/infect.c | 20 +++++++++++++++++++
|
||||
criu/arch/aarch64/include/asm/types.h | 2 ++
|
||||
criu/arch/arm/include/asm/types.h | 2 ++
|
||||
criu/arch/mips/include/asm/types.h | 2 ++
|
||||
criu/arch/ppc64/include/asm/types.h | 2 ++
|
||||
criu/arch/s390/include/asm/types.h | 2 ++
|
||||
criu/arch/x86/include/asm/types.h | 2 ++
|
||||
14 files changed, 67 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
|
||||
index f91e73d..9d4ce7e 100644
|
||||
--- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -23,10 +23,11 @@ typedef struct user_fpsimd_state user_fpregs_struct_t;
|
||||
#define compel_arch_get_tls_task(ctl, tls)
|
||||
#define compel_arch_get_tls_thread(tctl, tls)
|
||||
|
||||
-#define REG_RES(r) ((uint64_t)(r).regs[0])
|
||||
-#define REG_IP(r) ((uint64_t)(r).pc)
|
||||
-#define REG_SP(r) ((uint64_t)((r).sp))
|
||||
-#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8])
|
||||
+#define REG_RES(r) ((uint64_t)(r).regs[0])
|
||||
+#define REG_IP(r) ((uint64_t)(r).pc)
|
||||
+#define SET_REG_IP(r, val) ((r).pc = (val))
|
||||
+#define REG_SP(r) ((uint64_t)((r).sp))
|
||||
+#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8])
|
||||
|
||||
#define user_regs_native(pregs) true
|
||||
|
||||
diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
|
||||
index 159b6a9..8d32825 100644
|
||||
--- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -56,10 +56,11 @@ struct user_vfp_exc {
|
||||
unsigned long fpinst2;
|
||||
};
|
||||
|
||||
-#define REG_RES(regs) ((regs).ARM_r0)
|
||||
-#define REG_IP(regs) ((regs).ARM_pc)
|
||||
-#define REG_SP(regs) ((regs).ARM_sp)
|
||||
-#define REG_SYSCALL_NR(regs) ((regs).ARM_r7)
|
||||
+#define REG_RES(regs) ((regs).ARM_r0)
|
||||
+#define REG_IP(regs) ((regs).ARM_pc)
|
||||
+#define SET_REG_IP(regs, val) ((regs).ARM_pc = (val))
|
||||
+#define REG_SP(regs) ((regs).ARM_sp)
|
||||
+#define REG_SYSCALL_NR(regs) ((regs).ARM_r7)
|
||||
|
||||
#define user_regs_native(pregs) true
|
||||
|
||||
diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
|
||||
index 70b3f85..481566a 100644
|
||||
--- a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -56,10 +56,11 @@ static inline bool user_regs_native(user_regs_struct_t *pregs)
|
||||
#define compel_arch_get_tls_task(ctl, tls)
|
||||
#define compel_arch_get_tls_thread(tctl, tls)
|
||||
|
||||
-#define REG_RES(regs) ((regs).MIPS_v0)
|
||||
-#define REG_IP(regs) ((regs).cp0_epc)
|
||||
-#define REG_SP(regs) ((regs).MIPS_sp)
|
||||
-#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0)
|
||||
+#define REG_RES(regs) ((regs).MIPS_v0)
|
||||
+#define REG_IP(regs) ((regs).cp0_epc)
|
||||
+#define SET_REG_IP(regs, val) ((regs).cp0_epc = (val))
|
||||
+#define REG_SP(regs) ((regs).MIPS_sp)
|
||||
+#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0)
|
||||
|
||||
//#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
|
||||
#define __NR(syscall, compat) __NR_##syscall
|
||||
diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
|
||||
index fe6192e..bf2cc95 100644
|
||||
--- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -72,10 +72,11 @@ typedef struct {
|
||||
} tm;
|
||||
} user_fpregs_struct_t;
|
||||
|
||||
-#define REG_RES(regs) ((uint64_t)(regs).gpr[3])
|
||||
-#define REG_IP(regs) ((uint64_t)(regs).nip)
|
||||
-#define REG_SP(regs) ((uint64_t)(regs).gpr[1])
|
||||
-#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0])
|
||||
+#define REG_RES(regs) ((uint64_t)(regs).gpr[3])
|
||||
+#define REG_IP(regs) ((uint64_t)(regs).nip)
|
||||
+#define SET_REG_IP(regs, val) ((regs).nip = (val))
|
||||
+#define REG_SP(regs) ((uint64_t)(regs).gpr[1])
|
||||
+#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0])
|
||||
|
||||
#define user_regs_native(pregs) true
|
||||
|
||||
diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
|
||||
index 896d70e..87283bc 100644
|
||||
--- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -62,9 +62,10 @@ typedef struct {
|
||||
uint32_t system_call;
|
||||
} user_regs_struct_t;
|
||||
|
||||
-#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2])
|
||||
-#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr)
|
||||
-#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15])
|
||||
+#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2])
|
||||
+#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr)
|
||||
+#define SET_REG_IP(r, val) ((r).prstatus.psw.addr = (val))
|
||||
+#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15])
|
||||
/*
|
||||
* We assume that REG_SYSCALL_NR() is only used for pie code where we
|
||||
* always use svc 0 with opcode in %r1.
|
||||
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
|
||||
index 34b3ad0..b35504f 100644
|
||||
--- a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
|
||||
+++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h
|
||||
@@ -127,10 +127,11 @@ typedef struct {
|
||||
|
||||
typedef struct xsave_struct user_fpregs_struct_t;
|
||||
|
||||
-#define REG_RES(regs) get_user_reg(®s, ax)
|
||||
-#define REG_IP(regs) get_user_reg(®s, ip)
|
||||
-#define REG_SP(regs) get_user_reg(®s, sp)
|
||||
-#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
|
||||
+#define REG_RES(regs) get_user_reg(®s, ax)
|
||||
+#define REG_IP(regs) get_user_reg(®s, ip)
|
||||
+#define SET_REG_IP(regs, val) set_user_reg(®s, ip, val)
|
||||
+#define REG_SP(regs) get_user_reg(®s, sp)
|
||||
+#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
|
||||
|
||||
#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall)
|
||||
|
||||
diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h
|
||||
index c3d2ee6..389878e 100644
|
||||
--- a/compel/include/uapi/infect.h
|
||||
+++ b/compel/include/uapi/infect.h
|
||||
@@ -168,4 +168,10 @@ extern unsigned long compel_task_size(void);
|
||||
extern uint64_t compel_get_leader_sp(struct parasite_ctl *ctl);
|
||||
extern uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl);
|
||||
|
||||
+extern uint64_t compel_get_leader_ip(struct parasite_ctl *ctl);
|
||||
+extern uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl);
|
||||
+
|
||||
+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v);
|
||||
+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v);
|
||||
+
|
||||
#endif
|
||||
diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c
|
||||
index 0fb9e71..6a13cc1 100644
|
||||
--- a/compel/src/lib/infect.c
|
||||
+++ b/compel/src/lib/infect.c
|
||||
@@ -1686,3 +1686,23 @@ uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl)
|
||||
{
|
||||
return REG_SP(tctl->th.regs);
|
||||
}
|
||||
+
|
||||
+uint64_t compel_get_leader_ip(struct parasite_ctl *ctl)
|
||||
+{
|
||||
+ return REG_IP(ctl->orig.regs);
|
||||
+}
|
||||
+
|
||||
+uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl)
|
||||
+{
|
||||
+ return REG_IP(tctl->th.regs);
|
||||
+}
|
||||
+
|
||||
+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v)
|
||||
+{
|
||||
+ SET_REG_IP(ctl->orig.regs, v);
|
||||
+}
|
||||
+
|
||||
+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v)
|
||||
+{
|
||||
+ SET_REG_IP(tctl->th.regs, v);
|
||||
+}
|
||||
diff --git a/criu/arch/aarch64/include/asm/types.h b/criu/arch/aarch64/include/asm/types.h
|
||||
index c860af1..363c1ca 100644
|
||||
--- a/criu/arch/aarch64/include/asm/types.h
|
||||
+++ b/criu/arch/aarch64/include/asm/types.h
|
||||
@@ -22,6 +22,8 @@ typedef UserAarch64RegsEntry UserRegsEntry;
|
||||
|
||||
#define TI_SP(core) ((core)->ti_aarch64->gpregs->sp)
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_aarch64->gpregs->pc)
|
||||
+
|
||||
static inline void *decode_pointer(uint64_t v)
|
||||
{
|
||||
return (void *)v;
|
||||
diff --git a/criu/arch/arm/include/asm/types.h b/criu/arch/arm/include/asm/types.h
|
||||
index cfcb8a1..93d2dc2 100644
|
||||
--- a/criu/arch/arm/include/asm/types.h
|
||||
+++ b/criu/arch/arm/include/asm/types.h
|
||||
@@ -21,6 +21,8 @@ typedef UserArmRegsEntry UserRegsEntry;
|
||||
|
||||
#define TI_SP(core) ((core)->ti_arm->gpregs->sp)
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_arm->gpregs->ip)
|
||||
+
|
||||
static inline void *decode_pointer(u64 v)
|
||||
{
|
||||
return (void *)(u32)v;
|
||||
diff --git a/criu/arch/mips/include/asm/types.h b/criu/arch/mips/include/asm/types.h
|
||||
index 237471f..2c75b6a 100644
|
||||
--- a/criu/arch/mips/include/asm/types.h
|
||||
+++ b/criu/arch/mips/include/asm/types.h
|
||||
@@ -18,6 +18,8 @@
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->ti_mips
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_mips->gpregs->cp0_epc)
|
||||
+
|
||||
typedef UserMipsRegsEntry UserRegsEntry;
|
||||
|
||||
static inline u64 encode_pointer(void *p)
|
||||
diff --git a/criu/arch/ppc64/include/asm/types.h b/criu/arch/ppc64/include/asm/types.h
|
||||
index fedeff2..d60aadd 100644
|
||||
--- a/criu/arch/ppc64/include/asm/types.h
|
||||
+++ b/criu/arch/ppc64/include/asm/types.h
|
||||
@@ -19,6 +19,8 @@ typedef UserPpc64RegsEntry UserRegsEntry;
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->ti_ppc64
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_ppc64->gpregs->nip)
|
||||
+
|
||||
static inline void *decode_pointer(uint64_t v)
|
||||
{
|
||||
return (void *)v;
|
||||
diff --git a/criu/arch/s390/include/asm/types.h b/criu/arch/s390/include/asm/types.h
|
||||
index 7522cf2..abf12de 100644
|
||||
--- a/criu/arch/s390/include/asm/types.h
|
||||
+++ b/criu/arch/s390/include/asm/types.h
|
||||
@@ -19,6 +19,8 @@ typedef UserS390RegsEntry UserRegsEntry;
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->ti_s390
|
||||
|
||||
+#define TI_IP(core) ((core)->ti_s390->gpregs->psw_addr)
|
||||
+
|
||||
static inline u64 encode_pointer(void *p)
|
||||
{
|
||||
return (u64)p;
|
||||
diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h
|
||||
index a0a8ed9..8919d0a 100644
|
||||
--- a/criu/arch/x86/include/asm/types.h
|
||||
+++ b/criu/arch/x86/include/asm/types.h
|
||||
@@ -28,6 +28,8 @@ static inline int core_is_compat(CoreEntry *c)
|
||||
|
||||
#define CORE_THREAD_ARCH_INFO(core) core->thread_info
|
||||
|
||||
+#define TI_IP(core) ((core)->thread_info->gpregs->ip)
|
||||
+
|
||||
typedef UserX86RegsEntry UserRegsEntry;
|
||||
|
||||
static inline u64 encode_pointer(void *p)
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,248 +0,0 @@
|
||||
From afe090a86d6634e3620ebae16d32960f2c4933cc Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:04:54 +0800
|
||||
Subject: [PATCH 13/72] cr-dump: fixup thread IP when inside rseq cs
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
criu/cr-dump.c | 155 ++++++++++++++++++++++++++++++++++++++--
|
||||
criu/include/parasite.h | 2 +
|
||||
criu/include/pstree.h | 1 +
|
||||
3 files changed, 154 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index 91dd08a..a3f8973 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -1047,11 +1047,58 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs)
|
||||
+{
|
||||
+ int ret;
|
||||
+ uint64_t addr;
|
||||
+
|
||||
+ /* rseq is not registered */
|
||||
+ if (!rseq->rseq_abi_pointer)
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * We need to cover the case when victim process was inside rseq critical section
|
||||
+ * at the moment when CRIU comes and seized it. We need to determine the borders
|
||||
+ * of rseq critical section at first. To achieve that we need to access thread
|
||||
+ * memory and read pointer to struct rseq_cs.
|
||||
+ *
|
||||
+ * We have two ways to access thread memory: from the parasite and using ptrace().
|
||||
+ * But it this case we can't use parasite, because if victim process returns to the
|
||||
+ * execution, on the kernel side __rseq_handle_notify_resume hook will be called,
|
||||
+ * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq
|
||||
+ * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA).
|
||||
+ */
|
||||
+ ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)),
|
||||
+ sizeof(uint64_t));
|
||||
+ if (ret) {
|
||||
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr,
|
||||
+ (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* (struct rseq)->rseq_cs is NULL */
|
||||
+ if (!addr)
|
||||
+ return 0;
|
||||
+
|
||||
+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs));
|
||||
+ if (ret) {
|
||||
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
|
||||
+ (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
{
|
||||
struct __ptrace_rseq_configuration rseq;
|
||||
RseqEntry *rseqe = NULL;
|
||||
int ret;
|
||||
+ CoreEntry *core = item->core[i];
|
||||
+ RseqEntry **rseqep = &core->thread_core->rseq_entry;
|
||||
+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
+ pid_t tid = item->threads[i].real;
|
||||
|
||||
/*
|
||||
* If we are here it means that rseq() syscall is supported,
|
||||
@@ -1076,7 +1123,8 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
return -1;
|
||||
}
|
||||
|
||||
- pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature);
|
||||
+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer,
|
||||
+ rseq.signature);
|
||||
|
||||
rseqe = xmalloc(sizeof(*rseqe));
|
||||
if (!rseqe)
|
||||
@@ -1088,25 +1136,118 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep)
|
||||
rseqe->rseq_abi_size = rseq.rseq_abi_size;
|
||||
rseqe->signature = rseq.signature;
|
||||
|
||||
+ if (read_rseq_cs(tid, &rseq, rseq_cs))
|
||||
+ goto err;
|
||||
+
|
||||
+ /* save rseq entry to the image */
|
||||
*rseqep = rseqe;
|
||||
|
||||
return 0;
|
||||
+
|
||||
+err:
|
||||
+ xfree(rseqe);
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
static int dump_task_rseq(pid_t pid, struct pstree_item *item)
|
||||
{
|
||||
int i;
|
||||
+ struct rseq_cs *thread_rseq_cs;
|
||||
|
||||
/* if rseq() syscall isn't supported then nothing to dump */
|
||||
if (!kdat.has_rseq)
|
||||
return 0;
|
||||
|
||||
+ thread_rseq_cs = xzalloc(sizeof(*thread_rseq_cs) * item->nr_threads);
|
||||
+ if (!thread_rseq_cs)
|
||||
+ return -1;
|
||||
+
|
||||
+ dmpi(item)->thread_rseq_cs = thread_rseq_cs;
|
||||
+
|
||||
for (i = 0; i < item->nr_threads; i++) {
|
||||
- if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry))
|
||||
- return -1;
|
||||
+ if (dump_thread_rseq(item, i))
|
||||
+ goto free_rseq;
|
||||
}
|
||||
|
||||
return 0;
|
||||
+
|
||||
+free_rseq:
|
||||
+ xfree(thread_rseq_cs);
|
||||
+ dmpi(item)->thread_rseq_cs = NULL;
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+static bool task_in_rseq(struct rseq_cs *rseq_cs, uint64_t addr)
|
||||
+{
|
||||
+ return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset;
|
||||
+}
|
||||
+
|
||||
+static int fixup_thread_rseq(struct pstree_item *item, int i)
|
||||
+{
|
||||
+ CoreEntry *core = item->core[i];
|
||||
+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
+ pid_t tid = item->threads[i].real;
|
||||
+
|
||||
+ /* (struct rseq)->rseq_cs is NULL */
|
||||
+ if (!rseq_cs->start_ip)
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
|
||||
+ tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
|
||||
+ rseq_cs->version, (unsigned long)TI_IP(core));
|
||||
+
|
||||
+ if (rseq_cs->version != 0) {
|
||||
+ pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (task_in_rseq(rseq_cs, TI_IP(core))) {
|
||||
+ struct pid *tid = &item->threads[i];
|
||||
+
|
||||
+ pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
|
||||
+ tid->real);
|
||||
+
|
||||
+ /*
|
||||
+ * We need to fixup task instruction pointer from
|
||||
+ * the original one (which lays inside rseq critical section)
|
||||
+ * to rseq abort handler address.
|
||||
+ *
|
||||
+ * It's worth to mention that we need to fixup IP in CoreEntry
|
||||
+ * (used when full dump/restore is performed) and also in
|
||||
+ * the parasite regs storage (used if --leave-running option is used,
|
||||
+ * or if dump error occured and process execution is resumed).
|
||||
+ */
|
||||
+ TI_IP(core) = rseq_cs->abort_ip;
|
||||
+
|
||||
+ if (item->pid->real == tid->real) {
|
||||
+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
|
||||
+ } else {
|
||||
+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int fixup_task_rseq(pid_t pid, struct pstree_item *item)
|
||||
+{
|
||||
+ int ret = 0;
|
||||
+ int i;
|
||||
+
|
||||
+ if (!kdat.has_ptrace_get_rseq_conf)
|
||||
+ return 0;
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ if (fixup_thread_rseq(item, i)) {
|
||||
+ ret = -1;
|
||||
+ goto exit;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+exit:
|
||||
+ xfree(dmpi(item)->thread_rseq_cs);
|
||||
+ dmpi(item)->thread_rseq_cs = NULL;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static struct proc_pid_stat pps_buf;
|
||||
@@ -1409,6 +1550,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ ret = fixup_task_rseq(pid, item);
|
||||
+ if (ret) {
|
||||
+ pr_err("Fixup rseq for %d failed %d\n", pid, ret);
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
if (fault_injected(FI_DUMP_EARLY)) {
|
||||
pr_info("fault: CRIU sudden detach\n");
|
||||
kill(getpid(), SIGKILL);
|
||||
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
|
||||
index 5fde809..d2a0688 100644
|
||||
--- a/criu/include/parasite.h
|
||||
+++ b/criu/include/parasite.h
|
||||
@@ -10,6 +10,8 @@
|
||||
#include <time.h>
|
||||
#include <signal.h>
|
||||
|
||||
+#include "linux/rseq.h"
|
||||
+
|
||||
#include "image.h"
|
||||
#include "util-pie.h"
|
||||
#include "common/lock.h"
|
||||
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
|
||||
index c5b0fa7..458e5f9 100644
|
||||
--- a/criu/include/pstree.h
|
||||
+++ b/criu/include/pstree.h
|
||||
@@ -63,6 +63,7 @@ struct dmp_info {
|
||||
struct parasite_ctl *parasite_ctl;
|
||||
struct parasite_thread_ctl **thread_ctls;
|
||||
uint64_t *thread_sp;
|
||||
+ struct rseq_cs *thread_rseq_cs;
|
||||
|
||||
/*
|
||||
* Although we don't support dumping different struct creds in general,
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,250 +0,0 @@
|
||||
From 961a05f47822444406edeb3d90d9113bba44cdf3 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:05:34 +0800
|
||||
Subject: [PATCH 14/72] zdtm: add rseq transition test for amd64 Signed-off-by:
|
||||
Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
test/zdtm/transition/Makefile | 1 +
|
||||
test/zdtm/transition/rseq01.c | 208 +++++++++++++++++++++++++++++++
|
||||
test/zdtm/transition/rseq01.desc | 1 +
|
||||
3 files changed, 210 insertions(+)
|
||||
create mode 100644 test/zdtm/transition/rseq01.c
|
||||
create mode 100644 test/zdtm/transition/rseq01.desc
|
||||
|
||||
diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
|
||||
index 9388157..fae4e27 100644
|
||||
--- a/test/zdtm/transition/Makefile
|
||||
+++ b/test/zdtm/transition/Makefile
|
||||
@@ -23,6 +23,7 @@ TST_NOFILE = \
|
||||
lazy-thp \
|
||||
pid_reuse \
|
||||
pidfd_store_sk \
|
||||
+ rseq01 \
|
||||
|
||||
|
||||
TST_FILE = \
|
||||
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
|
||||
new file mode 100644
|
||||
index 0000000..5fac5a6
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq01.c
|
||||
@@ -0,0 +1,208 @@
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
+#include <signal.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/wait.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <sys/mman.h>
|
||||
+#include <fcntl.h>
|
||||
+#include <pthread.h>
|
||||
+#include <syscall.h>
|
||||
+
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#ifdef __has_include
|
||||
+# if __has_include ("sys/rseq.h")
|
||||
+# include <sys/rseq.h>
|
||||
+# endif
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__x86_64__)
|
||||
+
|
||||
+#if defined(__x86_64__) && defined(RSEQ_SIG)
|
||||
+static inline void *thread_pointer(void)
|
||||
+{
|
||||
+ void *result;
|
||||
+ asm("mov %%fs:0, %0" : "=r"(result));
|
||||
+ return result;
|
||||
+}
|
||||
+
|
||||
+static inline void unregister_old_rseq(void)
|
||||
+{
|
||||
+ /* unregister rseq */
|
||||
+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
|
||||
+}
|
||||
+#else
|
||||
+static inline void unregister_old_rseq(void)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+const char *test_doc = "rseq() transition test";
|
||||
+const char *test_author = "Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>";
|
||||
+
|
||||
+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */
|
||||
+
|
||||
+/* some useful definitions from kernel uapi */
|
||||
+#ifndef RSEQ_SIG
|
||||
+
|
||||
+enum rseq_flags {
|
||||
+ RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
+};
|
||||
+
|
||||
+struct rseq {
|
||||
+ uint32_t cpu_id_start;
|
||||
+ uint32_t cpu_id;
|
||||
+ uint64_t rseq_cs;
|
||||
+ uint32_t flags;
|
||||
+} __attribute__((aligned(4 * sizeof(uint64_t))));
|
||||
+
|
||||
+#define RSEQ_SIG 0x53053053
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#ifndef __NR_rseq
|
||||
+#define __NR_rseq 334
|
||||
+#endif
|
||||
+/* EOF */
|
||||
+
|
||||
+static volatile struct rseq *rseq_ptr;
|
||||
+static __thread volatile struct rseq __rseq_abi;
|
||||
+
|
||||
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
+{
|
||||
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
|
||||
+}
|
||||
+
|
||||
+static void register_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ unregister_old_rseq();
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (rc) {
|
||||
+ fail("Failed to register rseq");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void check_thread(void)
|
||||
+{
|
||||
+ int rc;
|
||||
+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG);
|
||||
+ if (!(rc && errno == EBUSY)) {
|
||||
+ fail("Failed to check rseq %d", rc);
|
||||
+ exit(1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
|
||||
+
|
||||
+static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
+{
|
||||
+ double a = 10000000000000000.0;
|
||||
+ double b = -1;
|
||||
+ /*test_msg("enter %f %f\n", a, b);*/
|
||||
+
|
||||
+ /* clang-format off */
|
||||
+ __asm__ __volatile__ goto(
|
||||
+ ".pushsection __rseq_table, \"aw\"\n\t"
|
||||
+ ".balign 32\n\t"
|
||||
+ "cs_obj:\n\t"
|
||||
+ /* version, flags */
|
||||
+ ".long 0, 0\n\t"
|
||||
+ /* start_ip, post_commit_offset, abort_ip */
|
||||
+ ".quad 1f, (2f-1f), 4f\n\t"
|
||||
+ ".popsection\n\t"
|
||||
+ "1:\n\t"
|
||||
+ "leaq cs_obj(%%rip), %%rax\n\t"
|
||||
+ "movq %%rax, %[rseq_cs]\n\t"
|
||||
+ "cmpl %[cpu_id], %[current_cpu_id]\n\t"
|
||||
+ "jnz 4f\n\t"
|
||||
+ "addq %[count], %[v]\n\t" /* final store */
|
||||
+ "mov $10000000, %%rcx\n\t"
|
||||
+ "fldl %[x]\n\t" /* we have st clobbered */
|
||||
+ "5:\n\t"
|
||||
+ "fsqrt\n\t" /* heavy instruction */
|
||||
+ "dec %%rcx\n\t"
|
||||
+ "jnz 5b\n\t"
|
||||
+ "fstpl %[y]\n\t"
|
||||
+ "2:\n\t"
|
||||
+ ".pushsection __rseq_failure, \"ax\"\n\t"
|
||||
+ /* Disassembler-friendly signature: nopl <sig>(%rip). */
|
||||
+ ".byte 0x0f, 0xb9, 0x3d\n\t"
|
||||
+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */
|
||||
+ "4:\n\t"
|
||||
+ /*"fstpl %[y]\n\t"*/
|
||||
+ "jmp %l[abort]\n\t"
|
||||
+ /*"jmp 1b\n\t"*/
|
||||
+ ".popsection\n\t"
|
||||
+ : /* gcc asm goto does not allow outputs */
|
||||
+ : [cpu_id] "r" (cpu),
|
||||
+ [current_cpu_id] "m" (rseq_ptr->cpu_id),
|
||||
+ [rseq_cs] "m" (rseq_ptr->rseq_cs),
|
||||
+ /* final store input */
|
||||
+ [v] "m" (*v),
|
||||
+ [count] "er" (count),
|
||||
+ [x] "m" (a),
|
||||
+ [y] "m" (b)
|
||||
+ : "memory", "cc", "rax", "rcx", "st"
|
||||
+ : abort
|
||||
+ );
|
||||
+ /* clang-format on */
|
||||
+ /*test_msg("exit %f %f\n", a, b);*/
|
||||
+ return 0;
|
||||
+abort:
|
||||
+ /*test_msg("abort %f %f\n", a, b);*/
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int cpu = 0;
|
||||
+ int ret;
|
||||
+ intptr_t *cpu_data;
|
||||
+ long nr_cpus;
|
||||
+
|
||||
+ rseq_ptr = &__rseq_abi;
|
||||
+ memset((void *)rseq_ptr, 0, sizeof(struct rseq));
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
+
|
||||
+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data));
|
||||
+ if (!cpu_data) {
|
||||
+ fail("calloc");
|
||||
+ exit(EXIT_FAILURE);
|
||||
+ }
|
||||
+ register_thread();
|
||||
+
|
||||
+ test_daemon();
|
||||
+
|
||||
+ while (test_go()) {
|
||||
+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
|
||||
+ ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
+ if (ret)
|
||||
+ fail("Failed to increment per-cpu counter");
|
||||
+ }
|
||||
+
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ check_thread();
|
||||
+ pass();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ test_init(argc, argv);
|
||||
+ skip("Unsupported arch");
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
diff --git a/test/zdtm/transition/rseq01.desc b/test/zdtm/transition/rseq01.desc
|
||||
new file mode 100644
|
||||
index 0000000..0324fa3
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq01.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,330 +0,0 @@
|
||||
From 50f04f06eb3ecbdd465e417e8c5c8b19d43ec2f4 Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:09:44 +0800
|
||||
Subject: [PATCH 15/72] cr-dump: handle rseq flags field Userspace may
|
||||
configure rseq critical section by def
|
||||
|
||||
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
|
||||
---
|
||||
criu/cr-dump.c | 86 +++++++++++++++++++++++++++----------------
|
||||
criu/cr-restore.c | 63 +++++++++++++++++++++++++++++++
|
||||
criu/include/pstree.h | 1 +
|
||||
images/rseq.proto | 1 +
|
||||
4 files changed, 119 insertions(+), 32 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index a3f8973..79387fb 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -1047,13 +1047,13 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs)
|
||||
+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc,
|
||||
+ struct rseq_cs *rseq_cs, struct rseq *rseq)
|
||||
{
|
||||
int ret;
|
||||
- uint64_t addr;
|
||||
|
||||
/* rseq is not registered */
|
||||
- if (!rseq->rseq_abi_pointer)
|
||||
+ if (!rseqc->rseq_abi_pointer)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -1068,22 +1068,21 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str
|
||||
* then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq
|
||||
* will be cleared. So, let's use ptrace(PTRACE_PEEKDATA).
|
||||
*/
|
||||
- ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)),
|
||||
- sizeof(uint64_t));
|
||||
+ ret = ptrace_peek_area(tid, rseq, decode_pointer(rseqc->rseq_abi_pointer),
|
||||
+ sizeof(struct rseq));
|
||||
if (ret) {
|
||||
- pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr,
|
||||
- (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t));
|
||||
+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq struct\n", tid, (unsigned long)rseq,
|
||||
+ (unsigned long)(rseqc->rseq_abi_pointer), sizeof(uint64_t));
|
||||
return -1;
|
||||
}
|
||||
|
||||
- /* (struct rseq)->rseq_cs is NULL */
|
||||
- if (!addr)
|
||||
+ if (!rseq->rseq_cs.ptr64)
|
||||
return 0;
|
||||
|
||||
- ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs));
|
||||
+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs.ptr64), sizeof(struct rseq_cs));
|
||||
if (ret) {
|
||||
pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid,
|
||||
- (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs));
|
||||
+ (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs.ptr64, sizeof(struct rseq_cs));
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1092,11 +1091,12 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str
|
||||
|
||||
static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
{
|
||||
- struct __ptrace_rseq_configuration rseq;
|
||||
+ struct __ptrace_rseq_configuration rseqc;
|
||||
RseqEntry *rseqe = NULL;
|
||||
int ret;
|
||||
CoreEntry *core = item->core[i];
|
||||
RseqEntry **rseqep = &core->thread_core->rseq_entry;
|
||||
+ struct rseq rseq;
|
||||
struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
pid_t tid = item->threads[i].real;
|
||||
|
||||
@@ -1111,20 +1111,20 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
if (!kdat.has_ptrace_get_rseq_conf)
|
||||
return 0;
|
||||
|
||||
- ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq);
|
||||
- if (ret != sizeof(rseq)) {
|
||||
+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseqc), &rseqc);
|
||||
+ if (ret != sizeof(rseqc)) {
|
||||
pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
- if (rseq.flags != 0) {
|
||||
+ if (rseqc.flags != 0) {
|
||||
pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid,
|
||||
- rseq.flags);
|
||||
+ rseqc.flags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
- pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer,
|
||||
- rseq.signature);
|
||||
+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseqc.rseq_abi_pointer,
|
||||
+ rseqc.signature);
|
||||
|
||||
rseqe = xmalloc(sizeof(*rseqe));
|
||||
if (!rseqe)
|
||||
@@ -1132,13 +1132,22 @@ static int dump_thread_rseq(struct pstree_item *item, int i)
|
||||
|
||||
rseq_entry__init(rseqe);
|
||||
|
||||
- rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer;
|
||||
- rseqe->rseq_abi_size = rseq.rseq_abi_size;
|
||||
- rseqe->signature = rseq.signature;
|
||||
+ rseqe->rseq_abi_pointer = rseqc.rseq_abi_pointer;
|
||||
+ rseqe->rseq_abi_size = rseqc.rseq_abi_size;
|
||||
+ rseqe->signature = rseqc.signature;
|
||||
|
||||
- if (read_rseq_cs(tid, &rseq, rseq_cs))
|
||||
+ if (read_rseq_cs(tid, &rseqc, rseq_cs, &rseq))
|
||||
goto err;
|
||||
|
||||
+ rseqe->has_rseq_cs_pointer = true;
|
||||
+ rseqe->rseq_cs_pointer = rseq.rseq_cs.ptr64;
|
||||
+ pr_err("cs pointer %lx\n", rseqe->rseq_cs_pointer);
|
||||
+ /* we won't save rseq_cs to the image (only pointer),
|
||||
+ * so let's combine flags from both struct rseq and struct rseq_cs
|
||||
+ * (kernel does the same when interpreting RSEQ_CS_FLAG_*)
|
||||
+ */
|
||||
+ rseq_cs->flags |= rseq.flags;
|
||||
+
|
||||
/* save rseq entry to the image */
|
||||
*rseqep = rseqe;
|
||||
|
||||
@@ -1188,11 +1197,11 @@ static int fixup_thread_rseq(struct pstree_item *item, int i)
|
||||
struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i];
|
||||
pid_t tid = item->threads[i].real;
|
||||
|
||||
- /* (struct rseq)->rseq_cs is NULL */
|
||||
+ /* equivalent to (struct rseq)->rseq_cs is NULL */
|
||||
if (!rseq_cs->start_ip)
|
||||
return 0;
|
||||
|
||||
- pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
|
||||
+ pr_debug("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n",
|
||||
tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags,
|
||||
rseq_cs->version, (unsigned long)TI_IP(core));
|
||||
|
||||
@@ -1204,25 +1213,38 @@ static int fixup_thread_rseq(struct pstree_item *item, int i)
|
||||
if (task_in_rseq(rseq_cs, TI_IP(core))) {
|
||||
struct pid *tid = &item->threads[i];
|
||||
|
||||
- pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
|
||||
- tid->real);
|
||||
-
|
||||
/*
|
||||
* We need to fixup task instruction pointer from
|
||||
* the original one (which lays inside rseq critical section)
|
||||
- * to rseq abort handler address.
|
||||
+ * to rseq abort handler address. But we need to look on rseq_cs->flags
|
||||
+ * (please refer to struct rseq -> flags field description).
|
||||
+ * Naive idea of flags support may be like... let's change instruction pointer (IP)
|
||||
+ * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL).
|
||||
+ * But unfortunately, it doesn't work properly, because the kernel does
|
||||
+ * clean up of rseq_cs field in the struct rseq (modifies userspace memory).
|
||||
+ * So, we need to preserve original value of (struct rseq)->rseq_cs field in the
|
||||
+ * image and restore it's value before releasing threads.
|
||||
*
|
||||
* It's worth to mention that we need to fixup IP in CoreEntry
|
||||
* (used when full dump/restore is performed) and also in
|
||||
* the parasite regs storage (used if --leave-running option is used,
|
||||
* or if dump error occured and process execution is resumed).
|
||||
*/
|
||||
- TI_IP(core) = rseq_cs->abort_ip;
|
||||
|
||||
- if (item->pid->real == tid->real) {
|
||||
- compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
|
||||
+ if (rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) {
|
||||
+ pr_err("The %d task is in rseq critical section.!!! IP will be set to rseq abort handler addr\n",
|
||||
+ tid->real);
|
||||
} else {
|
||||
- compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
|
||||
+ pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n",
|
||||
+ tid->real);
|
||||
+
|
||||
+ TI_IP(core) = rseq_cs->abort_ip;
|
||||
+
|
||||
+ if (item->pid->real == tid->real) {
|
||||
+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip);
|
||||
+ } else {
|
||||
+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index b2bd044..864140f 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "common/compiler.h"
|
||||
|
||||
#include "linux/mount.h"
|
||||
+#include "linux/rseq.h"
|
||||
|
||||
#include "clone-noasan.h"
|
||||
#include "cr_options.h"
|
||||
@@ -779,6 +780,7 @@ static int open_cores(int pid, CoreEntry *leader_core)
|
||||
{
|
||||
int i, tpid;
|
||||
CoreEntry **cores = NULL;
|
||||
+ //RseqEntry *rseqs;
|
||||
|
||||
cores = xmalloc(sizeof(*cores) * current->nr_threads);
|
||||
if (!cores)
|
||||
@@ -812,6 +814,19 @@ static int open_cores(int pid, CoreEntry *leader_core)
|
||||
}
|
||||
}
|
||||
|
||||
+
|
||||
+ pr_err("item %lx\n", (uint64_t)current);
|
||||
+
|
||||
+ for (i = 0; i < current->nr_threads; i++) {
|
||||
+ ThreadCoreEntry *tc = cores[i]->thread_core;
|
||||
+
|
||||
+ /* compatibility with older CRIU versions */
|
||||
+ if (!tc->rseq_entry)
|
||||
+ continue;
|
||||
+
|
||||
+ current->rseqe[i] = *tc->rseq_entry;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
err:
|
||||
xfree(cores);
|
||||
@@ -868,8 +883,15 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
|
||||
{
|
||||
unsigned args_len;
|
||||
struct task_restore_args *ta;
|
||||
+ RseqEntry *rseqs;
|
||||
pr_info("Restoring resources\n");
|
||||
|
||||
+ rseqs = shmalloc(sizeof(*rseqs) * current->nr_threads);
|
||||
+ if (!rseqs)
|
||||
+ return -1;
|
||||
+
|
||||
+ current->rseqe = rseqs;
|
||||
+
|
||||
rst_mem_switch_to_private();
|
||||
|
||||
args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) * current->nr_threads, page_size());
|
||||
@@ -1966,6 +1988,44 @@ static int attach_to_tasks(bool root_seized)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int restore_rseq_cs(void)
|
||||
+{
|
||||
+ struct pstree_item *item;
|
||||
+
|
||||
+ for_each_pstree_item(item) {
|
||||
+ int i;
|
||||
+
|
||||
+ if (!task_alive(item))
|
||||
+ continue;
|
||||
+
|
||||
+ if (item->nr_threads == 1) {
|
||||
+ item->threads[0].real = item->pid->real;
|
||||
+ } else {
|
||||
+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads))
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ pid_t pid = item->threads[i].real;
|
||||
+
|
||||
+ if (!item->rseqe[i].rseq_cs_pointer || !item->rseqe[i].rseq_abi_pointer) {
|
||||
+ pr_err("item %lx rseqe %lx\n", (uint64_t)item, (uint64_t)item->rseqe);
|
||||
+ pr_err("nothing to do with cs_pointer\n");
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("restoring cs ... %lx \n", item->rseqe[i].rseq_cs_pointer);
|
||||
+
|
||||
+ if (ptrace_poke_area(pid, &item->rseqe[i].rseq_cs_pointer, (void *)(item->rseqe[i].rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t))) {
|
||||
+ pr_err("Can't restore memfd args (pid: %d)\n", pid);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int catch_tasks(bool root_seized, enum trace_flags *flag)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
@@ -2400,6 +2460,9 @@ skip_ns_bouncing:
|
||||
if (restore_freezer_state())
|
||||
pr_err("Unable to restore freezer state\n");
|
||||
|
||||
+ /* just before releasing threads we have to restore rseq_cs */
|
||||
+ restore_rseq_cs();
|
||||
+
|
||||
/* Detaches from processes and they continue run through sigreturn. */
|
||||
if (finalize_restore_detach())
|
||||
goto out_kill_network_unlocked;
|
||||
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
|
||||
index 458e5f9..97bef11 100644
|
||||
--- a/criu/include/pstree.h
|
||||
+++ b/criu/include/pstree.h
|
||||
@@ -25,6 +25,7 @@ struct pstree_item {
|
||||
int nr_threads; /* number of threads */
|
||||
struct pid *threads; /* array of threads */
|
||||
CoreEntry **core;
|
||||
+ RseqEntry *rseqe;
|
||||
TaskKobjIdsEntry *ids;
|
||||
union {
|
||||
futex_t task_st;
|
||||
diff --git a/images/rseq.proto b/images/rseq.proto
|
||||
index be28004..45cb847 100644
|
||||
--- a/images/rseq.proto
|
||||
+++ b/images/rseq.proto
|
||||
@@ -6,4 +6,5 @@ message rseq_entry {
|
||||
required uint64 rseq_abi_pointer = 1;
|
||||
required uint32 rseq_abi_size = 2;
|
||||
required uint32 signature = 3;
|
||||
+ optional uint64 rseq_cs_pointer = 4;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,177 +0,0 @@
|
||||
From dc5f32571e66ab72842e735259d0c442ed1c603b Mon Sep 17 00:00:00 2001
|
||||
From: bb-cat <ningyu9@huawei.com>
|
||||
Date: Wed, 2 Mar 2022 15:10:24 +0800
|
||||
Subject: [PATCH 16/72] zdtm: add rseq02 transition test with NO_RESTART CS
|
||||
flag Signed-off-by: Alexander Mikhalitsyn
|
||||
<alexander.mikhalitsyn@virtuozzo.com>
|
||||
|
||||
---
|
||||
test/zdtm/transition/Makefile | 2 ++
|
||||
test/zdtm/transition/rseq01.c | 61 ++++++++++++++++++++++++++++++--
|
||||
test/zdtm/transition/rseq02.c | 1 +
|
||||
test/zdtm/transition/rseq02.desc | 1 +
|
||||
4 files changed, 63 insertions(+), 2 deletions(-)
|
||||
create mode 120000 test/zdtm/transition/rseq02.c
|
||||
create mode 120000 test/zdtm/transition/rseq02.desc
|
||||
|
||||
diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile
|
||||
index fae4e27..378a4fc 100644
|
||||
--- a/test/zdtm/transition/Makefile
|
||||
+++ b/test/zdtm/transition/Makefile
|
||||
@@ -24,6 +24,7 @@ TST_NOFILE = \
|
||||
pid_reuse \
|
||||
pidfd_store_sk \
|
||||
rseq01 \
|
||||
+ rseq02 \
|
||||
|
||||
|
||||
TST_FILE = \
|
||||
@@ -82,6 +83,7 @@ ptrace: LDFLAGS += -pthread
|
||||
fork2: CFLAGS += -D FORK2
|
||||
thread-bomb.o: CFLAGS += -pthread
|
||||
thread-bomb: LDFLAGS += -pthread
|
||||
+rseq02: CFLAGS += -D NOABORT
|
||||
|
||||
%: %.sh
|
||||
cp $< $@
|
||||
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
|
||||
index 5fac5a6..25e1d61 100644
|
||||
--- a/test/zdtm/transition/rseq01.c
|
||||
+++ b/test/zdtm/transition/rseq01.c
|
||||
@@ -53,6 +53,18 @@ enum rseq_flags {
|
||||
RSEQ_FLAG_UNREGISTER = (1 << 0),
|
||||
};
|
||||
|
||||
+enum rseq_cs_flags_bit {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
|
||||
+};
|
||||
+
|
||||
+enum rseq_cs_flags {
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
|
||||
+};
|
||||
+
|
||||
struct rseq {
|
||||
uint32_t cpu_id_start;
|
||||
uint32_t cpu_id;
|
||||
@@ -104,6 +116,7 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
{
|
||||
double a = 10000000000000000.0;
|
||||
double b = -1;
|
||||
+ uint64_t rseq_cs1, rseq_cs2;
|
||||
/*test_msg("enter %f %f\n", a, b);*/
|
||||
|
||||
/* clang-format off */
|
||||
@@ -129,6 +142,9 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
"dec %%rcx\n\t"
|
||||
"jnz 5b\n\t"
|
||||
"fstpl %[y]\n\t"
|
||||
+ "movq %%rax, %[rseq_cs_check2]\n\t"
|
||||
+ "movq %[rseq_cs], %%rax\n\t"
|
||||
+ "movq %%rax, %[rseq_cs_check1]\n\t"
|
||||
"2:\n\t"
|
||||
".pushsection __rseq_failure, \"ax\"\n\t"
|
||||
/* Disassembler-friendly signature: nopl <sig>(%rip). */
|
||||
@@ -143,6 +159,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
: [cpu_id] "r" (cpu),
|
||||
[current_cpu_id] "m" (rseq_ptr->cpu_id),
|
||||
[rseq_cs] "m" (rseq_ptr->rseq_cs),
|
||||
+ [rseq_cs_check1] "m" (rseq_cs1),
|
||||
+ [rseq_cs_check2] "m" (rseq_cs2),
|
||||
/* final store input */
|
||||
[v] "m" (*v),
|
||||
[count] "er" (count),
|
||||
@@ -153,8 +171,20 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu)
|
||||
);
|
||||
/* clang-format on */
|
||||
/*test_msg("exit %f %f\n", a, b);*/
|
||||
+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2);
|
||||
+ if (rseq_cs1 != rseq_cs2) {
|
||||
+ /*
|
||||
+ * It means that we finished critical section
|
||||
+ * *normally* (haven't jumped to abort) but the kernel had cleaned up
|
||||
+ * rseq_ptr->rseq_cs before we left critical section
|
||||
+ * and CRIU wasn't restored it correctly.
|
||||
+ * That's a bug picture.
|
||||
+ */
|
||||
+ return -1;
|
||||
+ }
|
||||
return 0;
|
||||
abort:
|
||||
+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2);
|
||||
/*test_msg("abort %f %f\n", a, b);*/
|
||||
return -1;
|
||||
}
|
||||
@@ -177,21 +207,48 @@ int main(int argc, char *argv[])
|
||||
fail("calloc");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
+
|
||||
register_thread();
|
||||
|
||||
+ /*
|
||||
+ * We want to test that RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
|
||||
+ * is handled properly by CRIU, but that flag can be used
|
||||
+ * only with all another flags set.
|
||||
+ * Please, refer to
|
||||
+ * https://github.com/torvalds/linux/blob/master/kernel/rseq.c#L192
|
||||
+ */
|
||||
+#ifdef NOABORT
|
||||
+ rseq_ptr->flags = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT |
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL |
|
||||
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE;
|
||||
+#endif
|
||||
+
|
||||
test_daemon();
|
||||
|
||||
while (test_go()) {
|
||||
cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start);
|
||||
ret = rseq_addv(&cpu_data[cpu], 2, cpu);
|
||||
- if (ret)
|
||||
+#ifndef NOABORT
|
||||
+ /* just ignore abort */
|
||||
+ ret = 0;
|
||||
+#else
|
||||
+ if (ret) {
|
||||
fail("Failed to increment per-cpu counter");
|
||||
+ break;
|
||||
+ } else {
|
||||
+ //test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]);
|
||||
+ }
|
||||
+#endif
|
||||
}
|
||||
|
||||
test_waitsig();
|
||||
|
||||
check_thread();
|
||||
- pass();
|
||||
+
|
||||
+ if (ret)
|
||||
+ fail();
|
||||
+ else
|
||||
+ pass();
|
||||
|
||||
return 0;
|
||||
}
|
||||
diff --git a/test/zdtm/transition/rseq02.c b/test/zdtm/transition/rseq02.c
|
||||
new file mode 120000
|
||||
index 0000000..d564917
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq02.c
|
||||
@@ -0,0 +1 @@
|
||||
+rseq01.c
|
||||
\ No newline at end of file
|
||||
diff --git a/test/zdtm/transition/rseq02.desc b/test/zdtm/transition/rseq02.desc
|
||||
new file mode 120000
|
||||
index 0000000..b888f0d
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/transition/rseq02.desc
|
||||
@@ -0,0 +1 @@
|
||||
+rseq01.desc
|
||||
\ No newline at end of file
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,56 +0,0 @@
|
||||
From 1f760a8bbb539e81b1ef48aeedbebb792d7b74b2 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 14 Jan 2022 16:39:32 +0800
|
||||
Subject: [PATCH 17/72] zdtm: fix zdtm/static/maps00 case in arm64
|
||||
|
||||
This case sometimes will cause SIGILL signal in arm64 platform.
|
||||
|
||||
<<ARM Coretex-A series Programmer's Guide for ARMv8-A>> notes:
|
||||
The ARM architecture does not require the hardware to ensure coherency
|
||||
between instruction caches and memory, even for locations of shared
|
||||
memory.
|
||||
|
||||
Therefore, we need flush dcache and icache for self-modifying code.
|
||||
|
||||
- https://developer.arm.com/documentation/den0024/a/Caches/Point-of-coherency-and-unification
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/zdtm/static/maps00.c | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c
|
||||
index 10a4cac..5ef8f1a 100644
|
||||
--- a/test/zdtm/static/maps00.c
|
||||
+++ b/test/zdtm/static/maps00.c
|
||||
@@ -158,7 +158,8 @@ static int check_map(struct map *map)
|
||||
|
||||
if (!sigsetjmp(segv_ret, 1)) {
|
||||
if (map->prot & PROT_WRITE) {
|
||||
- memcpy(map->ptr, test_func, getpagesize());
|
||||
+ memcpy(map->ptr,test_func, ONE_MAP_SIZE);
|
||||
+ __builtin___clear_cache(map->ptr, map->ptr+ONE_MAP_SIZE);
|
||||
} else {
|
||||
if (!(map->flag & MAP_ANONYMOUS)) {
|
||||
uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func;
|
||||
@@ -169,14 +170,15 @@ static int check_map(struct map *map)
|
||||
}
|
||||
}
|
||||
}
|
||||
- if (!(map->flag & MAP_ANONYMOUS) || map->prot & PROT_WRITE)
|
||||
+ if (!(map->flag & MAP_ANONYMOUS) || (map->prot & PROT_WRITE))
|
||||
/* Function body has been copied into the mapping */
|
||||
((int (*)(void))map->ptr)(); /* perform exec access */
|
||||
- else
|
||||
+ else {
|
||||
/* No way to copy function body into mapping,
|
||||
* clear exec bit from effective protection
|
||||
*/
|
||||
prot &= PROT_WRITE | PROT_READ | !PROT_EXEC;
|
||||
+ }
|
||||
} else
|
||||
prot &= PROT_WRITE | PROT_READ | !PROT_EXEC;
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,52 +0,0 @@
|
||||
From 003edcab5c2dc1a3f00dba7f4b7bcdd017eb34b5 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Thu, 20 Jan 2022 19:45:14 +0800
|
||||
Subject: [PATCH 18/72] test: flush ipt rules after program exits
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/zdtm/static/socket-tcp-nfconntrack.desc | 2 +-
|
||||
test/zdtm/static/socket-tcp.c | 13 +++++++++++++
|
||||
2 files changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/test/zdtm/static/socket-tcp-nfconntrack.desc b/test/zdtm/static/socket-tcp-nfconntrack.desc
|
||||
index add2513..05bdb49 100644
|
||||
--- a/test/zdtm/static/socket-tcp-nfconntrack.desc
|
||||
+++ b/test/zdtm/static/socket-tcp-nfconntrack.desc
|
||||
@@ -1 +1 @@
|
||||
-{'flavor': 'h', 'opts': '--tcp-established', 'flags': 'suid'}
|
||||
+{'flavor': 'h', 'opts': '--tcp-established', 'flags': 'suid excl'}
|
||||
diff --git a/test/zdtm/static/socket-tcp.c b/test/zdtm/static/socket-tcp.c
|
||||
index f6ef473..29b0fce 100644
|
||||
--- a/test/zdtm/static/socket-tcp.c
|
||||
+++ b/test/zdtm/static/socket-tcp.c
|
||||
@@ -57,6 +57,13 @@ int write_data(int fd, const unsigned char *buf, int size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+#ifdef ZDTM_CONNTRACK
|
||||
+static void ipt_flush(void)
|
||||
+{
|
||||
+ system("iptables -w --flush");
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
unsigned char buf[BUF_SIZE];
|
||||
@@ -72,6 +79,12 @@ int main(int argc, char **argv)
|
||||
pr_perror("unshare");
|
||||
return 1;
|
||||
}
|
||||
+
|
||||
+ if (atexit(ipt_flush) != 0) {
|
||||
+ pr_perror("atexit");
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
if (system("ip link set up dev lo"))
|
||||
return 1;
|
||||
if (system("iptables -w -A INPUT -i lo -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT"))
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
From 5e68ba283e442467baef762bfcf87910d84e01ae Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Wed, 19 Jan 2022 10:01:25 +0800
|
||||
Subject: [PATCH 19/72] zdtm: fix cleaning step of zdtm_netns
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/zdtm.py | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/test/zdtm.py b/test/zdtm.py
|
||||
index 0a52e1b..0feece0 100755
|
||||
--- a/test/zdtm.py
|
||||
+++ b/test/zdtm.py
|
||||
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
+# -*- coding: utf-8 -*-
|
||||
+
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
@@ -2110,7 +2112,8 @@ class Launcher:
|
||||
|
||||
if self.__fail:
|
||||
print_sep("FAIL", "#")
|
||||
- sys.exit(1)
|
||||
+
|
||||
+ return self.__fail
|
||||
|
||||
|
||||
def all_tests(opts):
|
||||
@@ -2375,10 +2378,11 @@ def run_tests(opts):
|
||||
else:
|
||||
launcher.skip(t, "no flavors")
|
||||
finally:
|
||||
- launcher.finish()
|
||||
+ fail = launcher.finish()
|
||||
if opts['join_ns']:
|
||||
subprocess.Popen(["ip", "netns", "delete", "zdtm_netns"]).wait()
|
||||
-
|
||||
+ if fail:
|
||||
+ sys.exit(1)
|
||||
|
||||
sti_fmt = "%-40s%-10s%s"
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,453 +0,0 @@
|
||||
From 3858f7e228b15d0e1ce553f530fda4da9aa4efab Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Fri, 23 Apr 2021 21:22:08 +0800
|
||||
Subject: [PATCH 20/72] mm: add pin memory method for criu
|
||||
|
||||
Add pin memory method for criu to improve memory recover
|
||||
speed and avoid user private data saving to files.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/config.c | 1 +
|
||||
criu/cr-dump.c | 9 +++
|
||||
criu/cr-restore.c | 2 +
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/pin-mem.h | 49 +++++++++++++
|
||||
criu/include/restorer.h | 1 +
|
||||
criu/mem.c | 16 +++++
|
||||
criu/pie/restorer.c | 26 ++++++-
|
||||
criu/pin-mem.c | 146 ++++++++++++++++++++++++++++++++++++++
|
||||
criu/seize.c | 6 ++
|
||||
12 files changed, 258 insertions(+), 1 deletion(-)
|
||||
create mode 100644 criu/include/pin-mem.h
|
||||
create mode 100644 criu/pin-mem.c
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 50a2fa9..98c4135 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -90,6 +90,7 @@ obj-y += servicefd.o
|
||||
obj-y += pie-util-vdso.o
|
||||
obj-y += vdso.o
|
||||
obj-y += timens.o
|
||||
+obj-y += pin-mem.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 71f99c9..53a5cfd 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -696,6 +696,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "pre-dump-mode", required_argument, 0, 1097 },
|
||||
{ "file-validation", required_argument, 0, 1098 },
|
||||
BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity),
|
||||
+ BOOL_OPT("pin-memory", &opts.pin_memory),
|
||||
{ "lsm-mount-context", required_argument, 0, 1099 },
|
||||
{ "network-lock", required_argument, 0, 1100 },
|
||||
{},
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index 79387fb..5fac9ce 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -86,6 +86,7 @@
|
||||
#include "pidfd-store.h"
|
||||
#include "apparmor.h"
|
||||
#include "asm/dump.h"
|
||||
+#include "pin-mem.h"
|
||||
|
||||
/*
|
||||
* Architectures can overwrite this function to restore register sets that
|
||||
@@ -2058,6 +2059,14 @@ static int cr_dump_finish(int ret)
|
||||
close_service_fd(CR_PROC_FD_OFF);
|
||||
close_image_dir();
|
||||
|
||||
+ if (ret == 0 && opts.pin_memory) {
|
||||
+ pr_info("start restore_task_special_pages\n");
|
||||
+ restore_task_special_pages(0);
|
||||
+ } else if (ret != 0 && opts.pin_memory) {
|
||||
+ pr_info("clear pin mem info\n");
|
||||
+ clear_pin_mem(0);
|
||||
+ }
|
||||
+
|
||||
if (ret) {
|
||||
pr_err("Dumping FAILED.\n");
|
||||
} else {
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 864140f..5514c29 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -3885,6 +3885,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
task_args, task_args->t->pid, task_args->nr_threads, task_args->clone_restore_fn,
|
||||
task_args->thread_args);
|
||||
|
||||
+ task_args->pin_memory = opts.pin_memory;
|
||||
+
|
||||
/*
|
||||
* An indirect call to task_restore, note it never returns
|
||||
* and restoring core is extremely destructive.
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index b5a36b9..1b90481 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -447,6 +447,7 @@ usage:
|
||||
" can be 'filesize' or 'buildid' (default).\n"
|
||||
" --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n"
|
||||
" same cpu quantity.\n"
|
||||
+ " --pin-memory Use pin memory method for checkpoint and restore.\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 3b50e59..61898fd 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -190,6 +190,7 @@ struct cr_options {
|
||||
int file_validation_method;
|
||||
/* restore cpu affinity */
|
||||
int with_cpu_affinity;
|
||||
+ int pin_memory;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/pin-mem.h b/criu/include/pin-mem.h
|
||||
new file mode 100644
|
||||
index 0000000..7e53b12
|
||||
--- /dev/null
|
||||
+++ b/criu/include/pin-mem.h
|
||||
@@ -0,0 +1,49 @@
|
||||
+#ifndef __CRIU_PIN_MEM_H__
|
||||
+#define __CRIU_PIN_MEM_H__
|
||||
+
|
||||
+#include <stdbool.h>
|
||||
+
|
||||
+#include "vma.pb-c.h"
|
||||
+
|
||||
+#if __has_include("linux/pin_memory.h")
|
||||
+# include <linux/pin_memory.h>
|
||||
+#else
|
||||
+
|
||||
+#define PIN_MEM_MAGIC 0x59
|
||||
+#define _SET_PIN_MEM_AREA 1
|
||||
+#define _CLEAR_PIN_MEM_AREA 2
|
||||
+#define _REMAP_PIN_MEM_AREA 3
|
||||
+#define _DUMP_SEPCIAL_PAGES 6
|
||||
+#define _RETORE_SEPCIAL_PAGES 7
|
||||
+
|
||||
+#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set)
|
||||
+#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int)
|
||||
+#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int)
|
||||
+#define DUMP_SPECIAL_PAGES _IOW(PIN_MEM_MAGIC, _DUMP_SEPCIAL_PAGES, int)
|
||||
+#define RETORE_SPECIAL_PAGES _IOW(PIN_MEM_MAGIC, _RETORE_SEPCIAL_PAGES, int)
|
||||
+
|
||||
+#define MAX_PIN_MEM_AREA_NUM 16
|
||||
+
|
||||
+struct _pin_mem_area {
|
||||
+ unsigned long virt_start;
|
||||
+ unsigned long virt_end;
|
||||
+};
|
||||
+
|
||||
+struct pin_mem_area_set {
|
||||
+ unsigned int pid;
|
||||
+ unsigned int area_num;
|
||||
+ struct _pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM];
|
||||
+};
|
||||
+
|
||||
+#endif /* __has_include("linux/pin_memory.h") */
|
||||
+
|
||||
+#define PIN_MEM_FILE "/dev/pinmem"
|
||||
+#define ONCE_PIN_MEM_SIZE_LIMIT (32 * 1024 * 1024)
|
||||
+
|
||||
+bool should_pin_vmae(VmaEntry *vmae);
|
||||
+int pin_vmae(VmaEntry *vmae, struct pstree_item *item);
|
||||
+int dump_task_special_pages(int pid);
|
||||
+int restore_task_special_pages(int pid);
|
||||
+int clear_pin_mem(int pid);
|
||||
+
|
||||
+#endif /* __CRIU_PIN_MEM_H__ */
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index c29d869..e0bdc04 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -232,6 +232,7 @@ struct task_restore_args {
|
||||
int lsm_type;
|
||||
int child_subreaper;
|
||||
bool has_clone3_set_tid;
|
||||
+ bool pin_memory;
|
||||
} __aligned(64);
|
||||
|
||||
/*
|
||||
diff --git a/criu/mem.c b/criu/mem.c
|
||||
index ca74bfb..07efdbe 100644
|
||||
--- a/criu/mem.c
|
||||
+++ b/criu/mem.c
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "prctl.h"
|
||||
#include "compel/infect-util.h"
|
||||
#include "pidfd-store.h"
|
||||
+#include "pin-mem.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "images/pagemap.pb-c.h"
|
||||
@@ -500,6 +501,17 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
goto out_xfer;
|
||||
}
|
||||
|
||||
+ if (opts.pin_memory) {
|
||||
+ /* pin memory before dump pages */
|
||||
+ list_for_each_entry(vma_area, &vma_area_list->h, list) {
|
||||
+ if (should_pin_vmae(vma_area->e)
|
||||
+ && pin_vmae(vma_area->e, item) != 0) {
|
||||
+ exit_code = -1;
|
||||
+ goto out_xfer;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Step 1 -- generate the pagemap
|
||||
*/
|
||||
@@ -509,6 +521,10 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
parent_predump_mode = mdc->parent_ie->pre_dump_mode;
|
||||
|
||||
list_for_each_entry(vma_area, &vma_area_list->h, list) {
|
||||
+ if (opts.pin_memory && should_pin_vmae(vma_area->e)) {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump,
|
||||
parent_predump_mode);
|
||||
if (ret < 0)
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index 368b5a0..db01ba5 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -49,6 +49,7 @@
|
||||
|
||||
#include "shmem.h"
|
||||
#include "restorer.h"
|
||||
+#include "pin-mem.h"
|
||||
|
||||
#ifndef PR_SET_PDEATHSIG
|
||||
#define PR_SET_PDEATHSIG 1
|
||||
@@ -1408,6 +1409,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int remap_vmas(int pid)
|
||||
+{
|
||||
+ int fd, ret = 0;
|
||||
+
|
||||
+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd == -1) {
|
||||
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
|
||||
+ return -1;;
|
||||
+ }
|
||||
+
|
||||
+ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid);
|
||||
+ if (ret < 0)
|
||||
+ pr_err("remap pin mem fail for pid: %d\n", pid);
|
||||
+ sys_close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
/*
|
||||
* The main routine to restore task via sigreturn.
|
||||
* This one is very special, we never return there
|
||||
@@ -1577,7 +1596,12 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
goto core_restore_end;
|
||||
}
|
||||
}
|
||||
-
|
||||
+ if (args->pin_memory) {
|
||||
+ if (remap_vmas(my_pid) < 0) {
|
||||
+ pr_err("Remap vmas fail\n");
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
+ }
|
||||
/*
|
||||
* Now read the contents (if any)
|
||||
*/
|
||||
diff --git a/criu/pin-mem.c b/criu/pin-mem.c
|
||||
new file mode 100644
|
||||
index 0000000..b18db97
|
||||
--- /dev/null
|
||||
+++ b/criu/pin-mem.c
|
||||
@@ -0,0 +1,146 @@
|
||||
+#include <fcntl.h>
|
||||
+#include <stdbool.h>
|
||||
+#include <sys/ioctl.h>
|
||||
+
|
||||
+#include "pstree.h"
|
||||
+#include "mem.h"
|
||||
+#include "vma.h"
|
||||
+#include "pin-mem.h"
|
||||
+
|
||||
+bool should_pin_vmae(VmaEntry *vmae)
|
||||
+{
|
||||
+ /*
|
||||
+ * vDSO area must be always dumped because on restore
|
||||
+ * we might need to generate a proxy.
|
||||
+ */
|
||||
+ if (vma_entry_is(vmae, VMA_AREA_VDSO))
|
||||
+ return false;
|
||||
+ /*
|
||||
+ * In turn VVAR area is special and referenced from
|
||||
+ * vDSO area by IP addressing (at least on x86) thus
|
||||
+ * never ever dump its content but always use one provided
|
||||
+ * by the kernel on restore, ie runtime VVAR area must
|
||||
+ * be remapped into proper place..
|
||||
+ */
|
||||
+ if (vma_entry_is(vmae, VMA_AREA_VVAR))
|
||||
+ return false;
|
||||
+
|
||||
+ if (vma_entry_is(vmae, VMA_AREA_AIORING))
|
||||
+ return false;
|
||||
+ if (vma_entry_is(vmae, VMA_ANON_PRIVATE))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static int pin_one_pmas(int fd, unsigned long start,
|
||||
+ unsigned long *pend, struct pstree_item *item)
|
||||
+{
|
||||
+ int ret;
|
||||
+ unsigned int index = 0;
|
||||
+ unsigned long end;
|
||||
+ unsigned long next = start;
|
||||
+ struct pin_mem_area_set pmas;
|
||||
+ struct _pin_mem_area *pma;
|
||||
+
|
||||
+ end = *pend;
|
||||
+ while (start < end) {
|
||||
+ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end)
|
||||
+ ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT);
|
||||
+ pma = &(pmas.mem_area[index]);
|
||||
+ pma->virt_start = start;
|
||||
+ pma->virt_end = next;
|
||||
+ index++;
|
||||
+ start += ONCE_PIN_MEM_SIZE_LIMIT;
|
||||
+ if (index >= MAX_PIN_MEM_AREA_NUM)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ *pend = next;
|
||||
+ pmas.area_num = index;
|
||||
+ pmas.pid = vpid(item);
|
||||
+
|
||||
+ ret = ioctl(fd, SET_PIN_MEM_AREA, &pmas);
|
||||
+ if (ret < 0)
|
||||
+ pr_err("pin mem fail, errno: %s\n", strerror(errno));
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int pin_vmae(VmaEntry *vmae, struct pstree_item *item)
|
||||
+{
|
||||
+ int fd;
|
||||
+ int ret = 0;
|
||||
+ unsigned long start, end;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ start = vmae->start;
|
||||
+ while (start < vmae->end) {
|
||||
+ end = vmae->end;
|
||||
+ ret = pin_one_pmas(fd, start, &end, item);
|
||||
+ if (ret < 0)
|
||||
+ break;
|
||||
+ start = end;
|
||||
+ }
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int dump_task_special_pages(int pid)
|
||||
+{
|
||||
+ int fd, ret;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_warn("error open file: %s\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, DUMP_SPECIAL_PAGES, (unsigned long) &pid);
|
||||
+ if (ret < 0)
|
||||
+ pr_warn("No need DUMP_SPECIAL_PAGES for %d\n", pid);
|
||||
+
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int restore_task_special_pages(int pid)
|
||||
+{
|
||||
+ int fd, ret;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_warn("error open file: %s\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, RETORE_SPECIAL_PAGES, (unsigned long) &pid);
|
||||
+ if (ret < 0)
|
||||
+ pr_warn("No need RETORE_SPECIAL_PAGES for %d\n", pid);
|
||||
+
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int clear_pin_mem(int pid)
|
||||
+{
|
||||
+ int fd, ret;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_warn("error open file: %s\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, CLEAR_PIN_MEM_AREA, (unsigned long) &pid);
|
||||
+ if (ret < 0) {
|
||||
+ pr_warn("clear pin mem fail: %d\n", pid);
|
||||
+ }
|
||||
+
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
diff --git a/criu/seize.c b/criu/seize.c
|
||||
index 95bf9ef..8a35c3c 100644
|
||||
--- a/criu/seize.c
|
||||
+++ b/criu/seize.c
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "string.h"
|
||||
#include "xmalloc.h"
|
||||
#include "util.h"
|
||||
+#include "pin-mem.h"
|
||||
|
||||
#define NR_ATTEMPTS 5
|
||||
|
||||
@@ -640,6 +641,11 @@ static void unseize_task_and_threads(const struct pstree_item *item, int st)
|
||||
if (item->pid->state == TASK_DEAD)
|
||||
return;
|
||||
|
||||
+ if (opts.pin_memory) {
|
||||
+ for (i = 0; i < item->nr_threads; i++)
|
||||
+ dump_task_special_pages(item->threads[i].real);
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* The st is the state we want to switch tasks into,
|
||||
* the item->state is the state task was in when we seized one.
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,213 +0,0 @@
|
||||
From 2911f505eefcfaea582d457c1fa18df34d151954 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:33:22 +0800
|
||||
Subject: [PATCH 21/72] pid: add pid recover method for criu
|
||||
|
||||
The default pid recover method cannot recover the task
|
||||
pid at every time.
|
||||
We add a new pid recover method by setting the fork_pid of
|
||||
the parent task struct, add the kernel will alloc pid by
|
||||
the fork_pid.
|
||||
The new pid recover method can also avoid other tasks using
|
||||
the dumping task pids.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/config.c | 1 +
|
||||
criu/cr-restore.c | 27 ++++++++++++++++++++++++++-
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/pin-mem.h | 4 ++++
|
||||
criu/include/restorer.h | 1 +
|
||||
criu/pie/restorer.c | 25 ++++++++++++++++++++++++-
|
||||
7 files changed, 58 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 53a5cfd..6dfbb01 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -699,6 +699,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("pin-memory", &opts.pin_memory),
|
||||
{ "lsm-mount-context", required_argument, 0, 1099 },
|
||||
{ "network-lock", required_argument, 0, 1100 },
|
||||
+ BOOL_OPT("use-fork-pid", &opts.use_fork_pid),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 5514c29..497dd14 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -80,6 +80,7 @@
|
||||
#include "timens.h"
|
||||
#include "bpfmap.h"
|
||||
#include "apparmor.h"
|
||||
+#include "pin-mem.h"
|
||||
|
||||
#include "parasite-syscall.h"
|
||||
#include "files-reg.h"
|
||||
@@ -1340,6 +1341,23 @@ static int set_next_pid(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int write_fork_pid(int pid)
|
||||
+{
|
||||
+ int fd, ret;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ pr_warn("error open file: %s\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ ret = ioctl(fd, SET_FORK_PID, &pid);
|
||||
+ if (ret < 0) {
|
||||
+ pr_warn("write fork pid fail, errno: %s\n", strerror(errno));
|
||||
+ }
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static inline int fork_with_pid(struct pstree_item *item)
|
||||
{
|
||||
struct cr_clone_arg ca;
|
||||
@@ -1424,7 +1442,7 @@ static inline int fork_with_pid(struct pstree_item *item)
|
||||
if (!(ca.clone_flags & CLONE_NEWPID)) {
|
||||
lock_last_pid();
|
||||
|
||||
- if (!kdat.has_clone3_set_tid) {
|
||||
+ if (!kdat.has_clone3_set_tid && !opts.use_fork_pid) {
|
||||
if (external_pidns) {
|
||||
/*
|
||||
* Restoring into another namespace requires a helper
|
||||
@@ -1434,6 +1452,12 @@ static inline int fork_with_pid(struct pstree_item *item)
|
||||
*/
|
||||
ret = call_in_child_process(set_next_pid, (void *)&pid);
|
||||
} else {
|
||||
+ if (opts.use_fork_pid) {
|
||||
+ ret = write_fork_pid(pid);
|
||||
+ if (ret < 0)
|
||||
+ goto err_unlock;
|
||||
+ }
|
||||
+
|
||||
ret = set_next_pid((void *)&pid);
|
||||
}
|
||||
if (ret != 0) {
|
||||
@@ -3886,6 +3910,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
task_args->thread_args);
|
||||
|
||||
task_args->pin_memory = opts.pin_memory;
|
||||
+ task_args->use_fork_pid = opts.use_fork_pid;
|
||||
|
||||
/*
|
||||
* An indirect call to task_restore, note it never returns
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index 1b90481..502acdf 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -448,6 +448,7 @@ usage:
|
||||
" --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n"
|
||||
" same cpu quantity.\n"
|
||||
" --pin-memory Use pin memory method for checkpoint and restore.\n"
|
||||
+ " --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 61898fd..923cc5f 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -191,6 +191,7 @@ struct cr_options {
|
||||
/* restore cpu affinity */
|
||||
int with_cpu_affinity;
|
||||
int pin_memory;
|
||||
+ int use_fork_pid;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/pin-mem.h b/criu/include/pin-mem.h
|
||||
index 7e53b12..2b54996 100644
|
||||
--- a/criu/include/pin-mem.h
|
||||
+++ b/criu/include/pin-mem.h
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "vma.pb-c.h"
|
||||
|
||||
#if __has_include("linux/pin_memory.h")
|
||||
+# define CONFIG_PID_RESERVE
|
||||
# include <linux/pin_memory.h>
|
||||
#else
|
||||
|
||||
@@ -35,6 +36,9 @@ struct pin_mem_area_set {
|
||||
struct _pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM];
|
||||
};
|
||||
|
||||
+#define _SET_FORK_PID 8
|
||||
+#define SET_FORK_PID _IOW(PIN_MEM_MAGIC, _SET_FORK_PID, int)
|
||||
+
|
||||
#endif /* __has_include("linux/pin_memory.h") */
|
||||
|
||||
#define PIN_MEM_FILE "/dev/pinmem"
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index e0bdc04..93f87f4 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -233,6 +233,7 @@ struct task_restore_args {
|
||||
int child_subreaper;
|
||||
bool has_clone3_set_tid;
|
||||
bool pin_memory;
|
||||
+ bool use_fork_pid;
|
||||
} __aligned(64);
|
||||
|
||||
/*
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index db01ba5..1317582 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -1426,6 +1426,22 @@ int remap_vmas(int pid)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int write_fork_pid(int pid)
|
||||
+{
|
||||
+ int fd, ret;
|
||||
+
|
||||
+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_warn("error open file: %s\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ ret = sys_ioctl(fd, SET_FORK_PID, (unsigned long) &pid);
|
||||
+ if (ret < 0) {
|
||||
+ pr_warn("write fork pid fail fail: %d\n", pid);
|
||||
+ }
|
||||
+ sys_close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
|
||||
/*
|
||||
* The main routine to restore task via sigreturn.
|
||||
@@ -1815,7 +1831,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
long parent_tid;
|
||||
int i, fd = -1;
|
||||
|
||||
- if (!args->has_clone3_set_tid) {
|
||||
+ if (!args->has_clone3_set_tid && !args->use_fork_pid) {
|
||||
/* One level pid ns hierarhy */
|
||||
fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0);
|
||||
if (fd < 0) {
|
||||
@@ -1847,6 +1863,13 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
pr_debug("Using clone3 to restore the process\n");
|
||||
RUN_CLONE3_RESTORE_FN(ret, c_args, sizeof(c_args), &thread_args[i],
|
||||
args->clone_restore_fn);
|
||||
+ } else if (args->use_fork_pid) {
|
||||
+ if (write_fork_pid(thread_args[i].pid) < 0) {
|
||||
+ pr_err("Clone fail with fork pid\n");
|
||||
+ mutex_unlock(&task_entries_local->last_pid_mutex);
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
+ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn);
|
||||
} else {
|
||||
last_pid_len =
|
||||
std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s);
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,621 +0,0 @@
|
||||
From 33c351e18eddc2517f799c1cac20790ebabddbc8 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:45:03 +0800
|
||||
Subject: [PATCH 22/72] notifier: add notifier calling method for checkpoint
|
||||
and restore
|
||||
|
||||
Add notifier calling method for checkpoint and restore during kernel module upgrading.
|
||||
|
||||
Signed-off-by: Xiaoguang Li <lixiaoguang2@huawei.com>
|
||||
Signed-off-by: He Jingxian <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/config.c | 1 +
|
||||
criu/cr-dump.c | 34 +++++++++++
|
||||
criu/cr-restore.c | 18 +++++-
|
||||
criu/crtools.c | 2 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/notifier.h | 44 +++++++++++++++
|
||||
criu/include/restorer.h | 1 +
|
||||
criu/include/util.h | 2 +
|
||||
criu/pie/restorer.c | 116 ++++++++++++++++++++++++++++++++++----
|
||||
criu/pie/util.c | 91 ++++++++++++++++++++++++++++++
|
||||
10 files changed, 297 insertions(+), 13 deletions(-)
|
||||
create mode 100644 criu/include/notifier.h
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 6dfbb01..5d1cff6 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -700,6 +700,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "lsm-mount-context", required_argument, 0, 1099 },
|
||||
{ "network-lock", required_argument, 0, 1100 },
|
||||
BOOL_OPT("use-fork-pid", &opts.use_fork_pid),
|
||||
+ BOOL_OPT("with-notifier", &opts.with_notifier_kup),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index 5fac9ce..50a2f9b 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -87,6 +87,7 @@
|
||||
#include "apparmor.h"
|
||||
#include "asm/dump.h"
|
||||
#include "pin-mem.h"
|
||||
+#include "notifier.h"
|
||||
|
||||
/*
|
||||
* Architectures can overwrite this function to restore register sets that
|
||||
@@ -1981,6 +1982,8 @@ static int cr_lazy_mem_dump(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static enum notifier_state notifier_state = NOTHING_COMPLETE;
|
||||
+
|
||||
static int cr_dump_finish(int ret)
|
||||
{
|
||||
int post_dump_ret = 0;
|
||||
@@ -2067,6 +2070,20 @@ static int cr_dump_finish(int ret)
|
||||
clear_pin_mem(0);
|
||||
}
|
||||
|
||||
+ if (ret != 0 && opts.with_notifier_kup) {
|
||||
+ pr_info("call notifier rollback\n");
|
||||
+ switch (notifier_state) {
|
||||
+ case PRE_FREEZE_COMPLETE:
|
||||
+ notifier_kup(PRE_FREEZE, ROLLBACK, true);
|
||||
+ break;
|
||||
+ case FREEZE_TO_KILL_COMPLETE:
|
||||
+ notifier_kup(FREEZE_TO_KILL, ROLLBACK, true);
|
||||
+ break;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (ret) {
|
||||
pr_err("Dumping FAILED.\n");
|
||||
} else {
|
||||
@@ -2100,6 +2117,14 @@ int cr_dump_tasks(pid_t pid)
|
||||
goto err;
|
||||
root_item->pid->real = pid;
|
||||
|
||||
+ if (notifier_kup(PRE_FREEZE, PREPARE, opts.with_notifier_kup)) {
|
||||
+ /* disable rollback function because we has already rollbacked. */
|
||||
+ opts.with_notifier_kup = false;
|
||||
+ pr_err("call notifier: %d err\n", PRE_FREEZE);
|
||||
+ goto err;
|
||||
+ } else
|
||||
+ notifier_state = PRE_FREEZE_COMPLETE;
|
||||
+
|
||||
pre_dump_ret = run_scripts(ACT_PRE_DUMP);
|
||||
if (pre_dump_ret != 0) {
|
||||
pr_err("Pre dump script failed with %d!\n", pre_dump_ret);
|
||||
@@ -2258,6 +2283,15 @@ int cr_dump_tasks(pid_t pid)
|
||||
ret = write_img_inventory(&he);
|
||||
if (ret)
|
||||
goto err;
|
||||
+
|
||||
+ ret = notifier_kup(FREEZE_TO_KILL, PREPARE, opts.with_notifier_kup);
|
||||
+ if (ret) {
|
||||
+ opts.with_notifier_kup = false;
|
||||
+ pr_err("call notifier:%d err\n", FREEZE_TO_KILL);
|
||||
+ goto err;
|
||||
+ } else
|
||||
+ notifier_state = FREEZE_TO_KILL_COMPLETE;
|
||||
+
|
||||
err:
|
||||
if (parent_ie)
|
||||
inventory_entry__free_unpacked(parent_ie, NULL);
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 497dd14..03511b6 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -81,6 +81,7 @@
|
||||
#include "bpfmap.h"
|
||||
#include "apparmor.h"
|
||||
#include "pin-mem.h"
|
||||
+#include "notifier.h"
|
||||
|
||||
#include "parasite-syscall.h"
|
||||
#include "files-reg.h"
|
||||
@@ -1951,6 +1952,7 @@ static int restore_task_with_children(void *_arg)
|
||||
return 0;
|
||||
|
||||
err:
|
||||
+ do_notifier_rollback(opts.with_notifier_kup, POST_UPDATE_KERNEL_COMPLETE);
|
||||
if (current->parent == NULL)
|
||||
futex_abort_and_wake(&task_entries->nr_in_progress);
|
||||
exit(1);
|
||||
@@ -2451,8 +2453,10 @@ skip_ns_bouncing:
|
||||
*/
|
||||
attach_to_tasks(root_seized);
|
||||
|
||||
- if (restore_switch_stage(CR_STATE_RESTORE_CREDS))
|
||||
+ if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) {
|
||||
+ pr_err("Can't switch to CR_STATE_RESTORE_CREDS stage\n");
|
||||
goto out_kill_network_unlocked;
|
||||
+ }
|
||||
|
||||
timing_stop(TIME_RESTORE);
|
||||
|
||||
@@ -2631,6 +2635,15 @@ int cr_restore_tasks(void)
|
||||
goto clean_cgroup;
|
||||
|
||||
ret = restore_root_task(root_item);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
+
|
||||
+ ret = notifier_kup(POST_RUN, PREPARE, opts.with_notifier_kup);
|
||||
+ if (ret < 0) {
|
||||
+ opts.with_notifier_kup = false;
|
||||
+ pr_err("calling POST_RUN notifier list return err\n");
|
||||
+ }
|
||||
+
|
||||
clean_cgroup:
|
||||
fini_cgroup();
|
||||
err:
|
||||
@@ -3922,6 +3935,9 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
err:
|
||||
free_mappings(&self_vmas);
|
||||
err_nv:
|
||||
+ if (current->parent == NULL && opts.with_notifier_kup)
|
||||
+ do_notifier_rollback(true, POST_UPDATE_KERNEL_COMPLETE);
|
||||
+
|
||||
/* Just to be sure */
|
||||
exit(1);
|
||||
return -1;
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index 502acdf..1d08620 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -449,6 +449,8 @@ usage:
|
||||
" same cpu quantity.\n"
|
||||
" --pin-memory Use pin memory method for checkpoint and restore.\n"
|
||||
" --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n"
|
||||
+ " --with-notifier Allow to checkpoint/restore kup notifier chain.\n"
|
||||
+ " This feature needs the kernel assistance.\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 923cc5f..039edba 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -192,6 +192,7 @@ struct cr_options {
|
||||
int with_cpu_affinity;
|
||||
int pin_memory;
|
||||
int use_fork_pid;
|
||||
+ int with_notifier_kup;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/notifier.h b/criu/include/notifier.h
|
||||
new file mode 100644
|
||||
index 0000000..e4972a7
|
||||
--- /dev/null
|
||||
+++ b/criu/include/notifier.h
|
||||
@@ -0,0 +1,44 @@
|
||||
+#ifndef __CRIU_NOTIFIER_H__
|
||||
+#define __CRIU_NOTIFIER_H__
|
||||
+
|
||||
+#define NOTIFY_PROC_PATH "/sys/kernel/modrestore/nvwa_notifier"
|
||||
+
|
||||
+#if __has_include("linux/modrestore.h")
|
||||
+# define CONFIG_EULEROS_MODRESTORE_NOTIFY /* useless, historical factors */
|
||||
+# include <linux/modrestore.h>
|
||||
+#else
|
||||
+enum KUP_HOOK_POINT {
|
||||
+ PRE_FREEZE,
|
||||
+ FREEZE_TO_KILL,
|
||||
+ PRE_UPDATE_KERNEL,
|
||||
+ POST_UPDATE_KERNEL,
|
||||
+ UNFREEZE_TO_RUN,
|
||||
+ POST_RUN,
|
||||
+
|
||||
+ KUP_HOOK_MAX,
|
||||
+};
|
||||
+
|
||||
+enum nvwa_cmd {
|
||||
+ PREPARE = 0,
|
||||
+ ROLLBACK,
|
||||
+
|
||||
+ NVWA_CMD_MAX,
|
||||
+};
|
||||
+#endif
|
||||
+
|
||||
+enum notifier_state {
|
||||
+ NOTHING_COMPLETE,
|
||||
+ PRE_FREEZE_COMPLETE,
|
||||
+ FREEZE_TO_KILL_COMPLETE,
|
||||
+ PRE_UPDATE_KERNEL_COMPLETE,
|
||||
+ POST_UPDATE_KERNEL_COMPLETE,
|
||||
+ UNFREEZE_TO_RUN_COMPLETE,
|
||||
+ POST_RUN_COMPLETE,
|
||||
+
|
||||
+ NOTIFIER_ROLLBACK_DONE = 0xfc17173b, /* has done rollback */
|
||||
+};
|
||||
+
|
||||
+int notifier_kup(enum KUP_HOOK_POINT, enum nvwa_cmd, bool);
|
||||
+void do_notifier_rollback(bool, enum notifier_state);
|
||||
+
|
||||
+#endif /* __CRIU_NOTIFIER_H__ */
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index 93f87f4..2f7345b 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -234,6 +234,7 @@ struct task_restore_args {
|
||||
bool has_clone3_set_tid;
|
||||
bool pin_memory;
|
||||
bool use_fork_pid;
|
||||
+ bool with_notifier_kup;
|
||||
} __aligned(64);
|
||||
|
||||
/*
|
||||
diff --git a/criu/include/util.h b/criu/include/util.h
|
||||
index 1c0b3c7..e0049a6 100644
|
||||
--- a/criu/include/util.h
|
||||
+++ b/criu/include/util.h
|
||||
@@ -13,6 +13,8 @@
|
||||
#include <sys/sysmacros.h>
|
||||
#include <dirent.h>
|
||||
#include <poll.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <fcntl.h>
|
||||
|
||||
#include "int.h"
|
||||
#include "common/compiler.h"
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index 1317582..4a1d38d 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "vma.h"
|
||||
#include "uffd.h"
|
||||
#include "sched.h"
|
||||
+#include "notifier.h"
|
||||
|
||||
#include "common/lock.h"
|
||||
#include "common/page.h"
|
||||
@@ -77,6 +78,7 @@
|
||||
|
||||
static struct task_entries *task_entries_local;
|
||||
static futex_t thread_inprogress;
|
||||
+static futex_t thread_start;
|
||||
static pid_t *helpers;
|
||||
static int n_helpers;
|
||||
static pid_t *zombies;
|
||||
@@ -118,10 +120,28 @@ void parasite_cleanup(void)
|
||||
|
||||
extern void cr_restore_rt(void) asm("__cr_restore_rt") __attribute__((visibility("hidden")));
|
||||
|
||||
+static int args_with_notifier_kup;
|
||||
+static enum notifier_state notifier_state = POST_UPDATE_KERNEL_COMPLETE;
|
||||
+static futex_t notifier_done;
|
||||
+
|
||||
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
|
||||
{
|
||||
char *r;
|
||||
int i;
|
||||
+ rt_sigaction_t act;
|
||||
+
|
||||
+ if (signal == SIGSEGV || signal == SIGBUS || signal == SIGILL) {
|
||||
+ /* Make sure we exit with the right signal at the end. So for instance
|
||||
+ * the core will be dumped if enabled. */
|
||||
+ pr_info("recv signal: %d\n", signal);
|
||||
+ do_notifier_rollback(args_with_notifier_kup, notifier_state);
|
||||
+ ksigemptyset (&act.rt_sa_mask);
|
||||
+ act.rt_sa_flags = SA_SIGINFO | SA_RESTART;
|
||||
+ act.rt_sa_handler = (rt_sighandler_t)SIG_DFL;
|
||||
+ sys_sigaction(signal, &act, NULL, sizeof(k_rtsigset_t));
|
||||
+ sys_kill(sys_getpid(),signal);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
/* We can ignore helpers that die, we expect them to after
|
||||
* CR_STATE_RESTORE is finished. */
|
||||
@@ -148,10 +168,14 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
|
||||
|
||||
pr_info("Task %d %s %d\n", siginfo->si_pid, r, siginfo->si_status);
|
||||
|
||||
+ pr_info("%s: trace do_notifier_rollback\n", __func__);
|
||||
+ do_notifier_rollback(args_with_notifier_kup, notifier_state);
|
||||
futex_abort_and_wake(&task_entries_local->nr_in_progress);
|
||||
/* sa_restorer may be unmaped, so we can't go back to userspace*/
|
||||
sys_kill(sys_getpid(), SIGSTOP);
|
||||
sys_exit_group(1);
|
||||
+
|
||||
+ /* for notifier, do nothing when receiving SIGCHLD signal */
|
||||
}
|
||||
|
||||
static int lsm_set_label(char *label, char *type, int procfd)
|
||||
@@ -616,6 +640,27 @@ static void noinline rst_sigreturn(unsigned long new_sp, struct rt_sigframe *sig
|
||||
ARCH_RT_SIGRETURN(new_sp, sigframe);
|
||||
}
|
||||
|
||||
+/* Notice: only one task, so it isn't necessary to consider concurrent. */
|
||||
+static int do_notifier(bool *notify)
|
||||
+{
|
||||
+ int retval = 0;
|
||||
+
|
||||
+ if (!*notify)
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_info("unfreeze_to_run restore notifier\n");
|
||||
+ retval = notifier_kup(UNFREEZE_TO_RUN, PREPARE, true);
|
||||
+ if (retval) {
|
||||
+ *notify = false;
|
||||
+ notifier_state = NOTIFIER_ROLLBACK_DONE;
|
||||
+ pr_err("call notifier: %d err\n", UNFREEZE_TO_RUN);
|
||||
+ }
|
||||
+
|
||||
+ notifier_state = UNFREEZE_TO_RUN_COMPLETE;
|
||||
+
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Threads restoration via sigreturn. Note it's locked
|
||||
* routine and calls for unlock at the end.
|
||||
@@ -654,12 +699,18 @@ long __export_restore_thread(struct thread_restore_args *args)
|
||||
|
||||
pr_info("%ld: Restored\n", sys_gettid());
|
||||
|
||||
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE);
|
||||
+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE) & FUTEX_ABORT_FLAG)) {
|
||||
+ pr_err("%s: abort by CR_STATE_RESTORE\n", __func__);
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
|
||||
if (restore_signals(args->siginfo, args->siginfo_n, false))
|
||||
goto core_restore_end;
|
||||
|
||||
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);
|
||||
+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD) & FUTEX_ABORT_FLAG)) {
|
||||
+ pr_err("%s: abort by CR_STATE_RESTORE_SIGCHLD\n", __func__);
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
|
||||
/*
|
||||
* Make sure it's before creds, since it's privileged
|
||||
@@ -674,16 +725,29 @@ long __export_restore_thread(struct thread_restore_args *args)
|
||||
if (ret)
|
||||
BUG();
|
||||
|
||||
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
|
||||
+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS) & FUTEX_ABORT_FLAG)) {
|
||||
+ pr_err("%s: abort by CR_STATE_RESTORE_CREDS\n", __func__);
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
|
||||
futex_dec_and_wake(&thread_inprogress);
|
||||
+ futex_wait_while(&thread_start, 0);
|
||||
+ if (!!(futex_get(&thread_start) & FUTEX_ABORT_FLAG)) {
|
||||
+ pr_err("%s: abort by thread_start\n", __func__);
|
||||
+ goto wait_notifier;
|
||||
+ }
|
||||
|
||||
new_sp = (long)rt_sigframe + RT_SIGFRAME_OFFSET(rt_sigframe);
|
||||
rst_sigreturn(new_sp, rt_sigframe);
|
||||
|
||||
core_restore_end:
|
||||
- pr_err("Restorer abnormal termination for %ld\n", sys_getpid());
|
||||
- futex_abort_and_wake(&task_entries_local->nr_in_progress);
|
||||
+ futex_abort_and_wake(&thread_start);
|
||||
+ futex_abort_and_wake(&task_entries_local->start);
|
||||
+
|
||||
+wait_notifier:
|
||||
+ pr_err("%s: Restorer abnormal termination for %ld\n", __func__, sys_getpid());
|
||||
+ futex_wait_while(¬ifier_done, 0);
|
||||
+
|
||||
sys_exit_group(1);
|
||||
return -1;
|
||||
}
|
||||
@@ -1465,6 +1529,10 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
rt_sigaction_t act;
|
||||
bool has_vdso_proxy;
|
||||
|
||||
+ futex_set(&thread_inprogress, 1);
|
||||
+ futex_set(&thread_start, 0);
|
||||
+ futex_set(¬ifier_done, 0);
|
||||
+
|
||||
bootstrap_start = args->bootstrap_start;
|
||||
bootstrap_len = args->bootstrap_len;
|
||||
|
||||
@@ -1481,6 +1549,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
#ifdef ARCH_HAS_LONG_PAGES
|
||||
__page_size = args->page_size;
|
||||
#endif
|
||||
+ args_with_notifier_kup = args->with_notifier_kup;
|
||||
|
||||
ksigfillset(&act.rt_sa_mask);
|
||||
act.rt_sa_handler = sigchld_handler;
|
||||
@@ -1895,7 +1964,8 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
pr_err("Unable to create a thread: %ld\n", ret);
|
||||
mutex_unlock(&task_entries_local->last_pid_mutex);
|
||||
goto core_restore_end;
|
||||
- }
|
||||
+ } else
|
||||
+ futex_inc(&thread_inprogress);
|
||||
}
|
||||
|
||||
mutex_unlock(&task_entries_local->last_pid_mutex);
|
||||
@@ -1919,7 +1989,14 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
|
||||
pr_info("%ld: Restored\n", sys_getpid());
|
||||
|
||||
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE);
|
||||
+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE) & FUTEX_ABORT_FLAG)) {
|
||||
+ pr_err("%s: abort by CR_STATE_RESTORE\n", __func__);
|
||||
+ goto core_restore_end;
|
||||
+ }
|
||||
+
|
||||
+ ret = do_notifier(&args->with_notifier_kup);
|
||||
+ if (ret)
|
||||
+ goto core_restore_end;
|
||||
|
||||
if (wait_helpers(args) < 0)
|
||||
goto core_restore_end;
|
||||
@@ -1965,7 +2042,8 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
if (ret)
|
||||
goto core_restore_end;
|
||||
|
||||
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);
|
||||
+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD) & FUTEX_ABORT_FLAG))
|
||||
+ goto core_restore_end;
|
||||
|
||||
rst_tcp_socks_all(args);
|
||||
|
||||
@@ -1986,15 +2064,20 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
ret = ret || restore_pdeath_sig(args->t);
|
||||
ret = ret || restore_child_subreaper(args->child_subreaper);
|
||||
|
||||
- futex_set_and_wake(&thread_inprogress, args->nr_threads);
|
||||
-
|
||||
- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
|
||||
+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS) & FUTEX_ABORT_FLAG))
|
||||
+ goto core_restore_end;
|
||||
|
||||
if (ret)
|
||||
BUG();
|
||||
|
||||
/* Wait until children stop to use args->task_entries */
|
||||
futex_wait_while_gt(&thread_inprogress, 1);
|
||||
+ if (!!(futex_get(&thread_start) & FUTEX_ABORT_FLAG)) {
|
||||
+ pr_err("%s: terminate by main thread futex_start\n", __func__);
|
||||
+ goto handle_notifier;
|
||||
+ }
|
||||
+
|
||||
+ futex_set_and_wake(&thread_start, 1);
|
||||
|
||||
sys_close(args->proc_fd);
|
||||
std_log_set_fd(-1);
|
||||
@@ -2030,8 +2113,17 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
rst_sigreturn(new_sp, rt_sigframe);
|
||||
|
||||
core_restore_end:
|
||||
- futex_abort_and_wake(&task_entries_local->nr_in_progress);
|
||||
+ futex_abort_and_wake(&thread_start);
|
||||
+ futex_abort_and_wake(&task_entries_local->start);
|
||||
+
|
||||
+handle_notifier:
|
||||
+ do_notifier_rollback(args->with_notifier_kup, notifier_state);
|
||||
+
|
||||
+ futex_abort_and_wake(&task_entries_local->nr_in_progress); /* notifier the criu main process */
|
||||
pr_err("Restorer fail %ld\n", sys_getpid());
|
||||
+
|
||||
+ futex_set_and_wake(¬ifier_done, 1); /* wake all other threads to exit */
|
||||
+
|
||||
sys_exit_group(1);
|
||||
return -1;
|
||||
}
|
||||
diff --git a/criu/pie/util.c b/criu/pie/util.c
|
||||
index e7a5a9f..9871db7 100644
|
||||
--- a/criu/pie/util.c
|
||||
+++ b/criu/pie/util.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "fcntl.h"
|
||||
#include "log.h"
|
||||
#include "util-pie.h"
|
||||
+#include "notifier.h"
|
||||
|
||||
#ifdef CR_NOGLIBC
|
||||
#include <compel/plugins/std/syscall.h>
|
||||
@@ -52,3 +53,93 @@ err_close:
|
||||
__sys(close)(fd);
|
||||
return -1;
|
||||
}
|
||||
+
|
||||
+#define KUP_BUF_SIZE 256
|
||||
+
|
||||
+static int int_to_string(unsigned number, char *buf, size_t total) {
|
||||
+ unsigned remainder, quotient, i, len;
|
||||
+
|
||||
+ quotient = number;
|
||||
+ len = 0;
|
||||
+ do {
|
||||
+ quotient /= 10;
|
||||
+ len += 1;
|
||||
+ } while (quotient > 0);
|
||||
+
|
||||
+ if (len > total - 1)
|
||||
+ return -1;
|
||||
+
|
||||
+ quotient = number;
|
||||
+ i = 1;
|
||||
+ do {
|
||||
+ remainder = quotient % 10;
|
||||
+ quotient = quotient / 10;
|
||||
+ buf[len-i] = '0' + remainder;
|
||||
+ i++;
|
||||
+ } while (quotient > 0);
|
||||
+ buf[len] = '\0';
|
||||
+
|
||||
+ return len == 0 ? -1 : len;
|
||||
+}
|
||||
+
|
||||
+int notifier_kup(enum KUP_HOOK_POINT action, enum nvwa_cmd cmd, bool enable)
|
||||
+{
|
||||
+ int fd, count = 0, retval = 0;
|
||||
+ char buf[KUP_BUF_SIZE] = {0};
|
||||
+
|
||||
+ if (!enable)
|
||||
+ return 0;
|
||||
+
|
||||
+ fd = __sys(open)(NOTIFY_PROC_PATH, O_WRONLY, 0);
|
||||
+ if (fd == -EACCES) {
|
||||
+ /* there is no priviledge to open file, ignore this condition. */
|
||||
+ pr_info("%s: open %s failed, retval: %d (-EACCES)\n",
|
||||
+ __func__, NOTIFY_PROC_PATH, -EACCES);
|
||||
+ return 0;
|
||||
+ } else if (fd < 0) {
|
||||
+ __pr_perror("%s: Can't open %s: %d\n", __func__, NOTIFY_PROC_PATH, fd);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ retval = int_to_string(action, buf, sizeof(buf)-count);
|
||||
+ if (retval <= 0) {
|
||||
+ __pr_perror("%s: int_to_string error\n", __func__);
|
||||
+ goto err_close;
|
||||
+ }
|
||||
+
|
||||
+ buf[retval] = ':';
|
||||
+ count = retval + 1;
|
||||
+
|
||||
+ retval = int_to_string(cmd, buf+count, sizeof(buf)-count);
|
||||
+ if (retval <= 0) {
|
||||
+ __pr_perror("%s: int_to_string error\n", __func__);
|
||||
+ goto err_close;
|
||||
+ }
|
||||
+
|
||||
+ count += retval;
|
||||
+ retval = __sys(write)(fd, buf, count);
|
||||
+ if (retval < 0)
|
||||
+ __pr_perror("%s: Can't write to %s\n", __func__, NOTIFY_PROC_PATH);
|
||||
+
|
||||
+err_close:
|
||||
+ __sys(close)(fd);
|
||||
+
|
||||
+ return retval < 0 ? -1 : 0;
|
||||
+}
|
||||
+
|
||||
+void do_notifier_rollback(bool rollback, enum notifier_state status)
|
||||
+{
|
||||
+ if (!rollback)
|
||||
+ return;
|
||||
+
|
||||
+ switch (status) {
|
||||
+ case POST_UPDATE_KERNEL_COMPLETE:
|
||||
+ notifier_kup(POST_UPDATE_KERNEL, ROLLBACK, true);
|
||||
+ break;
|
||||
+ case UNFREEZE_TO_RUN_COMPLETE:
|
||||
+ notifier_kup(UNFREEZE_TO_RUN, ROLLBACK, true);
|
||||
+ break;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
From 48c6f11d0b3c5f0549ff52cce0c8ce31ad67518f Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:49:15 +0800
|
||||
Subject: [PATCH 23/72] block-device: dump block device as reguler file
|
||||
|
||||
Add block device dump and restore method for kernel module upgrading.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Xiaoguang Li <lixiaoguang2@huawei.com>
|
||||
---
|
||||
criu/files.c | 27 +++++++++++++++++++++++++++
|
||||
1 file changed, 27 insertions(+)
|
||||
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index 93754fb..f262d80 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -442,6 +442,30 @@ static const struct fdtype_ops *get_mem_dev_ops(struct fd_parms *p, int minor)
|
||||
return ops;
|
||||
}
|
||||
|
||||
+static int dump_blkdev(struct fd_parms *p, int lfd, FdinfoEntry *e)
|
||||
+{
|
||||
+ struct fd_link *link_old = p->link;
|
||||
+ int maj = major(p->stat.st_rdev);
|
||||
+ const struct fdtype_ops *ops;
|
||||
+ int err;
|
||||
+
|
||||
+ switch (maj) {
|
||||
+ case SCSI_DISK0_MAJOR:
|
||||
+ ops = ®file_dump_ops;
|
||||
+ break;
|
||||
+ default: {
|
||||
+ char more[32] = "block_dev";
|
||||
+
|
||||
+ err = dump_unsupp_fd(p, lfd, "blk", more, e);
|
||||
+ p->link = link_old;
|
||||
+ return err;
|
||||
+ }
|
||||
+ }
|
||||
+ err = do_dump_gen_file(p, lfd, ops, e);
|
||||
+ p->link = link_old;
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e)
|
||||
{
|
||||
struct fd_link *link_old = p->link;
|
||||
@@ -508,6 +532,9 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
|
||||
p.fd_ctl = ctl; /* Some dump_opts require this to talk to parasite */
|
||||
p.dfds = dfds; /* epoll needs to verify if target fd exist */
|
||||
|
||||
+ if (S_ISBLK(p.stat.st_mode))
|
||||
+ return dump_blkdev(&p, lfd, e);
|
||||
+
|
||||
if (S_ISSOCK(p.stat.st_mode))
|
||||
return dump_socket(&p, lfd, e);
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,316 +0,0 @@
|
||||
From 9bb9af3189ae8a7eadf975befa2aa30b7227259e Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:52:49 +0800
|
||||
Subject: [PATCH 24/72] anon-inode: add support for anon inode fd
|
||||
|
||||
Add support for anon inode fd dump and restore during module upgrade.
|
||||
|
||||
Signed-off-by: Xiaoguang Li <lixiaoguang2@huawei.com>
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/cr-restore.c | 3 +++
|
||||
criu/files-reg.c | 3 ++-
|
||||
criu/include/image.h | 1 +
|
||||
criu/include/mem.h | 1 +
|
||||
criu/include/restorer.h | 6 ++++++
|
||||
criu/mem.c | 23 +++++++++++++++++++++++
|
||||
criu/pie/restorer.c | 37 +++++++++++++++++++++++++++++++++++++
|
||||
criu/proc_parse.c | 31 ++++++++++++++++++++++++++++---
|
||||
images/vma.proto | 1 +
|
||||
9 files changed, 102 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 03511b6..b805265 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -971,6 +971,8 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
|
||||
if (prepare_vmas(current, ta))
|
||||
return -1;
|
||||
|
||||
+ if (prepare_vma_names(current, ta))
|
||||
+ return -1;
|
||||
/*
|
||||
* Sockets have to be restored in their network namespaces,
|
||||
* so a task namespace has to be restored after sockets.
|
||||
@@ -3733,6 +3735,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
#endif
|
||||
|
||||
RST_MEM_FIXUP_PPTR(task_args->vmas);
|
||||
+ RST_MEM_FIXUP_PPTR(task_args->vma_names);
|
||||
RST_MEM_FIXUP_PPTR(task_args->rings);
|
||||
RST_MEM_FIXUP_PPTR(task_args->tcp_socks);
|
||||
RST_MEM_FIXUP_PPTR(task_args->timerfd);
|
||||
diff --git a/criu/files-reg.c b/criu/files-reg.c
|
||||
index ee54d1d..fbdf811 100644
|
||||
--- a/criu/files-reg.c
|
||||
+++ b/criu/files-reg.c
|
||||
@@ -2137,7 +2137,7 @@ int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *ar
|
||||
|
||||
/* unnamed temporary files are restored as ghost files */
|
||||
flags &= ~O_TMPFILE;
|
||||
-
|
||||
+ pr_info("openat path is: %s\n", rfi->path);
|
||||
fd = openat(ns_root_fd, rfi->path, flags);
|
||||
if (fd < 0) {
|
||||
pr_perror("Can't open file %s on restore", rfi->path);
|
||||
@@ -2307,6 +2307,7 @@ int collect_filemap(struct vma_area *vma)
|
||||
if (!fd)
|
||||
return -1;
|
||||
|
||||
+ pr_info("find fd for %lx, shmid: %lx\n", vma->e->start, vma->e->shmid);
|
||||
vma->vmfd = fd;
|
||||
vma->vm_open = open_filemap;
|
||||
return 0;
|
||||
diff --git a/criu/include/image.h b/criu/include/image.h
|
||||
index 14659db..f598de7 100644
|
||||
--- a/criu/include/image.h
|
||||
+++ b/criu/include/image.h
|
||||
@@ -84,6 +84,7 @@
|
||||
#define VMA_AREA_VVAR (1 << 12)
|
||||
#define VMA_AREA_AIORING (1 << 13)
|
||||
#define VMA_AREA_MEMFD (1 << 14)
|
||||
+#define VMA_AREA_ANON_INODE (1 << 15)
|
||||
|
||||
#define VMA_CLOSE (1 << 28)
|
||||
#define VMA_NO_PROT_WRITE (1 << 29)
|
||||
diff --git a/criu/include/mem.h b/criu/include/mem.h
|
||||
index 03574ea..ccf8da6 100644
|
||||
--- a/criu/include/mem.h
|
||||
+++ b/criu/include/mem.h
|
||||
@@ -45,6 +45,7 @@ extern int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_l
|
||||
struct task_restore_args;
|
||||
int open_vmas(struct pstree_item *t);
|
||||
int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta);
|
||||
+int prepare_vma_names(struct pstree_item *t, struct task_restore_args *ta);
|
||||
int unmap_guard_pages(struct pstree_item *t);
|
||||
int prepare_mappings(struct pstree_item *t);
|
||||
bool should_dump_page(VmaEntry *vmae, u64 pme);
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index 2f7345b..a81cc1b 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -134,6 +134,10 @@ struct restore_vma_io {
|
||||
|
||||
#define RIO_SIZE(niovs) (sizeof(struct restore_vma_io) + (niovs) * sizeof(struct iovec))
|
||||
|
||||
+struct vma_names {
|
||||
+ char name[PATH_MAX];
|
||||
+};
|
||||
+
|
||||
struct task_restore_args {
|
||||
struct thread_restore_args *t; /* thread group leader */
|
||||
|
||||
@@ -157,6 +161,8 @@ struct task_restore_args {
|
||||
VmaEntry *vmas;
|
||||
unsigned int vmas_n;
|
||||
|
||||
+ struct vma_names *vma_names;
|
||||
+
|
||||
int vma_ios_fd;
|
||||
struct restore_vma_io *vma_ios;
|
||||
unsigned int vma_ios_n;
|
||||
diff --git a/criu/mem.c b/criu/mem.c
|
||||
index 07efdbe..00965f0 100644
|
||||
--- a/criu/mem.c
|
||||
+++ b/criu/mem.c
|
||||
@@ -525,6 +525,9 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
continue;
|
||||
}
|
||||
|
||||
+ if (vma_entry_is(vma_area->e, VMA_AREA_ANON_INODE))
|
||||
+ continue;
|
||||
+
|
||||
ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump,
|
||||
parent_predump_mode);
|
||||
if (ret < 0)
|
||||
@@ -1355,6 +1358,9 @@ int open_vmas(struct pstree_item *t)
|
||||
filemap_ctx_init(false);
|
||||
|
||||
list_for_each_entry(vma, &vmas->h, list) {
|
||||
+ if (vma_area_is(vma, VMA_AREA_ANON_INODE))
|
||||
+ continue;
|
||||
+
|
||||
if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open)
|
||||
continue;
|
||||
|
||||
@@ -1437,3 +1443,20 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta)
|
||||
|
||||
return prepare_vma_ios(t, ta);
|
||||
}
|
||||
+
|
||||
+int prepare_vma_names(struct pstree_item *t, struct task_restore_args *ta)
|
||||
+{
|
||||
+ struct vma_area *vma;
|
||||
+ struct vm_area_list *vmas = &rsti(t)->vmas;
|
||||
+ ta->vma_names = (struct vma_names *)rst_mem_align_cpos(RM_PRIVATE);
|
||||
+
|
||||
+ list_for_each_entry(vma, &vmas->h, list) {
|
||||
+ struct vma_names *vma_names;
|
||||
+ vma_names = rst_mem_alloc(sizeof(*vma_names), RM_PRIVATE);
|
||||
+ if (!vma_names)
|
||||
+ return -1;
|
||||
+
|
||||
+ memcpy(vma_names->name, vma->e->name, strlen(vma->e->name) + 1);
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index 4a1d38d..549bbd6 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -68,6 +68,27 @@
|
||||
#define FALLOC_FL_PUNCH_HOLE 0x02
|
||||
#endif
|
||||
|
||||
+#define ANON_PROC_PATH "/sys/kernel/modrestore/anon_state_restore"
|
||||
+
|
||||
+static int restore_anon_mapping(VmaEntry *vma_entry, struct vma_names *vma_name)
|
||||
+{
|
||||
+ int fd;
|
||||
+
|
||||
+ fd = sys_open(ANON_PROC_PATH, O_WRONLY, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_info("anon sys fs open fail:%s\n", ANON_PROC_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+ pr_info("restore anon mapping: %s\n", vma_name->name);
|
||||
+
|
||||
+ if (sys_write(fd, vma_name->name, 4096) < 0) {
|
||||
+ sys_close(fd);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ sys_close(fd);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#define sys_prctl_safe(opcode, val1, val2, val3) \
|
||||
({ \
|
||||
long __ret = sys_prctl(opcode, val1, val2, val3, 0); \
|
||||
@@ -1348,6 +1369,10 @@ static bool can_restore_vdso(struct task_restore_args *args)
|
||||
}
|
||||
|
||||
/*
|
||||
+ * pr_info("anon vma name:%s\n", vma_name->name);
|
||||
+ * if (restore_anon_mapping(vma_entry, vma_name) < 0)
|
||||
+ * goto core_restore_end;
|
||||
+ * continue;
|
||||
* There is a use-case for restoring vvar alone: valgrind (see #488).
|
||||
* On the other side, we expect that vvar is touched by application
|
||||
* only from vdso. So, we can put a stale page and proceed restore
|
||||
@@ -1528,6 +1553,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
pid_t my_pid = sys_getpid();
|
||||
rt_sigaction_t act;
|
||||
bool has_vdso_proxy;
|
||||
+ struct vma_names *vma_name;
|
||||
|
||||
futex_set(&thread_inprogress, 1);
|
||||
futex_set(&thread_start, 0);
|
||||
@@ -1667,6 +1693,14 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
*/
|
||||
for (i = 0; i < args->vmas_n; i++) {
|
||||
vma_entry = args->vmas + i;
|
||||
+ vma_name = args->vma_names + i;
|
||||
+
|
||||
+ if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE)) {
|
||||
+ pr_info("anon vma name:%s\n", vma_name->name);
|
||||
+ if (restore_anon_mapping(vma_entry, vma_name) < 0)
|
||||
+ goto core_restore_end;
|
||||
+ continue;
|
||||
+ }
|
||||
|
||||
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR) && !vma_entry_is(vma_entry, VMA_AREA_AIORING))
|
||||
continue;
|
||||
@@ -1784,6 +1818,9 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
if (!vma_entry->has_madv || !vma_entry->madv)
|
||||
continue;
|
||||
|
||||
+ if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE))
|
||||
+ continue;
|
||||
+
|
||||
for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
|
||||
if (vma_entry->madv & (1ul << m)) {
|
||||
ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m);
|
||||
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
|
||||
index f3491e7..e41d43a 100644
|
||||
--- a/criu/proc_parse.c
|
||||
+++ b/criu/proc_parse.c
|
||||
@@ -76,6 +76,7 @@ static char *buf = __buf.buf;
|
||||
*/
|
||||
|
||||
#define AIO_FNAME "/[aio]"
|
||||
+#define ANON_FNAME "anon_inode"
|
||||
|
||||
/* check the @line starts with "%lx-%lx" format */
|
||||
static bool __is_vma_range_fmt(char *line)
|
||||
@@ -171,8 +172,17 @@ static void parse_vma_vmflags(char *buf, struct vma_area *vma_area)
|
||||
* only exception is VVAR area that mapped by the kernel as
|
||||
* VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP
|
||||
*/
|
||||
- if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED))
|
||||
- vma_area->e->status |= VMA_UNSUPP;
|
||||
+ /* There are many types of io/pf vm_map, not only vvar, but also
|
||||
+ * anon_inode, and char device.
|
||||
+ * For anon_inode and char device, we use anon_notifier to restore
|
||||
+ * status. Therefore, we disable the broken code here.
|
||||
+ */
|
||||
+// if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) &&
|
||||
+// !vma_area_is(vma_area, VMA_AREA_ANON_INODE))
|
||||
+// {
|
||||
+// pr_info("set current status tp VMA_UNSUPP\n");
|
||||
+// vma_area->e->status |= VMA_UNSUPP;
|
||||
+// }
|
||||
|
||||
if (vma_area->e->madv)
|
||||
vma_area->e->has_madv = true;
|
||||
@@ -437,6 +447,21 @@ static int vma_get_mapfile(const char *fname, struct vma_area *vma, DIR *mfd, st
|
||||
return 0;
|
||||
}
|
||||
|
||||
+ if (!strncmp(fname, ANON_FNAME, sizeof(ANON_FNAME) - 1)) {
|
||||
+ /*anon_inode*/
|
||||
+ close_safe(vm_file_fd);
|
||||
+ vma->e->status = VMA_AREA_ANON_INODE;
|
||||
+ vma->e->name = xmalloc(PATH_MAX);
|
||||
+ if (!vma->e->name) {
|
||||
+ pr_err("alloc vma name of anon-inode fail.\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ snprintf(vma->e->name, PATH_MAX - 1, "%"PRIx64"-%"PRIx64 " %s", vma->e->start, vma->e->end, fname);
|
||||
+ vma->e->name[PATH_MAX - 1] = 0;
|
||||
+ pr_info("set vma_area status to: %d, name:%s\n", vma->e->status, vma->e->name);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
pr_err("Unknown shit %o (%s)\n", buf.st_mode, fname);
|
||||
return -1;
|
||||
}
|
||||
@@ -566,6 +591,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
|
||||
vma_area->e->shmid = prev->e->shmid;
|
||||
vma_area->vmst = prev->vmst;
|
||||
vma_area->mnt_id = prev->mnt_id;
|
||||
+ vma_area->e->name = prev->e->name;
|
||||
|
||||
if (!(vma_area->e->status & VMA_AREA_SYSVIPC)) {
|
||||
vma_area->e->status &= ~(VMA_FILE_PRIVATE | VMA_FILE_SHARED);
|
||||
@@ -728,7 +754,6 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t du
|
||||
if (IS_ERR(str))
|
||||
goto err;
|
||||
eof = (str == NULL);
|
||||
-
|
||||
if (!eof && !__is_vma_range_fmt(str)) {
|
||||
if (!strncmp(str, "Nonlinear", 9)) {
|
||||
BUG_ON(!vma_area);
|
||||
diff --git a/images/vma.proto b/images/vma.proto
|
||||
index 0c07d51..1aa30f9 100644
|
||||
--- a/images/vma.proto
|
||||
+++ b/images/vma.proto
|
||||
@@ -24,4 +24,5 @@ message vma_entry {
|
||||
|
||||
/* file status flags */
|
||||
optional uint32 fdflags = 10 [(criu).hex = true];
|
||||
+ required string name = 11;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,784 +0,0 @@
|
||||
From 2eebb9de411333628ce8fc5894f072b6ed6179e0 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:55:34 +0800
|
||||
Subject: [PATCH 25/72] char_dev: add support for char device dump and restore
|
||||
|
||||
Add support for char device dump and restore during module upgrade.
|
||||
|
||||
`/sys/kernel/repairing_device` provides the char device whiltelist
|
||||
with `IOCTL_CMD_{NEEDREPAIR, REPAIR}` command besides the internal
|
||||
device list.
|
||||
The device modules could use `mures_{add, del}_devname()` to add, or
|
||||
delete the char device whitelist dynamically.
|
||||
|
||||
Signed-off-by: Xiaoguang Li <lixiaoguang2@huawei.com>
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 2 +
|
||||
criu/config.c | 1 +
|
||||
criu/cr-dump.c | 4 ++
|
||||
criu/cr-restore.c | 4 +-
|
||||
criu/crtools.c | 2 +
|
||||
criu/devname.c | 130 +++++++++++++++++++++++++++++++++++
|
||||
criu/files-chr.c | 104 ++++++++++++++++++++++++++++
|
||||
criu/files-reg.c | 6 +-
|
||||
criu/files.c | 93 ++++++++++++++++++++++++-
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/files-chr.h | 25 +++++++
|
||||
criu/include/files.h | 6 ++
|
||||
criu/include/image-desc.h | 1 +
|
||||
criu/include/image.h | 1 +
|
||||
criu/include/protobuf-desc.h | 1 +
|
||||
criu/mem.c | 7 +-
|
||||
criu/proc_parse.c | 21 +++++-
|
||||
images/Makefile | 1 +
|
||||
images/chr.proto | 12 ++++
|
||||
images/fdinfo.proto | 3 +
|
||||
20 files changed, 417 insertions(+), 8 deletions(-)
|
||||
create mode 100644 criu/devname.c
|
||||
create mode 100644 criu/files-chr.c
|
||||
create mode 100644 criu/include/files-chr.h
|
||||
create mode 100644 images/chr.proto
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 98c4135..2e82912 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -91,6 +91,8 @@ obj-y += pie-util-vdso.o
|
||||
obj-y += vdso.o
|
||||
obj-y += timens.o
|
||||
obj-y += pin-mem.o
|
||||
+obj-y += devname.o
|
||||
+obj-y += files-chr.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 5d1cff6..03cad66 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -701,6 +701,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "network-lock", required_argument, 0, 1100 },
|
||||
BOOL_OPT("use-fork-pid", &opts.use_fork_pid),
|
||||
BOOL_OPT("with-notifier", &opts.with_notifier_kup),
|
||||
+ BOOL_OPT("dump-char-dev", &opts.dump_char_dev),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index 50a2f9b..fd17413 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -88,6 +88,7 @@
|
||||
#include "asm/dump.h"
|
||||
#include "pin-mem.h"
|
||||
#include "notifier.h"
|
||||
+#include "files-chr.h"
|
||||
|
||||
/*
|
||||
* Architectures can overwrite this function to restore register sets that
|
||||
@@ -1880,6 +1881,9 @@ int cr_pre_dump_tasks(pid_t pid)
|
||||
*/
|
||||
rlimit_unlimit_nofile();
|
||||
|
||||
+ if (opts.dump_char_dev && parse_devname() < 0)
|
||||
+ goto err;
|
||||
+
|
||||
root_item = alloc_pstree_item();
|
||||
if (!root_item)
|
||||
goto err;
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index b805265..2904a75 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -332,11 +332,11 @@ static int root_prepare_shared(void)
|
||||
if (pi->pid->state == TASK_HELPER)
|
||||
continue;
|
||||
|
||||
- ret = prepare_mm_pid(pi);
|
||||
+ ret = prepare_fd_pid(pi);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
- ret = prepare_fd_pid(pi);
|
||||
+ ret = prepare_mm_pid(pi);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index 1d08620..dc6d603 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -451,6 +451,8 @@ usage:
|
||||
" --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n"
|
||||
" --with-notifier Allow to checkpoint/restore kup notifier chain.\n"
|
||||
" This feature needs the kernel assistance.\n"
|
||||
+ " --dump-char-dev Dump char dev files as normal file with repair cmd\n"
|
||||
+ \
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/devname.c b/criu/devname.c
|
||||
new file mode 100644
|
||||
index 0000000..5f6fbed
|
||||
--- /dev/null
|
||||
+++ b/criu/devname.c
|
||||
@@ -0,0 +1,130 @@
|
||||
+#include <stdbool.h>
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#include "log.h"
|
||||
+#include "common/xmalloc.h"
|
||||
+
|
||||
+#define REPAIRING_DEVICE_FILE "/sys/kernel/repairing_device"
|
||||
+#define ASCII_SIZE 128
|
||||
+
|
||||
+static void *root_bucket[ASCII_SIZE];
|
||||
+
|
||||
+static int insert_devname_internal(void *bucket[], const char *name)
|
||||
+{
|
||||
+ void *new = NULL;
|
||||
+ int idx = *name;
|
||||
+
|
||||
+ if (bucket[idx] != NULL)
|
||||
+ return insert_devname_internal(bucket[idx], name+1);
|
||||
+ else if (idx == '\0') {
|
||||
+ new = xmalloc(sizeof(void *));
|
||||
+ if (!new) {
|
||||
+ pr_perror("alloc devname failed\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ bucket[idx] = new;
|
||||
+ return 0;
|
||||
+ } else {
|
||||
+ new = xmalloc(sizeof(void *) * ASCII_SIZE);
|
||||
+ if (!new) {
|
||||
+ pr_perror("alloc devname failed\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ memset(new, 0, sizeof(void *) * ASCII_SIZE);
|
||||
+ bucket[idx] = new;
|
||||
+ return insert_devname_internal(bucket[idx], name+1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int insert_devname(const char *devname)
|
||||
+{
|
||||
+ if (devname == NULL || *devname == '\0') // ignore
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_debug("insert device '%s'\n", devname);
|
||||
+ return insert_devname_internal(root_bucket, devname);
|
||||
+}
|
||||
+
|
||||
+int parse_devname(void)
|
||||
+{
|
||||
+ int retval = -1;
|
||||
+ char *line = NULL;
|
||||
+ size_t len = 0;
|
||||
+ ssize_t nread = 0;
|
||||
+ FILE *fp = NULL;
|
||||
+
|
||||
+ fp = fopen(REPAIRING_DEVICE_FILE, "r");
|
||||
+ if (fp == NULL) {
|
||||
+ pr_info("Unable to open %s, downgrade to use internal whitelist\n",
|
||||
+ REPAIRING_DEVICE_FILE);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ while ((nread = getline(&line, &len, fp)) != -1) {
|
||||
+ if (nread <= 1) // ignore empty string
|
||||
+ continue;
|
||||
+
|
||||
+ line[nread-1] = '\0'; // drop '\n'
|
||||
+ retval = insert_devname(line);
|
||||
+ if (retval != 0)
|
||||
+ goto out;
|
||||
+ }
|
||||
+ retval = 0;
|
||||
+
|
||||
+out:
|
||||
+ free(line);
|
||||
+ fclose(fp);
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static const char *steal_devname(const char *name, ssize_t len)
|
||||
+{
|
||||
+ ssize_t off = len;
|
||||
+
|
||||
+ for (off -= 1; off > 0; off--) {
|
||||
+ if (name[off] == '/')
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return name + off + 1;
|
||||
+}
|
||||
+
|
||||
+static bool find_devname_internal(void *bucket[], const char *name)
|
||||
+{
|
||||
+ int idx = *name;
|
||||
+
|
||||
+ if (*name == '\0' && bucket[idx] != NULL)
|
||||
+ return true;
|
||||
+ else if (bucket[idx] == NULL)
|
||||
+ return false;
|
||||
+ else {
|
||||
+ return find_devname_internal(bucket[idx], name+1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+bool find_devname(const char *name)
|
||||
+{
|
||||
+ const char *devname;
|
||||
+ size_t len = 0;
|
||||
+ bool found = false;
|
||||
+
|
||||
+ if (name == NULL)
|
||||
+ return false;
|
||||
+ else if ((len = strlen(name)) == 0)
|
||||
+ return false;
|
||||
+
|
||||
+ devname = steal_devname(name, len);
|
||||
+ found = find_devname_internal(root_bucket, devname);
|
||||
+
|
||||
+ pr_debug("device '%s' (original name '%s') %s found in %s\n",
|
||||
+ devname, name, found ? "is" : "isn't", REPAIRING_DEVICE_FILE);
|
||||
+
|
||||
+ /* Compatible with the old version, there are still `strstr` branch in the following */
|
||||
+ found |= (strstr(name, "uverbs") != NULL
|
||||
+ || strstr(name, "rdma_cm") != NULL
|
||||
+ || strstr(name, "umad") != NULL);
|
||||
+
|
||||
+ return found;
|
||||
+}
|
||||
diff --git a/criu/files-chr.c b/criu/files-chr.c
|
||||
new file mode 100644
|
||||
index 0000000..2eb023e
|
||||
--- /dev/null
|
||||
+++ b/criu/files-chr.c
|
||||
@@ -0,0 +1,104 @@
|
||||
+#include <sys/ioctl.h>
|
||||
+
|
||||
+#include "imgset.h"
|
||||
+#include "pstree.h"
|
||||
+#include "files-chr.h"
|
||||
+#include "log.h"
|
||||
+
|
||||
+#include "protobuf.h"
|
||||
+
|
||||
+/* Checks if file descriptor @lfd is infinibandevent */
|
||||
+int is_infiniband_link(char *link)
|
||||
+{
|
||||
+ return is_anon_link_type(link, "[infinibandevent]");
|
||||
+}
|
||||
+
|
||||
+static int chrfile_open(struct file_desc *d, int *new_fd)
|
||||
+{
|
||||
+ int fd, mntns_root;
|
||||
+ int ret = 0;
|
||||
+ struct chrfile_info *ci;
|
||||
+
|
||||
+ ci = container_of(d, struct chrfile_info, d);
|
||||
+
|
||||
+ if (ci->cfe->repair)
|
||||
+ ci->cfe->flags |= O_REPAIR;
|
||||
+
|
||||
+ mntns_root = open_pid_proc(getpid());
|
||||
+ fd = openat(mntns_root, ci->path, ci->cfe->flags);
|
||||
+ if (fd < 0){
|
||||
+ pr_err("open chr file failed\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (ci->cfe->repair) {
|
||||
+ ret = ioctl(fd, IOCTL_CMD_REPAIR , ci->cfe->index);
|
||||
+ pr_info("repair ioctl return: %d, index: %d\n", ret, ci->cfe->index);
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
+ *new_fd = fd;
|
||||
+ return ret;
|
||||
+err:
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static struct file_desc_ops chrfile_desc_ops = {
|
||||
+ .type = FD_TYPES__CHR,
|
||||
+ .open = chrfile_open,
|
||||
+};
|
||||
+
|
||||
+static int collect_one_chrfile(void *o, ProtobufCMessage *base, struct cr_img *i)
|
||||
+{
|
||||
+ struct chrfile_info *ci = o;
|
||||
+ static char dot[] = ".";
|
||||
+
|
||||
+ ci->cfe = pb_msg(base, ChrfileEntry);
|
||||
+ if (ci->cfe->name[1] == '\0')
|
||||
+ ci->path = dot;
|
||||
+ else
|
||||
+ ci->path = ci->cfe->name;
|
||||
+
|
||||
+ pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path);
|
||||
+ file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+struct collect_image_info chrfile_cinfo = {
|
||||
+ .fd_type = CR_FD_CHRFILE,
|
||||
+ .pb_type = PB_CHRFILE,
|
||||
+ .priv_size = sizeof(struct chrfile_info),
|
||||
+ .collect = collect_one_chrfile,
|
||||
+};
|
||||
+
|
||||
+int collect_chr_map(struct pstree_item *me, struct vma_area *vma)
|
||||
+{
|
||||
+ struct list_head *list = &rsti(me)->fds;
|
||||
+ struct fdinfo_list_entry *fle, *tmp;
|
||||
+ struct chrfile_info *ci;
|
||||
+ bool exist_fd;
|
||||
+
|
||||
+
|
||||
+ list_for_each_entry_safe(fle, tmp, list, ps_list) {
|
||||
+ struct file_desc *d = fle->desc;
|
||||
+
|
||||
+ if (d->ops->type != FD_TYPES__CHR)
|
||||
+ continue;
|
||||
+
|
||||
+ ci = container_of(d, struct chrfile_info, d);
|
||||
+ if (!strcmp(vma->e->name, ci->path)) {
|
||||
+ vma->vmfd = d;
|
||||
+ vma->e->fd = fle->fe->fd;
|
||||
+ exist_fd = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!exist_fd)
|
||||
+ return -EEXIST;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/criu/files-reg.c b/criu/files-reg.c
|
||||
index fbdf811..b9576a4 100644
|
||||
--- a/criu/files-reg.c
|
||||
+++ b/criu/files-reg.c
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "fault-injection.h"
|
||||
#include "external.h"
|
||||
#include "memfd.h"
|
||||
+#include "files-chr.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@@ -1640,7 +1641,8 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
|
||||
rfe.has_mnt_id = true;
|
||||
}
|
||||
|
||||
- pr_info("Dumping path for %d fd via self %d [%s]\n", p->fd, lfd, &link->name[1]);
|
||||
+ pr_info("Dumping path for %d fd via self %d [%s], id: %d\n",
|
||||
+ p->fd, lfd, &link->name[1], id);
|
||||
|
||||
/*
|
||||
* The regular path we can handle should start with slash.
|
||||
@@ -2373,7 +2375,7 @@ static int collect_one_regfile(void *o, ProtobufCMessage *base, struct cr_img *i
|
||||
rfi->remap = NULL;
|
||||
rfi->size_mode_checked = false;
|
||||
|
||||
- pr_info("Collected [%s] ID %#x\n", rfi->path, rfi->rfe->id);
|
||||
+ pr_info("Collected regfile [%s] ID %#x\n", rfi->path, rfi->rfe->id);
|
||||
return file_desc_add(&rfi->d, rfi->rfe->id, ®_desc_ops);
|
||||
}
|
||||
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index f262d80..e1681a1 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -49,6 +49,7 @@
|
||||
#include "kerndat.h"
|
||||
#include "fdstore.h"
|
||||
#include "bpfmap.h"
|
||||
+#include "files-chr.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@@ -325,10 +326,32 @@ int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops,
|
||||
e->fd = p->fd;
|
||||
e->flags = p->fd_flags;
|
||||
|
||||
+ pr_info("fdinfoEntry fd: %d\n", e->fd);
|
||||
ret = fd_id_generate(p->pid, e, p);
|
||||
if (ret == 1) /* new ID generated */
|
||||
ret = ops->dump(lfd, e->id, p);
|
||||
- else
|
||||
+ else if (ops->type == FD_TYPES__CHR) {
|
||||
+ /*
|
||||
+ * Sometimes the app_data subprocess may inherit the fd from
|
||||
+ * app_data. Those fds may result the unconditional oops during
|
||||
+ * the restoration of app_data. Therefore, prevent the dump in
|
||||
+ * those condition.
|
||||
+ */
|
||||
+ struct fd_link _link, *link;
|
||||
+
|
||||
+ if (!p->link) {
|
||||
+ if (fill_fdlink(lfd, p, &_link))
|
||||
+ return -1;
|
||||
+ link = &_link;
|
||||
+ } else
|
||||
+ link = p->link;
|
||||
+
|
||||
+ if (find_devname(link->name)) {
|
||||
+ pr_err("char dev '%s' fd %d is owned by multi-processes\n",
|
||||
+ link->name, e->fd);
|
||||
+ ret = -1;
|
||||
+ }
|
||||
+ } else
|
||||
/* Remove locks generated by the fd before going to the next */
|
||||
discard_dup_locks_tail(p->pid, e->fd);
|
||||
|
||||
@@ -466,6 +489,58 @@ static int dump_blkdev(struct fd_parms *p, int lfd, FdinfoEntry *e)
|
||||
return err;
|
||||
}
|
||||
|
||||
+static int dump_chr_file(int lfd, u32 id, const struct fd_parms *p)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct fd_link _link, *link;
|
||||
+ struct cr_img *img;
|
||||
+ FileEntry fe = FILE_ENTRY__INIT;
|
||||
+ ChrfileEntry cfe = CHRFILE_ENTRY__INIT;
|
||||
+
|
||||
+ if (!p->link) {
|
||||
+ if (fill_fdlink(lfd, p, &_link))
|
||||
+ return -1;
|
||||
+ link = &_link;
|
||||
+ } else
|
||||
+ link = p->link;
|
||||
+
|
||||
+ pr_info("Dumping chr-file fd %d with lfd %d with id %d, name: %s\n", p->fd, lfd, id, link->name);
|
||||
+
|
||||
+ if (strstr(link->name, "(deleted)") != NULL) {
|
||||
+ pr_err("char device '%s' is deleted\n", link->name);
|
||||
+ return -ENXIO;
|
||||
+ }
|
||||
+
|
||||
+ cfe.repair = false;
|
||||
+ if (find_devname(link->name)) {
|
||||
+ ret = ioctl(lfd, IOCTL_CMD_NEEDREPAIR, 0);
|
||||
+ if (ret <= 0) {
|
||||
+ pr_err("ioctl cmd needrepair failed, errno: %d, %s\n", ret, strerror(errno));
|
||||
+ return -1;
|
||||
+ } else {
|
||||
+ pr_info("char device needrepair cmd return: %d\n", ret);
|
||||
+ cfe.index = ret;
|
||||
+ cfe.repair = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ cfe.id = id;
|
||||
+ cfe.name = &link->name[1];
|
||||
+ cfe.flags = p->flags;
|
||||
+ fe.type = FD_TYPES__CHR;
|
||||
+ fe.id = cfe.id;
|
||||
+ fe.chr = &cfe;
|
||||
+
|
||||
+ img = img_from_set(glob_imgset, CR_FD_FILES);
|
||||
+ ret = pb_write_one(img, &fe, PB_FILE);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+const struct fdtype_ops chr_dump_ops = {
|
||||
+ .type = FD_TYPES__CHR,
|
||||
+ .dump = dump_chr_file,
|
||||
+};
|
||||
+
|
||||
static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e)
|
||||
{
|
||||
struct fd_link *link_old = p->link;
|
||||
@@ -493,6 +568,10 @@ static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e)
|
||||
ops = &tty_dump_ops;
|
||||
break;
|
||||
}
|
||||
+ if (opts.dump_char_dev) {
|
||||
+ ops = &chr_dump_ops;
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
sprintf(more, "%d:%d", maj, minor(p->stat.st_rdev));
|
||||
err = dump_unsupp_fd(p, lfd, "chr", more, e);
|
||||
@@ -559,6 +638,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
|
||||
ops = &signalfd_dump_ops;
|
||||
else if (is_timerfd_link(link))
|
||||
ops = &timerfd_dump_ops;
|
||||
+ else if (is_infiniband_link(link))
|
||||
+ return 1;
|
||||
#ifdef CONFIG_HAS_LIBBPF
|
||||
else if (is_bpfmap_link(link))
|
||||
ops = &bpfmap_dump_ops;
|
||||
@@ -663,6 +744,11 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
|
||||
ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds);
|
||||
if (ret)
|
||||
break;
|
||||
+ /* infiniband link file */
|
||||
+ if (ret > 0) {
|
||||
+ ret = 0;
|
||||
+ continue;
|
||||
+ }
|
||||
|
||||
ret = pb_write_one(img, &e, PB_FDINFO);
|
||||
if (ret)
|
||||
@@ -917,6 +1003,7 @@ int prepare_fd_pid(struct pstree_item *item)
|
||||
if (!img)
|
||||
return -1;
|
||||
|
||||
+ pr_info("prepare_fd_pid\n");
|
||||
while (1) {
|
||||
FdinfoEntry *e;
|
||||
|
||||
@@ -1125,6 +1212,7 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
|
||||
if (reopen_fd_as(fle->fe->fd, new_fd))
|
||||
return -1;
|
||||
|
||||
+ pr_info("*******flags: %d",fle->fe->flags);
|
||||
if (fcntl(fle->fe->fd, F_SETFD, fle->fe->flags) == -1) {
|
||||
pr_perror("Unable to set file descriptor flags");
|
||||
return -1;
|
||||
@@ -1761,6 +1849,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i)
|
||||
ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo);
|
||||
break;
|
||||
#endif
|
||||
+ case FD_TYPES__CHR:
|
||||
+ ret = collect_one_file_entry(fe, fe->chr->id, &fe->chr->base, &chrfile_cinfo);
|
||||
+ break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 039edba..226acb2 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -193,6 +193,7 @@ struct cr_options {
|
||||
int pin_memory;
|
||||
int use_fork_pid;
|
||||
int with_notifier_kup;
|
||||
+ int dump_char_dev;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h
|
||||
new file mode 100644
|
||||
index 0000000..5be11f5
|
||||
--- /dev/null
|
||||
+++ b/criu/include/files-chr.h
|
||||
@@ -0,0 +1,25 @@
|
||||
+#ifndef __CRIU_FILES_CHR_H__
|
||||
+#define __CRIU_FILES_CHR_H__
|
||||
+
|
||||
+#include "files.h"
|
||||
+
|
||||
+#include "images/chr.pb-c.h"
|
||||
+
|
||||
+struct chrfile_info {
|
||||
+ struct file_desc d;
|
||||
+ ChrfileEntry *cfe;
|
||||
+ char *path;
|
||||
+};
|
||||
+
|
||||
+extern struct collect_image_info chrfile_cinfo;
|
||||
+
|
||||
+extern const struct fdtype_ops chr_dump_ops;
|
||||
+extern int collect_chr_map(struct pstree_item *me, struct vma_area *);
|
||||
+
|
||||
+int parse_devname(void);
|
||||
+bool find_devname(const char *name);
|
||||
+
|
||||
+int collect_chr_map(struct pstree_item *me, struct vma_area *vma);
|
||||
+int is_infiniband_link(char *link);
|
||||
+
|
||||
+#endif /* __CRIU_FILES_CHR_H__ */
|
||||
diff --git a/criu/include/files.h b/criu/include/files.h
|
||||
index 96face7..1d979a9 100644
|
||||
--- a/criu/include/files.h
|
||||
+++ b/criu/include/files.h
|
||||
@@ -15,6 +15,12 @@
|
||||
#include "images/fown.pb-c.h"
|
||||
#include "images/vma.pb-c.h"
|
||||
|
||||
+#ifndef IOCTL_CMD_NEEDREPAIR
|
||||
+#define IOCTL_CMD_NEEDREPAIR 0x00100000UL
|
||||
+#define IOCTL_CMD_REPAIR 0x00200000UL
|
||||
+#define O_REPAIR 040000000
|
||||
+#endif
|
||||
+
|
||||
struct parasite_drain_fd;
|
||||
struct pstree_item;
|
||||
struct file_desc;
|
||||
diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
|
||||
index 5045bae..e35f8b2 100644
|
||||
--- a/criu/include/image-desc.h
|
||||
+++ b/criu/include/image-desc.h
|
||||
@@ -115,6 +115,7 @@ enum {
|
||||
CR_FD_MEMFD_FILE,
|
||||
|
||||
CR_FD_AUTOFS,
|
||||
+ CR_FD_CHRFILE,
|
||||
|
||||
CR_FD_MAX
|
||||
};
|
||||
diff --git a/criu/include/image.h b/criu/include/image.h
|
||||
index f598de7..66492c0 100644
|
||||
--- a/criu/include/image.h
|
||||
+++ b/criu/include/image.h
|
||||
@@ -85,6 +85,7 @@
|
||||
#define VMA_AREA_AIORING (1 << 13)
|
||||
#define VMA_AREA_MEMFD (1 << 14)
|
||||
#define VMA_AREA_ANON_INODE (1 << 15)
|
||||
+#define VMA_AREA_CHR (1 << 16)
|
||||
|
||||
#define VMA_CLOSE (1 << 28)
|
||||
#define VMA_NO_PROT_WRITE (1 << 29)
|
||||
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
|
||||
index 3824de1..2468e8f 100644
|
||||
--- a/criu/include/protobuf-desc.h
|
||||
+++ b/criu/include/protobuf-desc.h
|
||||
@@ -70,6 +70,7 @@ enum {
|
||||
PB_BPFMAP_FILE,
|
||||
PB_BPFMAP_DATA,
|
||||
PB_APPARMOR,
|
||||
+ PB_CHRFILE,
|
||||
|
||||
/* PB_AUTOGEN_STOP */
|
||||
|
||||
diff --git a/criu/mem.c b/criu/mem.c
|
||||
index 00965f0..b955d66 100644
|
||||
--- a/criu/mem.c
|
||||
+++ b/criu/mem.c
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "compel/infect-util.h"
|
||||
#include "pidfd-store.h"
|
||||
#include "pin-mem.h"
|
||||
+#include "files-chr.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "images/pagemap.pb-c.h"
|
||||
@@ -717,7 +718,9 @@ int prepare_mm_pid(struct pstree_item *i)
|
||||
|
||||
pr_info("vma 0x%" PRIx64 " 0x%" PRIx64 "\n", vma->e->start, vma->e->end);
|
||||
|
||||
- if (vma_area_is(vma, VMA_ANON_SHARED))
|
||||
+ if (vma_area_is(vma, VMA_AREA_CHR))
|
||||
+ ret = collect_chr_map(i, vma);
|
||||
+ else if (vma_area_is(vma, VMA_ANON_SHARED))
|
||||
ret = collect_shmem(pid, vma);
|
||||
else if (vma_area_is(vma, VMA_FILE_PRIVATE) || vma_area_is(vma, VMA_FILE_SHARED))
|
||||
ret = collect_filemap(vma);
|
||||
@@ -1358,7 +1361,7 @@ int open_vmas(struct pstree_item *t)
|
||||
filemap_ctx_init(false);
|
||||
|
||||
list_for_each_entry(vma, &vmas->h, list) {
|
||||
- if (vma_area_is(vma, VMA_AREA_ANON_INODE))
|
||||
+ if (vma_area_is(vma, VMA_AREA_ANON_INODE) || vma_area_is(vma, VMA_AREA_CHR))
|
||||
continue;
|
||||
|
||||
if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open)
|
||||
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
|
||||
index e41d43a..8913d93 100644
|
||||
--- a/criu/proc_parse.c
|
||||
+++ b/criu/proc_parse.c
|
||||
@@ -603,11 +603,30 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
|
||||
} else if (*vm_file_fd >= 0) {
|
||||
struct stat *st_buf = vma_area->vmst;
|
||||
|
||||
+ pr_info("file mode is: %x, st_ino: %ld\n",
|
||||
+ st_buf->st_mode, st_buf->st_ino);
|
||||
if (S_ISREG(st_buf->st_mode))
|
||||
/* regular file mapping -- supported */;
|
||||
else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO))
|
||||
/* devzero mapping -- also makes sense */;
|
||||
- else {
|
||||
+ else if (S_ISCHR(st_buf->st_mode) && opts.dump_char_dev) {
|
||||
+ /* NOTICE: if `--dump-char-dev` option is set, permmit
|
||||
+ * all char device memory area dumping.
|
||||
+ */
|
||||
+ if (strstr(file_path, "uverbs") != NULL) {
|
||||
+ int len = strlen(file_path) + 1;
|
||||
+
|
||||
+ vma_area->e->status |= VMA_AREA_CHR;
|
||||
+ vma_area->e->name = xmalloc(len);
|
||||
+ if (!vma_area->e->name) {
|
||||
+ pr_err("alloc vma area name failed\n");
|
||||
+ goto err;
|
||||
+ strncpy(vma_area->e->name, file_path, len);
|
||||
+ pr_info("vma name content is: %s\n",
|
||||
+ vma_area->e->name);
|
||||
+ }
|
||||
+ }
|
||||
+ } else {
|
||||
pr_err("Can't handle non-regular mapping on %d's map %" PRIx64 "\n", pid, vma_area->e->start);
|
||||
goto err;
|
||||
}
|
||||
diff --git a/images/Makefile b/images/Makefile
|
||||
index 004e22e..37dff9a 100644
|
||||
--- a/images/Makefile
|
||||
+++ b/images/Makefile
|
||||
@@ -72,6 +72,7 @@ proto-obj-y += bpfmap-file.o
|
||||
proto-obj-y += bpfmap-data.o
|
||||
proto-obj-y += apparmor.o
|
||||
proto-obj-y += rseq.o
|
||||
+proto-obj-y += chr.o
|
||||
|
||||
CFLAGS += -iquote $(obj)/
|
||||
|
||||
diff --git a/images/chr.proto b/images/chr.proto
|
||||
new file mode 100644
|
||||
index 0000000..67929db
|
||||
--- /dev/null
|
||||
+++ b/images/chr.proto
|
||||
@@ -0,0 +1,12 @@
|
||||
+syntax = "proto2";
|
||||
+
|
||||
+import "opts.proto";
|
||||
+
|
||||
+message chrfile_entry {
|
||||
+ required uint32 id = 1;
|
||||
+ required uint32 flags = 2 [(criu).flags = "rfile.flags"];
|
||||
+ required uint32 index = 3;
|
||||
+ required string name = 4;
|
||||
+ required bool repair = 5;
|
||||
+};
|
||||
+
|
||||
diff --git a/images/fdinfo.proto b/images/fdinfo.proto
|
||||
index 88f1c11..6549472 100644
|
||||
--- a/images/fdinfo.proto
|
||||
+++ b/images/fdinfo.proto
|
||||
@@ -20,6 +20,7 @@ import "pipe.proto";
|
||||
import "tty.proto";
|
||||
import "memfd.proto";
|
||||
import "bpfmap-file.proto";
|
||||
+import "chr.proto";
|
||||
|
||||
enum fd_types {
|
||||
UND = 0;
|
||||
@@ -42,6 +43,7 @@ enum fd_types {
|
||||
TIMERFD = 17;
|
||||
MEMFD = 18;
|
||||
BPFMAP = 19;
|
||||
+ CHR = 21;
|
||||
|
||||
/* Any number above the real used. Not stored to image */
|
||||
CTL_TTY = 65534;
|
||||
@@ -78,4 +80,5 @@ message file_entry {
|
||||
optional tty_file_entry tty = 19;
|
||||
optional memfd_file_entry memfd = 20;
|
||||
optional bpfmap_file_entry bpf = 21;
|
||||
+ optional chrfile_entry chr = 23;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,74 +0,0 @@
|
||||
From 539add7149df575d6d8cdce60ad6fb2c2300e27d Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Sun, 24 Oct 2021 15:20:27 +0800
|
||||
Subject: [PATCH 26/72] improve char dev fd check and repair method
|
||||
|
||||
Some special char dev cannot work in child processes, we make dump fail
|
||||
when the special char dev fd is in child processes.
|
||||
In the char dev repair process, user may need recover fd. We should
|
||||
make thre repair process running after the char dev fd is reopened as dumped fd.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/files-chr.c | 11 +----------
|
||||
criu/files.c | 12 ++++++++++++
|
||||
2 files changed, 13 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/criu/files-chr.c b/criu/files-chr.c
|
||||
index 2eb023e..315e9c6 100644
|
||||
--- a/criu/files-chr.c
|
||||
+++ b/criu/files-chr.c
|
||||
@@ -31,17 +31,8 @@ static int chrfile_open(struct file_desc *d, int *new_fd)
|
||||
return -1;
|
||||
}
|
||||
|
||||
- if (ci->cfe->repair) {
|
||||
- ret = ioctl(fd, IOCTL_CMD_REPAIR , ci->cfe->index);
|
||||
- pr_info("repair ioctl return: %d, index: %d\n", ret, ci->cfe->index);
|
||||
- if (ret)
|
||||
- goto err;
|
||||
- }
|
||||
-
|
||||
*new_fd = fd;
|
||||
- return ret;
|
||||
-err:
|
||||
- close(fd);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index e1681a1..7b688f5 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -1231,6 +1231,7 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
struct file_desc *d = fle->desc;
|
||||
struct fdinfo_list_entry *flem;
|
||||
int new_fd = -1, ret;
|
||||
+ struct chrfile_info *ci;
|
||||
|
||||
flem = file_master(d);
|
||||
if (fle != flem) {
|
||||
@@ -1258,6 +1259,17 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
if (ret != -1 && new_fd >= 0) {
|
||||
if (setup_and_serve_out(fle, new_fd) < 0)
|
||||
return -1;
|
||||
+ if (d->ops->type == FD_TYPES__CHR) {
|
||||
+ ci = container_of(d, struct chrfile_info, d);
|
||||
+ if (ci->cfe->repair) {
|
||||
+ ret = ioctl(fle->fe->fd, IOCTL_CMD_REPAIR , ci->cfe->index);
|
||||
+ pr_info("repair ioctl return: %d, index: %d\n", ret, ci->cfe->index);
|
||||
+ if (ret) {
|
||||
+ close(fle->fe->fd);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
out:
|
||||
if (ret == 0)
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,472 +0,0 @@
|
||||
From fe19a2639373175c134fa51a7c1c26ca5306d22c Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 10 Sep 2021 16:06:55 +0800
|
||||
Subject: [PATCH 27/72] mmap: restore /dev/hisi_sec2* deivce vma
|
||||
|
||||
There are two kinds of vmas: anonymous vma and file-based vma. For
|
||||
anonymous vma, criu just map area and fill content to it; for file-based
|
||||
vma, criu preprocess it, such as setting `open_vm()` callback function.
|
||||
|
||||
`/dev/hisi_sec2*` char device is different from the normal. The `open`,
|
||||
`mmap`, and `close` syscall actions has a special meaning.
|
||||
- `open`: allocate physical resource of the device
|
||||
- `mmap`: create instance
|
||||
- `close`: release physical resource
|
||||
The vma means the instance in this device. One fd may be associated with
|
||||
a group instances: one mmio (vma size is 2 pages, pgoff is 0), one dus
|
||||
(vma size is 37 pages, pgoff is 0x2000). As for dus vma, it's split two
|
||||
vmas by `mprotect(addr, 0x5000, PROT_READ)`: one size is 0x20000, one
|
||||
size is 0x5000.
|
||||
|
||||
This patch makes the /dev/hisi_sec* restore possible. Idea:
|
||||
It's impossible for criu to know the relationship between vma and the
|
||||
mapped file fd. Therefore, just collect the total fds number during
|
||||
collecting /dev/hisi_sec* files, then the fd is tagged that which
|
||||
function is used during vma restoration, and aissign the unused fd to the
|
||||
specific vma. And during `mmap()` process, dus vma is splitted by `mprotect`.
|
||||
|
||||
Note:
|
||||
- criu use ino to index the fd.
|
||||
- this physical device drivers is hisi_sec2.ko, which is located in
|
||||
`drivers/crypto/hisilicon/sec2/` of linux kernel.
|
||||
- this device name has prefix "hisi_sec2" that is found from
|
||||
`drivers/crypto/hisilicon/sec2/sec_main.c`.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/files-chr.c | 130 +++++++++++++++++++++++++++++++++++++--
|
||||
criu/include/files-chr.h | 16 +++++
|
||||
criu/include/vma.h | 12 ++++
|
||||
criu/pie/restorer.c | 130 ++++++++++++++++++++++++++++++++++++++-
|
||||
criu/proc_parse.c | 4 +-
|
||||
5 files changed, 284 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/criu/files-chr.c b/criu/files-chr.c
|
||||
index 315e9c6..95d93e1 100644
|
||||
--- a/criu/files-chr.c
|
||||
+++ b/criu/files-chr.c
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "log.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
+#include "rst-malloc.h"
|
||||
+
|
||||
+static unsigned hisi_sec_fds_n;
|
||||
|
||||
/* Checks if file descriptor @lfd is infinibandevent */
|
||||
int is_infiniband_link(char *link)
|
||||
@@ -16,11 +19,14 @@ int is_infiniband_link(char *link)
|
||||
static int chrfile_open(struct file_desc *d, int *new_fd)
|
||||
{
|
||||
int fd, mntns_root;
|
||||
- int ret = 0;
|
||||
+ int ret = -1;
|
||||
struct chrfile_info *ci;
|
||||
|
||||
ci = container_of(d, struct chrfile_info, d);
|
||||
|
||||
+ pr_info("charfile: Opening %s (repair %d index %d)\n",
|
||||
+ ci->path, ci->cfe->repair, ci->cfe->index);
|
||||
+
|
||||
if (ci->cfe->repair)
|
||||
ci->cfe->flags |= O_REPAIR;
|
||||
|
||||
@@ -32,6 +38,7 @@ static int chrfile_open(struct file_desc *d, int *new_fd)
|
||||
}
|
||||
|
||||
*new_fd = fd;
|
||||
+ ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -52,10 +59,12 @@ static int collect_one_chrfile(void *o, ProtobufCMessage *base, struct cr_img *i
|
||||
else
|
||||
ci->path = ci->cfe->name;
|
||||
|
||||
- pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path);
|
||||
- file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops);
|
||||
+ /* collect `/dev/hisi_sec2*` fds */
|
||||
+ if (strstr(ci->path, HISI_SEC_DEV) != NULL)
|
||||
+ hisi_sec_fds_n += 1;
|
||||
|
||||
- return 0;
|
||||
+ pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path);
|
||||
+ return file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops);
|
||||
}
|
||||
|
||||
struct collect_image_info chrfile_cinfo = {
|
||||
@@ -65,6 +74,7 @@ struct collect_image_info chrfile_cinfo = {
|
||||
.collect = collect_one_chrfile,
|
||||
};
|
||||
|
||||
+static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma);
|
||||
int collect_chr_map(struct pstree_item *me, struct vma_area *vma)
|
||||
{
|
||||
struct list_head *list = &rsti(me)->fds;
|
||||
@@ -72,6 +82,12 @@ int collect_chr_map(struct pstree_item *me, struct vma_area *vma)
|
||||
struct chrfile_info *ci;
|
||||
bool exist_fd;
|
||||
|
||||
+ if (strstr(vma->e->name, HISI_SEC_DEV) != NULL) {
|
||||
+ if (handle_hisi_vma(list, vma) != 0) {
|
||||
+ return -1;
|
||||
+ } else
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
list_for_each_entry_safe(fle, tmp, list, ps_list) {
|
||||
struct file_desc *d = fle->desc;
|
||||
@@ -91,5 +107,111 @@ int collect_chr_map(struct pstree_item *me, struct vma_area *vma)
|
||||
if (!exist_fd)
|
||||
return -EEXIST;
|
||||
|
||||
+out:
|
||||
+ pr_info(" `- find fd %ld for dev %s at this vma\n", vma->e->fd, vma->e->name);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#define MAX_HISI_SEC_SIZE 3 /* one physical device expose three char dev */
|
||||
+static struct hlist_head hisi_sec_fds_hash[MAX_HISI_SEC_SIZE];
|
||||
+
|
||||
+static int collect_hisi_sec_fds(struct list_head *list)
|
||||
+{
|
||||
+ struct fdinfo_list_entry *fle, *tmp;
|
||||
+ struct chrfile_info *ci;
|
||||
+ struct file_desc *d;
|
||||
+ struct hisi_sec_desc *desc;
|
||||
+ int idx;
|
||||
+ int nr = 0;
|
||||
+
|
||||
+ for (idx = 0; idx < MAX_HISI_SEC_SIZE; idx++)
|
||||
+ INIT_HLIST_HEAD(&hisi_sec_fds_hash[idx]);
|
||||
+
|
||||
+ list_for_each_entry_safe(fle, tmp, list, ps_list) {
|
||||
+ d = fle->desc;
|
||||
+
|
||||
+ if (d->ops->type != FD_TYPES__CHR)
|
||||
+ continue;
|
||||
+
|
||||
+ ci = container_of(d, struct chrfile_info, d);
|
||||
+
|
||||
+ if (strstr(ci->path, HISI_SEC_DEV) != NULL) {
|
||||
+ desc = shmalloc(sizeof(*desc));
|
||||
+ if (desc == NULL)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ desc->name = ci->path;
|
||||
+ desc->fd = fle->fe->fd;
|
||||
+ desc->mmio = desc->dus = 0;
|
||||
+
|
||||
+ idx = (ci->path[strlen(ci->path)-1] - '0') % MAX_HISI_SEC_SIZE;
|
||||
+ hlist_add_head(&desc->hash, &hisi_sec_fds_hash[idx]);
|
||||
+
|
||||
+ nr += 1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return nr;
|
||||
+}
|
||||
+
|
||||
+static long delivery_hisi_sec_fd(struct list_head *fds, struct vma_area *vma)
|
||||
+{
|
||||
+ extern unsigned hisi_sec_fds_n; /* defined in criu/files.c */
|
||||
+ static bool initialized = false;
|
||||
+ struct hisi_sec_desc *desc;
|
||||
+ int fd = -1, idx;
|
||||
+
|
||||
+ if (!initialized) {
|
||||
+ int nr;
|
||||
+
|
||||
+ pr_info("find %d fds for hisi_sec char device\n", hisi_sec_fds_n);
|
||||
+
|
||||
+ nr = collect_hisi_sec_fds(fds);
|
||||
+ if (nr != hisi_sec_fds_n) {
|
||||
+ pr_err("Collected fds(%d) aren't equal opened(%d)\n",
|
||||
+ nr, hisi_sec_fds_n);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ initialized = true;
|
||||
+ } else if (vma->e->pgoff != HISI_SEC_MMIO && vma->e->pgoff != HISI_SEC_DUS) {
|
||||
+ /* It's impossible value for fd, just as a tag to show it's a
|
||||
+ * vma by `mprotect` syscall.
|
||||
+ */
|
||||
+ return LONG_MAX;
|
||||
+ }
|
||||
+
|
||||
+ idx = (vma->e->name[strlen(vma->e->name)-1] - '0') % MAX_HISI_SEC_SIZE;
|
||||
+ hlist_for_each_entry(desc, &hisi_sec_fds_hash[idx], hash) {
|
||||
+ if (strcmp(desc->name, vma->e->name) != 0)
|
||||
+ continue;
|
||||
+
|
||||
+ if (vma->e->pgoff == HISI_SEC_MMIO && !desc->mmio) {
|
||||
+ fd = desc->fd;
|
||||
+ desc->mmio = true;
|
||||
+ break;
|
||||
+ } else if (vma->e->pgoff == HISI_SEC_DUS && !desc->dus) {
|
||||
+ fd = desc->fd;
|
||||
+ desc->dus = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return fd;
|
||||
+}
|
||||
+
|
||||
+static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma)
|
||||
+{
|
||||
+ long fd = delivery_hisi_sec_fd(fds, vma);
|
||||
+
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("find fd for char dev vma pgoff %lx named %s failed.\n",
|
||||
+ vma->e->pgoff, vma->e->name);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ vma->e->fd = fd;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h
|
||||
index 5be11f5..26b8fb2 100644
|
||||
--- a/criu/include/files-chr.h
|
||||
+++ b/criu/include/files-chr.h
|
||||
@@ -22,4 +22,20 @@ bool find_devname(const char *name);
|
||||
int collect_chr_map(struct pstree_item *me, struct vma_area *vma);
|
||||
int is_infiniband_link(char *link);
|
||||
|
||||
+struct hisi_sec_desc {
|
||||
+ struct hlist_node hash;
|
||||
+ char *name;
|
||||
+ bool mmio;
|
||||
+ bool dus;
|
||||
+ int fd;
|
||||
+};
|
||||
+
|
||||
+#define HISI_SEC_DEV "hisi_sec2" /* `/dev/hisi_sec2*` char device */
|
||||
+
|
||||
+/* here is the selection of offset in `mmap`, they're from drivers */
|
||||
+enum hisi_sec_dev {
|
||||
+ HISI_SEC_MMIO = 0x0,
|
||||
+ HISI_SEC_DUS = 0x2000,
|
||||
+};
|
||||
+
|
||||
#endif /* __CRIU_FILES_CHR_H__ */
|
||||
diff --git a/criu/include/vma.h b/criu/include/vma.h
|
||||
index ed9f31e..2b6e86f 100644
|
||||
--- a/criu/include/vma.h
|
||||
+++ b/criu/include/vma.h
|
||||
@@ -125,4 +125,16 @@ static inline bool vma_entry_can_be_lazy(VmaEntry *e)
|
||||
!(vma_entry_is(e, VMA_AREA_VDSO)) && !(vma_entry_is(e, VMA_AREA_VSYSCALL)));
|
||||
}
|
||||
|
||||
+struct vma_attr {
|
||||
+ int prot;
|
||||
+ int flags;
|
||||
+};
|
||||
+
|
||||
+enum ALIEN_MAP_METHOD {
|
||||
+ PGOFF_IS_ZERO,
|
||||
+ MAP_THEN_PROTECT,
|
||||
+
|
||||
+ MAX_ALIEN_MAP_METHOD,
|
||||
+};
|
||||
+
|
||||
#endif /* __CR_VMA_H__ */
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index 549bbd6..dcc922e 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "uffd.h"
|
||||
#include "sched.h"
|
||||
#include "notifier.h"
|
||||
+#include "files-chr.h"
|
||||
|
||||
#include "common/lock.h"
|
||||
#include "common/page.h"
|
||||
@@ -861,6 +862,129 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
|
||||
return addr;
|
||||
}
|
||||
|
||||
+static unsigned long restore_map_then_protect_mapping(VmaEntry *curr,
|
||||
+ struct vma_attr *curr_attr,
|
||||
+ VmaEntry *next,
|
||||
+ struct vma_attr *next_attr)
|
||||
+{
|
||||
+ int retval;
|
||||
+ unsigned long addr;
|
||||
+
|
||||
+ if (next->fd != LONG_MAX
|
||||
+ || curr->end != next->start
|
||||
+ || (vma_entry_len(curr) + curr->pgoff) != next->pgoff
|
||||
+ || curr->prot == next->prot
|
||||
+ || curr->flags != next->flags) {
|
||||
+ pr_err("They looks not currect:\n");
|
||||
+ pr_err(" `- vma A: (%x %x %d %lx)\n",
|
||||
+ curr_attr->prot, curr_attr->flags,
|
||||
+ (int)curr->fd, curr->pgoff);
|
||||
+ pr_err(" `- vma B: (%x %x %d %lx)\n",
|
||||
+ next_attr->prot, next_attr->flags,
|
||||
+ (int)next->fd, next->pgoff);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ pr_info("\tmmap(%x %x %d %lx) in map then protect mapping\n",
|
||||
+ curr_attr->prot, curr_attr->flags,
|
||||
+ (int)curr->fd, curr->pgoff);
|
||||
+
|
||||
+ addr = sys_mmap(decode_pointer(curr->start),
|
||||
+ vma_entry_len(curr) + vma_entry_len(next),
|
||||
+ curr_attr->prot, curr_attr->flags, curr->fd, curr->pgoff);
|
||||
+ if (addr != curr->start) {
|
||||
+ pr_err("%s: mmap failed with code %ld\n", __func__, addr);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ pr_info("\t mprotect(%x)\n", next_attr->prot);
|
||||
+ retval = sys_mprotect(decode_pointer(next->start),
|
||||
+ vma_entry_len(next), next_attr->prot);
|
||||
+ if (retval != 0) {
|
||||
+ addr = retval;
|
||||
+ pr_err("%s: mprotect failed with code %d\n", __func__, retval);
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ return addr;
|
||||
+}
|
||||
+
|
||||
+static unsigned long restore_pgoff_is_zero_mapping(VmaEntry *curr, struct vma_attr *attr)
|
||||
+{
|
||||
+ unsigned long addr;
|
||||
+
|
||||
+ pr_debug("\tmmap(%x %x %d %lx) in pgoff is zero mapping\n",
|
||||
+ attr->prot, attr->flags, (int)curr->fd, curr->pgoff);
|
||||
+
|
||||
+ addr = sys_mmap(decode_pointer(curr->start),
|
||||
+ vma_entry_len(curr),
|
||||
+ attr->prot, attr->flags,
|
||||
+ curr->fd, curr->pgoff);
|
||||
+
|
||||
+ return addr;
|
||||
+}
|
||||
+
|
||||
+static unsigned long restore_hisi_sec_mapping(struct task_restore_args *args,
|
||||
+ int i, int *step)
|
||||
+{
|
||||
+ VmaEntry *curr = args->vmas + i;
|
||||
+ VmaEntry *next = args->vmas + i + 1;
|
||||
+ struct vma_attr curr_attr = {
|
||||
+ .prot = curr->prot,
|
||||
+ .flags = curr->flags | MAP_FIXED,
|
||||
+ };
|
||||
+ struct vma_attr next_attr = {
|
||||
+ .prot = next->prot,
|
||||
+ .flags = next->flags | MAP_FIXED,
|
||||
+ };
|
||||
+ unsigned long addr;
|
||||
+
|
||||
+ switch (curr->pgoff) {
|
||||
+ case HISI_SEC_MMIO:
|
||||
+ addr = restore_pgoff_is_zero_mapping(curr, &curr_attr);
|
||||
+ break;
|
||||
+ case HISI_SEC_DUS:
|
||||
+ *step = 2;
|
||||
+ addr = restore_map_then_protect_mapping(curr, &curr_attr, next, &next_attr);
|
||||
+ break;
|
||||
+ default:
|
||||
+ pr_err("invalid pgoff %lx for vma\n", curr->pgoff);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ return addr;
|
||||
+}
|
||||
+
|
||||
+static bool find(const char *s1, const char *s2)
|
||||
+{
|
||||
+ if (s1 == NULL || s2 == NULL)
|
||||
+ return NULL;
|
||||
+
|
||||
+ while (*s1 != '\0' && *s2 != '\0') {
|
||||
+ if (*s1 == *s2) {
|
||||
+ s1 += 1;
|
||||
+ s2 += 1;
|
||||
+ } else
|
||||
+ s1 += 1;
|
||||
+
|
||||
+ if (*s2 == '\0')
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static unsigned long distribute_restore_mapping(struct task_restore_args *args,
|
||||
+ int i, int *step)
|
||||
+{
|
||||
+ VmaEntry *vma = args->vmas + i;
|
||||
+ struct vma_names *vma_name = args->vma_names + i;
|
||||
+
|
||||
+ if (vma_entry_is(vma, VMA_AREA_CHR) && find(vma_name->name, HISI_SEC_DEV))
|
||||
+ return restore_hisi_sec_mapping(args, i, step);
|
||||
+ else
|
||||
+ return restore_mapping(vma);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This restores aio ring header, content, head and in-kernel position
|
||||
* of tail. To set tail, we write to /dev/null and use the fact this
|
||||
@@ -1542,7 +1666,7 @@ int write_fork_pid(int pid)
|
||||
long __export_restore_task(struct task_restore_args *args)
|
||||
{
|
||||
long ret = -1;
|
||||
- int i;
|
||||
+ int i, step;
|
||||
VmaEntry *vma_entry;
|
||||
unsigned long va;
|
||||
struct restore_vma_io *rio;
|
||||
@@ -1691,7 +1815,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
/*
|
||||
* OK, lets try to map new one.
|
||||
*/
|
||||
- for (i = 0; i < args->vmas_n; i++) {
|
||||
+ for (i = 0, step = 1; i < args->vmas_n; i += step, step = 1) {
|
||||
vma_entry = args->vmas + i;
|
||||
vma_name = args->vma_names + i;
|
||||
|
||||
@@ -1708,7 +1832,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
if (vma_entry_is(vma_entry, VMA_PREMMAPED))
|
||||
continue;
|
||||
|
||||
- va = restore_mapping(vma_entry);
|
||||
+ va = distribute_restore_mapping(args, i, &step);
|
||||
|
||||
if (va != vma_entry->start) {
|
||||
pr_err("Can't restore %" PRIx64 " mapping with %lx\n", vma_entry->start, va);
|
||||
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
|
||||
index 8913d93..daa54d9 100644
|
||||
--- a/criu/proc_parse.c
|
||||
+++ b/criu/proc_parse.c
|
||||
@@ -41,6 +41,7 @@
|
||||
#include "path.h"
|
||||
#include "fault-injection.h"
|
||||
#include "memfd.h"
|
||||
+#include "files-chr.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "images/fdinfo.pb-c.h"
|
||||
@@ -613,7 +614,8 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
|
||||
/* NOTICE: if `--dump-char-dev` option is set, permmit
|
||||
* all char device memory area dumping.
|
||||
*/
|
||||
- if (strstr(file_path, "uverbs") != NULL) {
|
||||
+ if (strstr(file_path, "uverbs") != NULL
|
||||
+ || strstr(file_path, HISI_SEC_DEV) != NULL) {
|
||||
int len = strlen(file_path) + 1;
|
||||
|
||||
vma_area->e->status |= VMA_AREA_CHR;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,223 +0,0 @@
|
||||
From 5ff0e810f04de4b31f605ba3179dec3b3777978a Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Mon, 8 Nov 2021 15:08:12 +0800
|
||||
Subject: [PATCH 28/72] infiniband: fix the infiniband fd conflict
|
||||
|
||||
Phenomenon:
|
||||
Operating uverbs device will generate anonymous fd named
|
||||
`anon_inode:[infinibandevent]`. When `anon_inode:[infinibandevent]` fd
|
||||
is the last opened fd, and some kind of unix socket fd exist, which is
|
||||
generated by syscalls like `socketpair()` at the same tim,
|
||||
`anon_inode:[infinibandevent]` will restore fail probabilistically.
|
||||
|
||||
log as the following:
|
||||
|
||||
```
|
||||
(00.254523) 63959: open file flags:1
|
||||
(00.254526) 63959: unix: Opening standalone (stage 0 id 0x1ff ino 1019605 peer 0)
|
||||
(00.254571) 63959: *******flags: 0
|
||||
(00.254575) 63959: Create fd for 1408 # the fake fd
|
||||
(00.254578) 63959: *******flags: 1
|
||||
(00.254580) 63959: Create fd for 445 # the restoration fd
|
||||
```
|
||||
|
||||
Reason:
|
||||
During the restoration of unix socket, `socketpair()` will generate
|
||||
two fds, one is used to the current restoration, another is called fake
|
||||
fd which fd nr is owned by `find_unused_fd()`. When
|
||||
`anon_inode:[infinibandevent]` fd is the last one, criu don't dump the
|
||||
fd information for `anon_inode:[infinibandevent]` in original
|
||||
implementation, and criu think the fd nr which should belong to
|
||||
`anon_inode:[infinibandevent]` isn't used. Therefore, it cause the
|
||||
`anon_inode:[infinibandevent]` restoration fail.
|
||||
|
||||
This patch fix the above problem. Core: dump
|
||||
`anon_inode:[infinibandevent]` fd information, make the criu is aware
|
||||
that that fd nr is used.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/files-chr.c | 57 ++++++++++++++++++++++++++++++++++++
|
||||
criu/files.c | 10 +++----
|
||||
criu/include/files-chr.h | 8 +++++
|
||||
criu/include/image-desc.h | 1 +
|
||||
criu/include/protobuf-desc.h | 1 +
|
||||
images/chr.proto | 3 ++
|
||||
images/fdinfo.proto | 2 ++
|
||||
7 files changed, 76 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/criu/files-chr.c b/criu/files-chr.c
|
||||
index 95d93e1..6d87c33 100644
|
||||
--- a/criu/files-chr.c
|
||||
+++ b/criu/files-chr.c
|
||||
@@ -215,3 +215,60 @@ static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma)
|
||||
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+static void pr_info_infiniband(char *action, InfinibandEntry *infiniband)
|
||||
+{
|
||||
+ pr_info("%sinfiniband: id %#08x\n", action, infiniband->id);
|
||||
+}
|
||||
+
|
||||
+static int dump_one_infiniband(int lfd, u32 id, const struct fd_parms *p)
|
||||
+{
|
||||
+ FileEntry fe = FILE_ENTRY__INIT;
|
||||
+ InfinibandEntry infiniband = INFINIBAND_ENTRY__INIT;
|
||||
+
|
||||
+ infiniband.id = id;
|
||||
+
|
||||
+ fe.type = FD_TYPES__INFINIBAND;
|
||||
+ fe.id = infiniband.id;
|
||||
+ fe.infiniband = &infiniband;
|
||||
+
|
||||
+ pr_info_infiniband("Dumping ", &infiniband);
|
||||
+
|
||||
+ return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE);
|
||||
+}
|
||||
+
|
||||
+const struct fdtype_ops infiniband_dump_ops = {
|
||||
+ .type = FD_TYPES__INFINIBAND,
|
||||
+ .dump = dump_one_infiniband,
|
||||
+};
|
||||
+
|
||||
+static int infiniband_open(struct file_desc *d, int *new_fd) {
|
||||
+ /*
|
||||
+ * `*new_fd == -1` at this time, it means this open operation shouldn't
|
||||
+ * be served out, which is why this function does nothing here.
|
||||
+ */
|
||||
+ return 0;
|
||||
+};
|
||||
+
|
||||
+static struct file_desc_ops infiniband_desc_ops = {
|
||||
+ .type = FD_TYPES__INFINIBAND,
|
||||
+ .open = infiniband_open,
|
||||
+};
|
||||
+
|
||||
+static int collect_one_infiniband(void *o, ProtobufCMessage *base, struct cr_img *i)
|
||||
+{
|
||||
+ struct infiniband_file_info *info = o;
|
||||
+
|
||||
+ info->infiniband = pb_msg(base, InfinibandEntry);
|
||||
+ pr_info_infiniband("Collected ", info->infiniband);
|
||||
+
|
||||
+ /* add the fd to `file_desc_hash` list to prevent from NULL pointer */
|
||||
+ return file_desc_add(&info->d, info->infiniband->id, &infiniband_desc_ops);
|
||||
+}
|
||||
+
|
||||
+struct collect_image_info infiniband_cinfo = {
|
||||
+ .fd_type = CR_FD_INFINIBAND,
|
||||
+ .pb_type = PB_INFINIBAND,
|
||||
+ .priv_size = sizeof(struct infiniband_file_info),
|
||||
+ .collect = collect_one_infiniband,
|
||||
+};
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index 7b688f5..1ec5281 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -639,7 +639,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
|
||||
else if (is_timerfd_link(link))
|
||||
ops = &timerfd_dump_ops;
|
||||
else if (is_infiniband_link(link))
|
||||
- return 1;
|
||||
+ ops = &infiniband_dump_ops;
|
||||
#ifdef CONFIG_HAS_LIBBPF
|
||||
else if (is_bpfmap_link(link))
|
||||
ops = &bpfmap_dump_ops;
|
||||
@@ -744,11 +744,6 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
|
||||
ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds);
|
||||
if (ret)
|
||||
break;
|
||||
- /* infiniband link file */
|
||||
- if (ret > 0) {
|
||||
- ret = 0;
|
||||
- continue;
|
||||
- }
|
||||
|
||||
ret = pb_write_one(img, &e, PB_FDINFO);
|
||||
if (ret)
|
||||
@@ -1864,6 +1859,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i)
|
||||
case FD_TYPES__CHR:
|
||||
ret = collect_one_file_entry(fe, fe->chr->id, &fe->chr->base, &chrfile_cinfo);
|
||||
break;
|
||||
+ case FD_TYPES__INFINIBAND:
|
||||
+ ret = collect_one_file_entry(fe, fe->infiniband->id, &fe->infiniband->base, &infiniband_cinfo);
|
||||
+ break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h
|
||||
index 26b8fb2..261c4b2 100644
|
||||
--- a/criu/include/files-chr.h
|
||||
+++ b/criu/include/files-chr.h
|
||||
@@ -38,4 +38,12 @@ enum hisi_sec_dev {
|
||||
HISI_SEC_DUS = 0x2000,
|
||||
};
|
||||
|
||||
+struct infiniband_file_info {
|
||||
+ InfinibandEntry *infiniband;
|
||||
+ struct file_desc d;
|
||||
+};
|
||||
+
|
||||
+extern const struct fdtype_ops infiniband_dump_ops;
|
||||
+extern struct collect_image_info infiniband_cinfo;
|
||||
+
|
||||
#endif /* __CRIU_FILES_CHR_H__ */
|
||||
diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
|
||||
index e35f8b2..9ad5fa0 100644
|
||||
--- a/criu/include/image-desc.h
|
||||
+++ b/criu/include/image-desc.h
|
||||
@@ -116,6 +116,7 @@ enum {
|
||||
|
||||
CR_FD_AUTOFS,
|
||||
CR_FD_CHRFILE,
|
||||
+ CR_FD_INFINIBAND,
|
||||
|
||||
CR_FD_MAX
|
||||
};
|
||||
diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h
|
||||
index 2468e8f..72a9e1d 100644
|
||||
--- a/criu/include/protobuf-desc.h
|
||||
+++ b/criu/include/protobuf-desc.h
|
||||
@@ -71,6 +71,7 @@ enum {
|
||||
PB_BPFMAP_DATA,
|
||||
PB_APPARMOR,
|
||||
PB_CHRFILE,
|
||||
+ PB_INFINIBAND,
|
||||
|
||||
/* PB_AUTOGEN_STOP */
|
||||
|
||||
diff --git a/images/chr.proto b/images/chr.proto
|
||||
index 67929db..ed65005 100644
|
||||
--- a/images/chr.proto
|
||||
+++ b/images/chr.proto
|
||||
@@ -10,3 +10,6 @@ message chrfile_entry {
|
||||
required bool repair = 5;
|
||||
};
|
||||
|
||||
+message infiniband_entry {
|
||||
+ required uint32 id = 1;
|
||||
+};
|
||||
diff --git a/images/fdinfo.proto b/images/fdinfo.proto
|
||||
index 6549472..eb52f35 100644
|
||||
--- a/images/fdinfo.proto
|
||||
+++ b/images/fdinfo.proto
|
||||
@@ -44,6 +44,7 @@ enum fd_types {
|
||||
MEMFD = 18;
|
||||
BPFMAP = 19;
|
||||
CHR = 21;
|
||||
+ INFINIBAND = 22;
|
||||
|
||||
/* Any number above the real used. Not stored to image */
|
||||
CTL_TTY = 65534;
|
||||
@@ -81,4 +82,5 @@ message file_entry {
|
||||
optional memfd_file_entry memfd = 20;
|
||||
optional bpfmap_file_entry bpf = 21;
|
||||
optional chrfile_entry chr = 23;
|
||||
+ optional infiniband_entry infiniband = 25;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,255 +0,0 @@
|
||||
From e522deb5680840e878b8f05c66f040cfd3b49d90 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:47:28 +0800
|
||||
Subject: [PATCH 29/72] cred: provide cred checkpoint restore method
|
||||
|
||||
criu checkpoint/restore the task, it only restore the context instead of
|
||||
the memory address storing the context.
|
||||
|
||||
To handle the problem resulted by CVE bugfix, details:
|
||||
- https://nvd.nist.gov/vuln/detail/CVE-2016-4565
|
||||
- https://openfabrics.org/images/2018workshop/presentations/113_MRuhl_JourneytoVerbsIOCTL.pdf
|
||||
|
||||
Brief:
|
||||
Refresh the security context address of file. The infiniband code use
|
||||
write()` as bi-directional `ioctl()`, there is `struct cred` address
|
||||
uring `write()` process. However, criu uses some syscall, such as
|
||||
capset()` and `setgroups()`, to regenerate the new cred, the file
|
||||
red is fixed by `fcntl(F_SETOWN)`, then the address of new cred is
|
||||
ifferent from the file.
|
||||
This patch fix the `struct cred` address checking problem resulted by
|
||||
VE fixed in infiniband drivers.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: luolongjun <luolongjun@huawei.com>
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/config.c | 1 +
|
||||
criu/cr-restore.c | 35 +++++++++++++++++++++++++++++++++++
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/fcntl.h | 4 ++++
|
||||
criu/include/prctl.h | 4 ++++
|
||||
criu/include/restorer.h | 3 +++
|
||||
criu/pie/restorer.c | 38 ++++++++++++++++++++++++++++++++++++++
|
||||
8 files changed, 87 insertions(+)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 03cad66..cf99fb1 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -702,6 +702,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("use-fork-pid", &opts.use_fork_pid),
|
||||
BOOL_OPT("with-notifier", &opts.with_notifier_kup),
|
||||
BOOL_OPT("dump-char-dev", &opts.dump_char_dev),
|
||||
+ BOOL_OPT("with-fd-cred", &opts.with_fd_cred),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 2904a75..ac677a1 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -692,6 +692,28 @@ static int __collect_child_pids(struct pstree_item *p, int state, unsigned int *
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int collect_child_fds(int state, unsigned int *n, struct pstree_item *me)
|
||||
+{
|
||||
+ struct list_head *list = &rsti(me)->fds;
|
||||
+ struct fdinfo_list_entry *fle, *tmp;
|
||||
+
|
||||
+ *n = 0;
|
||||
+ list_for_each_entry_safe(fle, tmp, list, ps_list) {
|
||||
+ if (fle->fe->type == state) {
|
||||
+ int *child;
|
||||
+
|
||||
+ child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
|
||||
+ if (!child)
|
||||
+ return -1;
|
||||
+
|
||||
+ (*n)++;
|
||||
+ *child = fle->fe->fd;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int collect_child_pids(int state, unsigned int *n)
|
||||
{
|
||||
struct pstree_item *pi;
|
||||
@@ -715,6 +737,12 @@ static int collect_child_pids(int state, unsigned int *n)
|
||||
return __collect_child_pids(current, state, n);
|
||||
}
|
||||
|
||||
+static int collect_chr_fds(struct pstree_item *me, struct task_restore_args *ta)
|
||||
+{
|
||||
+ ta->setcred_pids = (int *)rst_mem_align_cpos(RM_PRIVATE);
|
||||
+ return collect_child_fds(FD_TYPES__CHR, &ta->setcred_pids_n, me);
|
||||
+}
|
||||
+
|
||||
static int collect_helper_pids(struct task_restore_args *ta)
|
||||
{
|
||||
ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
|
||||
@@ -939,6 +967,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
|
||||
if (collect_zombie_pids(ta) < 0)
|
||||
return -1;
|
||||
|
||||
+ if (opts.with_fd_cred && collect_chr_fds(current, ta) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
if (collect_inotify_fds(ta) < 0)
|
||||
return -1;
|
||||
|
||||
@@ -3746,6 +3777,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
||||
RST_MEM_FIXUP_PPTR(task_args->helpers);
|
||||
RST_MEM_FIXUP_PPTR(task_args->zombies);
|
||||
RST_MEM_FIXUP_PPTR(task_args->vma_ios);
|
||||
+ if (opts.with_fd_cred)
|
||||
+ RST_MEM_FIXUP_PPTR(task_args->setcred_pids);
|
||||
+ else
|
||||
+ task_args->setcred_pids_n = UINT_MAX;
|
||||
RST_MEM_FIXUP_PPTR(task_args->inotify_fds);
|
||||
|
||||
task_args->compatible_mode = core_is_compat(core);
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index dc6d603..ed7bd99 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -453,6 +453,7 @@ usage:
|
||||
" This feature needs the kernel assistance.\n"
|
||||
" --dump-char-dev Dump char dev files as normal file with repair cmd\n"
|
||||
\
|
||||
+ " --with-fd-cred Allow to make the restored process has the same cred\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 226acb2..1d6ddcf 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -194,6 +194,7 @@ struct cr_options {
|
||||
int use_fork_pid;
|
||||
int with_notifier_kup;
|
||||
int dump_char_dev;
|
||||
+ int with_fd_cred;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h
|
||||
index 35f8805..568977c 100644
|
||||
--- a/criu/include/fcntl.h
|
||||
+++ b/criu/include/fcntl.h
|
||||
@@ -19,6 +19,10 @@ struct f_owner_ex {
|
||||
#define F_GETOWNER_UIDS 17
|
||||
#endif
|
||||
|
||||
+#ifndef F_SETCRED
|
||||
+#define F_SETCRED 18
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* These things are required to compile on CentOS-6
|
||||
*/
|
||||
diff --git a/criu/include/prctl.h b/criu/include/prctl.h
|
||||
index c843f40..81dda9d 100644
|
||||
--- a/criu/include/prctl.h
|
||||
+++ b/criu/include/prctl.h
|
||||
@@ -82,4 +82,8 @@ struct prctl_mm_map {
|
||||
#define PR_GET_THP_DISABLE 42
|
||||
#endif
|
||||
|
||||
+#ifndef PR_DEFAULT_CRED
|
||||
+#define PR_DEFAULT_CRED 54
|
||||
+#endif
|
||||
+
|
||||
#endif /* __CR_PRCTL_H__ */
|
||||
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
|
||||
index a81cc1b..60c1dab 100644
|
||||
--- a/criu/include/restorer.h
|
||||
+++ b/criu/include/restorer.h
|
||||
@@ -193,6 +193,9 @@ struct task_restore_args {
|
||||
pid_t *zombies;
|
||||
unsigned int zombies_n;
|
||||
|
||||
+ int *setcred_pids;
|
||||
+ unsigned int setcred_pids_n;
|
||||
+
|
||||
int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */
|
||||
unsigned int inotify_fds_n;
|
||||
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index dcc922e..fde6e30 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -101,6 +101,7 @@ static int restore_anon_mapping(VmaEntry *vma_entry, struct vma_names *vma_name)
|
||||
static struct task_entries *task_entries_local;
|
||||
static futex_t thread_inprogress;
|
||||
static futex_t thread_start;
|
||||
+static futex_t cred_set;
|
||||
static pid_t *helpers;
|
||||
static int n_helpers;
|
||||
static pid_t *zombies;
|
||||
@@ -365,6 +366,41 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int update_cred_ref(struct task_restore_args *ta)
|
||||
+{
|
||||
+ int i;
|
||||
+ int ret;
|
||||
+ int pid = sys_getpid();
|
||||
+ long int tid = sys_gettid();
|
||||
+
|
||||
+ if (ta->setcred_pids_n == UINT_MAX) {
|
||||
+ pr_info("no need to keep the same cred \n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (pid == tid) {
|
||||
+ /* let main thread finish cred update first */
|
||||
+ ret = sys_prctl(PR_DEFAULT_CRED, 0, 0, 0, 0);
|
||||
+ pr_info("main cred restore \n");
|
||||
+ futex_set_and_wake(&cred_set, 1);
|
||||
+ } else {
|
||||
+ futex_wait_until(&cred_set, 1);
|
||||
+ pr_info("other cred restore \n");
|
||||
+ ret = sys_prctl(PR_DEFAULT_CRED, 0, 0, 0, 0);
|
||||
+ }
|
||||
+
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ pr_info("%ld (%d) is going to update current cred \n", tid, pid);
|
||||
+
|
||||
+ for (i = 0; i < ta->setcred_pids_n; i++) {
|
||||
+ sys_fcntl(ta->setcred_pids[i], F_SETCRED, 0);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This should be done after creds restore, as
|
||||
* some creds changes might drop the value back
|
||||
@@ -742,6 +778,7 @@ long __export_restore_thread(struct thread_restore_args *args)
|
||||
BUG();
|
||||
|
||||
ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type);
|
||||
+ ret = ret || update_cred_ref(args->ta);
|
||||
ret = ret || restore_dumpable_flag(&args->ta->mm);
|
||||
ret = ret || restore_pdeath_sig(args);
|
||||
if (ret)
|
||||
@@ -2221,6 +2258,7 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
* thus restore* creds _after_ all of the above.
|
||||
*/
|
||||
ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type);
|
||||
+ ret = ret || update_cred_ref(args);
|
||||
ret = ret || restore_dumpable_flag(&args->mm);
|
||||
ret = ret || restore_pdeath_sig(args->t);
|
||||
ret = ret || restore_child_subreaper(args->child_subreaper);
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,93 +0,0 @@
|
||||
From 8afde209d2a9245d902eabe40ca7c514aeb6ee9a Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:56:16 +0800
|
||||
Subject: [PATCH 30/72] socket: fix connect error of invalid param
|
||||
|
||||
Fix connect error of invalid param during module upgrade.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Xiaoguang Li <lixiaoguang2@huawei.com>
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/include/sockets.h | 1 +
|
||||
criu/sk-inet.c | 13 +++++++++++--
|
||||
criu/sockets.c | 5 ++++-
|
||||
3 files changed, 16 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
|
||||
index 3e8f3d6..2391b48 100644
|
||||
--- a/criu/include/sockets.h
|
||||
+++ b/criu/include/sockets.h
|
||||
@@ -27,6 +27,7 @@ struct socket_desc {
|
||||
extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *);
|
||||
extern int dump_socket_opts(int sk, SkOptsEntry *soe);
|
||||
extern int restore_socket_opts(int sk, SkOptsEntry *soe);
|
||||
+extern int restore_bound_opts(int sk, SkOptsEntry *soe);
|
||||
extern void release_skopts(SkOptsEntry *);
|
||||
extern int restore_prepare_socket(int sk);
|
||||
extern void preload_socket_modules(void);
|
||||
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
|
||||
index e52b198..05048c8 100644
|
||||
--- a/criu/sk-inet.c
|
||||
+++ b/criu/sk-inet.c
|
||||
@@ -100,15 +100,20 @@ static void show_one_inet(const char *act, const struct inet_sk_desc *sk)
|
||||
static void show_one_inet_img(const char *act, const InetSkEntry *e)
|
||||
{
|
||||
char src_addr[INET_ADDR_LEN] = "<unknown>";
|
||||
+ char dst_addr[INET_ADDR_LEN] = "<unknown>";
|
||||
|
||||
if (inet_ntop(e->family, (void *)e->src_addr, src_addr, INET_ADDR_LEN) == NULL) {
|
||||
pr_perror("Failed to translate address");
|
||||
}
|
||||
+ if (inet_ntop(e->family, (void *)e->dst_addr, dst_addr,
|
||||
+ INET_ADDR_LEN) == NULL) {
|
||||
+ pr_perror("Failed to translate address");
|
||||
+ }
|
||||
|
||||
pr_debug("\t%s: family %-10s type %-14s proto %-16s port %d "
|
||||
- "state %-16s src_addr %s\n",
|
||||
+ "state %-16s src_addr %s dst_addr %s\n",
|
||||
act, ___socket_family_name(e->family), ___socket_type_name(e->type), ___socket_proto_name(e->proto),
|
||||
- e->src_port, ___tcp_state_name(e->state), src_addr);
|
||||
+ e->src_port, ___tcp_state_name(e->state), src_addr, dst_addr);
|
||||
}
|
||||
|
||||
static int can_dump_ipproto(unsigned int ino, int proto, int type)
|
||||
@@ -852,6 +857,10 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
|
||||
if (restore_opt(sk, SOL_SOCKET, SO_REUSEPORT, &yes))
|
||||
goto err;
|
||||
|
||||
+ if(restore_bound_opts(sk, ie->opts) < 0){
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
if (tcp_connection(ie)) {
|
||||
if (!opts.tcp_established_ok && !opts.tcp_close) {
|
||||
pr_err("Connected TCP socket in image\n");
|
||||
diff --git a/criu/sockets.c b/criu/sockets.c
|
||||
index 9426b5b..2ddf85e 100644
|
||||
--- a/criu/sockets.c
|
||||
+++ b/criu/sockets.c
|
||||
@@ -586,7 +586,6 @@ int restore_socket_opts(int sk, SkOptsEntry *soe)
|
||||
tv.tv_usec = soe->so_rcv_tmo_usec;
|
||||
ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv);
|
||||
|
||||
- ret |= restore_bound_dev(sk, soe);
|
||||
ret |= restore_socket_filter(sk, soe);
|
||||
|
||||
/* The restore of SO_REUSEADDR depends on type of socket */
|
||||
@@ -594,6 +593,10 @@ int restore_socket_opts(int sk, SkOptsEntry *soe)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int restore_bound_opts(int sk, SkOptsEntry *soe){
|
||||
+ return restore_bound_dev(sk, soe);
|
||||
+}
|
||||
+
|
||||
int do_dump_opt(int sk, int level, int name, void *val, int len)
|
||||
{
|
||||
socklen_t aux = len;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,99 +0,0 @@
|
||||
From 89eb9deee6da8acc7747e103ee591f299fec2043 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:56:38 +0800
|
||||
Subject: [PATCH 31/72] criu: eventpollfd fix for improper usage in appdata
|
||||
|
||||
Fix eventpollfd problem of improper usage in appdata.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/eventpoll.c | 16 +++++++++++-----
|
||||
criu/proc_parse.c | 2 ++
|
||||
images/eventpoll.proto | 3 +++
|
||||
3 files changed, 16 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/criu/eventpoll.c b/criu/eventpoll.c
|
||||
index 978dca5..8900d50 100644
|
||||
--- a/criu/eventpoll.c
|
||||
+++ b/criu/eventpoll.c
|
||||
@@ -67,8 +67,8 @@ int is_eventpoll_link(char *link)
|
||||
|
||||
static void pr_info_eventpoll_tfd(char *action, uint32_t id, EventpollTfdEntry *e)
|
||||
{
|
||||
- pr_info("%seventpoll-tfd: id %#08x tfd %8d events %#08x data %#016" PRIx64 "\n", action, id, e->tfd, e->events,
|
||||
- e->data);
|
||||
+ pr_info("%seventpoll-tfd: id %#08x tfd %8d events %#08x data %#016" PRIx64 " ignore %d\n",
|
||||
+ action, id, e->tfd, e->events, e->data, e->ignore);
|
||||
}
|
||||
|
||||
static void pr_info_eventpoll(char *action, EventpollFileEntry *e)
|
||||
@@ -144,9 +144,9 @@ int flush_eventpoll_dinfo_queue(void)
|
||||
};
|
||||
struct kid_elem *t = kid_lookup_epoll_tfd(&fd_tree, &ke, &slot);
|
||||
if (!t) {
|
||||
- pr_debug("kid_lookup_epoll: no match pid %d efd %d tfd %d toff %u\n", dinfo->pid,
|
||||
- dinfo->efd, tfde->tfd, dinfo->toff[i].off);
|
||||
- goto err;
|
||||
+ pr_info("Drop tfd entry, pid %d efd %d tfd %d toff %u\n",
|
||||
+ dinfo->pid, dinfo->efd, tfde->tfd, dinfo->toff[i].off);
|
||||
+ continue;
|
||||
}
|
||||
|
||||
pr_debug("kid_lookup_epoll: rbsearch match pid %d efd %d tfd %d toff %u -> %d\n", dinfo->pid,
|
||||
@@ -159,6 +159,7 @@ int flush_eventpoll_dinfo_queue(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ pr_info("Change tfd: %d -> %d @ efd=%d\n", tfde->tfd, t->idx, slot.efd);
|
||||
tfde->tfd = t->idx;
|
||||
}
|
||||
|
||||
@@ -409,6 +410,11 @@ static int eventpoll_retore_tfd(int fd, int id, EventpollTfdEntry *tdefe)
|
||||
{
|
||||
struct epoll_event event;
|
||||
|
||||
+ if (tdefe->ignore) {
|
||||
+ pr_info_eventpoll_tfd("Ignore ", id, tdefe);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
pr_info_eventpoll_tfd("Restore ", id, tdefe);
|
||||
|
||||
event.events = tdefe->events;
|
||||
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
|
||||
index daa54d9..d13589c 100644
|
||||
--- a/criu/proc_parse.c
|
||||
+++ b/criu/proc_parse.c
|
||||
@@ -1895,10 +1895,12 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg)
|
||||
e->has_dev = false;
|
||||
e->has_inode = false;
|
||||
e->has_pos = false;
|
||||
+ e->has_ignore = false;
|
||||
} else if (ret == 6) {
|
||||
e->has_dev = true;
|
||||
e->has_inode = true;
|
||||
e->has_pos = true;
|
||||
+ e->has_ignore = true;
|
||||
} else if (ret < 6) {
|
||||
eventpoll_tfd_entry__free_unpacked(e, NULL);
|
||||
goto parse_err;
|
||||
diff --git a/images/eventpoll.proto b/images/eventpoll.proto
|
||||
index 0f3e8a8..2fd9598 100644
|
||||
--- a/images/eventpoll.proto
|
||||
+++ b/images/eventpoll.proto
|
||||
@@ -14,6 +14,9 @@ message eventpoll_tfd_entry {
|
||||
optional uint32 dev = 5;
|
||||
optional uint64 inode = 6;
|
||||
optional uint64 pos = 7;
|
||||
+
|
||||
+ /* entry validation */
|
||||
+ optional uint32 ignore = 8;
|
||||
}
|
||||
|
||||
message eventpoll_file_entry {
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,193 +0,0 @@
|
||||
From 58a8c9eb07c2cff6232c20f9a59edc634bb1e5e0 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 21:59:24 +0800
|
||||
Subject: [PATCH 32/72] task_exit_notify: add task exit notify mask method for
|
||||
criu
|
||||
|
||||
Add task exit notify mask method for criu during kernel module upgrade.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/config.c | 1 +
|
||||
criu/cr-restore.c | 10 ++++++++++
|
||||
criu/crtools.c | 1 +
|
||||
criu/exit-notify.c | 34 ++++++++++++++++++++++++++++++++++
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/exit-notify.h | 10 ++++++++++
|
||||
criu/seize.c | 10 +++++++++-
|
||||
8 files changed, 67 insertions(+), 1 deletion(-)
|
||||
create mode 100644 criu/exit-notify.c
|
||||
create mode 100644 criu/include/exit-notify.h
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 2e82912..65cc215 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -93,6 +93,7 @@ obj-y += timens.o
|
||||
obj-y += pin-mem.o
|
||||
obj-y += devname.o
|
||||
obj-y += files-chr.o
|
||||
+obj-y += exit-notify.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index cf99fb1..bd0f84d 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -703,6 +703,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("with-notifier", &opts.with_notifier_kup),
|
||||
BOOL_OPT("dump-char-dev", &opts.dump_char_dev),
|
||||
BOOL_OPT("with-fd-cred", &opts.with_fd_cred),
|
||||
+ BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index ac677a1..09f135b 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -82,6 +82,7 @@
|
||||
#include "apparmor.h"
|
||||
#include "pin-mem.h"
|
||||
#include "notifier.h"
|
||||
+#include "exit-notify.h"
|
||||
|
||||
#include "parasite-syscall.h"
|
||||
#include "files-reg.h"
|
||||
@@ -1542,6 +1543,15 @@ static inline int fork_with_pid(struct pstree_item *item)
|
||||
pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
|
||||
}
|
||||
|
||||
+ if (opts.mask_exit_notify) {
|
||||
+ int pid = ret;
|
||||
+
|
||||
+ pr_info("Start unmask exit notifier for pid %d\n", pid);
|
||||
+ ret = mask_task_exit_notify(pid, false);
|
||||
+ if (ret)
|
||||
+ pr_err("Can't unmask exit notifier for pid %d\n", pid);
|
||||
+ }
|
||||
+
|
||||
err_unlock:
|
||||
if (!(ca.clone_flags & CLONE_NEWPID))
|
||||
unlock_last_pid();
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index ed7bd99..1a41be4 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -454,6 +454,7 @@ usage:
|
||||
" --dump-char-dev Dump char dev files as normal file with repair cmd\n"
|
||||
\
|
||||
" --with-fd-cred Allow to make the restored process has the same cred\n"
|
||||
+ " --mask-exit-notify Mask task exit notify during dump and restore\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/exit-notify.c b/criu/exit-notify.c
|
||||
new file mode 100644
|
||||
index 0000000..5c86724
|
||||
--- /dev/null
|
||||
+++ b/criu/exit-notify.c
|
||||
@@ -0,0 +1,34 @@
|
||||
+#include <fcntl.h>
|
||||
+#include <stdbool.h>
|
||||
+#include <stdio.h>
|
||||
+#include <unistd.h>
|
||||
+
|
||||
+#include "exit-notify.h"
|
||||
+#include "log.h"
|
||||
+
|
||||
+int mask_task_exit_notify(int pid, bool mask)
|
||||
+{
|
||||
+ int fd, retval;
|
||||
+ char buf[PID_BUF_SIZE] = {0};
|
||||
+
|
||||
+ if (pid <= 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ snprintf(buf, PID_BUF_SIZE - 1, "%d", pid);
|
||||
+ if (mask)
|
||||
+ fd = open(MASK_EXIT_NOTIFY_DIR, O_WRONLY, 0);
|
||||
+ else
|
||||
+ fd = open(UNMASK_EXIT_NOTIFY_DIR, O_WRONLY, 0);
|
||||
+
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open mask exit notify file fail\n");
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ retval = write(fd, buf, PID_BUF_SIZE);
|
||||
+ if (retval < 0)
|
||||
+ pr_err("Write mask exit pid: %s fail\n", buf);
|
||||
+ close(fd);
|
||||
+
|
||||
+ return retval < 0 ? -1 : 0;
|
||||
+}
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 1d6ddcf..26ae5b6 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -195,6 +195,7 @@ struct cr_options {
|
||||
int with_notifier_kup;
|
||||
int dump_char_dev;
|
||||
int with_fd_cred;
|
||||
+ int mask_exit_notify;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/exit-notify.h b/criu/include/exit-notify.h
|
||||
new file mode 100644
|
||||
index 0000000..34f2c8d
|
||||
--- /dev/null
|
||||
+++ b/criu/include/exit-notify.h
|
||||
@@ -0,0 +1,10 @@
|
||||
+#ifndef __CRIU_EXIT_NOTIFY_H__
|
||||
+#define __CRIU_EXIT_NOTIFY_H__
|
||||
+
|
||||
+#define PID_BUF_SIZE 32
|
||||
+#define MASK_EXIT_NOTIFY_DIR "/sys/kernel/mask_exit_notify"
|
||||
+#define UNMASK_EXIT_NOTIFY_DIR "/sys/kernel/unmask_exit_notify"
|
||||
+
|
||||
+int mask_task_exit_notify(int pid, bool mask);
|
||||
+
|
||||
+#endif /* __CRIU_EXIT_NOTIFY_H__ */
|
||||
diff --git a/criu/seize.c b/criu/seize.c
|
||||
index 8a35c3c..1e127ff 100644
|
||||
--- a/criu/seize.c
|
||||
+++ b/criu/seize.c
|
||||
@@ -24,6 +24,8 @@
|
||||
#include "xmalloc.h"
|
||||
#include "util.h"
|
||||
#include "pin-mem.h"
|
||||
+#include "mem.h"
|
||||
+#include "exit-notify.h"
|
||||
|
||||
#define NR_ATTEMPTS 5
|
||||
|
||||
@@ -636,7 +638,7 @@ free:
|
||||
|
||||
static void unseize_task_and_threads(const struct pstree_item *item, int st)
|
||||
{
|
||||
- int i;
|
||||
+ int i, ret;
|
||||
|
||||
if (item->pid->state == TASK_DEAD)
|
||||
return;
|
||||
@@ -646,6 +648,12 @@ static void unseize_task_and_threads(const struct pstree_item *item, int st)
|
||||
dump_task_special_pages(item->threads[i].real);
|
||||
}
|
||||
|
||||
+ if (opts.mask_exit_notify && (st == TASK_DEAD)) {
|
||||
+ ret = mask_task_exit_notify(item->threads[0].real, true);
|
||||
+ if (ret)
|
||||
+ pr_err("mask exit notify for %d fail.\n", item->threads[0].real);
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* The st is the state we want to switch tasks into,
|
||||
* the item->state is the state task was in when we seized one.
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,403 +0,0 @@
|
||||
From fe39f73462e84a1a59d9b2b81a97e26cd1f2d20c Mon Sep 17 00:00:00 2001
|
||||
From: Luo Longjun <luolongjun@huawei.com>
|
||||
Date: Mon, 7 Jun 2021 11:50:42 +0800
|
||||
Subject: [PATCH 33/72] unix socket: add support for unix stream socket
|
||||
|
||||
When dump unix stream socket with external connections,
|
||||
we will tell kernel to turn repair mode on for this sock.
|
||||
And then kernel will keep this sock before restoring it.
|
||||
In this process, the other socket which communicates with
|
||||
this sock in repair mode will get EAGAIN or blocked.
|
||||
|
||||
Signed-off-by: Luo Longjun <luolongjun@huawei.com>
|
||||
|
||||
fix unix socket dump and restore err
|
||||
Fix name-less unix socket dump and restore problem.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
|
||||
unix socket:ignore repair error from kernel
|
||||
leave error for applications to deal with.
|
||||
|
||||
Signed-off-by: Luo Longjun <luolongjun@huawei.com>
|
||||
|
||||
- enable this feature by check cmdline `unix_stream_restore_enable`
|
||||
- don't set repair mode for non-external socket
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/cr-dump.c | 1 +
|
||||
criu/include/kerndat.h | 1 +
|
||||
criu/include/sockets.h | 1 +
|
||||
criu/kerndat.c | 33 +++++++++
|
||||
criu/sk-unix.c | 150 ++++++++++++++++++++++++++++++++++++++---
|
||||
images/sk-unix.proto | 1 +
|
||||
6 files changed, 178 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index fd17413..e0e11cc 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -2002,6 +2002,7 @@ static int cr_dump_finish(int ret)
|
||||
|
||||
cr_plugin_fini(CR_PLUGIN_STAGE__DUMP, ret);
|
||||
cgp_fini();
|
||||
+ unix_stream_unlock(ret);
|
||||
|
||||
if (!ret) {
|
||||
/*
|
||||
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
|
||||
index 05abeda..3979939 100644
|
||||
--- a/criu/include/kerndat.h
|
||||
+++ b/criu/include/kerndat.h
|
||||
@@ -76,6 +76,7 @@ struct kerndat_s {
|
||||
bool has_nftables_concat;
|
||||
bool has_rseq;
|
||||
bool has_ptrace_get_rseq_conf;
|
||||
+ bool has_unix_sk_repair;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
|
||||
index 2391b48..e43a760 100644
|
||||
--- a/criu/include/sockets.h
|
||||
+++ b/criu/include/sockets.h
|
||||
@@ -43,6 +43,7 @@ extern int add_fake_unix_queuers(void);
|
||||
extern int fix_external_unix_sockets(void);
|
||||
extern int prepare_scms(void);
|
||||
extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
|
||||
+extern void unix_stream_unlock(int ret);
|
||||
|
||||
extern struct collect_image_info netlink_sk_cinfo;
|
||||
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index af7113a..6d6aac1 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -1259,6 +1259,36 @@ static int kerndat_has_nftables_concat(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
+#define UNIX_STREAM_RESTORE_ENABLE_FILE "/sys/module/kernel/parameters/unix_stream_restore_enable"
|
||||
+
|
||||
+static void kerndat_has_unix_sk_repair(void)
|
||||
+{
|
||||
+ FILE *fp;
|
||||
+ char ch = 'N';
|
||||
+
|
||||
+ if (access(UNIX_STREAM_RESTORE_ENABLE_FILE, F_OK) < 0) {
|
||||
+ pr_debug("C/R external unix stream socket is not support\n");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ fp = fopen(UNIX_STREAM_RESTORE_ENABLE_FILE, "r");
|
||||
+ if (fp == NULL) {
|
||||
+ pr_err("failed to open '%s': %s\n",
|
||||
+ UNIX_STREAM_RESTORE_ENABLE_FILE, strerror(errno));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ fscanf(fp, "%c", &ch);
|
||||
+ if (ch == 'Y') {
|
||||
+ pr_debug("enable C/R external unix stream socket support\n");
|
||||
+ kdat.has_unix_sk_repair = true;
|
||||
+ }
|
||||
+
|
||||
+ fclose(fp);
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
int kerndat_init(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -1419,6 +1449,9 @@ int kerndat_init(void)
|
||||
pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
+
|
||||
+ kerndat_has_unix_sk_repair();
|
||||
+
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
kerndat_files_stat();
|
||||
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
|
||||
index f3fe60c..86bfa18 100644
|
||||
--- a/criu/sk-unix.c
|
||||
+++ b/criu/sk-unix.c
|
||||
@@ -72,6 +72,7 @@ struct unix_sk_desc {
|
||||
char *name;
|
||||
unsigned int nr_icons;
|
||||
unsigned int *icons;
|
||||
+ int repair_ino;
|
||||
|
||||
unsigned int vfs_dev;
|
||||
unsigned int vfs_ino;
|
||||
@@ -89,9 +90,18 @@ struct unix_sk_desc {
|
||||
struct list_head peer_list;
|
||||
struct list_head peer_node;
|
||||
|
||||
+ struct list_head repair_list;
|
||||
+ struct list_head repair_node;
|
||||
+ struct unix_stream_extern_socket_desc *ext_node;
|
||||
+
|
||||
UnixSkEntry *ue;
|
||||
};
|
||||
|
||||
+struct unix_stream_extern_socket_desc {
|
||||
+ struct list_head list;
|
||||
+ int fd;
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* The mutex_ghost is accessed from different tasks,
|
||||
* so make sure it is in shared memory.
|
||||
@@ -100,6 +110,7 @@ static mutex_t *mutex_ghost;
|
||||
|
||||
static LIST_HEAD(unix_sockets);
|
||||
static LIST_HEAD(unix_ghost_addr);
|
||||
+static LIST_HEAD(unix_stream_external_sockets);
|
||||
|
||||
static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d, UnixSkEntry *ue, const struct fd_parms *p);
|
||||
|
||||
@@ -116,6 +127,26 @@ struct unix_sk_listen_icon {
|
||||
|
||||
static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE];
|
||||
|
||||
+static int unix_stream_repair_on(int fd)
|
||||
+{
|
||||
+ int ret, aux = 1;
|
||||
+ ret = setsockopt(fd, SOL_TCP, TCP_REPAIR_OPTIONS, &aux, sizeof(aux));
|
||||
+ if (ret < 0)
|
||||
+ pr_err("Can't turn repair mod for unix stream on. \n");
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int unix_stream_repair_off(int fd)
|
||||
+{
|
||||
+ int ret, aux = 0;
|
||||
+ ret = setsockopt(fd, SOL_TCP, TCP_REPAIR_OPTIONS, &aux, sizeof(aux));
|
||||
+ if (ret < 0)
|
||||
+ pr_err("Can't turn repair mod for unix stream off. \n");
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static struct unix_sk_listen_icon *lookup_unix_listen_icons(unsigned int peer_ino)
|
||||
{
|
||||
struct unix_sk_listen_icon *ic;
|
||||
@@ -331,6 +362,8 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p)
|
||||
FilePermsEntry *perms;
|
||||
FownEntry *fown;
|
||||
void *m;
|
||||
+ unsigned int len;
|
||||
+ int ret;
|
||||
|
||||
m = xmalloc(sizeof(UnixSkEntry) + sizeof(SkOptsEntry) + sizeof(FilePermsEntry) + sizeof(FownEntry));
|
||||
if (!m)
|
||||
@@ -372,6 +405,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p)
|
||||
ue->fown = fown;
|
||||
ue->opts = skopts;
|
||||
ue->uflags = 0;
|
||||
+ ue->repair_ino = 0;
|
||||
|
||||
if (unix_resolve_name(lfd, id, sk, ue, p))
|
||||
goto err;
|
||||
@@ -419,6 +453,41 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Don't handle non-external unix socket, criu will restore it.
|
||||
+ *
|
||||
+ * use `sk->name != NULL || peer->name != NULL` to prevent
|
||||
+ * `socketpair()` sk condition.
|
||||
+ */
|
||||
+ if (kdat.has_unix_sk_repair && !sk->sd.already_dumped
|
||||
+ && (sk->name != NULL || peer->name != NULL)
|
||||
+ && ue->type == SOCK_STREAM) {
|
||||
+ struct unix_stream_extern_socket_desc *d;
|
||||
+
|
||||
+ d = xzalloc(sizeof(*d));
|
||||
+ if (!d)
|
||||
+ goto err;
|
||||
+
|
||||
+ /* Attention: used for upgrade in the same machine
|
||||
+ * May in conflict with original usage
|
||||
+ */
|
||||
+ pr_info("set %d(fd %d) unix stream repair on \n", sk->sd.ino, lfd);
|
||||
+ ret = unix_stream_repair_on(lfd);
|
||||
+ if (ret < 0)
|
||||
+ goto err;
|
||||
+
|
||||
+ d->fd = dup(lfd);
|
||||
+ pr_info("add %d into unix_stream_external_sockets\n", sk->sd.ino);
|
||||
+ list_add_tail(&d->list, &unix_stream_external_sockets);
|
||||
+ list_add(&sk->repair_node, &peer->repair_list);
|
||||
+ sk->ext_node = d;
|
||||
+
|
||||
+ len = sizeof(ue->repair_ino);
|
||||
+ ret = getsockopt(lfd, SOL_TCP, TCP_REPAIR_OPTIONS, &ue->repair_ino, &len);
|
||||
+ if (ret < 0)
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Peer should have us as peer or have a name by which
|
||||
* we can access one.
|
||||
@@ -520,6 +589,26 @@ dump:
|
||||
|
||||
sk->sd.already_dumped = 1;
|
||||
|
||||
+ while (!list_empty(&sk->repair_list)) {
|
||||
+ struct unix_sk_desc *psk;
|
||||
+ struct unix_stream_extern_socket_desc *d;
|
||||
+
|
||||
+ psk = list_first_entry(&sk->repair_list, struct unix_sk_desc, repair_node);
|
||||
+ list_del_init(&psk->repair_node);
|
||||
+
|
||||
+ pr_info("delete ino %d into unix_stream_external_sockets\n", psk->sd.ino);
|
||||
+
|
||||
+ d = psk->ext_node;
|
||||
+ list_del_init(&d->list);
|
||||
+ psk->ext_node = NULL;
|
||||
+ /* ino start from 1, using 0 to tag the non-repairing socket is safe. */
|
||||
+ psk->ue->repair_ino = 0;
|
||||
+
|
||||
+ unix_stream_repair_off(d->fd);
|
||||
+ close_safe(&d->fd);
|
||||
+ xfree(d);
|
||||
+ }
|
||||
+
|
||||
while (!list_empty(&sk->peer_list)) {
|
||||
struct unix_sk_desc *psk;
|
||||
psk = list_first_entry(&sk->peer_list, struct unix_sk_desc, peer_node);
|
||||
@@ -754,6 +843,8 @@ static int unix_collect_one(const struct unix_diag_msg *m, struct nlattr **tb, s
|
||||
|
||||
INIT_LIST_HEAD(&d->peer_list);
|
||||
INIT_LIST_HEAD(&d->peer_node);
|
||||
+ INIT_LIST_HEAD(&d->repair_list);
|
||||
+ INIT_LIST_HEAD(&d->repair_node);
|
||||
d->fd = -1;
|
||||
|
||||
if (tb[UNIX_DIAG_SHUTDOWN])
|
||||
@@ -866,16 +957,18 @@ static int __dump_external_socket(struct unix_sk_desc *sk, struct unix_sk_desc *
|
||||
return -1;
|
||||
}
|
||||
|
||||
- if (peer->type != SOCK_DGRAM) {
|
||||
- show_one_unix("Ext stream not supported", peer);
|
||||
- pr_err("Can't dump half of stream unix connection.\n");
|
||||
+ if (peer->type != SOCK_DGRAM &&
|
||||
+ peer->type != SOCK_STREAM) {
|
||||
+ show_one_unix("Ext unix type not supported", peer);
|
||||
+ pr_err("Can't dump this kind of unix connection.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
- if (!peer->name) {
|
||||
+ /* part 1: prevent NULL pointer oops */
|
||||
+ if (!peer->name && !sk->name) {
|
||||
show_one_unix("Ext dgram w/o name", peer);
|
||||
+ show_one_unix("Ext dgram w/o name", sk);
|
||||
pr_err("Can't dump name-less external socket.\n");
|
||||
- pr_err("%d\n", sk->fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -921,7 +1014,7 @@ int fix_external_unix_sockets(void)
|
||||
|
||||
fd_id_generate_special(NULL, &e.id);
|
||||
e.ino = sk->sd.ino;
|
||||
- e.type = SOCK_DGRAM;
|
||||
+ e.type = sk->type;
|
||||
e.state = TCP_LISTEN;
|
||||
e.name.data = (void *)sk->name;
|
||||
e.name.len = (size_t)sk->namelen;
|
||||
@@ -948,6 +1041,20 @@ err:
|
||||
return -1;
|
||||
}
|
||||
|
||||
+void unix_stream_unlock(int ret)
|
||||
+{
|
||||
+ struct unix_stream_extern_socket_desc *d;
|
||||
+ pr_debug("Unlocking unix stream sockets\n");
|
||||
+
|
||||
+ list_for_each_entry(d, &unix_stream_external_sockets, list) {
|
||||
+ if (ret) {
|
||||
+ pr_debug("unlock fd %d \n", d->fd);
|
||||
+ unix_stream_repair_off(d->fd);
|
||||
+ }
|
||||
+ close_safe(&d->fd);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
struct unix_sk_info {
|
||||
UnixSkEntry *ue;
|
||||
struct list_head list;
|
||||
@@ -1335,6 +1442,7 @@ static int post_open_standalone(struct file_desc *d, int fd)
|
||||
struct unix_sk_info *peer;
|
||||
struct sockaddr_un addr;
|
||||
int cwd_fd = -1, root_fd = -1, ns_fd = -1;
|
||||
+ int ret, value;
|
||||
|
||||
ui = container_of(d, struct unix_sk_info, d);
|
||||
BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) || (ui->ue->uflags & (USK_CALLBACK | USK_INHERIT)));
|
||||
@@ -1391,7 +1499,28 @@ static int post_open_standalone(struct file_desc *d, int fd)
|
||||
* while we're connecting in sake of ghost sockets.
|
||||
*/
|
||||
mutex_lock(mutex_ghost);
|
||||
- if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
|
||||
+
|
||||
+ /* we handle unix stream with external connections here.
|
||||
+ *
|
||||
+ * use `sk->name != NULL || peer->name != NULL` to prevent
|
||||
+ * `socketpair()` sk condition.
|
||||
+ */
|
||||
+ if (kdat.has_unix_sk_repair && peer->name
|
||||
+ && (ui->name != NULL || peer->name != NULL)
|
||||
+ && ui->ue->type == SOCK_STREAM && ui->ue->repair_ino != 0) {
|
||||
+ value = ui->ue->repair_ino;
|
||||
+ ret = setsockopt(fd, SOL_TCP, TCP_REPAIR, &value, sizeof(value));
|
||||
+ if (ret < 0) {
|
||||
+ /* permit the unix sk resume successfully when the peer has been
|
||||
+ * closed, just warn here */
|
||||
+ pr_warn("Can't repair %d socket\n", value);
|
||||
+ }
|
||||
+
|
||||
+ ret = unix_stream_repair_off(fd);
|
||||
+ if (ret < 0) {
|
||||
+ goto err_revert_and_exit;
|
||||
+ }
|
||||
+ } else if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
|
||||
pr_perror("Can't connect %d socket", ui->ue->ino);
|
||||
goto err_revert_and_exit;
|
||||
}
|
||||
@@ -2068,8 +2197,11 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
|
||||
}
|
||||
|
||||
ui->name = (void *)ue->name.data;
|
||||
- } else
|
||||
- ui->name = NULL;
|
||||
+ } else {
|
||||
+ /* part 2: prevent NULL pointer oops */
|
||||
+ ui->name = "";
|
||||
+ }
|
||||
+
|
||||
ui->name_dir = (void *)ue->name_dir;
|
||||
|
||||
ui->flags = 0;
|
||||
diff --git a/images/sk-unix.proto b/images/sk-unix.proto
|
||||
index 8ddbccd..3f77718 100644
|
||||
--- a/images/sk-unix.proto
|
||||
+++ b/images/sk-unix.proto
|
||||
@@ -54,4 +54,5 @@ message unix_sk_entry {
|
||||
optional uint32 ns_id = 16;
|
||||
optional sint32 mnt_id = 17 [default = -1];
|
||||
/* Please, don't use field with number 18. */
|
||||
+ required sint32 repair_ino = 19;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
From 9b556899d67d7b20c64422fbde6292528772094d Mon Sep 17 00:00:00 2001
|
||||
From: Xiaoguang Li <lixiaoguang2@huawei.com>
|
||||
Date: Mon, 29 Mar 2021 20:58:28 -0400
|
||||
Subject: [PATCH 34/72] netlink: add repair modes and clear resource when
|
||||
failure
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/cr-dump.c | 3 +++
|
||||
criu/include/net.h | 1 +
|
||||
criu/sk-netlink.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 49 insertions(+)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index e0e11cc..b7e0214 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -2073,6 +2073,9 @@ static int cr_dump_finish(int ret)
|
||||
} else if (ret != 0 && opts.pin_memory) {
|
||||
pr_info("clear pin mem info\n");
|
||||
clear_pin_mem(0);
|
||||
+ } else if (ret != 0 && opts.with_notifier_kup) {
|
||||
+ pr_info("repair off netlink fd\n");
|
||||
+ netlink_repair_off();
|
||||
}
|
||||
|
||||
if (ret != 0 && opts.with_notifier_kup) {
|
||||
diff --git a/criu/include/net.h b/criu/include/net.h
|
||||
index 0da4cad..718cc45 100644
|
||||
--- a/criu/include/net.h
|
||||
+++ b/criu/include/net.h
|
||||
@@ -55,5 +55,6 @@ extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name);
|
||||
extern int net_set_ext(struct ns_id *ns);
|
||||
extern struct ns_id *get_root_netns(void);
|
||||
extern int read_net_ns_img(void);
|
||||
+extern int netlink_repair_off(void);
|
||||
|
||||
#endif /* __CR_NET_H__ */
|
||||
diff --git a/criu/sk-netlink.c b/criu/sk-netlink.c
|
||||
index 754eed9..d4b3b7b 100644
|
||||
--- a/criu/sk-netlink.c
|
||||
+++ b/criu/sk-netlink.c
|
||||
@@ -68,6 +68,47 @@ int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg)
|
||||
return sk_collect_one(m->ndiag_ino, PF_NETLINK, &sd->sd, ns);
|
||||
}
|
||||
|
||||
+struct netlink_repair_fd {
|
||||
+ int netlink_fd;
|
||||
+ struct list_head nlist;
|
||||
+};
|
||||
+
|
||||
+static LIST_HEAD(netlink_repair_fds);
|
||||
+
|
||||
+static int netlink_repair_on(int fd)
|
||||
+{
|
||||
+ int ret, aux = 1;
|
||||
+ struct netlink_repair_fd *nrf;
|
||||
+
|
||||
+ ret = setsockopt(fd, SOL_NETLINK, TCP_REPAIR, &aux, sizeof(aux));
|
||||
+ if (ret < 0) {
|
||||
+ pr_err("Can't turn netlink repair mode ON, error: %d\n", ret);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ nrf = malloc(sizeof(*nrf));
|
||||
+ if (!nrf)
|
||||
+ return -ENOMEM;
|
||||
+ nrf->netlink_fd = dup(fd);
|
||||
+ list_add_tail(&nrf->nlist, &netlink_repair_fds);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int netlink_repair_off(void)
|
||||
+{
|
||||
+ int aux = 0, ret;
|
||||
+ struct netlink_repair_fd *nrf, *n;
|
||||
+
|
||||
+ list_for_each_entry_safe(nrf, n, &netlink_repair_fds, nlist) {
|
||||
+ ret = setsockopt(nrf->netlink_fd, SOL_NETLINK, TCP_REPAIR, &aux, sizeof(aux));
|
||||
+ if (ret < 0)
|
||||
+ pr_err("Failed to turn off repair mode on netlink\n");
|
||||
+ close(nrf->netlink_fd);
|
||||
+ list_del(&nrf->nlist);
|
||||
+ free(nrf);
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static bool can_dump_netlink_sk(int lfd)
|
||||
{
|
||||
int ret;
|
||||
@@ -90,6 +131,10 @@ static int dump_one_netlink_fd(int lfd, u32 id, const struct fd_parms *p)
|
||||
if (IS_ERR(sk))
|
||||
goto err;
|
||||
|
||||
+ if (netlink_repair_on(lfd) < 0) {
|
||||
+ goto err;
|
||||
+ }
|
||||
+
|
||||
ne.id = id;
|
||||
ne.ino = p->stat.st_ino;
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,114 +0,0 @@
|
||||
From 852b4db35a06ed382e287d88cd055fdf20fc031f Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fu.lin10@huawei.com>
|
||||
Date: Sat, 26 Jun 2021 15:18:15 +0800
|
||||
Subject: [PATCH 35/72] sysvshm: add dump/restore sysv-shm in host ipc ns
|
||||
|
||||
In original criu design, SysVIPC memory segment, which belongs
|
||||
to host ipcns, shouldn't be dumped because criu requires the
|
||||
whole ipcns to be dumped. During the restoring ipcns, the new
|
||||
shared memory will be created, and fill the original page data
|
||||
in it.
|
||||
|
||||
This patch makes the shared-memory in host ipcns restore possible.
|
||||
Idea:
|
||||
The SysVIPC memory won't disappear after the task exit. The basic
|
||||
information can be got from `/proc/sysvipc/shm` as long as the
|
||||
system doesn't reboot. Compared with restoring the whole ipcns,
|
||||
the processes of the shared memory creating and page data filling
|
||||
are ignored.
|
||||
|
||||
Reference:
|
||||
- https://www.criu.org/What_cannot_be_checkpointed
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/cr-dump.c | 8 ++++----
|
||||
criu/cr-restore.c | 43 +++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 47 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index b7e0214..e7b5787 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -463,11 +463,11 @@ static int dump_filemap(struct vma_area *vma_area, int fd)
|
||||
|
||||
static int check_sysvipc_map_dump(pid_t pid, VmaEntry *vma)
|
||||
{
|
||||
- if (root_ns_mask & CLONE_NEWIPC)
|
||||
- return 0;
|
||||
+ if (!(root_ns_mask & CLONE_NEWIPC))
|
||||
+ pr_info("Task %d with SysVIPC shmem map @%" PRIx64 " doesn't live in IPC ns\n",
|
||||
+ pid, vma->start);
|
||||
|
||||
- pr_err("Task %d with SysVIPC shmem map @%" PRIx64 " doesn't live in IPC ns\n", pid, vma->start);
|
||||
- return -1;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int get_task_auxv(pid_t pid, MmEntry *mm)
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 09f135b..152bace 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -1818,6 +1818,46 @@ static int create_children_and_session(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int prepare_rootns_sysv_shm(unsigned long clone_flags)
|
||||
+{
|
||||
+ int retval = 0;
|
||||
+ char *line = NULL;
|
||||
+ size_t len = 0;
|
||||
+ FILE *fp;
|
||||
+ key_t key;
|
||||
+ int shmid;
|
||||
+ mode_t mode;
|
||||
+ size_t size;
|
||||
+
|
||||
+ /* This is completed by `prepare_namespace()` */
|
||||
+ if (!!(clone_flags & CLONE_NEWIPC))
|
||||
+ return 0;
|
||||
+
|
||||
+ pr_info("Restoring SYSV shm in host namespace\n");
|
||||
+
|
||||
+ fp = fopen("/proc/sysvipc/shm", "r");
|
||||
+ if (fp == NULL) {
|
||||
+ pr_err("Can't open '/proc/sysvipc/shm', errno(%d): %s\n", errno, strerror(errno));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ while (getline(&line, &len, fp) != -1) {
|
||||
+ if (sscanf(line, "%d %d %o %lu", &key, &shmid, &mode, &size) != 4)
|
||||
+ continue;
|
||||
+
|
||||
+ pr_debug("sscanf key: %d shmid: %d mode %o size %lu\n",
|
||||
+ key, shmid, mode, size);
|
||||
+
|
||||
+ retval = collect_sysv_shmem(shmid, size);
|
||||
+ if (retval != 0)
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ fclose(fp);
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
static int restore_task_with_children(void *_arg)
|
||||
{
|
||||
struct cr_clone_arg *ca = _arg;
|
||||
@@ -1924,6 +1964,9 @@ static int restore_task_with_children(void *_arg)
|
||||
if (prepare_namespace(current, ca->clone_flags))
|
||||
goto err;
|
||||
|
||||
+ if (prepare_rootns_sysv_shm(ca->clone_flags))
|
||||
+ goto err;
|
||||
+
|
||||
if (restore_finish_ns_stage(CR_STATE_PREPARE_NAMESPACES, CR_STATE_FORKING) < 0)
|
||||
goto err;
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
From 92fd13a21e52343b532eb1a163a159303107a6e2 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Thu, 24 Jun 2021 16:56:02 +0800
|
||||
Subject: [PATCH 36/72] add O_REPAIR flag to vma fd
|
||||
|
||||
Add O_REPAIR flag when openning vma fd.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/files-reg.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/files-reg.c b/criu/files-reg.c
|
||||
index b9576a4..7bd8592 100644
|
||||
--- a/criu/files-reg.c
|
||||
+++ b/criu/files-reg.c
|
||||
@@ -2255,6 +2255,7 @@ void filemap_ctx_fini(void)
|
||||
}
|
||||
}
|
||||
|
||||
+#define O_REPAIR 040000000
|
||||
static int open_filemap(int pid, struct vma_area *vma)
|
||||
{
|
||||
u32 flags;
|
||||
@@ -2267,13 +2268,15 @@ static int open_filemap(int pid, struct vma_area *vma)
|
||||
*/
|
||||
|
||||
BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags);
|
||||
- flags = vma->e->fdflags;
|
||||
+ flags = vma->e->fdflags | O_REPAIR;
|
||||
|
||||
if (ctx.flags != flags || ctx.desc != vma->vmfd) {
|
||||
if (vma->e->status & VMA_AREA_MEMFD)
|
||||
ret = memfd_open(vma->vmfd, &flags);
|
||||
- else
|
||||
+ else {
|
||||
+
|
||||
ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
|
||||
+ }
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,90 +0,0 @@
|
||||
From bb60f8e71ec85dd11666bbb395508fac4403c251 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Sat, 26 Jun 2021 11:41:18 +0800
|
||||
Subject: [PATCH 37/72] looser file mode and size check
|
||||
|
||||
When the file mode and size larger than dump data,
|
||||
make the restoring process run success.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/config.c | 1 +
|
||||
criu/crtools.c | 1 +
|
||||
criu/files-reg.c | 14 +++++++++++---
|
||||
criu/include/cr_options.h | 1 +
|
||||
4 files changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index bd0f84d..a9eb699 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -704,6 +704,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("dump-char-dev", &opts.dump_char_dev),
|
||||
BOOL_OPT("with-fd-cred", &opts.with_fd_cred),
|
||||
BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify),
|
||||
+ BOOL_OPT("weak-file-check", &opts.weak_file_check),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index 1a41be4..e1afeca 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -455,6 +455,7 @@ usage:
|
||||
\
|
||||
" --with-fd-cred Allow to make the restored process has the same cred\n"
|
||||
" --mask-exit-notify Mask task exit notify during dump and restore\n"
|
||||
+ " --weak-file-check Allow file size and mod larger than dumping value\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/files-reg.c b/criu/files-reg.c
|
||||
index 7bd8592..1a3b836 100644
|
||||
--- a/criu/files-reg.c
|
||||
+++ b/criu/files-reg.c
|
||||
@@ -1991,7 +1991,10 @@ static bool validate_file(const int fd, const struct stat *fd_status, const stru
|
||||
{
|
||||
int result = 1;
|
||||
|
||||
- if (rfi->rfe->has_size && (fd_status->st_size != rfi->rfe->size)) {
|
||||
+ /* NOTICE: customize for the storage module upgrade feature */
|
||||
+ if (rfi->rfe->has_size
|
||||
+ && ((!opts.weak_file_check && fd_status->st_size != rfi->rfe->size)
|
||||
+ || (fd_status->st_size < rfi->rfe->size))) {
|
||||
pr_err("File %s has bad size %" PRIu64 " (expect %" PRIu64 ")\n", rfi->path, fd_status->st_size,
|
||||
rfi->rfe->size);
|
||||
return false;
|
||||
@@ -2102,8 +2105,13 @@ ext:
|
||||
if (!validate_file(tmp, &st, rfi))
|
||||
return -1;
|
||||
|
||||
- if (rfi->rfe->has_mode && (st.st_mode != rfi->rfe->mode)) {
|
||||
- pr_err("File %s has bad mode 0%o (expect 0%o)\n", rfi->path, (int)st.st_mode, rfi->rfe->mode);
|
||||
+ /* NOTICE: customize for the storage module upgrade feature */
|
||||
+ if (rfi->rfe->has_mode
|
||||
+ && ((!opts.weak_file_check && st.st_mode != rfi->rfe->mode)
|
||||
+ || (st.st_mode < rfi->rfe->mode))) {
|
||||
+ pr_err("File %s has bad mode 0%o (expect 0%o), weak check %d\n",
|
||||
+ rfi->path, (int)st.st_mode, rfi->rfe->mode,
|
||||
+ opts.weak_file_check);
|
||||
return -1;
|
||||
}
|
||||
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 26ae5b6..dec0082 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -196,6 +196,7 @@ struct cr_options {
|
||||
int dump_char_dev;
|
||||
int with_fd_cred;
|
||||
int mask_exit_notify;
|
||||
+ int weak_file_check;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,308 +0,0 @@
|
||||
From 61ca95f5434573e89151d3557185c517cd69447a Mon Sep 17 00:00:00 2001
|
||||
From: Sang Yan <sangyan@huawei.com>
|
||||
Date: Thu, 8 Jul 2021 14:12:42 +0800
|
||||
Subject: [PATCH 38/72] file-lock: add repair mode to dump file locks
|
||||
|
||||
Add new options "--file-locks-repair" to enable repair mode
|
||||
while dumping file locks.
|
||||
Repair mode keeps locks locked while process were killed in
|
||||
dumping operation. Then resume the locks from repair mode at
|
||||
process resuming.
|
||||
|
||||
Signed-off-by: Sang Yan <sangyan@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/config.c | 1 +
|
||||
criu/cr-dump.c | 8 ++++++
|
||||
criu/crtools.c | 1 +
|
||||
criu/file-lock.c | 10 +++++++
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/fcntl.h | 16 +++++++++++
|
||||
criu/include/parasite-syscall.h | 2 ++
|
||||
criu/include/parasite.h | 10 +++++++
|
||||
criu/parasite-syscall.c | 33 +++++++++++++++++++++++
|
||||
criu/pie/parasite.c | 48 +++++++++++++++++++++++++++++++++
|
||||
10 files changed, 130 insertions(+)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index a9eb699..0a0623a 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -705,6 +705,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("with-fd-cred", &opts.with_fd_cred),
|
||||
BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify),
|
||||
BOOL_OPT("weak-file-check", &opts.weak_file_check),
|
||||
+ BOOL_OPT("file-locks-repair", &opts.file_locks_repair),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index e7b5787..607eac2 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -1679,6 +1679,14 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err_cure;
|
||||
}
|
||||
|
||||
+ if (opts.file_locks_repair) {
|
||||
+ ret = parasite_dump_file_locks(parasite_ctl, pid);
|
||||
+ if (ret) {
|
||||
+ pr_err("Can't parasite dump file locks (pid: %d)\n", pid);
|
||||
+ goto err_cure;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
ret = dump_task_core_all(parasite_ctl, item, &pps_buf, cr_imgset, &misc);
|
||||
if (ret) {
|
||||
pr_err("Dump core (pid: %d) failed with %d\n", pid, ret);
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index e1afeca..7358918 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -456,6 +456,7 @@ usage:
|
||||
" --with-fd-cred Allow to make the restored process has the same cred\n"
|
||||
" --mask-exit-notify Mask task exit notify during dump and restore\n"
|
||||
" --weak-file-check Allow file size and mod larger than dumping value\n"
|
||||
+ " --file-locks-repair Use repair mode to dump and restore file locks\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/file-lock.c b/criu/file-lock.c
|
||||
index 6334462..c893083 100644
|
||||
--- a/criu/file-lock.c
|
||||
+++ b/criu/file-lock.c
|
||||
@@ -424,6 +424,8 @@ void discard_dup_locks_tail(pid_t pid, int fd)
|
||||
list_for_each_entry_safe_reverse(fl, p, &file_lock_list, list) {
|
||||
if (fl->owners_fd != fd || pid != fl->fl_holder)
|
||||
break;
|
||||
+ if (fl->fl_kind == FL_POSIX)
|
||||
+ continue;
|
||||
|
||||
list_del(&fl->list);
|
||||
xfree(fl);
|
||||
@@ -611,8 +613,12 @@ static int restore_file_lock(FileLockEntry *fle)
|
||||
cmd = fle->type;
|
||||
} else if (fle->type == F_RDLCK) {
|
||||
cmd = LOCK_SH;
|
||||
+ if (opts.file_locks_repair)
|
||||
+ cmd = LOCK_REPAIR;
|
||||
} else if (fle->type == F_WRLCK) {
|
||||
cmd = LOCK_EX;
|
||||
+ if (opts.file_locks_repair)
|
||||
+ cmd = LOCK_REPAIR;
|
||||
} else if (fle->type == F_UNLCK) {
|
||||
cmd = LOCK_UN;
|
||||
} else {
|
||||
@@ -638,6 +644,10 @@ static int restore_file_lock(FileLockEntry *fle)
|
||||
flk.l_pid = fle->pid;
|
||||
flk.l_type = fle->type;
|
||||
|
||||
+ if (opts.file_locks_repair
|
||||
+ && (fle->type == F_RDLCK || fle->type == F_WRLCK))
|
||||
+ flk.l_type = F_REPAIR;
|
||||
+
|
||||
pr_info("(posix)flag: %d, type: %d, pid: %d, fd: %d, "
|
||||
"start: %8" PRIx64 ", len: %8" PRIx64 "\n",
|
||||
fle->flag, fle->type, fle->pid, fle->fd, fle->start, fle->len);
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index dec0082..9ec8034 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -197,6 +197,7 @@ struct cr_options {
|
||||
int with_fd_cred;
|
||||
int mask_exit_notify;
|
||||
int weak_file_check;
|
||||
+ int file_locks_repair;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h
|
||||
index 568977c..0627818 100644
|
||||
--- a/criu/include/fcntl.h
|
||||
+++ b/criu/include/fcntl.h
|
||||
@@ -23,6 +23,22 @@ struct f_owner_ex {
|
||||
#define F_SETCRED 18
|
||||
#endif
|
||||
|
||||
+#ifndef F_NEED_REPAIR
|
||||
+#define F_NEED_REPAIR 16
|
||||
+#endif
|
||||
+
|
||||
+#ifndef F_REPAIR
|
||||
+#define F_REPAIR 32
|
||||
+#endif
|
||||
+
|
||||
+#ifndef LOCK_NEED_REPAIR
|
||||
+#define LOCK_NEED_REPAIR 256 /* REPAIRING lock */
|
||||
+#endif
|
||||
+
|
||||
+#ifndef LOCK_REPAIR
|
||||
+#define LOCK_REPAIR 512 /* REPAIR lock */
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* These things are required to compile on CentOS-6
|
||||
*/
|
||||
diff --git a/criu/include/parasite-syscall.h b/criu/include/parasite-syscall.h
|
||||
index 4540e11..9f2d3e0 100644
|
||||
--- a/criu/include/parasite-syscall.h
|
||||
+++ b/criu/include/parasite-syscall.h
|
||||
@@ -48,4 +48,6 @@ extern int parasite_dump_cgroup(struct parasite_ctl *ctl, struct parasite_dump_c
|
||||
|
||||
extern struct parasite_tty_args *parasite_dump_tty(struct parasite_ctl *ctl, int fd, int type);
|
||||
|
||||
+extern int parasite_dump_file_locks(struct parasite_ctl *ctl, int pid);
|
||||
+
|
||||
#endif /* __CR_PARASITE_SYSCALL_H__ */
|
||||
diff --git a/criu/include/parasite.h b/criu/include/parasite.h
|
||||
index d2a0688..230c453 100644
|
||||
--- a/criu/include/parasite.h
|
||||
+++ b/criu/include/parasite.h
|
||||
@@ -37,6 +37,7 @@ enum {
|
||||
PARASITE_CMD_CHECK_VDSO_MARK,
|
||||
PARASITE_CMD_CHECK_AIOS,
|
||||
PARASITE_CMD_DUMP_CGROUP,
|
||||
+ PARASITE_CMD_DUMP_FILELOCKS,
|
||||
|
||||
PARASITE_CMD_MAX,
|
||||
};
|
||||
@@ -244,6 +245,15 @@ struct parasite_dump_cgroup_args {
|
||||
char contents[1 << 12];
|
||||
};
|
||||
|
||||
+struct parasite_dump_filelocks_args {
|
||||
+ short kind;
|
||||
+ short type;
|
||||
+ long start;
|
||||
+ long len;
|
||||
+ int pid;
|
||||
+ int fd;
|
||||
+};
|
||||
+
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* __CR_PARASITE_H__ */
|
||||
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
|
||||
index ee4fa86..c57f854 100644
|
||||
--- a/criu/parasite-syscall.c
|
||||
+++ b/criu/parasite-syscall.c
|
||||
@@ -32,6 +32,7 @@
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
#include "signal.h"
|
||||
#include "sigframe.h"
|
||||
+#include "file-lock.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
@@ -654,3 +655,35 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item,
|
||||
|
||||
return ctl;
|
||||
}
|
||||
+
|
||||
+int parasite_dump_file_locks(struct parasite_ctl *ctl, int pid)
|
||||
+{
|
||||
+ struct parasite_dump_filelocks_args *args;
|
||||
+ struct file_lock *fl;
|
||||
+ int ret;
|
||||
+
|
||||
+ args = compel_parasite_args(ctl, struct parasite_dump_filelocks_args);
|
||||
+
|
||||
+ list_for_each_entry(fl, &file_lock_list, list) {
|
||||
+ if (fl->real_owner != pid)
|
||||
+ continue;
|
||||
+
|
||||
+ args->pid = fl->real_owner;
|
||||
+ args->fd = fl->owners_fd;
|
||||
+ args->kind = fl->fl_kind;
|
||||
+ args->type = fl->fl_ltype;
|
||||
+ args->start = fl->start;
|
||||
+ if (!strncmp(fl->end, "EOF", 3))
|
||||
+ args->len = 0;
|
||||
+ else
|
||||
+ args->len = (atoll(fl->end) + 1) - fl->start;
|
||||
+
|
||||
+ ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_FILELOCKS, ctl);
|
||||
+ if (ret < 0) {
|
||||
+ pr_err("Parasite dump file lock failed! (pid: %d)\n", pid);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c
|
||||
index e49958b..c781303 100644
|
||||
--- a/criu/pie/parasite.c
|
||||
+++ b/criu/pie/parasite.c
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "criu-log.h"
|
||||
#include "tty.h"
|
||||
#include "aio.h"
|
||||
+#include "file-lock.h"
|
||||
|
||||
#include "asm/parasite.h"
|
||||
#include "restorer.h"
|
||||
@@ -769,6 +770,50 @@ static int parasite_dump_cgroup(struct parasite_dump_cgroup_args *args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int set_filelocks_needrepair(struct parasite_dump_filelocks_args *args)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ if (args->kind == FL_FLOCK) {
|
||||
+ if (args->type == F_RDLCK || args->type == F_WRLCK) {
|
||||
+ int cmd = LOCK_NEED_REPAIR;
|
||||
+
|
||||
+ pr_info("Need Repair flock kind: %d, type: %d, cmd: %d, pid: %d, fd: %d\n",
|
||||
+ args->kind, args->type, cmd, args->pid, args->fd);
|
||||
+
|
||||
+ ret = sys_flock(args->fd, cmd);
|
||||
+ if (ret < 0) {
|
||||
+ pr_err("Can not set NEED_REPAIR flock!\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+ } else if (args->kind == FL_POSIX) {
|
||||
+ if (args->type == F_RDLCK || args->type == F_WRLCK) {
|
||||
+ struct flock flk;
|
||||
+ memset(&flk, 0, sizeof(flk));
|
||||
+
|
||||
+ flk.l_whence = SEEK_SET;
|
||||
+ flk.l_start = args->start;
|
||||
+ flk.l_len = args->len;
|
||||
+ flk.l_pid = args->pid;
|
||||
+ flk.l_type = F_NEED_REPAIR;
|
||||
+
|
||||
+ pr_info("Need Repair posix lock kind: %d, type: %d, cmd: %d, pid: %d, fd: %d, "
|
||||
+ "start: %8"PRIx64", len: %8"PRIx64"\n",
|
||||
+ args->kind, args->type, flk.l_type, args->pid, args->fd,
|
||||
+ args->start, args->len);
|
||||
+
|
||||
+ ret = sys_fcntl(args->fd, F_SETLKW, (long)&flk);
|
||||
+ if (ret < 0) {
|
||||
+ pr_err("Can not set NEED_REPAIR posix lock!\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
void parasite_cleanup(void)
|
||||
{
|
||||
if (mprotect_args) {
|
||||
@@ -821,6 +866,9 @@ int parasite_daemon_cmd(int cmd, void *args)
|
||||
case PARASITE_CMD_DUMP_CGROUP:
|
||||
ret = parasite_dump_cgroup(args);
|
||||
break;
|
||||
+ case PARASITE_CMD_DUMP_FILELOCKS:
|
||||
+ ret = set_filelocks_needrepair(args);
|
||||
+ break;
|
||||
default:
|
||||
pr_err("Unknown command in parasite daemon thread leader: %d\n", cmd);
|
||||
ret = -1;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,60 +0,0 @@
|
||||
From 5421245cf87bac71cbe999f257ba5b3a96c8733b Mon Sep 17 00:00:00 2001
|
||||
From: Liu Chao <liuchao173@huawei.com>
|
||||
Date: Fri, 9 Jul 2021 07:32:20 +0000
|
||||
Subject: [PATCH 39/72] unlock network when restore fails
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
---
|
||||
criu/cr-restore.c | 13 +++++++++++++
|
||||
1 file changed, 13 insertions(+)
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 152bace..d19768d 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -115,6 +115,9 @@
|
||||
#endif
|
||||
|
||||
struct pstree_item *current;
|
||||
+#define NETWORK_COLLECTED 0x1
|
||||
+#define NETWORK_UNLOCK 0x2
|
||||
+static int network_status = 0;
|
||||
|
||||
static int restore_task_with_children(void *);
|
||||
static int sigreturn_restore(pid_t pid, struct task_restore_args *ta, unsigned long alen, CoreEntry *core);
|
||||
@@ -249,6 +252,7 @@ static int crtools_prepare_shared(void)
|
||||
/* Connections are unlocked from criu */
|
||||
if (!files_collected() && collect_image(&inet_sk_cinfo))
|
||||
return -1;
|
||||
+ network_status |= NETWORK_COLLECTED;
|
||||
|
||||
if (collect_binfmt_misc())
|
||||
return -1;
|
||||
@@ -2525,6 +2529,7 @@ skip_ns_bouncing:
|
||||
|
||||
/* Unlock network before disabling repair mode on sockets */
|
||||
network_unlock();
|
||||
+ network_status |= NETWORK_UNLOCK;
|
||||
|
||||
/*
|
||||
* Stop getting sigchld, after we resume the tasks they
|
||||
@@ -2734,6 +2739,14 @@ clean_cgroup:
|
||||
fini_cgroup();
|
||||
err:
|
||||
cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret);
|
||||
+ if (ret < 0) {
|
||||
+ if (!!(network_status & NETWORK_COLLECTED)
|
||||
+ && !files_collected() && collect_image(&inet_sk_cinfo))
|
||||
+ pr_err("collect inet sk cinfo fail\n");
|
||||
+
|
||||
+ if (!!(network_status & NETWORK_UNLOCK))
|
||||
+ network_unlock();
|
||||
+ }
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,332 +0,0 @@
|
||||
From a22542173083d2eeb5dde627c47452ea641c98c1 Mon Sep 17 00:00:00 2001
|
||||
From: Sang Yan <sangyan@huawei.com>
|
||||
Date: Mon, 12 Jul 2021 16:14:45 +0800
|
||||
Subject: [PATCH 40/72] net: add shared socket recover method for criu
|
||||
|
||||
When the socket file is shared with another process,
|
||||
it will not be freed during dumping process.
|
||||
We can repair the socket file by installing it to
|
||||
the old fd number.
|
||||
|
||||
Add new options: "--share-dst-ports" and "--share-src-ports"
|
||||
for user to tell criu which socket ports are shared.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/config.c | 8 ++
|
||||
criu/crtools.c | 3 +
|
||||
criu/files.c | 18 ++++-
|
||||
criu/include/cr_options.h | 2 +
|
||||
criu/include/files.h | 4 +
|
||||
criu/include/net.h | 1 +
|
||||
criu/include/sk-inet.h | 3 +
|
||||
criu/sk-inet.c | 151 ++++++++++++++++++++++++++++++++++++++
|
||||
8 files changed, 189 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 0a0623a..7e92731 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -706,6 +706,8 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify),
|
||||
BOOL_OPT("weak-file-check", &opts.weak_file_check),
|
||||
BOOL_OPT("file-locks-repair", &opts.file_locks_repair),
|
||||
+ { "share-dst-ports", required_argument, 0, 2000 },
|
||||
+ { "share-src-ports", required_argument, 0, 2001 },
|
||||
{},
|
||||
};
|
||||
|
||||
@@ -1041,6 +1043,12 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
+ case 2000:
|
||||
+ SET_CHAR_OPTS(share_dst_ports, optarg);
|
||||
+ break;
|
||||
+ case 2001:
|
||||
+ SET_CHAR_OPTS(share_src_ports, optarg);
|
||||
+ break;
|
||||
case 'V':
|
||||
pr_msg("Version: %s\n", CRIU_VERSION);
|
||||
if (strcmp(CRIU_GITID, "0"))
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index 7358918..cfa149a 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -104,6 +104,9 @@ int main(int argc, char *argv[], char *envp[])
|
||||
goto usage;
|
||||
}
|
||||
|
||||
+ if (parse_share_ports())
|
||||
+ goto usage;
|
||||
+
|
||||
log_set_loglevel(opts.log_level);
|
||||
|
||||
if (optind < argc && !strcmp(argv[optind], "swrk")) {
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index 1ec5281..1c52cf4 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -705,6 +705,8 @@ int dump_my_file(int lfd, u32 *id, int *type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int dst_pid;
|
||||
+
|
||||
int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds)
|
||||
{
|
||||
int *lfds = NULL;
|
||||
@@ -728,7 +730,7 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
|
||||
img = open_image(CR_FD_FDINFO, O_DUMP, item->ids->files_id);
|
||||
if (!img)
|
||||
goto err;
|
||||
-
|
||||
+ dst_pid = item->pid->real;
|
||||
ret = 0; /* Don't fail if nr_fds == 0 */
|
||||
for (off = 0; ret == 0 && off < dfds->nr_fds; off += nr_fds) {
|
||||
if (nr_fds + off > dfds->nr_fds)
|
||||
@@ -1237,6 +1239,20 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ if (d->ops->type == FD_TYPES__INETSK) {
|
||||
+ if (check_need_repair(d)) {
|
||||
+ ret = repair_share_socket(d->id);
|
||||
+ if (!ret) {
|
||||
+ new_fd = get_share_socket();
|
||||
+ pr_info("get share socket:%d\n", new_fd);
|
||||
+ if (new_fd <= 0 || setup_and_serve_out(fle, new_fd) < 0)
|
||||
+ return -1;
|
||||
+ fle->stage = FLE_RESTORED;
|
||||
+ return 0;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Open method returns the following values:
|
||||
* 0 -- restore is successfully finished;
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 9ec8034..b7c1e34 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -198,6 +198,8 @@ struct cr_options {
|
||||
int mask_exit_notify;
|
||||
int weak_file_check;
|
||||
int file_locks_repair;
|
||||
+ char *share_dst_ports;
|
||||
+ char *share_src_ports;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/files.h b/criu/include/files.h
|
||||
index 1d979a9..0521c7e 100644
|
||||
--- a/criu/include/files.h
|
||||
+++ b/criu/include/files.h
|
||||
@@ -201,4 +201,8 @@ extern int open_transport_socket(void);
|
||||
extern int set_fds_event(pid_t virt);
|
||||
extern void wait_fds_event(void);
|
||||
|
||||
+extern int repair_share_socket(int id);
|
||||
+extern int check_need_repair(struct file_desc *d);
|
||||
+extern int get_share_socket(void);
|
||||
+
|
||||
#endif /* __CR_FILES_H__ */
|
||||
diff --git a/criu/include/net.h b/criu/include/net.h
|
||||
index 718cc45..ec47b61 100644
|
||||
--- a/criu/include/net.h
|
||||
+++ b/criu/include/net.h
|
||||
@@ -16,6 +16,7 @@ extern int dump_net_ns(struct ns_id *ns);
|
||||
extern int prepare_net_namespaces(void);
|
||||
extern void fini_net_namespaces(void);
|
||||
extern int netns_keep_nsfd(void);
|
||||
+extern int parse_share_ports(void);
|
||||
|
||||
struct pstree_item;
|
||||
extern int restore_task_net_ns(struct pstree_item *current);
|
||||
diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
|
||||
index c832d63..27deceb 100644
|
||||
--- a/criu/include/sk-inet.h
|
||||
+++ b/criu/include/sk-inet.h
|
||||
@@ -101,4 +101,7 @@ struct rst_tcp_sock {
|
||||
union libsoccr_addr;
|
||||
int restore_sockaddr(union libsoccr_addr *sa, int family, u32 pb_port, u32 *pb_addr, u32 ifindex);
|
||||
|
||||
+#define MAX_SHARE_PORT_NUM 64
|
||||
+extern int dst_pid;
|
||||
+
|
||||
#endif /* __CR_SK_INET_H__ */
|
||||
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
|
||||
index 05048c8..c7de793 100644
|
||||
--- a/criu/sk-inet.c
|
||||
+++ b/criu/sk-inet.c
|
||||
@@ -431,6 +431,152 @@ static bool needs_scope_id(uint32_t *src_addr)
|
||||
return false;
|
||||
}
|
||||
|
||||
+#define ADD_SHARE_SOCKET_PATH "/sys/kernel/add_share_socket"
|
||||
+#define REPAIR_SHARE_SOCKET_PATH "/sys/kernel/repair_share_socket"
|
||||
+#define SHARE_SOCKET_PATH "/sys/kernel/share_socket"
|
||||
+
|
||||
+int add_share_socket(u32 id, int fd, int pid, int port)
|
||||
+{
|
||||
+ int retval;
|
||||
+ char buf[256] = {0};
|
||||
+
|
||||
+ retval = snprintf(buf, 256, "%u,%d,%d,%d", id, fd, pid, port);
|
||||
+ if (retval <= 0)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ fd = open(ADD_SHARE_SOCKET_PATH, O_WRONLY, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file:%s fail\n", ADD_SHARE_SOCKET_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ retval = write(fd, buf, strlen(buf));
|
||||
+ close(fd);
|
||||
+ return retval < 0 ? -1 : 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+int repair_share_socket(int id)
|
||||
+{
|
||||
+ int retval, fd;
|
||||
+ char buf[256] = {0};
|
||||
+
|
||||
+ retval = snprintf(buf, 256, "%u", id);
|
||||
+ if (retval <= 0)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ fd = open(REPAIR_SHARE_SOCKET_PATH, O_WRONLY, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file:%s fail\n", REPAIR_SHARE_SOCKET_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+ retval = write(fd, buf, strlen(buf));
|
||||
+
|
||||
+ close(fd);
|
||||
+ return retval < 0 ? -1 : 0;
|
||||
+}
|
||||
+
|
||||
+int get_share_socket(void)
|
||||
+{
|
||||
+ int fd;
|
||||
+ ssize_t count;
|
||||
+ int retval = -1;
|
||||
+ char buf[32] = {0};
|
||||
+
|
||||
+ fd = open(SHARE_SOCKET_PATH, O_RDONLY, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file:%s fail\n", SHARE_SOCKET_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ count = read(fd, buf, sizeof(buf));
|
||||
+ if (count > 0)
|
||||
+ retval = atoi(buf);
|
||||
+
|
||||
+ close(fd);
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+int g_share_dst_ports[MAX_SHARE_PORT_NUM];
|
||||
+int g_share_dst_port_num;
|
||||
+int g_share_src_ports[MAX_SHARE_PORT_NUM];
|
||||
+int g_share_src_port_num;
|
||||
+
|
||||
+int parse_share_ports(void)
|
||||
+{
|
||||
+ char *save, *p;
|
||||
+
|
||||
+ if (opts.share_dst_ports) {
|
||||
+ p = strtok_r(opts.share_dst_ports, ",", &save);
|
||||
+ while (p != NULL) {
|
||||
+ if (g_share_dst_port_num >= MAX_SHARE_PORT_NUM)
|
||||
+ return -1;
|
||||
+ g_share_dst_ports[g_share_dst_port_num] = atoi(p);
|
||||
+ if (!g_share_dst_ports[g_share_dst_port_num])
|
||||
+ return -1;
|
||||
+ g_share_dst_port_num++;
|
||||
+ p = strtok_r(NULL, ",", &save);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (opts.share_src_ports) {
|
||||
+ p = strtok_r(opts.share_src_ports, ",", &save);
|
||||
+ while (p != NULL) {
|
||||
+ if (g_share_src_port_num >= MAX_SHARE_PORT_NUM)
|
||||
+ return -1;
|
||||
+ g_share_src_ports[g_share_src_port_num] = atoi(p);
|
||||
+ if (!g_share_src_ports[g_share_src_port_num])
|
||||
+ return -1;
|
||||
+ g_share_src_port_num++;
|
||||
+ p = strtok_r(NULL, ",", &save);
|
||||
+ }
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int check_share_dst_port(int dst_port)
|
||||
+{
|
||||
+ int i;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ for (i = 0; i < g_share_dst_port_num; i++) {
|
||||
+ if (dst_port == g_share_dst_ports[i]) {
|
||||
+ ret = 1;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int check_share_src_port(int src_port)
|
||||
+{
|
||||
+ int i;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ for (i = 0; i < g_share_src_port_num; i++) {
|
||||
+ if (src_port == g_share_src_ports[i]) {
|
||||
+ ret = 1;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+int check_need_repair(struct file_desc *d)
|
||||
+{
|
||||
+ struct inet_sk_info *ii;
|
||||
+ InetSkEntry *ie;
|
||||
+
|
||||
+ ii = container_of(d, struct inet_sk_info, d);
|
||||
+ ie = ii->ie;
|
||||
+ if (check_share_dst_port(ie->dst_port) ||
|
||||
+ check_share_src_port(ie->src_port))
|
||||
+ return 1;
|
||||
+ else
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int family)
|
||||
{
|
||||
struct inet_sk_desc *sk;
|
||||
@@ -488,6 +634,11 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
|
||||
|
||||
BUG_ON(sk->sd.already_dumped);
|
||||
|
||||
+ if (check_share_dst_port(sk->dst_port) || check_share_src_port(sk->src_port)) {
|
||||
+ pr_info("Start add share prot:%d src %d\n", sk->dst_port, sk->src_port);
|
||||
+ add_share_socket(id, lfd, dst_pid, sk->src_port);
|
||||
+ }
|
||||
+
|
||||
ie.id = id;
|
||||
ie.ino = sk->sd.ino;
|
||||
if (sk->sd.sk_ns) {
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,273 +0,0 @@
|
||||
From aac63cee766bb6840326d008ed1b1993bb7c629a Mon Sep 17 00:00:00 2001
|
||||
From: Liu Chao <liuchao173@huawei.com>
|
||||
Date: Mon, 19 Jul 2021 03:19:30 +0000
|
||||
Subject: [PATCH 41/72] tcp: save src ports to ip_local_reserved_ports when
|
||||
dump tasks and retore it when restore tasks
|
||||
|
||||
Signed-off-by: Liu Chao <liuchao173@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/config.c | 8 ++-
|
||||
criu/cr-dump.c | 4 ++
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/reserved-ports.h | 10 ++++
|
||||
criu/net.c | 6 +++
|
||||
criu/reserved-ports.c | 98 +++++++++++++++++++++++++++++++++++
|
||||
criu/sk-tcp.c | 2 +-
|
||||
9 files changed, 129 insertions(+), 2 deletions(-)
|
||||
create mode 100644 criu/include/reserved-ports.h
|
||||
create mode 100644 criu/reserved-ports.c
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 65cc215..3e522b4 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -94,6 +94,7 @@ obj-y += pin-mem.o
|
||||
obj-y += devname.o
|
||||
obj-y += files-chr.o
|
||||
obj-y += exit-notify.o
|
||||
+obj-y += reserved-ports.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index 7e92731..ae5f81e 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -615,7 +615,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
"no-" OPT_NAME, no_argument, SAVE_TO, false \
|
||||
}
|
||||
|
||||
- static const char short_opts[] = "dSsRt:hD:o:v::x::Vr:jJ:lW:L:M:";
|
||||
+ static const char short_opts[] = "dSsRt:hD:o:v::x::Vr:jJ:lW:L:M:P:";
|
||||
static struct option long_opts[] = {
|
||||
{ "tree", required_argument, 0, 't' },
|
||||
{ "leave-stopped", no_argument, 0, 's' },
|
||||
@@ -708,6 +708,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
BOOL_OPT("file-locks-repair", &opts.file_locks_repair),
|
||||
{ "share-dst-ports", required_argument, 0, 2000 },
|
||||
{ "share-src-ports", required_argument, 0, 2001 },
|
||||
+ { "reserve-ports", required_argument, 0, 'P' },
|
||||
{},
|
||||
};
|
||||
|
||||
@@ -1057,6 +1058,11 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
case 'h':
|
||||
*usage_error = false;
|
||||
return 2;
|
||||
+ case 'P':
|
||||
+ opts.reserve_ports = atoi(optarg);
|
||||
+ if (opts.reserve_ports < 0)
|
||||
+ goto bad_arg;
|
||||
+ break;
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index 607eac2..a8ab61e 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -89,6 +89,7 @@
|
||||
#include "pin-mem.h"
|
||||
#include "notifier.h"
|
||||
#include "files-chr.h"
|
||||
+#include "reserved-ports.h"
|
||||
|
||||
/*
|
||||
* Architectures can overwrite this function to restore register sets that
|
||||
@@ -2223,6 +2224,9 @@ int cr_dump_tasks(pid_t pid)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ if (opts.reserve_ports > 0)
|
||||
+ set_reserved_ports();
|
||||
+
|
||||
if (parent_ie) {
|
||||
inventory_entry__free_unpacked(parent_ie, NULL);
|
||||
parent_ie = NULL;
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index cfa149a..ae858e8 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -460,6 +460,7 @@ usage:
|
||||
" --mask-exit-notify Mask task exit notify during dump and restore\n"
|
||||
" --weak-file-check Allow file size and mod larger than dumping value\n"
|
||||
" --file-locks-repair Use repair mode to dump and restore file locks\n"
|
||||
+ " --reserve-ports Reserve src ports in kernel\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index b7c1e34..3b61c6b 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -200,6 +200,7 @@ struct cr_options {
|
||||
int file_locks_repair;
|
||||
char *share_dst_ports;
|
||||
char *share_src_ports;
|
||||
+ int reserve_ports;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/reserved-ports.h b/criu/include/reserved-ports.h
|
||||
new file mode 100644
|
||||
index 0000000..b614482
|
||||
--- /dev/null
|
||||
+++ b/criu/include/reserved-ports.h
|
||||
@@ -0,0 +1,10 @@
|
||||
+#ifndef __CRIU_RESERVED_PORTS_H__
|
||||
+#define __CRIU_RESERVED_PORTS_H__
|
||||
+
|
||||
+#define RESERVED_PORTS_PATH "/proc/sys/net/ipv4/ip_local_reserved_ports"
|
||||
+
|
||||
+extern void read_reserved_ports(char *path);
|
||||
+extern void write_reserved_ports(char *path);
|
||||
+extern void set_reserved_ports(void);
|
||||
+
|
||||
+#endif /* __CRIU_RESERVED_PORTS_H__ */
|
||||
diff --git a/criu/net.c b/criu/net.c
|
||||
index 7b45f06..fff4c85 100644
|
||||
--- a/criu/net.c
|
||||
+++ b/criu/net.c
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "external.h"
|
||||
#include "fdstore.h"
|
||||
#include "netfilter.h"
|
||||
+#include "reserved-ports.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "images/netdev.pb-c.h"
|
||||
@@ -3193,6 +3194,11 @@ void network_unlock(void)
|
||||
{
|
||||
pr_info("Unlock network\n");
|
||||
|
||||
+ if (opts.reserve_ports) {
|
||||
+ read_reserved_ports("ip_local_reserved_ports");
|
||||
+ write_reserved_ports(RESERVED_PORTS_PATH);
|
||||
+ }
|
||||
+
|
||||
cpt_unlock_tcp_connections();
|
||||
rst_unlock_tcp_connections();
|
||||
|
||||
diff --git a/criu/reserved-ports.c b/criu/reserved-ports.c
|
||||
new file mode 100644
|
||||
index 0000000..b4996ab
|
||||
--- /dev/null
|
||||
+++ b/criu/reserved-ports.c
|
||||
@@ -0,0 +1,98 @@
|
||||
+#include <fcntl.h>
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+#include <linux/limits.h>
|
||||
+
|
||||
+#include "log.h"
|
||||
+#include "cr_options.h"
|
||||
+#include "util.h"
|
||||
+#include "sk-inet.h"
|
||||
+#include "reserved-ports.h"
|
||||
+
|
||||
+#include "common/list.h"
|
||||
+
|
||||
+static char* reserved_ports;
|
||||
+static int reserved_ports_num;
|
||||
+extern struct list_head cpt_tcp_repair_sockets;
|
||||
+
|
||||
+void read_reserved_ports(char *path)
|
||||
+{
|
||||
+ FILE *file = NULL;
|
||||
+ char *ch = NULL;
|
||||
+ size_t size = 0;
|
||||
+
|
||||
+ if (reserved_ports) {
|
||||
+ free(reserved_ports);
|
||||
+ reserved_ports = NULL;
|
||||
+ }
|
||||
+
|
||||
+ file = fopen(path, "r");
|
||||
+ if (!file) {
|
||||
+ pr_err("Cannot fopen %s\n", path);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (getline(&reserved_ports, &size, file) <= 0)
|
||||
+ pr_err("Cannot getline from %s\n", path);
|
||||
+ fclose(file);
|
||||
+
|
||||
+ if (!reserved_ports)
|
||||
+ return;
|
||||
+
|
||||
+ ch = strstr(reserved_ports, "\n");
|
||||
+ if (ch)
|
||||
+ *ch = '\0';
|
||||
+}
|
||||
+
|
||||
+void write_reserved_ports(char *path)
|
||||
+{
|
||||
+ int fd = -1;
|
||||
+ char buf[PATH_MAX];
|
||||
+
|
||||
+ fd = open(path, O_RDWR | O_CREAT, 0640);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("Cannot open %s ret %d cwd: %s\n", path, fd, buf);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ cr_system(-1, fd, -1, "/usr/bin/echo",
|
||||
+ (char *[]) { "echo", reserved_ports, NULL}, 0);
|
||||
+ close(fd);
|
||||
+}
|
||||
+
|
||||
+static int add_reserved_ports(struct inet_sk_desc *sk)
|
||||
+{
|
||||
+ if (reserved_ports_num >= opts.reserve_ports)
|
||||
+ return -1;
|
||||
+
|
||||
+ if (strlen(reserved_ports) == 0)
|
||||
+ snprintf(reserved_ports, 6, "%u", sk->src_port);
|
||||
+ else
|
||||
+ snprintf(reserved_ports + strlen(reserved_ports), 7, ",%u", sk->src_port);
|
||||
+ reserved_ports_num++;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void set_reserved_ports(void)
|
||||
+{
|
||||
+ struct inet_sk_desc *sk = NULL;
|
||||
+ size_t size = 0;
|
||||
+
|
||||
+ read_reserved_ports(RESERVED_PORTS_PATH);
|
||||
+
|
||||
+ write_reserved_ports("ip_local_reserved_ports");
|
||||
+
|
||||
+ size = strlen(reserved_ports) + 6 * opts.reserve_ports + 1;
|
||||
+ if (xrealloc_safe(&reserved_ports, size))
|
||||
+ exit(1);
|
||||
+
|
||||
+ list_for_each_entry(sk, &cpt_tcp_repair_sockets, rlist)
|
||||
+ add_reserved_ports(sk);
|
||||
+
|
||||
+ write_reserved_ports(RESERVED_PORTS_PATH);
|
||||
+
|
||||
+ free(reserved_ports);
|
||||
+ reserved_ports = NULL;
|
||||
+}
|
||||
diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c
|
||||
index 0afecd2..38889d7 100644
|
||||
--- a/criu/sk-tcp.c
|
||||
+++ b/criu/sk-tcp.c
|
||||
@@ -30,7 +30,7 @@
|
||||
#undef LOG_PREFIX
|
||||
#define LOG_PREFIX "tcp: "
|
||||
|
||||
-static LIST_HEAD(cpt_tcp_repair_sockets);
|
||||
+LIST_HEAD(cpt_tcp_repair_sockets);
|
||||
static LIST_HEAD(rst_tcp_repair_sockets);
|
||||
|
||||
static int lock_connection(struct inet_sk_desc *sk)
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
From 06a0277c2aab1442c724217957fd5f915ace2753 Mon Sep 17 00:00:00 2001
|
||||
From: Zhuling <zhuling8@huawei.com>
|
||||
Date: Thu, 22 Jul 2021 10:15:15 +0800
|
||||
Subject: [PATCH 42/72] reg-file: fix dump fail problem with null seek op
|
||||
|
||||
Some customizing `struct file_operations` implementation has
|
||||
no `llseek`, therefore ignore the no-implementation errno.
|
||||
|
||||
Fix file dumping fail problem when the file seek op is null.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/files-reg.c | 15 ++++++++++++---
|
||||
1 file changed, 12 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/criu/files-reg.c b/criu/files-reg.c
|
||||
index 1a3b836..6dc8745 100644
|
||||
--- a/criu/files-reg.c
|
||||
+++ b/criu/files-reg.c
|
||||
@@ -2176,9 +2176,18 @@ static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg)
|
||||
*/
|
||||
if (!(rfi->rfe->flags & O_PATH)) {
|
||||
if (rfi->rfe->pos != -1ULL && lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) {
|
||||
- pr_perror("Can't restore file pos");
|
||||
- close(fd);
|
||||
- return -1;
|
||||
+ /*
|
||||
+ * Some customizing `struct file_operations`
|
||||
+ * implementation has no `llseek`, therefore
|
||||
+ * ignore the no-implementation errno.
|
||||
+ */
|
||||
+ if (errno == ESPIPE) {
|
||||
+ pr_warn("No ability to restore file ops\n");
|
||||
+ } else {
|
||||
+ pr_perror("Can't restore file pos");
|
||||
+ close(fd);
|
||||
+ return -1;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,39 +0,0 @@
|
||||
From 88274e29aaaec4a53df996ae84c37ad20f36395f Mon Sep 17 00:00:00 2001
|
||||
From: Zhuling <zhuling8@huawei.com>
|
||||
Date: Sat, 24 Jul 2021 16:37:17 +0800
|
||||
Subject: [PATCH 43/72] fix dump fail problem with no access to get socket
|
||||
filter
|
||||
|
||||
Someone uses bpf hook by writing the kernel function instead
|
||||
of the bpf code, it causes the error here.
|
||||
|
||||
Fix socket dumping fail problem when user space has no access
|
||||
to getting socket filter.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/sockets.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/sockets.c b/criu/sockets.c
|
||||
index 2ddf85e..e412a1d 100644
|
||||
--- a/criu/sockets.c
|
||||
+++ b/criu/sockets.c
|
||||
@@ -355,7 +355,12 @@ static int dump_socket_filter(int sk, SkOptsEntry *soe)
|
||||
|
||||
ret = getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, NULL, &len);
|
||||
if (ret) {
|
||||
- pr_perror("Can't get socket filter len");
|
||||
+ pr_warn("Can't get socket filter len");
|
||||
+ /* Someone uses bpf hook by writing the kernel function
|
||||
+ * instead of the bpf code, it causes the error here.
|
||||
+ */
|
||||
+ if (errno == EACCES)
|
||||
+ return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,139 +0,0 @@
|
||||
From c7f9888e234a626a4d7bf31b89d66b91607f9785 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fu.lin10@huawei.com>
|
||||
Date: Tue, 27 Jul 2021 11:40:34 +0800
|
||||
Subject: [PATCH 44/72] proc parse: fix vma offset value for the sysfs file of
|
||||
pci devices
|
||||
|
||||
Some pci devices create bin sysfs file which permit to use `mmap()`
|
||||
syscall, the 6th parameter `offset` is always 0 when those kinds of
|
||||
files create file mapping. The value of `offset` will be assign to
|
||||
`vma->vm_pgoff` in kernel. However, it will be changed to pci address
|
||||
automically during mmap callback function `pci_mmap_resource_range()`,
|
||||
and the offset in `/proc/<pid>/maps` will show non-zero. It will result
|
||||
criu restore fails.
|
||||
|
||||
There are many of those files. Just retry the mmap action.
|
||||
|
||||
NOTICE: the stragy is try best, not whitelist.
|
||||
|
||||
Signed-off-by: He Jingxian <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fu.lin10@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/include/image.h | 1 +
|
||||
criu/pie/restorer.c | 22 +++++++++++++++++++---
|
||||
criu/proc_parse.c | 32 ++++++++++++++++++++++++++++++++
|
||||
3 files changed, 52 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/criu/include/image.h b/criu/include/image.h
|
||||
index 66492c0..0156314 100644
|
||||
--- a/criu/include/image.h
|
||||
+++ b/criu/include/image.h
|
||||
@@ -86,6 +86,7 @@
|
||||
#define VMA_AREA_MEMFD (1 << 14)
|
||||
#define VMA_AREA_ANON_INODE (1 << 15)
|
||||
#define VMA_AREA_CHR (1 << 16)
|
||||
+#define VMA_AREA_DEV_SHARE (1 << 17)
|
||||
|
||||
#define VMA_CLOSE (1 << 28)
|
||||
#define VMA_NO_PROT_WRITE (1 << 29)
|
||||
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
|
||||
index fde6e30..67b0d4c 100644
|
||||
--- a/criu/pie/restorer.c
|
||||
+++ b/criu/pie/restorer.c
|
||||
@@ -883,8 +883,9 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
|
||||
* that mechanism as it causes the process to be charged for memory
|
||||
* immediately upon mmap, not later upon preadv().
|
||||
*/
|
||||
- pr_debug("\tmmap(%" PRIx64 " -> %" PRIx64 ", %x %x %d)\n", vma_entry->start, vma_entry->end, prot, flags,
|
||||
- (int)vma_entry->fd);
|
||||
+ pr_debug("\tmmap(%" PRIx64 " -> %" PRIx64 ", %x %x %d %lx)\n",
|
||||
+ vma_entry->start, vma_entry->end, prot, flags,
|
||||
+ (int)vma_entry->fd, vma_entry->pgoff);
|
||||
/*
|
||||
* Should map memory here. Note we map them as
|
||||
* writable since we're going to restore page
|
||||
@@ -892,6 +893,20 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
|
||||
*/
|
||||
addr = sys_mmap(decode_pointer(vma_entry->start), vma_entry_len(vma_entry), prot, flags, vma_entry->fd,
|
||||
vma_entry->pgoff);
|
||||
+ /* Some drivers implements its own mmap callback, the `mmap()` argument
|
||||
+ * `offset` has the differet semantic with POSIX standard. Therefore,
|
||||
+ * try to re-mmap with offset 0.
|
||||
+ *
|
||||
+ * NOTICE: the stragy is try best, not whitelist.
|
||||
+ */
|
||||
+ if (addr == -EINVAL && vma_entry->pgoff != 0) {
|
||||
+ pr_info("try mmap with offset 0\n");
|
||||
+ addr = sys_mmap(decode_pointer(vma_entry->start),
|
||||
+ vma_entry_len(vma_entry),
|
||||
+ prot, flags,
|
||||
+ vma_entry->fd,
|
||||
+ 0);
|
||||
+ }
|
||||
|
||||
if ((vma_entry->fd != -1) && (vma_entry->status & VMA_CLOSE))
|
||||
sys_close(vma_entry->fd);
|
||||
@@ -1979,7 +1994,8 @@ long __export_restore_task(struct task_restore_args *args)
|
||||
if (!vma_entry->has_madv || !vma_entry->madv)
|
||||
continue;
|
||||
|
||||
- if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE))
|
||||
+ if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE) ||
|
||||
+ vma_entry_is(vma_entry, VMA_AREA_DEV_SHARE))
|
||||
continue;
|
||||
|
||||
for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
|
||||
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
|
||||
index d13589c..282a2e9 100644
|
||||
--- a/criu/proc_parse.c
|
||||
+++ b/criu/proc_parse.c
|
||||
@@ -552,6 +552,35 @@ static inline int handle_vvar_vma(struct vma_area *vma)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static bool is_sysfs_resource(const char *path)
|
||||
+{
|
||||
+ char *sub = NULL;
|
||||
+ const char *prefix = "resource";
|
||||
+ const char *suffix = "_wc";
|
||||
+
|
||||
+ if (strstr(path, "devices/") == NULL)
|
||||
+ return false;
|
||||
+
|
||||
+ sub = rindex(path, '/');
|
||||
+ if (sub == NULL)
|
||||
+ return false;
|
||||
+
|
||||
+ sub += 1;
|
||||
+ if (strncmp(sub, prefix, strlen(prefix)) != 0)
|
||||
+ return false;
|
||||
+
|
||||
+ sub += strlen(prefix);
|
||||
+ while (*sub != '\0' && (*sub >= '0' && *sub <= '9'))
|
||||
+ sub += 1;
|
||||
+
|
||||
+ if (*sub == '\0')
|
||||
+ return true;
|
||||
+ if (!strcmp(sub, suffix))
|
||||
+ return true;
|
||||
+ else
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_path, DIR *map_files_dir,
|
||||
struct vma_file_info *vfi, struct vma_file_info *prev_vfi, int *vm_file_fd)
|
||||
{
|
||||
@@ -571,6 +600,9 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
|
||||
goto err;
|
||||
} else if (!strcmp(file_path, "[heap]")) {
|
||||
vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
|
||||
+ } else if (is_sysfs_resource(file_path)) {
|
||||
+ pr_info("find sys device module share memory\n");
|
||||
+ vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_DEV_SHARE;
|
||||
} else {
|
||||
vma_area->e->status = VMA_AREA_REGULAR;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,244 +0,0 @@
|
||||
From 1328e32ee05c59f7168039211c9d96176ff22791 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Sat, 14 Aug 2021 16:45:40 +0800
|
||||
Subject: [PATCH 45/72] add reuse file method for recover deleted file state
|
||||
|
||||
Orphan inode maybe exist in checkpoint process. Sometimes it can't be
|
||||
re-linked by `linkat()` syscall, e.g. sysfs.
|
||||
|
||||
Therefore, add reuse file method for recover file state of deleted
|
||||
files.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/files-reg.c | 10 ++++--
|
||||
criu/files.c | 22 +++++++++++-
|
||||
criu/include/orphan-inode.h | 16 +++++++++
|
||||
criu/orphan-inode.c | 71 +++++++++++++++++++++++++++++++++++++
|
||||
5 files changed, 116 insertions(+), 4 deletions(-)
|
||||
create mode 100644 criu/include/orphan-inode.h
|
||||
create mode 100644 criu/orphan-inode.c
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 3e522b4..7fee749 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -95,6 +95,7 @@ obj-y += devname.o
|
||||
obj-y += files-chr.o
|
||||
obj-y += exit-notify.o
|
||||
obj-y += reserved-ports.o
|
||||
+obj-y += orphan-inode.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/files-reg.c b/criu/files-reg.c
|
||||
index 6dc8745..ed46764 100644
|
||||
--- a/criu/files-reg.c
|
||||
+++ b/criu/files-reg.c
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "external.h"
|
||||
#include "memfd.h"
|
||||
#include "files-chr.h"
|
||||
+#include "orphan-inode.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@@ -1260,8 +1261,10 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms,
|
||||
*/
|
||||
|
||||
if (errno == ENOENT) {
|
||||
- link_strip_deleted(link);
|
||||
- return dump_linked_remap(rpath + 1, plen - 1, ost, lfd, id, nsid);
|
||||
+ pr_info("Start add no exist file: %s\n", rpath+1);
|
||||
+ add_reuse_file(id, lfd, dst_pid);
|
||||
+ need_reuse_flag = O_REUSE;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
pr_perror("Can't stat path");
|
||||
@@ -1663,7 +1666,8 @@ ext:
|
||||
rfe.has_mode = true;
|
||||
rfe.mode = p->stat.st_mode;
|
||||
|
||||
- if (S_ISREG(p->stat.st_mode) && should_check_size(rfe.flags) && !store_validation_data(&rfe, p, lfd))
|
||||
+ if (S_ISREG(p->stat.st_mode) && should_check_size(rfe.flags)
|
||||
+ && (need_reuse_flag != O_REUSE) && !store_validation_data(&rfe, p, lfd))
|
||||
return -1;
|
||||
|
||||
fe.type = FD_TYPES__REG;
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index 1c52cf4..e79052e 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -50,6 +50,7 @@
|
||||
#include "fdstore.h"
|
||||
#include "bpfmap.h"
|
||||
#include "files-chr.h"
|
||||
+#include "orphan-inode.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@@ -706,6 +707,7 @@ int dump_my_file(int lfd, u32 *id, int *type)
|
||||
}
|
||||
|
||||
int dst_pid;
|
||||
+int need_reuse_flag;
|
||||
|
||||
int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds)
|
||||
{
|
||||
@@ -743,10 +745,13 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
|
||||
for (i = 0; i < nr_fds; i++) {
|
||||
FdinfoEntry e = FDINFO_ENTRY__INIT;
|
||||
|
||||
+ need_reuse_flag = 0;
|
||||
ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
+ e.flags |= need_reuse_flag;
|
||||
+ pr_info("write fdinfoEntry fd=%d id=%d\n", (&e)->fd, (&e)->id);
|
||||
ret = pb_write_one(img, &e, PB_FDINFO);
|
||||
if (ret)
|
||||
break;
|
||||
@@ -939,7 +944,8 @@ int collect_fd(int pid, FdinfoEntry *e, struct rst_info *rst_info, bool fake)
|
||||
{
|
||||
struct file_desc *fdesc;
|
||||
|
||||
- pr_info("Collect fdinfo pid=%d fd=%d id=%#x\n", pid, e->fd, e->id);
|
||||
+ pr_info("Collect fdinfo pid=%d fd=%d id=%#x flags: %#x\n",
|
||||
+ pid, e->fd, e->id, e->flags);
|
||||
|
||||
fdesc = find_file_desc(e);
|
||||
if (fdesc == NULL) {
|
||||
@@ -1230,6 +1236,7 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
int new_fd = -1, ret;
|
||||
struct chrfile_info *ci;
|
||||
|
||||
+ pr_info("open file flags: %#x\n", fle->fe->flags);
|
||||
flem = file_master(d);
|
||||
if (fle != flem) {
|
||||
BUG_ON(fle->stage != FLE_INITIALIZED);
|
||||
@@ -1251,6 +1258,19 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
+ } else if (fle->fe->flags & O_REUSE) {
|
||||
+ pr_info("find reuse file:%d\n", d->id);
|
||||
+ ret = repair_reuse_file(d->id);
|
||||
+ if (!ret) {
|
||||
+ new_fd = get_reuse_file();
|
||||
+ pr_info("get reuse file:%d\n", new_fd);
|
||||
+ if (new_fd <= 0 || setup_and_serve_out(fle, new_fd) < 0) {
|
||||
+ pr_err("setup reuse file fail\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ fle->stage = FLE_RESTORED;
|
||||
+ return 0;
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/criu/include/orphan-inode.h b/criu/include/orphan-inode.h
|
||||
new file mode 100644
|
||||
index 0000000..bc3b6ae
|
||||
--- /dev/null
|
||||
+++ b/criu/include/orphan-inode.h
|
||||
@@ -0,0 +1,16 @@
|
||||
+#ifndef __CRIU_ORPHAN_INODE_H__
|
||||
+#define __CRIU_ORPHAN_INODE_H__
|
||||
+
|
||||
+#define ADD_REUSE_FILE_PATH "/sys/kernel/add_reuse_file"
|
||||
+#define REPAIR_REUSE_FILE_PATH "/sys/kernel/repair_reuse_file"
|
||||
+#define REUSE_FILE_PATH "/sys/kernel/reuse_file"
|
||||
+#define O_REUSE 0100000000
|
||||
+
|
||||
+extern int dst_pid;
|
||||
+extern int need_reuse_flag;
|
||||
+
|
||||
+int add_reuse_file(u32 id, int fd, int pid);
|
||||
+int repair_reuse_file(int id);
|
||||
+int get_reuse_file(void);
|
||||
+
|
||||
+#endif /* __CRIU_ORPHAN_INODE_H__ */
|
||||
diff --git a/criu/orphan-inode.c b/criu/orphan-inode.c
|
||||
new file mode 100644
|
||||
index 0000000..c4e38dc
|
||||
--- /dev/null
|
||||
+++ b/criu/orphan-inode.c
|
||||
@@ -0,0 +1,71 @@
|
||||
+#include <fcntl.h>
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+#include <unistd.h>
|
||||
+
|
||||
+#include "int.h"
|
||||
+#include "log.h"
|
||||
+#include "orphan-inode.h"
|
||||
+
|
||||
+int add_reuse_file(u32 id, int fd, int pid)
|
||||
+{
|
||||
+ int retval;
|
||||
+ char buf[256] = {0};
|
||||
+
|
||||
+ retval = snprintf(buf, 256, "%u,%d,%d", id, fd, pid);
|
||||
+ if (retval <= 0)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ fd = open(ADD_REUSE_FILE_PATH, O_WRONLY, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file:%s fail\n", ADD_REUSE_FILE_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ retval = write(fd, buf, strlen(buf));
|
||||
+ close(fd);
|
||||
+
|
||||
+ return retval < 0 ? -1 : 0;
|
||||
+}
|
||||
+
|
||||
+int repair_reuse_file(int id)
|
||||
+{
|
||||
+ int retval, fd;
|
||||
+ char buf[256] = {0};
|
||||
+
|
||||
+ retval = snprintf(buf, 256, "%u", id);
|
||||
+ if (retval <= 0)
|
||||
+ return -EFAULT;
|
||||
+
|
||||
+ fd = open(REPAIR_REUSE_FILE_PATH, O_WRONLY, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file:%s fail\n", REPAIR_REUSE_FILE_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+ retval = write(fd, buf, strlen(buf));
|
||||
+
|
||||
+ close(fd);
|
||||
+ return retval < 0 ? -1 : 0;
|
||||
+}
|
||||
+
|
||||
+int get_reuse_file(void)
|
||||
+{
|
||||
+ int fd;
|
||||
+ ssize_t count;
|
||||
+ int retval = -1;
|
||||
+ char buf[32] = {0};
|
||||
+
|
||||
+ fd = open(REUSE_FILE_PATH, O_RDONLY , 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_err("open file:%s fail\n", REUSE_FILE_PATH);
|
||||
+ return fd;
|
||||
+ }
|
||||
+
|
||||
+ count = read(fd, buf, sizeof(buf));
|
||||
+ if (count > 0)
|
||||
+ retval = atoi(buf);
|
||||
+
|
||||
+ close(fd);
|
||||
+ return retval;
|
||||
+}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,133 +0,0 @@
|
||||
From 8b1856d5c72c6870c04a87158718d2df62591a6c Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 11 Aug 2021 15:01:27 +0800
|
||||
Subject: [PATCH 46/72] sk: fix share sockets repair problem
|
||||
|
||||
Repair off the share sockets after reusing them
|
||||
to recover the share socket state.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/files.c | 33 ++++++++++++++++++++++++++++++++-
|
||||
criu/sk-inet.c | 7 +++++--
|
||||
criu/sk-netlink.c | 5 +++--
|
||||
3 files changed, 40 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index e79052e..24ed219 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "bpfmap.h"
|
||||
#include "files-chr.h"
|
||||
#include "orphan-inode.h"
|
||||
+#include "sk-inet.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@@ -1215,7 +1216,7 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
|
||||
if (reopen_fd_as(fle->fe->fd, new_fd))
|
||||
return -1;
|
||||
|
||||
- pr_info("*******flags: %d",fle->fe->flags);
|
||||
+ pr_info("*******flags: %d\n",fle->fe->flags);
|
||||
if (fcntl(fle->fe->fd, F_SETFD, fle->fe->flags) == -1) {
|
||||
pr_perror("Unable to set file descriptor flags");
|
||||
return -1;
|
||||
@@ -1229,6 +1230,30 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+#define MAX_SHARE_SOCKETS_NUM 25000
|
||||
+int repair_share_sockets[MAX_SHARE_SOCKETS_NUM];
|
||||
+int repair_share_num;
|
||||
+
|
||||
+int add_repair_share_socket(int fd)
|
||||
+{
|
||||
+ if (repair_share_num >= MAX_SHARE_SOCKETS_NUM)
|
||||
+ return -1;
|
||||
+ repair_share_sockets[repair_share_num] = fd;
|
||||
+ repair_share_num++;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void repair_off_share_sockets(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < repair_share_num; i++) {
|
||||
+ tcp_repair_off(repair_share_sockets[i]);
|
||||
+ pr_info("repair off socket:%d\n", repair_share_sockets[i]);
|
||||
+ }
|
||||
+ repair_share_num = 0;
|
||||
+}
|
||||
+
|
||||
static int open_fd(struct fdinfo_list_entry *fle)
|
||||
{
|
||||
struct file_desc *d = fle->desc;
|
||||
@@ -1248,6 +1273,7 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
|
||||
if (d->ops->type == FD_TYPES__INETSK) {
|
||||
if (check_need_repair(d)) {
|
||||
+ pr_info("start repair for:%d\n", d->id);
|
||||
ret = repair_share_socket(d->id);
|
||||
if (!ret) {
|
||||
new_fd = get_share_socket();
|
||||
@@ -1255,6 +1281,10 @@ static int open_fd(struct fdinfo_list_entry *fle)
|
||||
if (new_fd <= 0 || setup_and_serve_out(fle, new_fd) < 0)
|
||||
return -1;
|
||||
fle->stage = FLE_RESTORED;
|
||||
+ if (add_repair_share_socket(fle->fe->fd)) {
|
||||
+ pr_perror("add repair share socket fail\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -1379,6 +1409,7 @@ static int open_fdinfos(struct pstree_item *me)
|
||||
wait_fds_event();
|
||||
} while (again || progress);
|
||||
|
||||
+ repair_off_share_sockets();
|
||||
BUG_ON(!list_empty(list));
|
||||
/*
|
||||
* Fake fles may be used for restore other
|
||||
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
|
||||
index c7de793..c0251db 100644
|
||||
--- a/criu/sk-inet.c
|
||||
+++ b/criu/sk-inet.c
|
||||
@@ -635,8 +635,11 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
|
||||
BUG_ON(sk->sd.already_dumped);
|
||||
|
||||
if (check_share_dst_port(sk->dst_port) || check_share_src_port(sk->src_port)) {
|
||||
- pr_info("Start add share prot:%d src %d\n", sk->dst_port, sk->src_port);
|
||||
- add_share_socket(id, lfd, dst_pid, sk->src_port);
|
||||
+ pr_info("Start add share prot:%d-%d dst_pid %d id %d\n",
|
||||
+ sk->dst_port, sk->src_port, dst_pid, id);
|
||||
+ ret = add_share_socket(id, lfd, dst_pid, sk->src_port);
|
||||
+ if (ret)
|
||||
+ pr_warn("add share socket ret %d\n", ret);
|
||||
}
|
||||
|
||||
ie.id = id;
|
||||
diff --git a/criu/sk-netlink.c b/criu/sk-netlink.c
|
||||
index d4b3b7b..2832060 100644
|
||||
--- a/criu/sk-netlink.c
|
||||
+++ b/criu/sk-netlink.c
|
||||
@@ -115,9 +115,10 @@ static bool can_dump_netlink_sk(int lfd)
|
||||
|
||||
ret = fd_has_data(lfd);
|
||||
if (ret == 1)
|
||||
- pr_err("The socket has data to read\n");
|
||||
+ pr_warn("The socket has data to read\n");
|
||||
|
||||
- return ret == 0;
|
||||
+ /* ignore netlink socket data */
|
||||
+ return true;
|
||||
}
|
||||
|
||||
static int dump_one_netlink_fd(int lfd, u32 id, const struct fd_parms *p)
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,107 +0,0 @@
|
||||
From 1cb92fe0a930cf862f8a3ecd9a812d5b2e3aea60 Mon Sep 17 00:00:00 2001
|
||||
From: root <root@localhost.localdomain>
|
||||
Date: Wed, 8 Sep 2021 08:23:11 +0000
|
||||
Subject: [PATCH 47/72] mm: add clear pin mem and init page map option
|
||||
|
||||
Add 'clear-pin-mem' option for clearing pin memory data,
|
||||
and 'init-page-map' option for initializationing buffer for
|
||||
reading page map info.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/crtools.c | 13 ++++++++++++-
|
||||
criu/include/pin-mem.h | 4 ++++
|
||||
criu/pin-mem.c | 20 ++++++++++++++++++++
|
||||
3 files changed, 36 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index ae858e8..cc0a18f 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "fault-injection.h"
|
||||
#include "proc_parse.h"
|
||||
#include "kerndat.h"
|
||||
+#include "pin-mem.h"
|
||||
|
||||
#include "setproctitle.h"
|
||||
#include "sysctl.h"
|
||||
@@ -169,6 +170,14 @@ int main(int argc, char *argv[], char *envp[])
|
||||
goto usage;
|
||||
}
|
||||
|
||||
+ if (!strcmp(argv[optind], "clear-pin-memory")) {
|
||||
+ return clear_pin_mem(0);
|
||||
+ }
|
||||
+
|
||||
+ if (!strcmp(argv[optind], "init-pagemap-read")) {
|
||||
+ return init_pagemap_read(0);
|
||||
+ }
|
||||
+
|
||||
/* We must not open imgs dir, if service is called */
|
||||
if (strcmp(argv[optind], "service")) {
|
||||
ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind));
|
||||
@@ -320,7 +329,9 @@ usage:
|
||||
" service launch service\n"
|
||||
" dedup remove duplicates in memory dump\n"
|
||||
" cpuinfo dump writes cpu information into image file\n"
|
||||
- " cpuinfo check validates cpu information read from image file\n");
|
||||
+ " cpuinfo check validates cpu information read from image file\n"
|
||||
+ " clear-pin-memory clear pin memory manage data\n"
|
||||
+ " init-pagemap-read init data buffer for reading page map info\n");
|
||||
|
||||
if (usage_error) {
|
||||
pr_msg("\nTry -h|--help for more info\n");
|
||||
diff --git a/criu/include/pin-mem.h b/criu/include/pin-mem.h
|
||||
index 2b54996..b28ef3d 100644
|
||||
--- a/criu/include/pin-mem.h
|
||||
+++ b/criu/include/pin-mem.h
|
||||
@@ -39,6 +39,9 @@ struct pin_mem_area_set {
|
||||
#define _SET_FORK_PID 8
|
||||
#define SET_FORK_PID _IOW(PIN_MEM_MAGIC, _SET_FORK_PID, int)
|
||||
|
||||
+#define _INIT_PAGEMAP_READ 5
|
||||
+#define INIT_PAGEMAP_READ _IOW(PIN_MEM_MAGIC, _INIT_PAGEMAP_READ, int)
|
||||
+
|
||||
#endif /* __has_include("linux/pin_memory.h") */
|
||||
|
||||
#define PIN_MEM_FILE "/dev/pinmem"
|
||||
@@ -49,5 +52,6 @@ int pin_vmae(VmaEntry *vmae, struct pstree_item *item);
|
||||
int dump_task_special_pages(int pid);
|
||||
int restore_task_special_pages(int pid);
|
||||
int clear_pin_mem(int pid);
|
||||
+int init_pagemap_read(int para);
|
||||
|
||||
#endif /* __CRIU_PIN_MEM_H__ */
|
||||
diff --git a/criu/pin-mem.c b/criu/pin-mem.c
|
||||
index b18db97..96ca2c5 100644
|
||||
--- a/criu/pin-mem.c
|
||||
+++ b/criu/pin-mem.c
|
||||
@@ -144,3 +144,23 @@ int clear_pin_mem(int pid)
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
+
|
||||
+int init_pagemap_read(int para)
|
||||
+{
|
||||
+ int fd, ret;
|
||||
+
|
||||
+ fd = open(PIN_MEM_FILE, O_RDWR, 0);
|
||||
+ if (fd < 0) {
|
||||
+ pr_warn("error open file: %s\n", PIN_MEM_FILE);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ ret = ioctl(fd, INIT_PAGEMAP_READ, (unsigned long) ¶);
|
||||
+ if (ret < 0) {
|
||||
+ pr_warn("Init pagemap read fail, errno: %s\n", strerror(errno));
|
||||
+ }
|
||||
+
|
||||
+ close(fd);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,37 +0,0 @@
|
||||
From 803ee02298e0a71b07cf611eee68e23f702d259e Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Thu, 16 Sep 2021 13:50:46 +0000
|
||||
Subject: [PATCH 48/72] fds: fix fds list restore
|
||||
|
||||
When there exist multi processes need to dump, the child process may
|
||||
have the same fds as parent process. During the restore processing,
|
||||
criu choose the process which has the min pid value to be the master
|
||||
process to recover fds. However, choosing the parent process as the
|
||||
master process is more suitable.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/files.c | 7 +------
|
||||
1 file changed, 1 insertion(+), 6 deletions(-)
|
||||
|
||||
diff --git a/criu/files.c b/criu/files.c
|
||||
index 24ed219..6d8b812 100644
|
||||
--- a/criu/files.c
|
||||
+++ b/criu/files.c
|
||||
@@ -906,12 +906,7 @@ static struct fdinfo_list_entry *alloc_fle(int pid, FdinfoEntry *fe)
|
||||
|
||||
static void __collect_desc_fle(struct fdinfo_list_entry *new_le, struct file_desc *fdesc)
|
||||
{
|
||||
- struct fdinfo_list_entry *le;
|
||||
-
|
||||
- list_for_each_entry_reverse(le, &fdesc->fd_info_head, desc_list)
|
||||
- if (pid_rst_prio_eq(le->pid, new_le->pid))
|
||||
- break;
|
||||
- list_add(&new_le->desc_list, &le->desc_list);
|
||||
+ list_add(&new_le->desc_list, &fdesc->fd_info_head);
|
||||
}
|
||||
|
||||
static void collect_desc_fle(struct fdinfo_list_entry *new_le, struct file_desc *fdesc, bool force_master)
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,88 +0,0 @@
|
||||
From bec1445fd5dcfffb24918d725163f3be35f8b634 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Tue, 19 Oct 2021 20:53:19 +0800
|
||||
Subject: [PATCH 49/72] log: print error log to /dev/kmsg
|
||||
|
||||
The criu log can't be flushed to disk when OS crash in storage
|
||||
environment, therefore, output high level msg to /dev/kmsg.
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/include/log.h | 3 +++
|
||||
criu/kmsg.c | 16 ++++++++++++++++
|
||||
criu/log.c | 4 ++++
|
||||
4 files changed, 24 insertions(+)
|
||||
create mode 100644 criu/kmsg.c
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 7fee749..3bb7c19 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -96,6 +96,7 @@ obj-y += files-chr.o
|
||||
obj-y += exit-notify.o
|
||||
obj-y += reserved-ports.o
|
||||
obj-y += orphan-inode.o
|
||||
+obj-y += kmsg.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/include/log.h b/criu/include/log.h
|
||||
index 85e6dc2..aafea95 100644
|
||||
--- a/criu/include/log.h
|
||||
+++ b/criu/include/log.h
|
||||
@@ -2,6 +2,7 @@
|
||||
#define __CR_LOG_H__
|
||||
|
||||
#include <inttypes.h>
|
||||
+#include <stddef.h>
|
||||
|
||||
#ifndef CR_NOGLIBC
|
||||
|
||||
@@ -62,4 +63,6 @@ void flush_early_log_buffer(int fd);
|
||||
|
||||
#endif /* CR_NOGLIBC */
|
||||
|
||||
+void write_kmsg(const void *buf, size_t count);
|
||||
+
|
||||
#endif /* __CR_LOG_H__ */
|
||||
diff --git a/criu/kmsg.c b/criu/kmsg.c
|
||||
new file mode 100644
|
||||
index 0000000..c956dfb
|
||||
--- /dev/null
|
||||
+++ b/criu/kmsg.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+#include <fcntl.h>
|
||||
+#include <unistd.h>
|
||||
+
|
||||
+#define SYSLOG_DEV "/dev/kmsg"
|
||||
+
|
||||
+void write_kmsg(const void *buf, size_t count)
|
||||
+{
|
||||
+ int fd;
|
||||
+
|
||||
+ fd = open(SYSLOG_DEV, O_CLOEXEC | O_WRONLY);
|
||||
+ if (fd < 0)
|
||||
+ return;
|
||||
+
|
||||
+ write(fd, buf, count);
|
||||
+ close(fd);
|
||||
+}
|
||||
diff --git a/criu/log.c b/criu/log.c
|
||||
index c4ce90e..ba208f7 100644
|
||||
--- a/criu/log.c
|
||||
+++ b/criu/log.c
|
||||
@@ -373,6 +373,10 @@ static void vprint_on_level(unsigned int loglevel, const char *format, va_list p
|
||||
size += buf_off;
|
||||
|
||||
while (off < size) {
|
||||
+ if (loglevel <= LOG_WARN) {
|
||||
+ write_kmsg(buffer + off, size - off);
|
||||
+ }
|
||||
+
|
||||
ret = write(fd, buffer + off, size - off);
|
||||
if (ret <= 0)
|
||||
break;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,83 +0,0 @@
|
||||
From 6dde331da8e28e129010aee391e7ef3d757490cd Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Tue, 26 Oct 2021 11:13:27 +0800
|
||||
Subject: [PATCH 50/72] unix sk: improve dgram robustness
|
||||
|
||||
We should try out best to ensure the success of criu. As for unix dgram
|
||||
socket, criu use re-connect instead of repair instead of unix stream
|
||||
socket. Therefore, this patch does the following things:
|
||||
|
||||
- detect unix dgram unix sock file when criu dumps unix dgram socket
|
||||
- add the fault tolerance of unix dgram socket connecting (focus on the
|
||||
condition of `/dev/log` disappearance when rsyslog restart)
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/sk-unix.c | 35 +++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 33 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
|
||||
index 86bfa18..de75425 100644
|
||||
--- a/criu/sk-unix.c
|
||||
+++ b/criu/sk-unix.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <dlfcn.h>
|
||||
#include <libgen.h>
|
||||
+#include <time.h>
|
||||
|
||||
#include "libnetlink.h"
|
||||
#include "cr_options.h"
|
||||
@@ -1435,6 +1436,33 @@ err:
|
||||
return -1;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Sometimes, `/dev/log` will disappear because of the restart of rsyslog when
|
||||
+ * rotating, criu try to connect `/dev/log` will report error at this time. We
|
||||
+ * should try our best to ensure the success of criu restoration. Therefore,
|
||||
+ * retry three times here.
|
||||
+ */
|
||||
+static int unix_dgram_reconnect(int fd, struct sockaddr_un *addr, int len)
|
||||
+{
|
||||
+ int retval = 0;
|
||||
+ struct timespec tim = {
|
||||
+ .tv_sec = 0,
|
||||
+ .tv_nsec = 5e+8,
|
||||
+ };
|
||||
+
|
||||
+ for (int i = 0; i < 3; i++) {
|
||||
+ nanosleep(&tim, NULL);
|
||||
+ pr_warn("Can't connect unix socket(%s), %d retry\n",
|
||||
+ addr->sun_path, i);
|
||||
+ retval = connect(fd, (struct sockaddr *)addr,
|
||||
+ sizeof(addr->sun_family) + len);
|
||||
+ if (retval == 0)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
static int post_open_standalone(struct file_desc *d, int fd)
|
||||
{
|
||||
int fdstore_fd = -1, procfs_self_dir = -1, len;
|
||||
@@ -1521,8 +1549,11 @@ static int post_open_standalone(struct file_desc *d, int fd)
|
||||
goto err_revert_and_exit;
|
||||
}
|
||||
} else if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) {
|
||||
- pr_perror("Can't connect %d socket", ui->ue->ino);
|
||||
- goto err_revert_and_exit;
|
||||
+ if (ui->ue->type != SOCK_DGRAM || errno != ENOENT
|
||||
+ || unix_dgram_reconnect(fd, &addr, len) != 0) {
|
||||
+ pr_perror("Can't connect %d socket", ui->ue->ino);
|
||||
+ goto err_revert_and_exit;
|
||||
+ }
|
||||
}
|
||||
mutex_unlock(mutex_ghost);
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,46 +0,0 @@
|
||||
From a7d5401953c548c9479c386b52fffcba6b49c0e3 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Wed, 27 Oct 2021 11:57:43 +0800
|
||||
Subject: [PATCH 51/72] sk: ignore the bind error for icmp socket
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/sk-inet.c | 20 ++++++++++++++++++--
|
||||
1 file changed, 18 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
|
||||
index c0251db..96c2d09 100644
|
||||
--- a/criu/sk-inet.c
|
||||
+++ b/criu/sk-inet.c
|
||||
@@ -1160,8 +1160,24 @@ int inet_bind(int sk, struct inet_sk_info *ii)
|
||||
}
|
||||
|
||||
if (bind(sk, (struct sockaddr *)&addr, addr_size) == -1) {
|
||||
- pr_perror("Can't bind inet socket (id %d)", ii->ie->id);
|
||||
- return -1;
|
||||
+ InetSkEntry *ie = ii->ie;
|
||||
+
|
||||
+ /*
|
||||
+ * Sometimes the ping-like program restoration may appear
|
||||
+ * `bind()` error when it is specified the address. In view
|
||||
+ * of the principle that we should try our best to restore the
|
||||
+ * process, and ping-like program works abnormal can tolerate,
|
||||
+ * just warn here instead of report error.
|
||||
+ */
|
||||
+ if (ie->proto == IPPROTO_ICMP || ie->proto == IPPROTO_ICMPV6) {
|
||||
+ pr_warn("Can't bind inet socket (id %d) proto %s\n",
|
||||
+ ie->id,
|
||||
+ ie->proto == IPPROTO_ICMP ?
|
||||
+ "IPPROTO_ICMP" : "IPPROTO_ICMPV6");
|
||||
+ } else {
|
||||
+ pr_perror("Can't bind inet socket (id %d)", ii->ie->id);
|
||||
+ return -1;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (rst_freebind) {
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,505 +0,0 @@
|
||||
From ade879e6ccdc4c74a1c153f0750d2cd87ec8a4ec Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Tue, 30 Nov 2021 10:26:10 +0800
|
||||
Subject: [PATCH 52/72] optimization: parallel collecting vmas
|
||||
|
||||
In order to improve criu dump performance, make the collecting vmas
|
||||
operation parallel run with the other collecting operations.
|
||||
|
||||
In order to prevent the concurrency problem by `find_unused_fd`, only
|
||||
the main root task will parallel.
|
||||
|
||||
Usage:
|
||||
criu --parallel
|
||||
|
||||
Note:
|
||||
Ensure criu can use multi-core, otherwise the performance will
|
||||
deterioration.
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
Signed-off-by: hewenliang <hewenliang4@huawei.com>
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/Makefile.packages | 1 +
|
||||
criu/config.c | 1 +
|
||||
criu/cr-dump.c | 53 +++++++++++-----
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/include/pstree.h | 3 +
|
||||
criu/include/taskqueue.h | 50 +++++++++++++++
|
||||
criu/namespaces.c | 9 ++-
|
||||
criu/taskqueue.c | 124 ++++++++++++++++++++++++++++++++++++++
|
||||
10 files changed, 228 insertions(+), 16 deletions(-)
|
||||
create mode 100644 criu/include/taskqueue.h
|
||||
create mode 100644 criu/taskqueue.c
|
||||
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 3bb7c19..2ad0207 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -97,6 +97,7 @@ obj-y += exit-notify.o
|
||||
obj-y += reserved-ports.o
|
||||
obj-y += orphan-inode.o
|
||||
obj-y += kmsg.o
|
||||
+obj-y += taskqueue.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/Makefile.packages b/criu/Makefile.packages
|
||||
index 13c346f..851489b 100644
|
||||
--- a/criu/Makefile.packages
|
||||
+++ b/criu/Makefile.packages
|
||||
@@ -31,6 +31,7 @@ REQ-RPM-PKG-TEST-NAMES += $(PYTHON)-pyyaml
|
||||
endif
|
||||
|
||||
export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
|
||||
+export LIBS += -lpthread
|
||||
|
||||
check-packages-failed:
|
||||
$(warning Can not find some of the required libraries)
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index ae5f81e..fdbc5eb 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -709,6 +709,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "share-dst-ports", required_argument, 0, 2000 },
|
||||
{ "share-src-ports", required_argument, 0, 2001 },
|
||||
{ "reserve-ports", required_argument, 0, 'P' },
|
||||
+ BOOL_OPT("parallel", &opts.parallel),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index a8ab61e..ee826c0 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -17,6 +17,7 @@
|
||||
|
||||
#include <sched.h>
|
||||
#include <sys/resource.h>
|
||||
+#include <sys/sysinfo.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "protobuf.h"
|
||||
@@ -90,6 +91,7 @@
|
||||
#include "notifier.h"
|
||||
#include "files-chr.h"
|
||||
#include "reserved-ports.h"
|
||||
+#include "taskqueue.h"
|
||||
|
||||
/*
|
||||
* Architectures can overwrite this function to restore register sets that
|
||||
@@ -424,7 +426,7 @@ static int dump_pid_misc(pid_t pid, TaskCoreEntry *tc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int dump_filemap(struct vma_area *vma_area, int fd)
|
||||
+int dump_filemap(struct vma_area *vma_area, int fd)
|
||||
{
|
||||
struct fd_parms p = FD_PARMS_INIT;
|
||||
VmaEntry *vma = vma_area->e;
|
||||
@@ -1504,7 +1506,7 @@ err_cure:
|
||||
static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
{
|
||||
pid_t pid = item->pid->real;
|
||||
- struct vm_area_list vmas;
|
||||
+ struct vm_area_list *vmas = NULL;
|
||||
struct parasite_ctl *parasite_ctl;
|
||||
int ret, exit_code = -1;
|
||||
struct parasite_dump_misc misc;
|
||||
@@ -1513,8 +1515,6 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
struct proc_posix_timers_stat proc_args;
|
||||
struct mem_dump_ctl mdc;
|
||||
|
||||
- vm_area_list_init(&vmas);
|
||||
-
|
||||
pr_info("========================================\n");
|
||||
pr_info("Dumping task (pid: %d)\n", pid);
|
||||
pr_info("========================================\n");
|
||||
@@ -1525,12 +1525,23 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
*/
|
||||
return 0;
|
||||
|
||||
+ if (!opts.parallel || root_item->pid->real != item->pid->real ) {
|
||||
+ vmas = xmalloc(sizeof(struct vm_area_list));
|
||||
+ if (vmas == NULL) {
|
||||
+ pr_err("xmalloc no memory\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ vm_area_list_init(vmas);
|
||||
+ } else
|
||||
+ vmas = item->maps_info.vmas;
|
||||
+
|
||||
pr_info("Obtaining task stat ... \n");
|
||||
ret = parse_pid_stat(pid, &pps_buf);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
- ret = collect_mappings(pid, &vmas, dump_filemap);
|
||||
+ ret = (opts.parallel && root_item->pid->real == item->pid->real) ?
|
||||
+ 0 : collect_mappings(pid, vmas, dump_filemap);
|
||||
if (ret) {
|
||||
pr_err("Collect mappings (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
@@ -1570,7 +1581,10 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err;
|
||||
}
|
||||
|
||||
- parasite_ctl = parasite_infect_seized(pid, item, &vmas);
|
||||
+ if (opts.parallel && end_collect_mappings_thread(item))
|
||||
+ goto err;
|
||||
+
|
||||
+ parasite_ctl = parasite_infect_seized(pid, item, vmas);
|
||||
if (!parasite_ctl) {
|
||||
pr_err("Can't infect (pid: %d) with parasite\n", pid);
|
||||
goto err;
|
||||
@@ -1600,13 +1614,13 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err_cure_imgset;
|
||||
}
|
||||
|
||||
- ret = parasite_fixup_vdso(parasite_ctl, pid, &vmas);
|
||||
+ ret = parasite_fixup_vdso(parasite_ctl, pid, vmas);
|
||||
if (ret) {
|
||||
pr_err("Can't fixup vdso VMAs (pid: %d)\n", pid);
|
||||
goto err_cure_imgset;
|
||||
}
|
||||
|
||||
- ret = parasite_collect_aios(parasite_ctl, &vmas); /* FIXME -- merge with above */
|
||||
+ ret = parasite_collect_aios(parasite_ctl, vmas); /* FIXME -- merge with above */
|
||||
if (ret) {
|
||||
pr_err("Failed to check aio rings (pid: %d)\n", pid);
|
||||
goto err_cure_imgset;
|
||||
@@ -1658,7 +1672,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
mdc.stat = &pps_buf;
|
||||
mdc.parent_ie = parent_ie;
|
||||
|
||||
- ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl);
|
||||
+ ret = parasite_dump_pages_seized(item, vmas, &mdc, parasite_ctl);
|
||||
if (ret)
|
||||
goto err_cure;
|
||||
|
||||
@@ -1719,7 +1733,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err;
|
||||
}
|
||||
|
||||
- ret = dump_task_mm(pid, &pps_buf, &misc, &vmas, cr_imgset);
|
||||
+ ret = dump_task_mm(pid, &pps_buf, &misc, vmas, cr_imgset);
|
||||
if (ret) {
|
||||
pr_err("Dump mappings (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
@@ -1735,7 +1749,8 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
exit_code = 0;
|
||||
err:
|
||||
close_pid_proc();
|
||||
- free_mappings(&vmas);
|
||||
+ free_mappings(vmas);
|
||||
+ free(vmas);
|
||||
xfree(dfds);
|
||||
return exit_code;
|
||||
|
||||
@@ -1893,6 +1908,9 @@ int cr_pre_dump_tasks(pid_t pid)
|
||||
if (opts.dump_char_dev && parse_devname() < 0)
|
||||
goto err;
|
||||
|
||||
+ if (opts.parallel && init_parallel_env() != 0)
|
||||
+ goto err;
|
||||
+
|
||||
root_item = alloc_pstree_item();
|
||||
if (!root_item)
|
||||
goto err;
|
||||
@@ -2107,6 +2125,13 @@ static int cr_dump_finish(int ret)
|
||||
write_stats(DUMP_STATS);
|
||||
pr_info("Dumping finished successfully\n");
|
||||
}
|
||||
+
|
||||
+ /*
|
||||
+ * Don't care threads' status and ignore unfree resources, use
|
||||
+ * `exit_group()` to ensure exit all threads.
|
||||
+ */
|
||||
+ syscall(SYS_exit_group, post_dump_ret ? : (ret != 0));
|
||||
+
|
||||
return post_dump_ret ?: (ret != 0);
|
||||
}
|
||||
|
||||
@@ -2203,13 +2228,13 @@ int cr_dump_tasks(pid_t pid)
|
||||
if (collect_file_locks())
|
||||
goto err;
|
||||
|
||||
- if (collect_namespaces(true) < 0)
|
||||
- goto err;
|
||||
-
|
||||
glob_imgset = cr_glob_imgset_open(O_DUMP);
|
||||
if (!glob_imgset)
|
||||
goto err;
|
||||
|
||||
+ if (collect_namespaces(true) < 0)
|
||||
+ goto err;
|
||||
+
|
||||
if (seccomp_collect_dump_filters() < 0)
|
||||
goto err;
|
||||
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index cc0a18f..c20b3b7 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -472,6 +472,7 @@ usage:
|
||||
" --weak-file-check Allow file size and mod larger than dumping value\n"
|
||||
" --file-locks-repair Use repair mode to dump and restore file locks\n"
|
||||
" --reserve-ports Reserve src ports in kernel\n"
|
||||
+ " --parallel Collect smaps parallel to accellrate dumping speed\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 3b61c6b..6478d4d 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -201,6 +201,7 @@ struct cr_options {
|
||||
char *share_dst_ports;
|
||||
char *share_src_ports;
|
||||
int reserve_ports;
|
||||
+ int parallel;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
|
||||
index 97bef11..87e4c47 100644
|
||||
--- a/criu/include/pstree.h
|
||||
+++ b/criu/include/pstree.h
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef __CR_PSTREE_H__
|
||||
#define __CR_PSTREE_H__
|
||||
|
||||
+#include "taskqueue.h"
|
||||
+
|
||||
#include "common/list.h"
|
||||
#include "common/lock.h"
|
||||
#include "pid.h"
|
||||
@@ -31,6 +33,7 @@ struct pstree_item {
|
||||
futex_t task_st;
|
||||
unsigned long task_st_le_bits;
|
||||
};
|
||||
+ struct mappings_info maps_info;
|
||||
};
|
||||
|
||||
static inline pid_t vpid(const struct pstree_item *i)
|
||||
diff --git a/criu/include/taskqueue.h b/criu/include/taskqueue.h
|
||||
new file mode 100644
|
||||
index 0000000..16f9e3d
|
||||
--- /dev/null
|
||||
+++ b/criu/include/taskqueue.h
|
||||
@@ -0,0 +1,50 @@
|
||||
+#ifndef __CR_TASKQUEUE_H__
|
||||
+#define __CR_TASKQUEUE_H__
|
||||
+
|
||||
+#include <stdbool.h>
|
||||
+#include <pthread.h>
|
||||
+#include <semaphore.h>
|
||||
+
|
||||
+#include "vma.h"
|
||||
+#include "pstree.h"
|
||||
+
|
||||
+#include "common/list.h"
|
||||
+
|
||||
+#define TASKQUEUE_HASH_SIZE 8
|
||||
+
|
||||
+struct taskqueue {
|
||||
+ pthread_t task;
|
||||
+ void *(*routine)(void *);
|
||||
+ void *arg;
|
||||
+ int result;
|
||||
+};
|
||||
+#define queue_task queue.task
|
||||
+#define queue_routine queue.routine
|
||||
+#define queue_arg queue.arg
|
||||
+#define queue_result queue.result
|
||||
+
|
||||
+int init_parallel_env(void);
|
||||
+
|
||||
+static inline int taskqueue_create(struct taskqueue *queue)
|
||||
+{
|
||||
+ return pthread_create(&queue->task, NULL, queue->routine, queue->arg);
|
||||
+}
|
||||
+
|
||||
+static inline int taskqueue_join(struct taskqueue *queue)
|
||||
+{
|
||||
+ return pthread_join(queue->task, NULL);
|
||||
+}
|
||||
+
|
||||
+/* parallel collect smaps */
|
||||
+struct mappings_info {
|
||||
+ struct hlist_node hash;
|
||||
+ pid_t pid;
|
||||
+ struct vm_area_list *vmas;
|
||||
+ dump_filemap_t dump_file;
|
||||
+ struct taskqueue queue;
|
||||
+};
|
||||
+
|
||||
+int start_collect_mappings_thread(void);
|
||||
+int end_collect_mappings_thread(struct pstree_item *item);
|
||||
+
|
||||
+#endif /* __CR_TASKQUEUE_H__ */
|
||||
diff --git a/criu/namespaces.c b/criu/namespaces.c
|
||||
index 7fa5868..05e6732 100644
|
||||
--- a/criu/namespaces.c
|
||||
+++ b/criu/namespaces.c
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "cgroup.h"
|
||||
#include "fdstore.h"
|
||||
#include "kerndat.h"
|
||||
+#include "taskqueue.h"
|
||||
|
||||
#include "protobuf.h"
|
||||
#include "util.h"
|
||||
@@ -1607,11 +1608,15 @@ int collect_namespaces(bool for_dump)
|
||||
{
|
||||
int ret;
|
||||
|
||||
- ret = collect_user_namespaces(for_dump);
|
||||
+ ret = collect_mnt_namespaces(for_dump);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
- ret = collect_mnt_namespaces(for_dump);
|
||||
+ /* need mnt info provided by `mntinfo` */
|
||||
+ if (opts.parallel && start_collect_mappings_thread())
|
||||
+ return -1;
|
||||
+
|
||||
+ ret = collect_user_namespaces(for_dump);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
diff --git a/criu/taskqueue.c b/criu/taskqueue.c
|
||||
new file mode 100644
|
||||
index 0000000..1196a5e
|
||||
--- /dev/null
|
||||
+++ b/criu/taskqueue.c
|
||||
@@ -0,0 +1,124 @@
|
||||
+/*
|
||||
+ * Target:
|
||||
+ * parallel dump process
|
||||
+ */
|
||||
+
|
||||
+#include <string.h>
|
||||
+#include <errno.h>
|
||||
+#include <pthread.h>
|
||||
+#include <sys/sysinfo.h>
|
||||
+
|
||||
+#include "pstree.h"
|
||||
+#include "log.h"
|
||||
+#include "taskqueue.h"
|
||||
+
|
||||
+/*
|
||||
+ * Sometimes, only one cpu can be used which is bad for parallel routine.
|
||||
+ * Therefore, set cpu affinity for criu routine.
|
||||
+ */
|
||||
+static int set_cpuaffinity(void)
|
||||
+{
|
||||
+ cpu_set_t *set;
|
||||
+ int num_cpus = get_nprocs_conf();
|
||||
+ size_t cpusetsize = CPU_ALLOC_SIZE(num_cpus);
|
||||
+ int retval;
|
||||
+
|
||||
+ set = CPU_ALLOC(num_cpus);
|
||||
+ memset(set, 0xff, cpusetsize);
|
||||
+
|
||||
+ retval = sched_setaffinity(getpid(), cpusetsize, set);
|
||||
+ if (retval != 0)
|
||||
+ pr_err("sched_setaffinity failed: %s\n", strerror(errno));
|
||||
+
|
||||
+ CPU_FREE(set);
|
||||
+
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+int init_parallel_env(void)
|
||||
+{
|
||||
+ return set_cpuaffinity();
|
||||
+}
|
||||
+
|
||||
+static void *collect_mappings_routine(void *_arg)
|
||||
+{
|
||||
+ struct mappings_info *info = _arg;
|
||||
+
|
||||
+ info->queue_result = collect_mappings(info->pid, info->vmas, info->dump_file);
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+int dump_filemap(struct vma_area *vma_area, int fd); /* defined in criu/cr-dump.c */
|
||||
+
|
||||
+int start_collect_mappings_thread(void)
|
||||
+{
|
||||
+ struct pstree_item *pi;
|
||||
+ struct mappings_info *info;
|
||||
+
|
||||
+ for_each_pstree_item(pi) {
|
||||
+ /* disable parallel collect for non-root item because of the
|
||||
+ * concurrence.
|
||||
+ */
|
||||
+ if (pi->pid->real != root_item->pid->real)
|
||||
+ continue;
|
||||
+
|
||||
+ info = &pi->maps_info;
|
||||
+
|
||||
+ info->vmas = xmalloc(sizeof(struct vm_area_list));
|
||||
+ if (info->vmas == NULL) {
|
||||
+ pr_err("xzalloc vmas no memory\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ vm_area_list_init(info->vmas);
|
||||
+
|
||||
+ info->pid = pi->pid->real;
|
||||
+ info->dump_file = dump_filemap;
|
||||
+ info->queue_routine = collect_mappings_routine;
|
||||
+ info->queue_arg = info;
|
||||
+
|
||||
+ pr_info("Start thread to collect %d mappings\n", info->pid);
|
||||
+
|
||||
+ if (taskqueue_create(&info->queue) < 0) {
|
||||
+ pr_err("parallel_collect_mappings failed: %s\n", strerror(errno));
|
||||
+ free(info->vmas);
|
||||
+ /*
|
||||
+ * Don't care other threads status, use `exit_group()`
|
||||
+ * to ensure all threads exit.
|
||||
+ */
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int end_collect_mappings_thread(struct pstree_item *item)
|
||||
+{
|
||||
+ struct mappings_info *info = &item->maps_info;
|
||||
+ int retval;
|
||||
+
|
||||
+ /* disable parallel collect for non-root item because of the
|
||||
+ * concurrence.
|
||||
+ */
|
||||
+ if (root_item->pid->real != item->pid->real)
|
||||
+ return 0;
|
||||
+
|
||||
+ retval = taskqueue_join(&info->queue);
|
||||
+ if (retval != 0 || info->queue_result != 0) {
|
||||
+ pr_err("taskqueue_join failed, retval %d(errno %d: %s),"
|
||||
+ " queue_result: %d\n",
|
||||
+ retval,
|
||||
+ retval == 0 ? 0 : errno,
|
||||
+ retval == 0 ? "nil" : strerror(errno),
|
||||
+ info->queue_result);
|
||||
+ retval = -1;
|
||||
+ }
|
||||
+
|
||||
+ pr_info("End thread to collect %d mappings\n", info->pid);
|
||||
+
|
||||
+ /*
|
||||
+ * Don't care other threads status, use `exit_group()` to ensure all
|
||||
+ * threads exit.
|
||||
+ */
|
||||
+ return retval;
|
||||
+}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,120 +0,0 @@
|
||||
From 5acbfc773177797d954645e40ba8f7ed94a55d60 Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Tue, 30 Nov 2021 11:38:18 +0800
|
||||
Subject: [PATCH 53/72] mm: add exec file mapping pin method
|
||||
|
||||
In order to improve criu dump and restore performance,
|
||||
enable pin method for exec file mapping.
|
||||
|
||||
Signed-off-by: Jingxian He <hejingxian@huawei.com>
|
||||
---
|
||||
criu/config.c | 4 ++++
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 1 +
|
||||
criu/mem.c | 12 +++++++++++-
|
||||
criu/pin-mem.c | 4 ++++
|
||||
5 files changed, 21 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index fdbc5eb..c0358e5 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -710,6 +710,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "share-src-ports", required_argument, 0, 2001 },
|
||||
{ "reserve-ports", required_argument, 0, 'P' },
|
||||
BOOL_OPT("parallel", &opts.parallel),
|
||||
+ { "exec-pin-start", required_argument, 0, 2002 },
|
||||
{},
|
||||
};
|
||||
|
||||
@@ -1051,6 +1052,9 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
case 2001:
|
||||
SET_CHAR_OPTS(share_src_ports, optarg);
|
||||
break;
|
||||
+ case 2002:
|
||||
+ opts.exec_pin_start = atoi(optarg);
|
||||
+ break;
|
||||
case 'V':
|
||||
pr_msg("Version: %s\n", CRIU_VERSION);
|
||||
if (strcmp(CRIU_GITID, "0"))
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index c20b3b7..40e2d51 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -473,6 +473,7 @@ usage:
|
||||
" --file-locks-repair Use repair mode to dump and restore file locks\n"
|
||||
" --reserve-ports Reserve src ports in kernel\n"
|
||||
" --parallel Collect smaps parallel to accellrate dumping speed\n"
|
||||
+ " --exec-pin-start Exec file map's pin start index\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index 6478d4d..a64e977 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -202,6 +202,7 @@ struct cr_options {
|
||||
char *share_src_ports;
|
||||
int reserve_ports;
|
||||
int parallel;
|
||||
+ int exec_pin_start;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
diff --git a/criu/mem.c b/criu/mem.c
|
||||
index b955d66..ccb6ae6 100644
|
||||
--- a/criu/mem.c
|
||||
+++ b/criu/mem.c
|
||||
@@ -448,6 +448,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
int possible_pid_reuse = 0;
|
||||
bool has_parent;
|
||||
int parent_predump_mode = -1;
|
||||
+ int dump_iov;
|
||||
|
||||
pr_info("\n");
|
||||
pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real);
|
||||
@@ -521,9 +522,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
||||
if (mdc->parent_ie)
|
||||
parent_predump_mode = mdc->parent_ie->pre_dump_mode;
|
||||
|
||||
+ dump_iov = 0;
|
||||
list_for_each_entry(vma_area, &vma_area_list->h, list) {
|
||||
if (opts.pin_memory && should_pin_vmae(vma_area->e)) {
|
||||
- continue;
|
||||
+ if (opts.exec_pin_start
|
||||
+ && vma_entry_is(vma_area->e, VMA_FILE_PRIVATE)
|
||||
+ && ((vma_area->e->prot & PROT_WRITE)
|
||||
+ || !(vma_area->e->prot & PROT_EXEC))) {
|
||||
+ dump_iov += 1;
|
||||
+ if (dump_iov > opts.exec_pin_start + 1)
|
||||
+ continue;
|
||||
+ } else
|
||||
+ continue;
|
||||
}
|
||||
|
||||
if (vma_entry_is(vma_area->e, VMA_AREA_ANON_INODE))
|
||||
diff --git a/criu/pin-mem.c b/criu/pin-mem.c
|
||||
index 96ca2c5..686217f 100644
|
||||
--- a/criu/pin-mem.c
|
||||
+++ b/criu/pin-mem.c
|
||||
@@ -2,6 +2,7 @@
|
||||
#include <stdbool.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
+#include "cr_options.h"
|
||||
#include "pstree.h"
|
||||
#include "mem.h"
|
||||
#include "vma.h"
|
||||
@@ -30,6 +31,9 @@ bool should_pin_vmae(VmaEntry *vmae)
|
||||
if (vma_entry_is(vmae, VMA_ANON_PRIVATE))
|
||||
return true;
|
||||
|
||||
+ if (opts.exec_pin_start && vma_entry_is(vmae, VMA_FILE_PRIVATE))
|
||||
+ return true;
|
||||
+
|
||||
return false;
|
||||
}
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,774 +0,0 @@
|
||||
From 47412ba0d9ce6283071973387bf5b34bf876bb9a Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Wed, 1 Dec 2021 09:44:07 +0800
|
||||
Subject: [PATCH 54/72] ptrace: trace specific syscall
|
||||
|
||||
criu use `ptrace(PTRACE_SYSCALL)` to watch whether the tracee steps in
|
||||
correct status, it isn't necessory to stop tracee at every syscall.
|
||||
Therefore, customizing `ptrace(PTRACE_SYSCALL_NR)` to make tracee stop at
|
||||
the specific syscall can save time (1000 threads consume about 140ms).
|
||||
|
||||
ptrace syntax:
|
||||
long ptrace(PTRACE_SYSCALL_NR, pid_t pid, void *addr, void *data);
|
||||
|
||||
The argument `addr` is unused in original `ptrace(PTRACE_SYSCALL)`,
|
||||
Here `ptrace(PTRACE_SYSCALL_NR)` use `addr` parameter to give the
|
||||
specific sysno which is wanted to trace.
|
||||
|
||||
use `criu check` to generate `/run/criu.kdat` before the first usage of
|
||||
criu, or auto-check during `criu {dump, restore}`.
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/25
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
compel/Makefile | 1 +
|
||||
compel/include/uapi/bisect.h | 30 +++++++
|
||||
compel/include/uapi/infect.h | 15 +++-
|
||||
compel/src/lib/bisect.c | 92 +++++++++++++++++++
|
||||
compel/src/lib/infect.c | 167 ++++++++++++++++++++++++++++++++---
|
||||
criu/cr-dump.c | 2 +-
|
||||
criu/cr-restore.c | 97 +++++++++++++++++++-
|
||||
criu/include/kerndat.h | 1 +
|
||||
criu/kerndat.c | 61 +++++++++++++
|
||||
9 files changed, 450 insertions(+), 16 deletions(-)
|
||||
create mode 100644 compel/include/uapi/bisect.h
|
||||
create mode 100644 compel/src/lib/bisect.c
|
||||
|
||||
diff --git a/compel/Makefile b/compel/Makefile
|
||||
index b79aee6..2168a26 100644
|
||||
--- a/compel/Makefile
|
||||
+++ b/compel/Makefile
|
||||
@@ -27,6 +27,7 @@ lib-y += src/lib/infect-rpc.o
|
||||
lib-y += src/lib/infect-util.o
|
||||
lib-y += src/lib/infect.o
|
||||
lib-y += src/lib/ptrace.o
|
||||
+lib-y += src/lib/bisect.o
|
||||
|
||||
ifeq ($(ARCH),x86)
|
||||
lib-y += arch/$(ARCH)/src/lib/thread_area.o
|
||||
diff --git a/compel/include/uapi/bisect.h b/compel/include/uapi/bisect.h
|
||||
new file mode 100644
|
||||
index 0000000..55ebcbd
|
||||
--- /dev/null
|
||||
+++ b/compel/include/uapi/bisect.h
|
||||
@@ -0,0 +1,30 @@
|
||||
+#ifndef __COMPEL_BISECT_H__
|
||||
+#define __COMPEL_BISECT_H__
|
||||
+
|
||||
+#include <sys/types.h>
|
||||
+
|
||||
+enum tf {
|
||||
+ TRACE_INTERRUPT,
|
||||
+ TRACE_SYSCALL_ENTER,
|
||||
+ TRACE_SYSCALL_EXIT,
|
||||
+};
|
||||
+
|
||||
+struct trace_flag {
|
||||
+ pid_t key;
|
||||
+ enum tf flag;
|
||||
+};
|
||||
+
|
||||
+struct bisect_meta {
|
||||
+ int size;
|
||||
+ int used;
|
||||
+ void *data; /* data pointer array */
|
||||
+ void *__data; /* data array */
|
||||
+};
|
||||
+
|
||||
+struct trace_flag *tf_bisect(struct bisect_meta *meta, pid_t key);
|
||||
+struct trace_flag *tf_insert(struct bisect_meta *meta, pid_t key);
|
||||
+int tf_create(struct bisect_meta *meta, int len);
|
||||
+void tf_destroy(struct bisect_meta *meta);
|
||||
+void tf_clear(struct bisect_meta *meta);
|
||||
+
|
||||
+#endif /* __COMPEL_BISECT_H__ */
|
||||
diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h
|
||||
index 389878e..a23782e 100644
|
||||
--- a/compel/include/uapi/infect.h
|
||||
+++ b/compel/include/uapi/infect.h
|
||||
@@ -8,11 +8,16 @@
|
||||
#include <compel/ksigset.h>
|
||||
#include <compel/handle-elf.h>
|
||||
#include <compel/task-state.h>
|
||||
+#include <compel/bisect.h>
|
||||
|
||||
#include "common/compiler.h"
|
||||
|
||||
#define PARASITE_START_AREA_MIN (4096)
|
||||
|
||||
+#ifndef PTRACE_SYSCALL_NR
|
||||
+# define PTRACE_SYSCALL_NR 0xff00
|
||||
+#endif
|
||||
+
|
||||
extern int __must_check compel_interrupt_task(int pid);
|
||||
|
||||
struct seize_task_status {
|
||||
@@ -41,7 +46,7 @@ extern int __must_check compel_infect(struct parasite_ctl *ctl, unsigned long nr
|
||||
extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid);
|
||||
extern void compel_release_thread(struct parasite_thread_ctl *);
|
||||
|
||||
-extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl);
|
||||
+extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl, bool customize);
|
||||
extern int __must_check compel_cure_remote(struct parasite_ctl *ctl);
|
||||
extern int __must_check compel_cure_local(struct parasite_ctl *ctl);
|
||||
extern int __must_check compel_cure(struct parasite_ctl *ctl);
|
||||
@@ -83,6 +88,14 @@ extern int __must_check compel_stop_pie(pid_t pid, void *addr, enum trace_flags
|
||||
|
||||
extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr);
|
||||
|
||||
+extern int __must_check compel_stop_on_syscall_customize(int tasks,
|
||||
+ const int sys_nr, const int exit_sys_nr, struct bisect_meta *meta);
|
||||
+
|
||||
+extern int __must_check compel_stop_pie_customize(pid_t pid,
|
||||
+ const int sys_nr, struct trace_flag *tf);
|
||||
+
|
||||
+extern int __must_check compel_unmap_customize(struct parasite_ctl *ctl, unsigned long addr);
|
||||
+
|
||||
extern int compel_mode_native(struct parasite_ctl *ctl);
|
||||
|
||||
extern k_rtsigset_t *compel_task_sigmask(struct parasite_ctl *ctl);
|
||||
diff --git a/compel/src/lib/bisect.c b/compel/src/lib/bisect.c
|
||||
new file mode 100644
|
||||
index 0000000..807a5a9
|
||||
--- /dev/null
|
||||
+++ b/compel/src/lib/bisect.c
|
||||
@@ -0,0 +1,92 @@
|
||||
+#include <stddef.h>
|
||||
+
|
||||
+#include "log.h"
|
||||
+#include "common/xmalloc.h"
|
||||
+#include "bisect.h"
|
||||
+
|
||||
+struct trace_flag *tf_bisect(struct bisect_meta *meta, pid_t key)
|
||||
+{
|
||||
+ struct trace_flag **tfs = meta->data;
|
||||
+ int lo = 0, hi = meta->used, mid;
|
||||
+
|
||||
+ if (meta->used <= 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ while (lo < hi) {
|
||||
+ mid = (int)((lo + hi) / 2);
|
||||
+ if (tfs[mid]->key == key) {
|
||||
+ return tfs[mid];
|
||||
+ } else if (tfs[mid]->key > key) {
|
||||
+ hi = mid;
|
||||
+ } else {
|
||||
+ lo = mid + 1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+/* used in cr-restore */
|
||||
+struct trace_flag *tf_insert(struct bisect_meta *meta, pid_t key)
|
||||
+{
|
||||
+ struct trace_flag **tfs = meta->data;
|
||||
+ struct trace_flag *tf = &((struct trace_flag *)meta->__data)[meta->used];
|
||||
+ int i = 0, j = 0;
|
||||
+
|
||||
+ if (meta->used == meta->size)
|
||||
+ return NULL;
|
||||
+
|
||||
+ for (i = 0; i < meta->used; i++) {
|
||||
+ if (tfs[i]->key >= key) /* impossible condition: `tfs[i]->key == key` */
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ j = meta->used;
|
||||
+ meta->used += 1;
|
||||
+
|
||||
+ while (j > i) {
|
||||
+ tfs[j] = tfs[j-1];
|
||||
+ j -= 1;
|
||||
+ }
|
||||
+
|
||||
+ tfs[i] = tf;
|
||||
+ tf->key = key;
|
||||
+
|
||||
+ return tf;
|
||||
+}
|
||||
+
|
||||
+int tf_create(struct bisect_meta *meta, int len)
|
||||
+{
|
||||
+ struct trace_flag *tfs;
|
||||
+ struct trace_flag **tfs_ptr;
|
||||
+
|
||||
+ tfs = xzalloc(sizeof(*tfs) * len);
|
||||
+ if (tfs == NULL)
|
||||
+ return -1;
|
||||
+
|
||||
+ tfs_ptr = xmalloc(sizeof(*tfs_ptr) * len);
|
||||
+ if (tfs_ptr == NULL)
|
||||
+ goto err;
|
||||
+
|
||||
+ meta->size = len;
|
||||
+ meta->used = 0;
|
||||
+ meta->__data = tfs;
|
||||
+ meta->data = tfs_ptr;
|
||||
+
|
||||
+ return 0;
|
||||
+err:
|
||||
+ xfree(tfs);
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+void tf_destroy(struct bisect_meta *meta)
|
||||
+{
|
||||
+ xfree(meta->__data);
|
||||
+ xfree(meta->data);
|
||||
+}
|
||||
+
|
||||
+void tf_clear(struct bisect_meta *meta)
|
||||
+{
|
||||
+ meta->used = 0;
|
||||
+ __builtin_memset(meta->data, 0, sizeof(struct trace_flag **)*meta->size);
|
||||
+}
|
||||
diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c
|
||||
index 6a13cc1..f9b8832 100644
|
||||
--- a/compel/src/lib/infect.c
|
||||
+++ b/compel/src/lib/infect.c
|
||||
@@ -449,7 +449,7 @@ static int restore_child_handler(struct parasite_ctl *ctl)
|
||||
}
|
||||
|
||||
static int parasite_run(pid_t pid, int cmd, unsigned long ip, void *stack, user_regs_struct_t *regs,
|
||||
- struct thread_ctx *octx)
|
||||
+ struct thread_ctx *octx, void *addr)
|
||||
{
|
||||
k_rtsigset_t block;
|
||||
|
||||
@@ -470,7 +470,7 @@ static int parasite_run(pid_t pid, int cmd, unsigned long ip, void *stack, user_
|
||||
goto err_regs;
|
||||
}
|
||||
|
||||
- if (ptrace(cmd, pid, NULL, NULL)) {
|
||||
+ if (ptrace(cmd, pid, addr, NULL)) {
|
||||
pr_perror("Can't run parasite at %d", pid);
|
||||
goto err_cont;
|
||||
}
|
||||
@@ -575,7 +575,7 @@ int compel_execute_syscall(struct parasite_ctl *ctl, user_regs_struct_t *regs, c
|
||||
return -1;
|
||||
}
|
||||
|
||||
- err = parasite_run(pid, PTRACE_CONT, ctl->ictx.syscall_ip, 0, regs, &ctl->orig);
|
||||
+ err = parasite_run(pid, PTRACE_CONT, ctl->ictx.syscall_ip, 0, regs, &ctl->orig, NULL);
|
||||
if (!err)
|
||||
err = parasite_trap(ctl, pid, regs, &ctl->orig, false);
|
||||
|
||||
@@ -592,7 +592,7 @@ int compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t
|
||||
user_regs_struct_t regs = ctl->orig.regs;
|
||||
int ret;
|
||||
|
||||
- ret = parasite_run(ctl->rpid, PTRACE_CONT, ip, 0, ®s, &ctl->orig);
|
||||
+ ret = parasite_run(ctl->rpid, PTRACE_CONT, ip, 0, ®s, &ctl->orig, NULL);
|
||||
if (!ret)
|
||||
ret = parasite_trap(ctl, ctl->rpid, ret_regs ? ret_regs : ®s, &ctl->orig, false);
|
||||
return ret;
|
||||
@@ -641,7 +641,7 @@ static int parasite_init_daemon(struct parasite_ctl *ctl)
|
||||
goto err;
|
||||
|
||||
regs = ctl->orig.regs;
|
||||
- if (parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, ctl->rstack, ®s, &ctl->orig))
|
||||
+ if (parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, ctl->rstack, ®s, &ctl->orig, NULL))
|
||||
goto err;
|
||||
|
||||
futex_wait_while_eq(&args->daemon_connected, 0);
|
||||
@@ -1303,7 +1303,7 @@ static bool task_in_parasite(struct parasite_ctl *ctl, user_regs_struct_t *regs)
|
||||
return addr >= ctl->remote_map && addr < ctl->remote_map + ctl->map_length;
|
||||
}
|
||||
|
||||
-static int parasite_fini_seized(struct parasite_ctl *ctl)
|
||||
+static int parasite_fini_seized(struct parasite_ctl *ctl, bool customize)
|
||||
{
|
||||
pid_t pid = ctl->rpid;
|
||||
user_regs_struct_t regs;
|
||||
@@ -1348,6 +1348,34 @@ static int parasite_fini_seized(struct parasite_ctl *ctl)
|
||||
if (ret)
|
||||
return -1;
|
||||
|
||||
+ /* use customize ptrace */
|
||||
+ if (customize) {
|
||||
+ struct trace_flag tf = { .key = pid, .flag = TRACE_SYSCALL_ENTER };
|
||||
+ struct trace_flag *tf_ptr[] = { &tf };
|
||||
+ struct bisect_meta meta = {
|
||||
+ .size = 1,
|
||||
+ .used = 1,
|
||||
+ .__data = &tf,
|
||||
+ .data = tf_ptr,
|
||||
+ };
|
||||
+
|
||||
+ ret = compel_stop_pie_customize(pid, __NR(rt_sigreturn, 0), &tf);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* The process is going to execute the required syscall, the
|
||||
+ * original syscall should be forgot(set `-1`) in
|
||||
+ * `syscall_trace_enter()` handler in kernel when no other
|
||||
+ * else operation in tracer.
|
||||
+ *
|
||||
+ * Note: -1 means NO_SYSCALL which is defined in
|
||||
+ * `arch/arm64/include/asm/ptrace.h`.
|
||||
+ */
|
||||
+ return compel_stop_on_syscall_customize(1,
|
||||
+ __NR(rt_sigreturn, 0),
|
||||
+ -1, &meta);
|
||||
+ }
|
||||
+
|
||||
/* Go to sigreturn as closer as we can */
|
||||
ret = compel_stop_pie(pid, ctl->sigreturn_addr, &flag, ctl->ictx.flags & INFECT_NO_BREAKPOINTS);
|
||||
if (ret < 0)
|
||||
@@ -1368,7 +1396,7 @@ static int parasite_fini_seized(struct parasite_ctl *ctl)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-int compel_stop_daemon(struct parasite_ctl *ctl)
|
||||
+int compel_stop_daemon(struct parasite_ctl *ctl, bool customize)
|
||||
{
|
||||
if (ctl->daemonized) {
|
||||
/*
|
||||
@@ -1378,7 +1406,7 @@ int compel_stop_daemon(struct parasite_ctl *ctl)
|
||||
if (ctl->tsock < 0)
|
||||
return -1;
|
||||
|
||||
- if (parasite_fini_seized(ctl)) {
|
||||
+ if (parasite_fini_seized(ctl, customize)) {
|
||||
close_safe(&ctl->tsock);
|
||||
return -1;
|
||||
}
|
||||
@@ -1394,7 +1422,7 @@ int compel_cure_remote(struct parasite_ctl *ctl)
|
||||
long ret;
|
||||
int err;
|
||||
|
||||
- if (compel_stop_daemon(ctl))
|
||||
+ if (compel_stop_daemon(ctl, false))
|
||||
return -1;
|
||||
|
||||
if (!ctl->remote_map)
|
||||
@@ -1461,7 +1489,7 @@ int compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd)
|
||||
|
||||
*ctl->cmd = cmd;
|
||||
|
||||
- ret = parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, stack, ®s, octx);
|
||||
+ ret = parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, stack, ®s, octx, NULL);
|
||||
if (ret == 0)
|
||||
ret = parasite_trap(ctl, pid, ®s, octx, true);
|
||||
if (ret == 0)
|
||||
@@ -1484,7 +1512,7 @@ int compel_unmap(struct parasite_ctl *ctl, unsigned long addr)
|
||||
pid_t pid = ctl->rpid;
|
||||
int ret = -1;
|
||||
|
||||
- ret = parasite_run(pid, PTRACE_SYSCALL, addr, ctl->rstack, ®s, &ctl->orig);
|
||||
+ ret = parasite_run(pid, PTRACE_SYSCALL, addr, ctl->rstack, ®s, &ctl->orig, NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@@ -1500,6 +1528,45 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int compel_unmap_customize(struct parasite_ctl *ctl, unsigned long addr)
|
||||
+{
|
||||
+ user_regs_struct_t regs = ctl->orig.regs;
|
||||
+ pid_t pid = ctl->rpid;
|
||||
+ int ret = -1;
|
||||
+ struct trace_flag tf = { .key = pid, .flag = TRACE_SYSCALL_ENTER };
|
||||
+ struct trace_flag *tf_ptr[] = { &tf };
|
||||
+ struct bisect_meta meta = {
|
||||
+ .size = 1,
|
||||
+ .used = 1,
|
||||
+ .__data = &tf,
|
||||
+ .data = tf_ptr,
|
||||
+ };
|
||||
+
|
||||
+ /*
|
||||
+ * Here it parasite code. Unlike trap code `compel_stop_pie()`, it
|
||||
+ * won't let tracee forget the original syscall. In such way, tracer
|
||||
+ * just trace the syscall called by tracee. The log likes the following
|
||||
+ * if tracee forget syscall:
|
||||
+ *
|
||||
+ * [ 817.638332] set pid 1877 ptrace sysno 215
|
||||
+ * [ 817.638343] syscall_trace_enter: pid 1877 ptrace_sysno 0 current_sysno 215
|
||||
+ * [ 817.638363] (00.006280) Error (compel/src/lib/infect.c:1582): 1877 (native) is going to execute the syscall 215, required is 215
|
||||
+ * [ 817.638368] set pid 1877 ptrace sysno 0
|
||||
+ * [ 817.638402] syscall_trace_exit: pid 1877 ptrace_sysno 0 current_sysno 215
|
||||
+ */
|
||||
+ ret = parasite_run(pid, PTRACE_SYSCALL_NR, addr, ctl->rstack, ®s,
|
||||
+ &ctl->orig, (void *)(long)__NR(munmap, 0));
|
||||
+ if (ret)
|
||||
+ goto err;
|
||||
+
|
||||
+ ret = compel_stop_on_syscall_customize(1, __NR(munmap, 0), 0, &meta);
|
||||
+
|
||||
+ if (restore_thread_ctx(pid, &ctl->orig, false))
|
||||
+ ret = -1;
|
||||
+err:
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp)
|
||||
{
|
||||
int ret;
|
||||
@@ -1535,6 +1602,17 @@ int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int compel_stop_pie_customize(pid_t pid, const int sys_nr, struct trace_flag *tf)
|
||||
+{
|
||||
+ if (ptrace(PTRACE_SYSCALL_NR, pid, sys_nr, NULL)) {
|
||||
+ pr_perror("Unable to restart the %d process", pid);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ tf->flag = TRACE_SYSCALL_ENTER;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static bool task_is_trapped(int status, pid_t pid)
|
||||
{
|
||||
if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP)
|
||||
@@ -1642,6 +1720,73 @@ int compel_stop_on_syscall(int tasks, const int sys_nr, const int sys_nr_compat,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int compel_stop_on_syscall_customize(int tasks, const int sys_nr,
|
||||
+ const int exit_sys_nr, struct bisect_meta *meta)
|
||||
+{
|
||||
+ struct trace_flag *tf;
|
||||
+ user_regs_struct_t regs;
|
||||
+ int status, ret;
|
||||
+ pid_t pid;
|
||||
+
|
||||
+ while (tasks) {
|
||||
+ pid = wait4(-1, &status, __WALL, NULL);
|
||||
+ if (pid == -1) {
|
||||
+ pr_perror("wait4 failed");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ tf = tf_bisect(meta, pid);
|
||||
+ if (tf == NULL) {
|
||||
+ pr_warn("Unexpected task %d, state %d signal %d: %s\n",
|
||||
+ pid, WEXITSTATUS(status),
|
||||
+ WTERMSIG(status), strsignal(WTERMSIG(status)));
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (!task_is_trapped(status, pid))
|
||||
+ return -1;
|
||||
+
|
||||
+ switch (tf->flag) {
|
||||
+ case TRACE_SYSCALL_ENTER:
|
||||
+ pr_debug("%d was trapped\n", pid);
|
||||
+ pr_debug("`- Expecting exit\n");
|
||||
+
|
||||
+ ret = ptrace_get_regs(pid, ®s);
|
||||
+ if (ret) {
|
||||
+ pr_perror("ptrace");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (is_required_syscall(®s, pid, sys_nr, sys_nr)) {
|
||||
+ ret = ptrace(PTRACE_SYSCALL_NR, pid, exit_sys_nr, NULL);
|
||||
+ if (ret) {
|
||||
+ pr_perror("ptrace");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ tf->flag = TRACE_SYSCALL_EXIT;
|
||||
+ } else {
|
||||
+ pr_warn("Impossible condition, check the system, try our best to restore...\n");
|
||||
+ ret = ptrace(PTRACE_SYSCALL_NR, pid, sys_nr, NULL);
|
||||
+ if (ret) {
|
||||
+ pr_perror("ptrace");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+ break;
|
||||
+ case TRACE_SYSCALL_EXIT:
|
||||
+ pr_debug("%d was stopped\n", pid);
|
||||
+ tasks--;
|
||||
+ break;
|
||||
+
|
||||
+ default:
|
||||
+ pr_err("pid %d invalid status: %d\n", pid, tf->flag);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int compel_mode_native(struct parasite_ctl *ctl)
|
||||
{
|
||||
return user_regs_native(&ctl->orig.regs);
|
||||
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
|
||||
index ee826c0..9253e91 100644
|
||||
--- a/criu/cr-dump.c
|
||||
+++ b/criu/cr-dump.c
|
||||
@@ -1708,7 +1708,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie)
|
||||
goto err_cure;
|
||||
}
|
||||
|
||||
- ret = compel_stop_daemon(parasite_ctl);
|
||||
+ ret = compel_stop_daemon(parasite_ctl, kdat.has_customize_ptrace);
|
||||
if (ret) {
|
||||
pr_err("Can't stop daemon in parasite (pid: %d)\n", pid);
|
||||
goto err_cure;
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index d19768d..b0b3d30 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -2181,6 +2181,64 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int cache_tasks_customize(bool root_seized, struct bisect_meta *meta)
|
||||
+{
|
||||
+ struct pstree_item *item;
|
||||
+ struct trace_flag *tf;
|
||||
+
|
||||
+ for_each_pstree_item(item) {
|
||||
+ int status, i, ret;
|
||||
+ pid_t pid;
|
||||
+
|
||||
+ if (!task_alive(item))
|
||||
+ continue;
|
||||
+
|
||||
+ if (item->nr_threads == 1) {
|
||||
+ item->threads[0].real = item->pid->real;
|
||||
+ } else {
|
||||
+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads))
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ pid = item->threads[i].real;
|
||||
+
|
||||
+ if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
|
||||
+ pr_perror("Can't interrupt the %d task", pid);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ tf = tf_insert(meta, pid);
|
||||
+ if (tf == NULL) {
|
||||
+ pr_err("Can't find trace flag for %d, used %d\n",
|
||||
+ pid, meta->used);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ tf->flag = TRACE_INTERRUPT;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < item->nr_threads; i++) {
|
||||
+ pid = wait4(-1, &status, __WALL, NULL);
|
||||
+
|
||||
+ tf = tf_bisect(meta, pid);
|
||||
+ if (tf == NULL) {
|
||||
+ pr_err("Can't find trace flag for %d, used %d\n",
|
||||
+ pid, meta->used);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ ret = compel_stop_pie_customize(pid,
|
||||
+ __NR(rt_sigreturn, 0),
|
||||
+ tf);
|
||||
+ if (ret < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int clear_breakpoints(void)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
@@ -2207,6 +2265,7 @@ static void finalize_restore(void)
|
||||
pid_t pid = item->pid->real;
|
||||
struct parasite_ctl *ctl;
|
||||
unsigned long restorer_addr;
|
||||
+ int retval;
|
||||
|
||||
if (!task_alive(item))
|
||||
continue;
|
||||
@@ -2217,7 +2276,12 @@ static void finalize_restore(void)
|
||||
continue;
|
||||
|
||||
restorer_addr = (unsigned long)rsti(item)->munmap_restorer;
|
||||
- if (compel_unmap(ctl, restorer_addr))
|
||||
+ if (!kdat.has_customize_ptrace)
|
||||
+ retval = compel_unmap(ctl, restorer_addr);
|
||||
+ else
|
||||
+ retval = compel_unmap_customize(ctl, restorer_addr);
|
||||
+
|
||||
+ if (retval)
|
||||
pr_err("Failed to unmap restorer from %d\n", pid);
|
||||
|
||||
xfree(ctl);
|
||||
@@ -2333,11 +2397,18 @@ static void reap_zombies(void)
|
||||
|
||||
static int restore_root_task(struct pstree_item *init)
|
||||
{
|
||||
+ struct bisect_meta tfs_meta;
|
||||
enum trace_flags flag = TRACE_ALL;
|
||||
int ret, fd, mnt_ns_fd = -1;
|
||||
int root_seized = 0;
|
||||
struct pstree_item *item;
|
||||
|
||||
+ if (kdat.has_customize_ptrace
|
||||
+ && tf_create(&tfs_meta, task_entries->nr_threads) != 0) {
|
||||
+ pr_err("Can't alloc memory, tf_create failed\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
ret = run_scripts(ACT_PRE_RESTORE);
|
||||
if (ret != 0) {
|
||||
pr_err("Aborting restore due to pre-restore script ret code %d\n", ret);
|
||||
@@ -2551,7 +2622,12 @@ skip_ns_bouncing:
|
||||
|
||||
timing_stop(TIME_RESTORE);
|
||||
|
||||
- if (catch_tasks(root_seized, &flag)) {
|
||||
+ if (!kdat.has_customize_ptrace)
|
||||
+ ret = catch_tasks(root_seized, &flag);
|
||||
+ else
|
||||
+ ret = cache_tasks_customize(root_seized, &tfs_meta);
|
||||
+
|
||||
+ if (ret) {
|
||||
pr_err("Can't catch all tasks\n");
|
||||
goto out_kill_network_unlocked;
|
||||
}
|
||||
@@ -2561,7 +2637,15 @@ skip_ns_bouncing:
|
||||
|
||||
__restore_switch_stage(CR_STATE_COMPLETE);
|
||||
|
||||
- ret = compel_stop_on_syscall(task_entries->nr_threads, __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag);
|
||||
+ if (!kdat.has_customize_ptrace) {
|
||||
+ ret = compel_stop_on_syscall(task_entries->nr_threads,
|
||||
+ __NR(rt_sigreturn, 0),
|
||||
+ __NR(rt_sigreturn, 1), flag);
|
||||
+ } else {
|
||||
+ ret = compel_stop_on_syscall_customize(task_entries->nr_threads,
|
||||
+ __NR(rt_sigreturn, 0),
|
||||
+ -1, &tfs_meta);
|
||||
+ }
|
||||
if (ret) {
|
||||
pr_err("Can't stop all tasks on rt_sigreturn\n");
|
||||
goto out_kill_network_unlocked;
|
||||
@@ -2600,6 +2684,9 @@ skip_ns_bouncing:
|
||||
reap_zombies();
|
||||
}
|
||||
|
||||
+ if (kdat.has_customize_ptrace)
|
||||
+ tf_destroy(&tfs_meta);
|
||||
+
|
||||
return 0;
|
||||
|
||||
out_kill_network_unlocked:
|
||||
@@ -2631,6 +2718,10 @@ out:
|
||||
stop_usernsd();
|
||||
__restore_switch_stage(CR_STATE_FAIL);
|
||||
pr_err("Restoring FAILED.\n");
|
||||
+
|
||||
+ if (kdat.has_customize_ptrace)
|
||||
+ tf_destroy(&tfs_meta);
|
||||
+
|
||||
return -1;
|
||||
}
|
||||
|
||||
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
|
||||
index 3979939..8034db9 100644
|
||||
--- a/criu/include/kerndat.h
|
||||
+++ b/criu/include/kerndat.h
|
||||
@@ -77,6 +77,7 @@ struct kerndat_s {
|
||||
bool has_rseq;
|
||||
bool has_ptrace_get_rseq_conf;
|
||||
bool has_unix_sk_repair;
|
||||
+ bool has_customize_ptrace;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
diff --git a/criu/kerndat.c b/criu/kerndat.c
|
||||
index 6d6aac1..630814e 100644
|
||||
--- a/criu/kerndat.c
|
||||
+++ b/criu/kerndat.c
|
||||
@@ -1289,6 +1289,66 @@ static void kerndat_has_unix_sk_repair(void)
|
||||
return;
|
||||
}
|
||||
|
||||
+static void kerndat_has_customize_ptrace(void)
|
||||
+{
|
||||
+ pid_t tracee = fork();
|
||||
+ int status;
|
||||
+ int retval;
|
||||
+
|
||||
+ if (tracee == 0) {
|
||||
+ /* ensure */
|
||||
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
|
||||
+
|
||||
+ while (true)
|
||||
+ sleep(1);
|
||||
+ } else if (tracee > 0) {
|
||||
+ pr_debug("fork task %d as tracee\n", tracee);
|
||||
+ retval = ptrace(PTRACE_ATTACH, tracee, 0, 0);
|
||||
+ if (retval < 0) {
|
||||
+ pr_perror("Unexpect error from ptrace(PTRACE_ATTACH)");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ retval = wait4(-1, &status, __WALL, NULL);
|
||||
+ if (retval == -1)
|
||||
+ pr_perror("Unexpect error from wait");
|
||||
+ else if (retval != tracee || !(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP))
|
||||
+ pr_err("Task %d (expect %d) is unexpect, status: %d,"
|
||||
+ " stoped: %d signal: %d(%s)\n",
|
||||
+ retval, tracee, status,
|
||||
+ WIFSTOPPED(status), WSTOPSIG(status),
|
||||
+ strsignal(WTERMSIG(status)));
|
||||
+ else {
|
||||
+ retval = ptrace(PTRACE_SYSCALL_NR, tracee, 0, 0);
|
||||
+ if (retval == 0)
|
||||
+ kdat.has_customize_ptrace = true;
|
||||
+ else
|
||||
+ pr_perror("Unexpect error from ptrace(PTRACE_SYSCALL_NR)");
|
||||
+ }
|
||||
+
|
||||
+ if (kill(tracee, SIGKILL) != 0) {
|
||||
+ pr_perror("kill tracee %d failed", tracee);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * To prevent wait4 unexpect task when criu.kdat is generated
|
||||
+ * in dump process.
|
||||
+ */
|
||||
+ retval = waitpid(tracee, &status, 0);
|
||||
+ if (retval == -1)
|
||||
+ pr_err("waitpid() failed");
|
||||
+ else
|
||||
+ pr_debug("tracee %d exited, status %d, signal %d(%s)\n",
|
||||
+ WEXITSTATUS(status), WTERMSIG(status),
|
||||
+ WTERMSIG(status), strsignal(WTERMSIG(status)));
|
||||
+ } else {
|
||||
+ pr_perror("Unexpected error from fork\n");
|
||||
+ }
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
int kerndat_init(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -1451,6 +1511,7 @@ int kerndat_init(void)
|
||||
}
|
||||
|
||||
kerndat_has_unix_sk_repair();
|
||||
+ kerndat_has_customize_ptrace();
|
||||
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,150 +0,0 @@
|
||||
From c79a274b378173ac64d42d1c72df1ec594085d66 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Mon, 27 Dec 2021 21:34:39 +0800
|
||||
Subject: [PATCH 55/72] notifier: rollback when open img failed
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/26
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/cr-restore.c | 69 +++++++++++++++++++++++++++++++++++++++++++
|
||||
criu/include/pstree.h | 1 +
|
||||
criu/pstree.c | 8 +++++
|
||||
3 files changed, 78 insertions(+)
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index b0b3d30..13f0a93 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -1542,6 +1542,9 @@ static inline int fork_with_pid(struct pstree_item *item)
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
+ /* disable criu rollback capability. */
|
||||
+ criu_roll = false;
|
||||
+
|
||||
if (item == root_item) {
|
||||
item->pid->real = ret;
|
||||
pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
|
||||
@@ -2757,6 +2760,71 @@ int prepare_dummy_task_state(struct pstree_item *pi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int criu_rollback_internal(void *_arg)
|
||||
+{
|
||||
+ bool unmask = *(int *)_arg;
|
||||
+ pid_t pid = getpid();
|
||||
+
|
||||
+ if (unmask && mask_task_exit_notify(pid, false) != 0)
|
||||
+ pr_err("unmask exit notify failed for %d\n", pid);
|
||||
+
|
||||
+ do_notifier_rollback(true, POST_UPDATE_KERNEL_COMPLETE);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void criu_rollback(void)
|
||||
+{
|
||||
+ pid_t pid;
|
||||
+ unsigned long clone_flags;
|
||||
+ int retval = 0;
|
||||
+
|
||||
+ if (!criu_roll || !opts.with_notifier_kup)
|
||||
+ return;
|
||||
+
|
||||
+ pid = vpid(root_item);
|
||||
+ clone_flags = rsti(root_item)->clone_flags;
|
||||
+
|
||||
+ pr_info("do criu rollback\n");
|
||||
+
|
||||
+ /* Some rollback notifier must be call in the specific task context. */
|
||||
+ if (opts.use_fork_pid)
|
||||
+ retval = write_fork_pid(vpid(root_item));
|
||||
+ else if (!kdat.has_clone3_set_tid)
|
||||
+ retval = set_next_pid((void *)&pid);
|
||||
+
|
||||
+ if (retval < 0) {
|
||||
+ pr_err("set next pid %d failed, can't do rollback.", pid);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (!kdat.has_clone3_set_tid) {
|
||||
+ retval = clone_noasan(criu_rollback_internal,
|
||||
+ clone_flags | SIGCHLD,
|
||||
+ &opts.mask_exit_notify);
|
||||
+ } else {
|
||||
+ retval = clone3_with_pid_noasan(criu_rollback_internal,
|
||||
+ &opts.mask_exit_notify,
|
||||
+ clone_flags,
|
||||
+ SIGCHLD, pid);
|
||||
+ }
|
||||
+
|
||||
+ if (retval < 0) {
|
||||
+ pr_err("Can't fork for %d to do rollback: %s.\n",
|
||||
+ pid, strerror(errno));
|
||||
+ } else {
|
||||
+ int status;
|
||||
+
|
||||
+ if (retval != pid)
|
||||
+ pr_err("clone pid %d isn't equal with %d\n",
|
||||
+ retval, pid);
|
||||
+
|
||||
+ if (waitpid(pid, &status, 0) < 0) {
|
||||
+ pr_warn("Unable to wait %d: %s\n",
|
||||
+ pid, strerror(errno));
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
int cr_restore_tasks(void)
|
||||
{
|
||||
int ret = -1;
|
||||
@@ -2831,6 +2899,7 @@ clean_cgroup:
|
||||
err:
|
||||
cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret);
|
||||
if (ret < 0) {
|
||||
+ criu_rollback();
|
||||
if (!!(network_status & NETWORK_COLLECTED)
|
||||
&& !files_collected() && collect_image(&inet_sk_cinfo))
|
||||
pr_err("collect inet sk cinfo fail\n");
|
||||
diff --git a/criu/include/pstree.h b/criu/include/pstree.h
|
||||
index 87e4c47..6c0765b 100644
|
||||
--- a/criu/include/pstree.h
|
||||
+++ b/criu/include/pstree.h
|
||||
@@ -46,6 +46,7 @@ enum {
|
||||
};
|
||||
#define FDS_EVENT (1 << FDS_EVENT_BIT)
|
||||
|
||||
+extern bool criu_roll;
|
||||
extern struct pstree_item *current;
|
||||
|
||||
struct rst_info;
|
||||
diff --git a/criu/pstree.c b/criu/pstree.c
|
||||
index 778c884..8992155 100644
|
||||
--- a/criu/pstree.c
|
||||
+++ b/criu/pstree.c
|
||||
@@ -20,6 +20,11 @@
|
||||
#include "images/pstree.pb-c.h"
|
||||
#include "crtools.h"
|
||||
|
||||
+/*
|
||||
+ * Sometimes, img may be broken, set flag here to enable roll capibility
|
||||
+ * before forking restorer.
|
||||
+ */
|
||||
+bool criu_roll;
|
||||
struct pstree_item *root_item;
|
||||
static struct rb_root pid_root_rb;
|
||||
|
||||
@@ -638,6 +643,9 @@ static int read_pstree_image(pid_t *pid_max)
|
||||
if (!img)
|
||||
return -1;
|
||||
|
||||
+ /* enable rollback capibility when opening img successfully. */
|
||||
+ criu_roll = true;
|
||||
+
|
||||
do {
|
||||
ret = read_one_pstree_item(img, pid_max);
|
||||
} while (ret > 0);
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
From 389a410ddfbca241bf724a4e4751fa96499ff6f1 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Thu, 30 Dec 2021 10:45:16 +0800
|
||||
Subject: [PATCH 56/72] detach: don't kill task when `ptrace(PTRACE_DETACH)`
|
||||
return ESRCH
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/26
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/cr-restore.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index 13f0a93..c3ff65d 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -2317,6 +2317,16 @@ static int finalize_restore_detach(void)
|
||||
return -1;
|
||||
}
|
||||
if (ptrace(PTRACE_DETACH, pid, NULL, 0)) {
|
||||
+ /*
|
||||
+ * There is delta between task resume and
|
||||
+ * `ptrace(PTRACE_DETACH)`, task maybe exit
|
||||
+ * initiative during this time.
|
||||
+ */
|
||||
+ if (errno == ESRCH) {
|
||||
+ pr_warn("Unable to detach %d, task has dead\n", pid);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
pr_perror("Unable to detach %d", pid);
|
||||
return -1;
|
||||
}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,114 +0,0 @@
|
||||
From 4a3b351a69083567392a70bfb8d91c3f666e0aff Mon Sep 17 00:00:00 2001
|
||||
From: Jingxian He <hejingxian@huawei.com>
|
||||
Date: Wed, 19 May 2021 22:49:57 +0800
|
||||
Subject: [PATCH 57/72] build: add secure compilation options
|
||||
|
||||
Add secure compilation options:
|
||||
-fstack-protector -fstack-protector-all
|
||||
-Wl,-z,relro,-z,now,-z,noexecstack
|
||||
|
||||
Conflict:NA
|
||||
Reference:https://gitee.com/src-openeuler/criu/pulls/21
|
||||
Signed-off-by: Fu Lin <fulin10@huawei.com>
|
||||
---
|
||||
Makefile | 4 ++++
|
||||
criu/Makefile | 2 +-
|
||||
criu/pie/Makefile | 1 +
|
||||
criu/pie/Makefile.library | 2 ++
|
||||
lib/Makefile | 1 +
|
||||
lib/c/Makefile | 2 +-
|
||||
scripts/nmk/scripts/build.mk | 5 +++--
|
||||
7 files changed, 13 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 08761ef..c1eafdd 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -80,6 +80,10 @@ ifeq ($(ARCH),mips)
|
||||
DEFINES := -DCONFIG_MIPS
|
||||
endif
|
||||
|
||||
+# secure compilation options
|
||||
+CFLAGS += -fstack-protector-all -fPIE
|
||||
+LDFLAGS += -pie
|
||||
+
|
||||
#
|
||||
# CFLAGS_PIE:
|
||||
#
|
||||
diff --git a/criu/Makefile b/criu/Makefile
|
||||
index db4e9d8..3b4d69f 100644
|
||||
--- a/criu/Makefile
|
||||
+++ b/criu/Makefile
|
||||
@@ -85,7 +85,7 @@ $(obj)/%: pie
|
||||
|
||||
$(obj)/criu: $(PROGRAM-BUILTINS)
|
||||
$(call msg-link, $@)
|
||||
- $(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(WRAPFLAGS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
|
||||
+ $(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(WRAPFLAGS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@ -Wl,-z,relro,-z,now,-z,noexecstack -fPIE -pie
|
||||
|
||||
UNIT-BUILTINS += $(obj)/config.o
|
||||
UNIT-BUILTINS += $(obj)/log.o
|
||||
diff --git a/criu/pie/Makefile b/criu/pie/Makefile
|
||||
index 265dcf8..40b5804 100644
|
||||
--- a/criu/pie/Makefile
|
||||
+++ b/criu/pie/Makefile
|
||||
@@ -6,6 +6,7 @@ target := parasite restorer
|
||||
|
||||
CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS))
|
||||
CFLAGS += $(CFLAGS_PIE)
|
||||
+CFLAGS := $(filter-out -fstack-protector -fstack-protector-all,$(CFLAGS))
|
||||
ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0
|
||||
ccflags-y += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0
|
||||
|
||||
diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library
|
||||
index da2a2fa..c022d06 100644
|
||||
--- a/criu/pie/Makefile.library
|
||||
+++ b/criu/pie/Makefile.library
|
||||
@@ -27,3 +27,5 @@ CFLAGS += $(CFLAGS_PIE)
|
||||
ifeq ($(ARCH),mips)
|
||||
CFLAGS += -fno-stack-protector -DCR_NOGLIBC -mno-abicalls -fno-pic
|
||||
endif
|
||||
+
|
||||
+CFLAGS := $(filter-out -fstack-protector -fstack-protector-all,$(CFLAGS))
|
||||
diff --git a/lib/Makefile b/lib/Makefile
|
||||
index 575a7ba..729c298 100644
|
||||
--- a/lib/Makefile
|
||||
+++ b/lib/Makefile
|
||||
@@ -14,6 +14,7 @@ lib/c/Makefile: ;
|
||||
lib/c/%: .FORCE
|
||||
$(Q) $(MAKE) $(build)=lib/c $@
|
||||
|
||||
+CFLAGS := $(filter-out -fPIE,$(CFLAGS))
|
||||
cflags-so += $(CFLAGS) -rdynamic -Wl,-soname,$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR)
|
||||
ldflags-so += -lprotobuf-c
|
||||
|
||||
diff --git a/lib/c/Makefile b/lib/c/Makefile
|
||||
index af01467..d7f6491 100644
|
||||
--- a/lib/c/Makefile
|
||||
+++ b/lib/c/Makefile
|
||||
@@ -4,5 +4,5 @@ obj-y += ./images/rpc.pb-c.o
|
||||
ccflags-y += -iquote criu/$(ARCH_DIR)/include
|
||||
ccflags-y += -iquote criu/include
|
||||
ccflags-y += -iquote images
|
||||
-ccflags-y += -fPIC -fno-stack-protector
|
||||
+ccflags-y += -fPIC
|
||||
ldflags-y += -r -z noexecstack
|
||||
diff --git a/scripts/nmk/scripts/build.mk b/scripts/nmk/scripts/build.mk
|
||||
index d01d2b7..6f366d7 100644
|
||||
--- a/scripts/nmk/scripts/build.mk
|
||||
+++ b/scripts/nmk/scripts/build.mk
|
||||
@@ -15,8 +15,9 @@ lib-name :=
|
||||
lib-target :=
|
||||
hostprogs-y :=
|
||||
libso-y :=
|
||||
-ld_flags :=
|
||||
-ldflags-so :=
|
||||
+ld_flags := -Wl,-z,relro,-z,now,-z,noexecstack
|
||||
+ldflags-so := -Wl,-z,relro,-z,now,-z,noexecstack
|
||||
+ldflags-y := -z relro -z now -z noexecstack
|
||||
arflags-y :=
|
||||
target :=
|
||||
deps-y :=
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,283 +0,0 @@
|
||||
From e6dea32c64dfae3a6d06512b45f66416fc974556 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fu.lin10@huawei.com>
|
||||
Date: Wed, 11 Aug 2021 16:50:49 +0800
|
||||
Subject: [PATCH 58/72] nftables: add mnl api
|
||||
|
||||
libmnl provides the communication between userspace and kernelspace for
|
||||
netfilter netlink. I abstract here for the next usage.
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/Makefile | 2 +
|
||||
criu/Makefile.crtools | 1 +
|
||||
criu/Makefile.packages | 6 ++
|
||||
criu/include/nftables.h | 28 +++++++
|
||||
criu/mnl.c | 165 ++++++++++++++++++++++++++++++++++++++++
|
||||
5 files changed, 202 insertions(+)
|
||||
create mode 100644 criu/include/nftables.h
|
||||
create mode 100644 criu/mnl.c
|
||||
|
||||
diff --git a/criu/Makefile b/criu/Makefile
|
||||
index 3b4d69f..8d11bd5 100644
|
||||
--- a/criu/Makefile
|
||||
+++ b/criu/Makefile
|
||||
@@ -28,6 +28,8 @@ CFLAGS += -iquote images
|
||||
CFLAGS += -iquote $(ARCH_DIR)/include
|
||||
CFLAGS += -iquote .
|
||||
CFLAGS += $(shell $(PKG_CONFIG) --cflags libnl-3.0)
|
||||
+CFLAGS += $(shell $(PKG_CONFIG) --cflags libnftnl)
|
||||
+CFLAGS += $(shell $(PKG_CONFIG) --cflags libmnl)
|
||||
CFLAGS += $(CONFIG-DEFINES)
|
||||
|
||||
ifeq ($(GMON),1)
|
||||
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
|
||||
index 2ad0207..a132810 100644
|
||||
--- a/criu/Makefile.crtools
|
||||
+++ b/criu/Makefile.crtools
|
||||
@@ -98,6 +98,7 @@ obj-y += reserved-ports.o
|
||||
obj-y += orphan-inode.o
|
||||
obj-y += kmsg.o
|
||||
obj-y += taskqueue.o
|
||||
+obj-y += mnl.o
|
||||
obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o
|
||||
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
|
||||
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
|
||||
diff --git a/criu/Makefile.packages b/criu/Makefile.packages
|
||||
index 851489b..76e59ca 100644
|
||||
--- a/criu/Makefile.packages
|
||||
+++ b/criu/Makefile.packages
|
||||
@@ -7,6 +7,8 @@ REQ-RPM-PKG-NAMES += protobuf-python
|
||||
REQ-RPM-PKG-NAMES += libnl3-devel
|
||||
REQ-RPM-PKG-NAMES += libcap-devel
|
||||
REQ-RPM-PKG-NAMES += $(PYTHON)-future
|
||||
+REQ-RPM-PKG-NAMES += libmnl-devel
|
||||
+REQ-RPM-PKG-NAMES += libnftnl-devel
|
||||
|
||||
REQ-RPM-PKG-TEST-NAMES += libaio-devel
|
||||
|
||||
@@ -18,6 +20,8 @@ REQ-DEB-PKG-NAMES += $(PYTHON)-protobuf
|
||||
REQ-DEB-PKG-NAMES += $(PYTHON)-future
|
||||
REQ-DEB-PKG-NAMES += libnl-3-dev
|
||||
REQ-DEB-PKG-NAMES += libcap-dev
|
||||
+REQ-DEB-PKG-NAMES += libmnl-dev
|
||||
+REQ-DEB-PKG-NAMES += libnftnl-dev
|
||||
|
||||
REQ-DEB-PKG-TEST-NAMES += $(PYTHON)-yaml
|
||||
REQ-DEB-PKG-TEST-NAMES += libaio-dev
|
||||
@@ -32,6 +36,8 @@ endif
|
||||
|
||||
export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
|
||||
export LIBS += -lpthread
|
||||
+export LIBS += $(shell $(PKG_CONFIG) --libs libmnl)
|
||||
+export LIBS += $(shell $(PKG_CONFIG) --libs libnftnl)
|
||||
|
||||
check-packages-failed:
|
||||
$(warning Can not find some of the required libraries)
|
||||
diff --git a/criu/include/nftables.h b/criu/include/nftables.h
|
||||
new file mode 100644
|
||||
index 0000000..0bdab31
|
||||
--- /dev/null
|
||||
+++ b/criu/include/nftables.h
|
||||
@@ -0,0 +1,28 @@
|
||||
+#ifndef __CR_NFTABLES_H__
|
||||
+#define __CR_NFTABLES_H__
|
||||
+
|
||||
+#include <libmnl/libmnl.h>
|
||||
+
|
||||
+struct mnl_params {
|
||||
+ struct mnl_socket *nl;
|
||||
+ char *buf;
|
||||
+ struct mnl_nlmsg_batch *batch;
|
||||
+ uint32_t seq;
|
||||
+};
|
||||
+
|
||||
+typedef struct nlmsghdr * (*buf_func_t)(struct mnl_params *mnl_params, void *args);
|
||||
+typedef int (*batch_func_t)(struct mnl_params *mnl_params, void *args);
|
||||
+typedef int (*mnl_func_t)(struct mnl_params *mnl, batch_func_t cb, void *args);
|
||||
+
|
||||
+struct mnl_cb_params {
|
||||
+ pid_t tree_id;
|
||||
+ bool create;
|
||||
+ bool ipv6;
|
||||
+};
|
||||
+
|
||||
+int mnl_sendmsg(batch_func_t batch_cb, void *args);
|
||||
+int mnl_common(mnl_func_t mnl_cb, void *arg1, void *arg2);
|
||||
+int mnl_batch_send_and_recv(struct mnl_params *mnl_params, batch_func_t cb, void *args, int *result);
|
||||
+int mnl_buf_send_and_recv(struct mnl_params *mnl_params, buf_func_t cb, void *args, int *result);
|
||||
+
|
||||
+#endif /* __CR_NFTABLES_H__ */
|
||||
diff --git a/criu/mnl.c b/criu/mnl.c
|
||||
new file mode 100644
|
||||
index 0000000..3a03202
|
||||
--- /dev/null
|
||||
+++ b/criu/mnl.c
|
||||
@@ -0,0 +1,165 @@
|
||||
+#include <string.h>
|
||||
+#include <time.h>
|
||||
+#include <errno.h>
|
||||
+
|
||||
+#include <libnftnl/common.h>
|
||||
+
|
||||
+#include "nftables.h"
|
||||
+#include "log.h"
|
||||
+
|
||||
+int mnl_common(mnl_func_t mnl_cb, void *arg1, void *arg2)
|
||||
+{
|
||||
+ char buf[MNL_SOCKET_BUFFER_SIZE];
|
||||
+ struct mnl_params mnl = {
|
||||
+ .seq = time(NULL),
|
||||
+ };
|
||||
+ int retval = -1;
|
||||
+
|
||||
+ mnl.nl = mnl_socket_open(NETLINK_NETFILTER);
|
||||
+ if (mnl.nl == NULL) {
|
||||
+ pr_err("mnl_socket_open failed with %d: %s\n", errno, strerror(errno));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (mnl_socket_bind(mnl.nl, 0, MNL_SOCKET_AUTOPID) < 0) {
|
||||
+ pr_err("mnl_socket_bind wailed with %d: %s\n", errno, strerror(errno));
|
||||
+ goto err_mnl;
|
||||
+ }
|
||||
+
|
||||
+ mnl.buf = buf;
|
||||
+ mnl.batch = mnl_nlmsg_batch_start(buf, sizeof(buf));
|
||||
+ if (mnl.batch == NULL)
|
||||
+ goto err_mnl;
|
||||
+
|
||||
+ if (mnl_cb(&mnl, arg1, arg2) < 0)
|
||||
+ goto err_batch;
|
||||
+
|
||||
+ retval = 0;
|
||||
+
|
||||
+err_batch:
|
||||
+ mnl_nlmsg_batch_stop(mnl.batch);
|
||||
+err_mnl:
|
||||
+ mnl_socket_close(mnl.nl);
|
||||
+
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static int mnl_sendmsg_internal(struct mnl_params *mnl, batch_func_t cb, void *args)
|
||||
+{
|
||||
+ int retval = -1;
|
||||
+
|
||||
+ nftnl_batch_begin(mnl_nlmsg_batch_current(mnl->batch), mnl->seq++);
|
||||
+ mnl_nlmsg_batch_next(mnl->batch);
|
||||
+
|
||||
+ if (cb(mnl, args) < 0)
|
||||
+ goto err_batch;
|
||||
+
|
||||
+ nftnl_batch_end(mnl_nlmsg_batch_current(mnl->batch), mnl->seq++);
|
||||
+ mnl_nlmsg_batch_next(mnl->batch);
|
||||
+
|
||||
+ if (mnl_socket_sendto(mnl->nl, mnl_nlmsg_batch_head(mnl->batch),
|
||||
+ mnl_nlmsg_batch_size(mnl->batch)) < 0) {
|
||||
+ pr_err("%s: mnl_socket_sendto failed with %d: %s\n",
|
||||
+ __func__, errno, strerror(errno));
|
||||
+ goto err_batch;
|
||||
+ }
|
||||
+
|
||||
+ retval = 0;
|
||||
+
|
||||
+err_batch:
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+int mnl_sendmsg(batch_func_t batch_cb, void *args)
|
||||
+{
|
||||
+ return mnl_common(mnl_sendmsg_internal, batch_cb, args);
|
||||
+}
|
||||
+
|
||||
+int mnl_batch_send_and_recv(struct mnl_params *mnl_params, batch_func_t cb,
|
||||
+ void *args, int *result)
|
||||
+{
|
||||
+ struct mnl_socket *nl = mnl_params->nl;
|
||||
+ struct mnl_nlmsg_batch *batch = mnl_params->batch;
|
||||
+ uint32_t *seq = &mnl_params->seq;
|
||||
+ char buf[MNL_SOCKET_BUFFER_SIZE];
|
||||
+ int retval;
|
||||
+
|
||||
+ mnl_nlmsg_batch_reset(batch);
|
||||
+ nftnl_batch_begin(mnl_nlmsg_batch_current(batch), (*seq)++);
|
||||
+ mnl_nlmsg_batch_next(batch);
|
||||
+
|
||||
+ if (cb(mnl_params, args) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ nftnl_batch_end(mnl_nlmsg_batch_current(batch), (*seq)++);
|
||||
+ mnl_nlmsg_batch_next(batch);
|
||||
+
|
||||
+ if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch),
|
||||
+ mnl_nlmsg_batch_size(batch)) < 0) {
|
||||
+ pr_err("%s: mnl_socket_sendto failed with %d: %s\n",
|
||||
+ __func__, errno, strerror(errno));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* don't care the netlink retval, and nlmsg hdr flags has no `NLM_F_ACK` */
|
||||
+ if (result == NULL)
|
||||
+ return 0;
|
||||
+
|
||||
+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf));
|
||||
+ while (retval > 0) {
|
||||
+ retval = mnl_cb_run(buf, retval, 0, mnl_socket_get_portid(nl), NULL, NULL);
|
||||
+ if (retval <= 0)
|
||||
+ break;
|
||||
+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf));
|
||||
+ }
|
||||
+
|
||||
+ if (retval < 0) {
|
||||
+ pr_err("%s: mnl batch socket recv errno with %d: %s\n",
|
||||
+ __func__, errno, strerror(errno));
|
||||
+ *result = errno;
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ *result = 0;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int mnl_buf_send_and_recv(struct mnl_params *mnl_params, buf_func_t cb,
|
||||
+ void *args, int *result)
|
||||
+{
|
||||
+ struct mnl_socket *nl = mnl_params->nl;
|
||||
+ char buf[MNL_SOCKET_BUFFER_SIZE];
|
||||
+ struct nlmsghdr *nlh;
|
||||
+ int retval = 0;
|
||||
+
|
||||
+ if ((nlh = cb(mnl_params, args)) == NULL)
|
||||
+ return -1;
|
||||
+
|
||||
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
|
||||
+ pr_err("%s: mnl_socket_sendto failed with %d: %s\n",
|
||||
+ __func__, errno, strerror(errno));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* don't care the netlink retval, and nlmsg hdr flags has no `NLM_F_ACK` */
|
||||
+ if (result == NULL)
|
||||
+ return 0;
|
||||
+
|
||||
+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf));
|
||||
+ while (retval > 0) {
|
||||
+ retval = mnl_cb_run(buf, retval, 0, mnl_socket_get_portid(nl), NULL, NULL);
|
||||
+ if (retval <= 0)
|
||||
+ break;
|
||||
+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf));
|
||||
+ }
|
||||
+
|
||||
+ if (retval < 0) {
|
||||
+ pr_info("%s: mnl buf socket recv errno with %d: %s\n",
|
||||
+ __func__, errno, strerror(errno));
|
||||
+ *result = errno;
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ *result = 0;
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,55 +0,0 @@
|
||||
From 073ed2ef448fb073aa3c6f0552e120e3e98a8906 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Wed, 13 Apr 2022 14:30:54 +0800
|
||||
Subject: [PATCH 60/72] net: switch to nftables API
|
||||
|
||||
This is fake patch
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
criu/config.c | 2 ++
|
||||
criu/crtools.c | 1 +
|
||||
criu/include/cr_options.h | 2 ++
|
||||
3 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/criu/config.c b/criu/config.c
|
||||
index c0358e5..7c4e230 100644
|
||||
--- a/criu/config.c
|
||||
+++ b/criu/config.c
|
||||
@@ -711,6 +711,8 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
||||
{ "reserve-ports", required_argument, 0, 'P' },
|
||||
BOOL_OPT("parallel", &opts.parallel),
|
||||
{ "exec-pin-start", required_argument, 0, 2002 },
|
||||
+ BOOL_OPT("use-nft", &opts.use_nft),
|
||||
+ BOOL_OPT("async-clear-nft", &opts.async_clear_nft),
|
||||
{},
|
||||
};
|
||||
|
||||
diff --git a/criu/crtools.c b/criu/crtools.c
|
||||
index 40e2d51..c555213 100644
|
||||
--- a/criu/crtools.c
|
||||
+++ b/criu/crtools.c
|
||||
@@ -474,6 +474,7 @@ usage:
|
||||
" --reserve-ports Reserve src ports in kernel\n"
|
||||
" --parallel Collect smaps parallel to accellrate dumping speed\n"
|
||||
" --exec-pin-start Exec file map's pin start index\n"
|
||||
+ " --use Use nft API instead of iptables cmd in network locking\n"
|
||||
"\n"
|
||||
"Check options:\n"
|
||||
" Without options, \"criu check\" checks availability of absolutely required\n"
|
||||
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
|
||||
index a64e977..6dadaba 100644
|
||||
--- a/criu/include/cr_options.h
|
||||
+++ b/criu/include/cr_options.h
|
||||
@@ -203,6 +203,8 @@ struct cr_options {
|
||||
int reserve_ports;
|
||||
int parallel;
|
||||
int exec_pin_start;
|
||||
+ int use_nft;
|
||||
+ int async_clear_nft;
|
||||
};
|
||||
|
||||
extern struct cr_options opts;
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,46 +0,0 @@
|
||||
From 926affe76a99871f9a95f3381190bd3fb601e6ec Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 21 Jan 2022 14:46:21 +0800
|
||||
Subject: [PATCH 61/72] zdtm: unlink kdat before testing
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/zdtm.py | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/test/zdtm.py b/test/zdtm.py
|
||||
index 0feece0..1b2c7da 100755
|
||||
--- a/test/zdtm.py
|
||||
+++ b/test/zdtm.py
|
||||
@@ -24,6 +24,7 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
import socket
|
||||
+import pathlib
|
||||
from builtins import (input, int, open, range, str, zip)
|
||||
|
||||
import pycriu as crpc
|
||||
@@ -2662,6 +2663,9 @@ rp.add_argument("--pre-dump-mode",
|
||||
help="Use splice or read mode of pre-dumping",
|
||||
choices=['splice', 'read'],
|
||||
default='splice')
|
||||
+rp.add_argument("--kdat",
|
||||
+ help="Path to criu.kdat, default '/run/criu.kdat'",
|
||||
+ default="/run/criu.kdat")
|
||||
|
||||
lp = sp.add_parser("list", help="List tests")
|
||||
lp.set_defaults(action=list_tests)
|
||||
@@ -2692,6 +2696,10 @@ if opts['debug']:
|
||||
|
||||
if opts['action'] == 'run':
|
||||
criu.available()
|
||||
+ # remove kdat file before testing
|
||||
+ kdat = pathlib.Path(opts['kdat'])
|
||||
+ if kdat.exists():
|
||||
+ kdat.unlink()
|
||||
for tst in test_classes.values():
|
||||
tst.available()
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,302 +0,0 @@
|
||||
From 3d945368250958f5ebf3b4053e07c816adafba33 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 21 Jan 2022 17:20:05 +0800
|
||||
Subject: [PATCH 62/72] zdtm: add host ns sysvshm ipc case
|
||||
|
||||
---
|
||||
test/zdtm/Makefile | 2 +-
|
||||
test/zdtm/customization/Makefile | 53 ++++++++
|
||||
test/zdtm/customization/ipc.c | 202 +++++++++++++++++++++++++++++++
|
||||
test/zdtm/customization/ipc.desc | 1 +
|
||||
4 files changed, 257 insertions(+), 1 deletion(-)
|
||||
create mode 100644 test/zdtm/customization/Makefile
|
||||
create mode 100644 test/zdtm/customization/ipc.c
|
||||
create mode 100644 test/zdtm/customization/ipc.desc
|
||||
|
||||
diff --git a/test/zdtm/Makefile b/test/zdtm/Makefile
|
||||
index 24a33f2..8f9857b 100644
|
||||
--- a/test/zdtm/Makefile
|
||||
+++ b/test/zdtm/Makefile
|
||||
@@ -1,4 +1,4 @@
|
||||
-SUBDIRS := lib static transition
|
||||
+SUBDIRS := lib static transition customization
|
||||
|
||||
all: $(SUBDIRS)
|
||||
.PHONY: all $(SUBDIRS)
|
||||
diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile
|
||||
new file mode 100644
|
||||
index 0000000..563b7b1
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/Makefile
|
||||
@@ -0,0 +1,53 @@
|
||||
+LIBDIR := ../lib
|
||||
+LIB := $(LIBDIR)/libzdtmtst.a
|
||||
+LDLIBS += $(LIB)
|
||||
+CPPFLAGS += -I$(LIBDIR)
|
||||
+
|
||||
+TST = \
|
||||
+ ipc
|
||||
+
|
||||
+SRC = $(TST:%=%.c)
|
||||
+OBJ = $(SRC:%.c=%.o)
|
||||
+DEP = $(SRC:%.c=%.d)
|
||||
+PID = $(TST:%=%.pid)
|
||||
+OUT = $(TST:%=%.out)
|
||||
+
|
||||
+include ../Makefile.inc
|
||||
+
|
||||
+all: $(TST)
|
||||
+install: all
|
||||
+.PHONY: all install
|
||||
+
|
||||
+$(TST:%=%.pid): %.pid: %
|
||||
+ $(<D)/$(<F) --pidfile=$@ --outfile=$<.out
|
||||
+
|
||||
+%.out: %.pid %
|
||||
+ -kill -TERM `cat $<`
|
||||
+
|
||||
+start: $(PID)
|
||||
+
|
||||
+%.is_running: %.pid
|
||||
+ kill -0 `cat $<`
|
||||
+
|
||||
+check_start: $(PID:%.pid=%.is_running)
|
||||
+
|
||||
+stop:
|
||||
+ -kill -TERM `awk '{print}' *.pid`
|
||||
+
|
||||
+WAIT_TIME=10
|
||||
+wait_stop:
|
||||
+ -for i in `seq 1 $(WAIT_TIME)`; do \
|
||||
+ kill -0 `awk '{print}' *.pid 2>/dev/null` 2>/dev/null || break; \
|
||||
+ sleep 1; \
|
||||
+ done
|
||||
+
|
||||
+$(TST): | $(LIB)
|
||||
+
|
||||
+%: %.sh
|
||||
+ cp $< $@
|
||||
+ chmod +x $@
|
||||
+
|
||||
+$(LIB): force
|
||||
+ $(Q) $(MAKE) -C $(LIBDIR)
|
||||
+
|
||||
+.PHONY: force start check_start stop wait_stop
|
||||
diff --git a/test/zdtm/customization/ipc.c b/test/zdtm/customization/ipc.c
|
||||
new file mode 100644
|
||||
index 0000000..2b3c2b1
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/ipc.c
|
||||
@@ -0,0 +1,202 @@
|
||||
+#include <sched.h>
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <string.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <unistd.h>
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/wait.h>
|
||||
+#include <sys/sem.h>
|
||||
+#include <sys/ipc.h>
|
||||
+#include <sys/shm.h>
|
||||
+#include <signal.h>
|
||||
+#include <errno.h>
|
||||
+
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+const char *test_doc="Tests ipc sems and shmems migrate fine";
|
||||
+const char *test_author="Pavel Emelianov <xemul@parallels.com>";
|
||||
+
|
||||
+static struct sembuf unlock = {
|
||||
+ .sem_op = 1,
|
||||
+ .sem_num = 0,
|
||||
+ .sem_flg = 0,
|
||||
+};
|
||||
+
|
||||
+static struct sembuf lock = {
|
||||
+ .sem_op = -1,
|
||||
+ .sem_num = 0,
|
||||
+ .sem_flg = 0,
|
||||
+};
|
||||
+
|
||||
+#define DEF_MEM_SIZE (40960)
|
||||
+unsigned int shmem_size = DEF_MEM_SIZE;
|
||||
+TEST_OPTION(shmem_size, uint, "Size of shared memory segment", 0);
|
||||
+
|
||||
+#define INIT_CRC (~0)
|
||||
+
|
||||
+#define POISON 0xac
|
||||
+static inline void poison_area(int *mem)
|
||||
+{
|
||||
+ memset(mem, POISON, shmem_size);
|
||||
+}
|
||||
+
|
||||
+static int child(key_t key)
|
||||
+{
|
||||
+ int sem, shm, ret, res = 0;
|
||||
+ uint8_t *mem;
|
||||
+ uint32_t crc;
|
||||
+
|
||||
+ sem = semget(key, 1, 0777);
|
||||
+ if (sem == -1)
|
||||
+ return -1;
|
||||
+ shm = shmget(key, shmem_size, 0777);
|
||||
+ if (shm == -1)
|
||||
+ return -2;
|
||||
+ mem = shmat(shm, NULL, 0);
|
||||
+ if (mem == (uint8_t *)-1)
|
||||
+ return -3;
|
||||
+
|
||||
+ while (test_go()) {
|
||||
+ ret = semop(sem, &lock, 1);
|
||||
+ if (ret) {
|
||||
+ if (errno == EINTR)
|
||||
+ continue;
|
||||
+ fail("Error in semop lock");
|
||||
+ res = errno;
|
||||
+ break;
|
||||
+ }
|
||||
+ crc = INIT_CRC;
|
||||
+ datagen(mem, shmem_size, &crc);
|
||||
+ while ((ret = semop(sem, &unlock, 1)) && (errno == EINTR));
|
||||
+ if (ret) {
|
||||
+ fail("Error in semop unlock");
|
||||
+ res = errno;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ shmdt(mem);
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char **argv)
|
||||
+{
|
||||
+ key_t key;
|
||||
+ int sem, shm, pid1, pid2;
|
||||
+ int fail_count = 0;
|
||||
+ uint8_t *mem;
|
||||
+ uint32_t crc;
|
||||
+ int ret;
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ /* using the large number to fill string length */
|
||||
+ key = ftok(argv[0], 1822155650);
|
||||
+ if (key == -1) {
|
||||
+ pr_perror("Can't make key");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ sem = semget(key, 1, 0777 | IPC_CREAT | IPC_EXCL);
|
||||
+ if (sem == -1) {
|
||||
+ pr_perror("Can't get sem");
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ if (semctl(sem, 0, SETVAL, 1) == -1) {
|
||||
+ pr_perror("Can't init sem");
|
||||
+ fail_count++;
|
||||
+ goto out_sem;
|
||||
+ }
|
||||
+
|
||||
+ shm = shmget(key, shmem_size, 0777 | IPC_CREAT | IPC_EXCL);
|
||||
+ if (shm == -1) {
|
||||
+ pr_perror("Can't get shm");
|
||||
+ fail_count++;
|
||||
+ goto out_sem;
|
||||
+ }
|
||||
+
|
||||
+ mem = shmat(shm, NULL, 0);
|
||||
+ if (mem == (void *)-1) {
|
||||
+ pr_perror("Can't attach shm");
|
||||
+ fail_count++;
|
||||
+ goto out_shm;
|
||||
+ }
|
||||
+
|
||||
+ poison_area((int *)mem);
|
||||
+
|
||||
+ pid1 = test_fork();
|
||||
+ if (pid1 == -1) {
|
||||
+ pr_perror("Can't fork 1st time");
|
||||
+ goto out_shdt;
|
||||
+ } else if (pid1 == 0)
|
||||
+ exit(child(key));
|
||||
+
|
||||
+ pid2 = test_fork();
|
||||
+ if (pid2 == -1) {
|
||||
+ pr_perror("Can't fork 2nd time");
|
||||
+ fail_count++;
|
||||
+ goto out_child;
|
||||
+ } else if (pid2 == 0)
|
||||
+ exit(child(key));
|
||||
+
|
||||
+ test_daemon();
|
||||
+ while (test_go()) {
|
||||
+ ret = semop(sem, &lock, 1);
|
||||
+ if (ret) {
|
||||
+ if (errno == EINTR)
|
||||
+ continue;
|
||||
+ fail_count++;
|
||||
+ fail("Error in semop lock");
|
||||
+ break;
|
||||
+ }
|
||||
+ if (mem[0] != POISON) {
|
||||
+ crc = INIT_CRC;
|
||||
+ if (datachk(mem, shmem_size, &crc)) {
|
||||
+ fail_count++;
|
||||
+ fail("Semaphore protection is broken or "
|
||||
+ "shmem pages are messed");
|
||||
+ semop(sem, &unlock, 1);
|
||||
+ break;
|
||||
+ }
|
||||
+ poison_area((int *)mem);
|
||||
+ }
|
||||
+ while ((ret = semop(sem, &unlock, 1)) && (errno == EINTR));
|
||||
+ if (ret) {
|
||||
+ fail_count++;
|
||||
+ fail("Error in semop unlock");
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ kill(pid2, SIGTERM);
|
||||
+ waitpid(pid2, &ret, 0);
|
||||
+ if (!WIFEXITED(ret)) {
|
||||
+ fail_count++;
|
||||
+ pr_perror("Child 2 was killed");
|
||||
+ } else if (WEXITSTATUS(ret)) {
|
||||
+ fail_count++;
|
||||
+ pr_perror("Child 2 couldn't inititalise");
|
||||
+ }
|
||||
+out_child:
|
||||
+ kill(pid1, SIGTERM);
|
||||
+ waitpid(pid1, &ret, 0);
|
||||
+ if (!WIFEXITED(ret)) {
|
||||
+ fail_count++;
|
||||
+ pr_perror("Child 1 was killed");
|
||||
+ } else if (WEXITSTATUS(ret)) {
|
||||
+ fail_count++;
|
||||
+ pr_perror("Child 1 couldn't inititalise");
|
||||
+ }
|
||||
+out_shdt:
|
||||
+ shmdt(mem);
|
||||
+out_shm:
|
||||
+ shmctl(shm, IPC_RMID, NULL);
|
||||
+out_sem:
|
||||
+ semctl(sem, 1, IPC_RMID);
|
||||
+ if (fail_count == 0)
|
||||
+ pass();
|
||||
+out:
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/test/zdtm/customization/ipc.desc b/test/zdtm/customization/ipc.desc
|
||||
new file mode 100644
|
||||
index 0000000..63df42a
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/ipc.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'flavor': 'h'}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,620 +0,0 @@
|
||||
From 8c7cfce7c9f90af9314b96c6ec34c97fb6f9be8a Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Mon, 14 Feb 2022 19:11:15 +0800
|
||||
Subject: [PATCH 64/72] zdtm: init notifier testcase
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/zdtm.py | 70 ++++++++--
|
||||
test/zdtm/customization/Makefile | 3 +-
|
||||
test/zdtm/customization/notifier00.c | 68 ++++++++++
|
||||
test/zdtm/customization/notifier00.desc | 1 +
|
||||
test/zdtm/mod/.gitignore | 163 ++++++++++++++++++++++++
|
||||
test/zdtm/mod/Makefile | 28 ++++
|
||||
test/zdtm/mod/notifier.c | 145 +++++++++++++++++++++
|
||||
7 files changed, 466 insertions(+), 12 deletions(-)
|
||||
create mode 100644 test/zdtm/customization/notifier00.c
|
||||
create mode 100644 test/zdtm/customization/notifier00.desc
|
||||
create mode 100644 test/zdtm/mod/.gitignore
|
||||
create mode 100644 test/zdtm/mod/Makefile
|
||||
create mode 100644 test/zdtm/mod/notifier.c
|
||||
|
||||
diff --git a/test/zdtm.py b/test/zdtm.py
|
||||
index d3b146f..d64a683 100755
|
||||
--- a/test/zdtm.py
|
||||
+++ b/test/zdtm.py
|
||||
@@ -25,6 +25,7 @@ import tempfile
|
||||
import time
|
||||
import socket
|
||||
import pathlib
|
||||
+import platform
|
||||
from builtins import (input, int, open, range, str, zip)
|
||||
|
||||
import pycriu as crpc
|
||||
@@ -1466,6 +1467,13 @@ class criu:
|
||||
return True
|
||||
return False
|
||||
|
||||
+ @staticmethod
|
||||
+ def check_sysfs(pathes):
|
||||
+ for path in pathes.split():
|
||||
+ if not pathlib.Path(path).exists():
|
||||
+ return True
|
||||
+ return False
|
||||
+
|
||||
@staticmethod
|
||||
def available():
|
||||
if not os.access(opts['criu_bin'], os.X_OK):
|
||||
@@ -1991,21 +1999,49 @@ class Launcher:
|
||||
testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason)
|
||||
print(testline, file=self.__file_report)
|
||||
|
||||
+ def check_module(self, mod):
|
||||
+ found = False
|
||||
+ with open("/proc/modules") as f:
|
||||
+ for line in f.readlines():
|
||||
+ if "pin_memory" == line.split()[0]:
|
||||
+ found = True
|
||||
+ return found
|
||||
+
|
||||
def modprobe_pin_memory(self, load):
|
||||
+ mod = "pin_memory"
|
||||
if not load:
|
||||
return
|
||||
- else:
|
||||
- found = False
|
||||
- with open("/proc/modules") as f:
|
||||
- for line in f.readlines():
|
||||
- if "pin_memory" == line.split()[0]:
|
||||
- found = True
|
||||
- if not found:
|
||||
- subprocess.check_call(["modprobe", "pin_memory"])
|
||||
+ elif not self.check_module(mod):
|
||||
+ subprocess.check_call(["modprobe", mod])
|
||||
|
||||
cmd = [opts["criu_bin"], "init-pagemap-read"]
|
||||
subprocess.check_call(cmd, shell=False)
|
||||
|
||||
+ def build_and_load_mod(self, target, kdir):
|
||||
+ if platform.machine() != "aarch64" or not target:
|
||||
+ return
|
||||
+
|
||||
+ if not os.access("zdtm/mod", os.R_OK):
|
||||
+ print("should be executed in the test subdir")
|
||||
+ sys.exit(0)
|
||||
+
|
||||
+ dirpath = f"MOD={os.getcwd()}/zdtm/mod"
|
||||
+ build_mod = ["make", "-C", "zdtm/mod", dirpath, target]
|
||||
+ if kdir:
|
||||
+ build_mod.append(f"KDIR={kdir}")
|
||||
+ subprocess.check_call(build_mod)
|
||||
+
|
||||
+ # ensure the module has been unloaded
|
||||
+ if self.check_module(target.rstrip(".ko")):
|
||||
+ subprocess.run(["rmmod", target], check=False)
|
||||
+
|
||||
+ modpath = f"zdtm/mod/{target}"
|
||||
+ subprocess.check_call(["insmod", modpath])
|
||||
+
|
||||
+ def unload_mod(self, mod):
|
||||
+ if mod:
|
||||
+ subprocess.check_call(["rmmod", mod])
|
||||
+
|
||||
def run_test(self, name, desc, flavor):
|
||||
|
||||
if len(self.__subs) >= self.__max:
|
||||
@@ -2014,9 +2050,9 @@ class Launcher:
|
||||
with open("/proc/sys/kernel/tainted") as taintfd:
|
||||
taint = taintfd.read()
|
||||
# 0x1000 means the out of tree module has been loaded
|
||||
- if self.__taint != taint and (int(self.__taint) | 0x1000) != int(taint):
|
||||
+ if self.__taint != taint and (int(self.__taint) | 0x3000) != int(taint):
|
||||
raise Exception("The kernel is tainted: %r (%r)" %
|
||||
- (taint, self.__taint))
|
||||
+ (taint, str(int(self.__taint) | 0x3000)))
|
||||
|
||||
if test_flag(desc, 'excl'):
|
||||
self.wait_all()
|
||||
@@ -2045,6 +2081,8 @@ class Launcher:
|
||||
# `--use-fork-pid`, so don't care `--pin-memory` option
|
||||
self.modprobe_pin_memory(no_pid_ns)
|
||||
|
||||
+ self.build_and_load_mod(desc.get("mod", ""), opts["kdir"])
|
||||
+
|
||||
sub = subprocess.Popen(["./zdtm_ct", "zdtm.py"],
|
||||
env=dict(os.environ, CR_CT_TEST_INFO=arg,
|
||||
ZDTM_NO_PID_NS=zdtm_no_pid_ns),
|
||||
@@ -2059,9 +2097,11 @@ class Launcher:
|
||||
}
|
||||
|
||||
# pin memory function don't support concurrency
|
||||
- if test_flag(desc, 'excl') or test_value(desc, "opts", "--pin-memory"):
|
||||
+ if test_flag(desc, 'excl') or test_value(desc, "opts", "--pin-memory") or desc.get("mod", ""):
|
||||
self.wait()
|
||||
|
||||
+ self.unload_mod(desc.get("mod", ""))
|
||||
+
|
||||
def __wait_one(self, flags):
|
||||
pid = -1
|
||||
status = -1
|
||||
@@ -2412,6 +2452,11 @@ def run_tests(opts):
|
||||
t, f"cmdline '{cmdline}' isn't support, or don't set")
|
||||
continue
|
||||
|
||||
+ sysfs = tdesc.get('sysfs', '')
|
||||
+ if sysfs and criu.check_sysfs(sysfs):
|
||||
+ launcher.skip(t, f"sysfs file {sysfs} don't exist")
|
||||
+ continue
|
||||
+
|
||||
test_flavs = tdesc.get('flavor', 'h ns uns').split()
|
||||
opts_flavs = (opts['flavor'] or 'h,ns,uns').split(',')
|
||||
if opts_flavs != ['best']:
|
||||
@@ -2434,6 +2479,7 @@ def run_tests(opts):
|
||||
launcher.run_test(t, tdesc, run_flavs)
|
||||
else:
|
||||
launcher.skip(t, "no flavors")
|
||||
+
|
||||
finally:
|
||||
fail = launcher.finish()
|
||||
if opts['join_ns']:
|
||||
@@ -2723,6 +2769,8 @@ rp.add_argument("--pre-dump-mode",
|
||||
rp.add_argument("--kdat",
|
||||
help="Path to criu.kdat, default '/run/criu.kdat'",
|
||||
default="/run/criu.kdat")
|
||||
+rp.add_argument(
|
||||
+ "--kdir", help="specific kernel devel path, the default value is `/lib/modules/$(uname -r)/build`")
|
||||
|
||||
lp = sp.add_parser("list", help="List tests")
|
||||
lp.set_defaults(action=list_tests)
|
||||
diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile
|
||||
index 82348f2..93922c7 100644
|
||||
--- a/test/zdtm/customization/Makefile
|
||||
+++ b/test/zdtm/customization/Makefile
|
||||
@@ -10,7 +10,8 @@ TST_NOFILE = \
|
||||
maps04 \
|
||||
maps05 \
|
||||
maps007 \
|
||||
- maps008
|
||||
+ maps008 \
|
||||
+ notifier00
|
||||
|
||||
TST_FILE = \
|
||||
maps00 \
|
||||
diff --git a/test/zdtm/customization/notifier00.c b/test/zdtm/customization/notifier00.c
|
||||
new file mode 100644
|
||||
index 0000000..5fc3d54
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/notifier00.c
|
||||
@@ -0,0 +1,68 @@
|
||||
+#include <stdio.h>
|
||||
+/* Historical reasons: in order to compatible with R10 */
|
||||
+#define CONFIG_EULEROS_MODRESTORE_NOTIFY
|
||||
+#include <linux/modrestore.h>
|
||||
+
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+const char *test_doc = "Tests the basic function of the notifiers";
|
||||
+static char *nvwa_notifiers[] = {
|
||||
+ "PRE_FREEZE",
|
||||
+ "FREEZE_TO_KILL",
|
||||
+ "PRE_UPDATE_KERNEL",
|
||||
+ "POST_UPDATE_KERNEL",
|
||||
+ "UNFREEZE_TO_RUN",
|
||||
+ "POST_RUN"
|
||||
+};
|
||||
+
|
||||
+_Static_assert(sizeof(nvwa_notifiers)/sizeof(nvwa_notifiers[0]) == KUP_HOOK_MAX, "nvwa_notifiers number is wrong!");
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int orig_values[KUP_HOOK_MAX] = {0};
|
||||
+ bool failure = false;
|
||||
+ FILE *fp;
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ fp = fopen("/sys/kernel/criu_notifier", "r");
|
||||
+ if (fp == NULL) {
|
||||
+ pr_perror("fopen");
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
+ for (int i = 0; i < KUP_HOOK_MAX; i++)
|
||||
+ fscanf(fp, "%d ", orig_values+i);
|
||||
+
|
||||
+ test_daemon();
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ if (fseek(fp, 0, SEEK_SET) != 0) {
|
||||
+ pr_perror("fseek");
|
||||
+ return 2;
|
||||
+ }
|
||||
+
|
||||
+ for (int i = 0; i < KUP_HOOK_MAX; i++) {
|
||||
+ int val = 0;
|
||||
+ int should = orig_values[i]+1;
|
||||
+
|
||||
+ fscanf(fp, "%d ", &val);
|
||||
+
|
||||
+ /* those are not called in criu */
|
||||
+ if (i == PRE_UPDATE_KERNEL || i == POST_UPDATE_KERNEL)
|
||||
+ continue;
|
||||
+
|
||||
+ if (val != should) {
|
||||
+ pr_err("%s notifier is abnormal, it should be %d, but %d.\n",
|
||||
+ nvwa_notifiers[i], should, val);
|
||||
+ failure = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (failure)
|
||||
+ fail("notifier is abnormal.");
|
||||
+ else
|
||||
+ pass();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/test/zdtm/customization/notifier00.desc b/test/zdtm/customization/notifier00.desc
|
||||
new file mode 100644
|
||||
index 0000000..1c6b512
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/notifier00.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'arch': 'aarch64', 'opts': '--with-notifier', 'flavor': 'h', 'flags': 'suid', 'sysfs': '/sys/kernel/modrestore/nvwa_notifier', 'mod': 'notifier.ko'}
|
||||
diff --git a/test/zdtm/mod/.gitignore b/test/zdtm/mod/.gitignore
|
||||
new file mode 100644
|
||||
index 0000000..7afd412
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/mod/.gitignore
|
||||
@@ -0,0 +1,163 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0-only
|
||||
+#
|
||||
+# NOTE! Don't add files that are generated in specific
|
||||
+# subdirectories here. Add them in the ".gitignore" file
|
||||
+# in that subdirectory instead.
|
||||
+#
|
||||
+# NOTE! Please use 'git ls-files -i --exclude-standard'
|
||||
+# command after changing this file, to see if there are
|
||||
+# any tracked files which get ignored after the change.
|
||||
+#
|
||||
+# Normal rules (sorted alphabetically)
|
||||
+#
|
||||
+.*
|
||||
+*.a
|
||||
+*.asn1.[ch]
|
||||
+*.bin
|
||||
+*.bz2
|
||||
+*.c.[012]*.*
|
||||
+*.dt.yaml
|
||||
+*.dtb
|
||||
+*.dtbo
|
||||
+*.dtb.S
|
||||
+*.dwo
|
||||
+*.elf
|
||||
+*.gcno
|
||||
+*.gz
|
||||
+*.i
|
||||
+*.ko
|
||||
+*.lex.c
|
||||
+*.ll
|
||||
+*.lst
|
||||
+*.lz4
|
||||
+*.lzma
|
||||
+*.lzo
|
||||
+*.mod
|
||||
+*.mod.c
|
||||
+*.o
|
||||
+*.o.*
|
||||
+*.patch
|
||||
+*.s
|
||||
+*.so
|
||||
+*.so.dbg
|
||||
+*.su
|
||||
+*.symtypes
|
||||
+*.symversions
|
||||
+*.tab.[ch]
|
||||
+*.tar
|
||||
+*.xz
|
||||
+*.zst
|
||||
+Module.symvers
|
||||
+modules.order
|
||||
+
|
||||
+#
|
||||
+# Top-level generic files
|
||||
+#
|
||||
+/linux
|
||||
+/modules-only.symvers
|
||||
+/vmlinux
|
||||
+/vmlinux.32
|
||||
+/vmlinux.map
|
||||
+/vmlinux.symvers
|
||||
+/vmlinux-gdb.py
|
||||
+/vmlinuz
|
||||
+/System.map
|
||||
+/Module.markers
|
||||
+/modules.builtin
|
||||
+/modules.builtin.modinfo
|
||||
+/modules.nsdeps
|
||||
+
|
||||
+#
|
||||
+# RPM spec file (make rpm-pkg)
|
||||
+#
|
||||
+/*.spec
|
||||
+
|
||||
+#
|
||||
+# Debian directory (make deb-pkg)
|
||||
+#
|
||||
+/debian/
|
||||
+
|
||||
+#
|
||||
+# Snap directory (make snap-pkg)
|
||||
+#
|
||||
+/snap/
|
||||
+
|
||||
+#
|
||||
+# tar directory (make tar*-pkg)
|
||||
+#
|
||||
+/tar-install/
|
||||
+
|
||||
+#
|
||||
+# We don't want to ignore the following even if they are dot-files
|
||||
+#
|
||||
+!.clang-format
|
||||
+!.cocciconfig
|
||||
+!.get_maintainer.ignore
|
||||
+!.gitattributes
|
||||
+!.gitignore
|
||||
+!.mailmap
|
||||
+
|
||||
+#
|
||||
+# Generated include files
|
||||
+#
|
||||
+/include/config/
|
||||
+/include/generated/
|
||||
+/include/ksym/
|
||||
+/arch/*/include/generated/
|
||||
+
|
||||
+# stgit generated dirs
|
||||
+patches-*
|
||||
+
|
||||
+# quilt's files
|
||||
+patches
|
||||
+series
|
||||
+
|
||||
+# ctags files
|
||||
+tags
|
||||
+TAGS
|
||||
+
|
||||
+# cscope files
|
||||
+cscope.*
|
||||
+ncscope.*
|
||||
+
|
||||
+# gnu global files
|
||||
+GPATH
|
||||
+GRTAGS
|
||||
+GSYMS
|
||||
+GTAGS
|
||||
+
|
||||
+# id-utils files
|
||||
+ID
|
||||
+
|
||||
+*.orig
|
||||
+*~
|
||||
+\#*#
|
||||
+
|
||||
+#
|
||||
+# Leavings from module signing
|
||||
+#
|
||||
+extra_certificates
|
||||
+signing_key.pem
|
||||
+signing_key.priv
|
||||
+signing_key.x509
|
||||
+x509.genkey
|
||||
+
|
||||
+# Kconfig presets
|
||||
+/all.config
|
||||
+/alldef.config
|
||||
+/allmod.config
|
||||
+/allno.config
|
||||
+/allrandom.config
|
||||
+/allyes.config
|
||||
+
|
||||
+# Kconfig savedefconfig output
|
||||
+/defconfig
|
||||
+
|
||||
+# Kdevelop4
|
||||
+*.kdev4
|
||||
+
|
||||
+# Clang's compilation database file
|
||||
+/compile_commands.json
|
||||
+
|
||||
+# Documentation toolchain
|
||||
+sphinx_*/
|
||||
diff --git a/test/zdtm/mod/Makefile b/test/zdtm/mod/Makefile
|
||||
new file mode 100644
|
||||
index 0000000..10c9c9a
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/mod/Makefile
|
||||
@@ -0,0 +1,28 @@
|
||||
+# notice:
|
||||
+# `ARCH` var is used in both criu and kernel, but they have the different value
|
||||
+# for the same architecture(e.g. arm64). Therefore, this Makefile can't be
|
||||
+# included in the criu Makefile.
|
||||
+obj-m += notifier.o
|
||||
+
|
||||
+# specific the kernel devel path
|
||||
+# example (use `/home/me/kernel` as `KDIR`):
|
||||
+# $ export KDIR="/home/me/kernel"
|
||||
+ifeq ($(KDIR),)
|
||||
+ KDIR := /lib/modules/$(shell uname -r)/build
|
||||
+endif
|
||||
+
|
||||
+# specific the mod src path
|
||||
+ifeq ($(MOD),)
|
||||
+ MOD := $(PWD)
|
||||
+endif
|
||||
+
|
||||
+all:
|
||||
+ $(MAKE) -C $(KDIR) M=$(MOD) modules
|
||||
+
|
||||
+clean:
|
||||
+ $(MAKE) -C $(KDIR) M=$(MOD) clean
|
||||
+
|
||||
+.PHONY: all clean
|
||||
+
|
||||
+notifier.ko:
|
||||
+ $(MAKE) -C $(KDIR) M=$(MOD) notifier.ko
|
||||
diff --git a/test/zdtm/mod/notifier.c b/test/zdtm/mod/notifier.c
|
||||
new file mode 100644
|
||||
index 0000000..70a5b33
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/mod/notifier.c
|
||||
@@ -0,0 +1,145 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/modrestore.h>
|
||||
+
|
||||
+static int values[KUP_HOOK_MAX];
|
||||
+static char *nvwa_actions[] = {
|
||||
+ "PREPARE",
|
||||
+ "ROLLBACK",
|
||||
+};
|
||||
+static char *nvwa_notifiers[] = {
|
||||
+ "PRE_FREEZE",
|
||||
+ "FREEZE_TO_KILL",
|
||||
+ "PRE_UPDATE_KERNEL",
|
||||
+ "POST_UPDATE_KERNEL",
|
||||
+ "UNFREEZE_TO_RUN",
|
||||
+ "POST_RUN"
|
||||
+};
|
||||
+
|
||||
+static int nvwa_notifier_func(struct notifier_block *nb, unsigned long val, void *data)
|
||||
+{
|
||||
+ struct nvwa_action *action = data;
|
||||
+
|
||||
+ switch (action->cmd) {
|
||||
+ case PREPARE:
|
||||
+ values[val] += 1;
|
||||
+ break;
|
||||
+ case ROLLBACK:
|
||||
+ values[val] -= 1;
|
||||
+ break;
|
||||
+ default:
|
||||
+ pr_err("invalid cmd: %d", action->cmd);
|
||||
+ return NOTIFY_BAD;
|
||||
+ }
|
||||
+
|
||||
+ pr_info("nvwa notifier action %s", nvwa_actions[action->cmd]);
|
||||
+
|
||||
+ return NOTIFY_DONE;
|
||||
+}
|
||||
+
|
||||
+#define DEFINE_NVWA_NB(name) \
|
||||
+ static struct notifier_block nvwa_##name##_nb = { \
|
||||
+ .notifier_call = nvwa_notifier_func, \
|
||||
+ }
|
||||
+
|
||||
+DEFINE_NVWA_NB(pre_freeze);
|
||||
+DEFINE_NVWA_NB(freeze_to_kill);
|
||||
+DEFINE_NVWA_NB(pre_update_kernel);
|
||||
+DEFINE_NVWA_NB(post_update_kernel);
|
||||
+DEFINE_NVWA_NB(unfreeze_to_run);
|
||||
+DEFINE_NVWA_NB(post_run);
|
||||
+
|
||||
+static struct notifier_block *nvwa_nbs[] = {
|
||||
+ &nvwa_pre_freeze_nb,
|
||||
+ &nvwa_freeze_to_kill_nb,
|
||||
+ &nvwa_pre_update_kernel_nb,
|
||||
+ &nvwa_post_update_kernel_nb,
|
||||
+ &nvwa_unfreeze_to_run_nb,
|
||||
+ &nvwa_post_run_nb,
|
||||
+};
|
||||
+
|
||||
+static int register_nvwa_notifiers(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ BUILD_BUG_ON_MSG(ARRAY_SIZE(nvwa_nbs) != KUP_HOOK_MAX,
|
||||
+ "wrong nvwa notifier block size!");
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(nvwa_nbs); i++) {
|
||||
+ if (register_nvwa_notifier(i, nvwa_nbs[i]) != 0) {
|
||||
+ pr_err("register nvwa %s notifier failed!", nvwa_notifiers[i]);
|
||||
+ goto error;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+error:
|
||||
+
|
||||
+ for (i -= 1; i >= 0; i -= 1)
|
||||
+ unregister_nvwa_notifier(i, nvwa_nbs[i]);
|
||||
+
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+static void unregister_nvwa_notifiers(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(nvwa_nbs); i++)
|
||||
+ unregister_nvwa_notifier(i, nvwa_nbs[i]);
|
||||
+}
|
||||
+
|
||||
+static ssize_t criu_notifier_store(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t count)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(values); i++)
|
||||
+ values[i] = 0;
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static ssize_t criu_notifier_show(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ int i;
|
||||
+ ssize_t count = 0;
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(values); i++)
|
||||
+ count += sprintf(buf+count, "%d ", values[i]);
|
||||
+
|
||||
+ buf[count-1] = '\n';
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute notifier_file = __ATTR_RW(criu_notifier);
|
||||
+
|
||||
+static int __init notifier_init(void)
|
||||
+{
|
||||
+ if (register_nvwa_notifiers() != 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ if (sysfs_create_file(kernel_kobj, ¬ifier_file.attr) != 0) {
|
||||
+ unregister_nvwa_notifiers();
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void __exit notifier_exit(void)
|
||||
+{
|
||||
+ sysfs_remove_file(kernel_kobj, ¬ifier_file.attr);
|
||||
+ unregister_nvwa_notifiers();
|
||||
+}
|
||||
+
|
||||
+module_init(notifier_init);
|
||||
+module_exit(notifier_exit);
|
||||
+MODULE_LICENSE("GPL");
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,35 +0,0 @@
|
||||
From d17aedda384cfe6940b9948f4db36643495e0375 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Tue, 15 Feb 2022 11:31:27 +0800
|
||||
Subject: [PATCH 65/72] zdtm: print errno info when accessing *.out failure
|
||||
|
||||
The line `Output file *.out appears to exist, aborting` is confusing.
|
||||
The one common reason is permission denied because of the test desc
|
||||
is lack of suid flag. The zdtm.py will set `ZDTM_UID` and `ZDTM_GID`,
|
||||
the function `test_init()` (in `zdtm/lib/test.c`) will change tester
|
||||
itself to that uid and gid if no suid flag.
|
||||
|
||||
Here print the errno when access *.out failed.
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/zdtm/lib/test.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c
|
||||
index 81da81e..471980d 100644
|
||||
--- a/test/zdtm/lib/test.c
|
||||
+++ b/test/zdtm/lib/test.c
|
||||
@@ -74,7 +74,8 @@ static void test_fini(void)
|
||||
static void setup_outfile(void)
|
||||
{
|
||||
if (!access(outfile, F_OK) || errno != ENOENT) {
|
||||
- fprintf(stderr, "Output file %s appears to exist, aborting\n", outfile);
|
||||
+ fprintf(stderr, "Output file %s appears to exist, aborting: %s\n",
|
||||
+ outfile, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
From af97bc76b1dc1e6ca2b924d7e5666dd04a1847b2 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Wed, 16 Feb 2022 10:39:06 +0800
|
||||
Subject: [PATCH 66/72] zdtm: print more info for fs.c
|
||||
|
||||
---
|
||||
test/zdtm/lib/fs.c | 11 ++++++++---
|
||||
1 file changed, 8 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c
|
||||
index 7b8be5f..a716b40 100644
|
||||
--- a/test/zdtm/lib/fs.c
|
||||
+++ b/test/zdtm/lib/fs.c
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
+#include <sys/stat.h>
|
||||
|
||||
#include "zdtmtst.h"
|
||||
#include "fs.h"
|
||||
@@ -103,11 +104,15 @@ int get_cwd_check_perm(char **result)
|
||||
}
|
||||
|
||||
if (access(cwd, X_OK)) {
|
||||
- pr_err("access check for bit X for current dir path '%s' "
|
||||
- "failed for uid:%d,gid:%d, error: %d(%s). "
|
||||
+ struct stat sb;
|
||||
+
|
||||
+ stat(cwd, &sb);
|
||||
+ pr_err("access check for bit X for current dir path '%s'(uid:%d,gid:%d,mode:%o) "
|
||||
+ "failed for uid:%d,gid:%d,euid:%d, error: %d(%s). "
|
||||
"Bit 'x' should be set in all path components of "
|
||||
"this directory\n",
|
||||
- cwd, getuid(), getgid(), errno, strerror(errno));
|
||||
+ cwd, sb.st_uid, sb.st_gid, sb.st_mode, getuid(), getgid(),
|
||||
+ geteuid(), errno, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,288 +0,0 @@
|
||||
From c44c68028f22751ef12fac02567008a16e992fea Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Thu, 17 Feb 2022 14:30:03 +0800
|
||||
Subject: [PATCH 67/72] zdtm: add chardev testcase
|
||||
|
||||
- char dev `ioctl({IOCTL_CMD_NEEDREPAIR, IOCTL_CMD_REPAIR})`
|
||||
checkpoint/restore test
|
||||
- anonymous inode checkpoint/restore test
|
||||
---
|
||||
test/zdtm/customization/Makefile | 3 +-
|
||||
test/zdtm/customization/chardev00.c | 65 +++++++++++
|
||||
test/zdtm/customization/chardev00.desc | 1 +
|
||||
test/zdtm/mod/Makefile | 5 +-
|
||||
test/zdtm/mod/anon_inode.c | 148 +++++++++++++++++++++++++
|
||||
5 files changed, 220 insertions(+), 2 deletions(-)
|
||||
create mode 100644 test/zdtm/customization/chardev00.c
|
||||
create mode 100644 test/zdtm/customization/chardev00.desc
|
||||
create mode 100644 test/zdtm/mod/anon_inode.c
|
||||
|
||||
diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile
|
||||
index 93922c7..7d08db3 100644
|
||||
--- a/test/zdtm/customization/Makefile
|
||||
+++ b/test/zdtm/customization/Makefile
|
||||
@@ -11,7 +11,8 @@ TST_NOFILE = \
|
||||
maps05 \
|
||||
maps007 \
|
||||
maps008 \
|
||||
- notifier00
|
||||
+ notifier00 \
|
||||
+ chardev00
|
||||
|
||||
TST_FILE = \
|
||||
maps00 \
|
||||
diff --git a/test/zdtm/customization/chardev00.c b/test/zdtm/customization/chardev00.c
|
||||
new file mode 100644
|
||||
index 0000000..c708699
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/chardev00.c
|
||||
@@ -0,0 +1,65 @@
|
||||
+#include <stdio.h>
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <fcntl.h>
|
||||
+#include <sys/ioctl.h>
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#define CHARDEV_PATH "/dev/anon_test"
|
||||
+
|
||||
+const char *test_doc="Tests char dev and anonmous inode map checkpoint/restore";
|
||||
+
|
||||
+static int check_maps(unsigned long addr)
|
||||
+{
|
||||
+ FILE *fp = fopen("/proc/self/maps", "r");
|
||||
+ char *line = NULL;
|
||||
+ size_t n = 0;
|
||||
+ unsigned long start = 0;
|
||||
+
|
||||
+ if (fp == NULL) {
|
||||
+ pr_perror("open self maps failed");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ while (getline(&line, &n, fp) != -1) {
|
||||
+ test_msg("%s", line);
|
||||
+ sscanf(line, "%lx-", &start);
|
||||
+ if (start == addr)
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int fd, retval = 0;
|
||||
+ unsigned long addr;
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ fd = open(CHARDEV_PATH, O_RDWR);
|
||||
+ if (fd < 0) {
|
||||
+ pr_perror("open '%s' failed", CHARDEV_PATH);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ retval = ioctl(fd, 0, &addr);
|
||||
+ if (retval < 0) {
|
||||
+ pr_perror("create anonymous map failed");
|
||||
+ retval = -1;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ test_msg("create anonymous vma start 0x%lx\n", addr);
|
||||
+
|
||||
+ test_daemon();
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ retval = check_maps(addr);
|
||||
+ if (retval == 0)
|
||||
+ pass();
|
||||
+ else
|
||||
+ fail("anonymous inode map don't restore");
|
||||
+out:
|
||||
+ return retval;
|
||||
+}
|
||||
diff --git a/test/zdtm/customization/chardev00.desc b/test/zdtm/customization/chardev00.desc
|
||||
new file mode 100644
|
||||
index 0000000..9c51ba8
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/chardev00.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'arch': 'aarch64', 'opts': '--dump-char-dev', 'flavor': 'h', 'flags': 'suid excl', 'sysfs': '/sys/kernel/modrestore/anon_state_restore /sys/kernel/repairing_device', 'mod': 'anon_inode.ko'}
|
||||
diff --git a/test/zdtm/mod/Makefile b/test/zdtm/mod/Makefile
|
||||
index 10c9c9a..0bc89f7 100644
|
||||
--- a/test/zdtm/mod/Makefile
|
||||
+++ b/test/zdtm/mod/Makefile
|
||||
@@ -2,7 +2,7 @@
|
||||
# `ARCH` var is used in both criu and kernel, but they have the different value
|
||||
# for the same architecture(e.g. arm64). Therefore, this Makefile can't be
|
||||
# included in the criu Makefile.
|
||||
-obj-m += notifier.o
|
||||
+obj-m += notifier.o anon_inode.o
|
||||
|
||||
# specific the kernel devel path
|
||||
# example (use `/home/me/kernel` as `KDIR`):
|
||||
@@ -26,3 +26,6 @@ clean:
|
||||
|
||||
notifier.ko:
|
||||
$(MAKE) -C $(KDIR) M=$(MOD) notifier.ko
|
||||
+
|
||||
+anon_inode.ko:
|
||||
+ $(MAKE) -C $(KDIR) M=$(MOD) anon_inode.ko
|
||||
diff --git a/test/zdtm/mod/anon_inode.c b/test/zdtm/mod/anon_inode.c
|
||||
new file mode 100644
|
||||
index 0000000..d9c7d2a
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/mod/anon_inode.c
|
||||
@@ -0,0 +1,148 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/miscdevice.h>
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/mman.h>
|
||||
+#include <linux/anon_inodes.h>
|
||||
+#include <linux/file.h>
|
||||
+#include <linux/modrestore.h>
|
||||
+
|
||||
+static int anon_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
+{
|
||||
+ pr_info("call %s\n", __func__);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations none_fops = {
|
||||
+ .owner = THIS_MODULE,
|
||||
+ .mmap = anon_mmap,
|
||||
+};
|
||||
+
|
||||
+static unsigned long create_mmap(void)
|
||||
+{
|
||||
+ struct file *filp;
|
||||
+ unsigned long start;
|
||||
+
|
||||
+ pr_info("call %s\n", __func__);
|
||||
+ filp = anon_inode_getfile("test", &none_fops, NULL, O_RDWR);
|
||||
+ if (IS_ERR(filp)) {
|
||||
+ pr_warn("anon_inode_getfile('test') failed: %d\n", (int)PTR_ERR(filp));
|
||||
+ return PTR_ERR(filp);
|
||||
+ }
|
||||
+
|
||||
+ start = vm_mmap(filp, 0, 1<<20, PROT_READ | PROT_WRITE, MAP_SHARED, 0);
|
||||
+ if (IS_ERR_VALUE(start)) {
|
||||
+ pr_warn("vm_mmap failed with: %d\n", (int)PTR_ERR((void *)start));
|
||||
+ }
|
||||
+
|
||||
+ fput(filp);
|
||||
+
|
||||
+ return start;
|
||||
+}
|
||||
+
|
||||
+static int anon_inode_notifier(struct notifier_block *nb,
|
||||
+ unsigned long action, void *data)
|
||||
+{
|
||||
+ struct vma_anon_entry *vma_entry = data;
|
||||
+ struct file *filp;
|
||||
+ unsigned long start;
|
||||
+
|
||||
+ filp = anon_inode_getfile("test", &none_fops, NULL, O_RDWR);
|
||||
+ if (IS_ERR(filp)) {
|
||||
+ pr_warn("anon_inode_getfile('test') failed: %d\n", (int)PTR_ERR(filp));
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ start = vm_mmap(filp, vma_entry->start, vma_entry -> end-vma_entry->start,
|
||||
+ PROT_READ | PROT_WRITE, MAP_SHARED, 0);
|
||||
+ if (start != vma_entry->start)
|
||||
+ pr_warn("vm_mmap() failed: %#lx\n", start);
|
||||
+
|
||||
+ fput(filp);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static long anon_ioctl(struct file *file, unsigned int cmd, unsigned long argp)
|
||||
+{
|
||||
+ unsigned long start;
|
||||
+
|
||||
+ switch (cmd) {
|
||||
+ case 0:
|
||||
+ start = create_mmap();
|
||||
+ if (IS_ERR_VALUE(start))
|
||||
+ return -EINVAL;
|
||||
+ if (put_user(start, (unsigned long __user *)argp))
|
||||
+ return -EFAULT;
|
||||
+ break;
|
||||
+ case IOCTL_CMD_NEEDREPAIR:
|
||||
+ pr_info("call IOCTL_CMD_NEEDREPAIR");
|
||||
+ /* do nothing, just a request slot */
|
||||
+ return 17173;
|
||||
+ case IOCTL_CMD_REPAIR:
|
||||
+ pr_info("call IOCTL_CMD_REPAIR");
|
||||
+ /* do nothing, just a request slot */
|
||||
+ break;
|
||||
+ default:
|
||||
+ pr_warn("wrong cmd\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations anon_fops = {
|
||||
+ .owner = THIS_MODULE,
|
||||
+ .unlocked_ioctl = anon_ioctl,
|
||||
+ .compat_ioctl = anon_ioctl,
|
||||
+};
|
||||
+
|
||||
+static struct miscdevice anon_dev = {
|
||||
+ .minor = MISC_DYNAMIC_MINOR,
|
||||
+ .name = "anon_test",
|
||||
+ .fops = &anon_fops,
|
||||
+};
|
||||
+
|
||||
+static struct notifier_block anon_inode_nb = {
|
||||
+ .notifier_call = anon_inode_notifier,
|
||||
+};
|
||||
+
|
||||
+static int __init anon_init(void)
|
||||
+{
|
||||
+ int retval;
|
||||
+
|
||||
+ retval = mures_add_devname(anon_dev.name);
|
||||
+ if (retval != 0)
|
||||
+ goto out;
|
||||
+
|
||||
+ retval = register_anon_notifier(&anon_inode_nb);
|
||||
+ if (retval != 0)
|
||||
+ goto del_devname;
|
||||
+
|
||||
+ retval = misc_register(&anon_dev);
|
||||
+ if (retval != 0)
|
||||
+ goto del_notifier;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+del_notifier:
|
||||
+ unregister_anon_notifier(&anon_inode_nb);
|
||||
+del_devname:
|
||||
+ mures_del_devname(anon_dev.name);
|
||||
+out:
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static void __exit anon_exit(void)
|
||||
+{
|
||||
+ mures_del_devname(anon_dev.name);
|
||||
+ unregister_anon_notifier(&anon_inode_nb);
|
||||
+ misc_deregister(&anon_dev);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+module_init(anon_init);
|
||||
+module_exit(anon_exit);
|
||||
+MODULE_LICENSE("GPL");
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,256 +0,0 @@
|
||||
From f7e452ffc5318b2aac8aabde5dd8b7bee910c6f7 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Thu, 17 Feb 2022 14:59:37 +0800
|
||||
Subject: [PATCH 68/72] zdtm: add infiniband testcase
|
||||
|
||||
---
|
||||
test/zdtm/customization/Makefile | 4 +-
|
||||
.../customization/infiniband_with_unix_sk.c | 55 ++++++++
|
||||
.../infiniband_with_unix_sk.desc | 1 +
|
||||
test/zdtm/mod/Makefile | 5 +-
|
||||
test/zdtm/mod/infiniband_kern.c | 121 ++++++++++++++++++
|
||||
5 files changed, 184 insertions(+), 2 deletions(-)
|
||||
create mode 100644 test/zdtm/customization/infiniband_with_unix_sk.c
|
||||
create mode 100644 test/zdtm/customization/infiniband_with_unix_sk.desc
|
||||
create mode 100644 test/zdtm/mod/infiniband_kern.c
|
||||
|
||||
diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile
|
||||
index 7d08db3..728646b 100644
|
||||
--- a/test/zdtm/customization/Makefile
|
||||
+++ b/test/zdtm/customization/Makefile
|
||||
@@ -12,7 +12,8 @@ TST_NOFILE = \
|
||||
maps007 \
|
||||
maps008 \
|
||||
notifier00 \
|
||||
- chardev00
|
||||
+ chardev00 \
|
||||
+ infiniband_with_unix_sk
|
||||
|
||||
TST_FILE = \
|
||||
maps00 \
|
||||
@@ -61,6 +62,7 @@ wait_stop:
|
||||
$(TST): | $(LIB)
|
||||
|
||||
maps02: get_smaps_bits.o
|
||||
+infiniband_with_unix_sk: LDFLAGS += -lpthread
|
||||
|
||||
%: %.sh
|
||||
cp $< $@
|
||||
diff --git a/test/zdtm/customization/infiniband_with_unix_sk.c b/test/zdtm/customization/infiniband_with_unix_sk.c
|
||||
new file mode 100644
|
||||
index 0000000..4a9e108
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/infiniband_with_unix_sk.c
|
||||
@@ -0,0 +1,55 @@
|
||||
+#include <sys/types.h>
|
||||
+#include <sys/stat.h>
|
||||
+#include <sys/socket.h>
|
||||
+#include <stdio.h>
|
||||
+#include <stdbool.h>
|
||||
+#include <fcntl.h>
|
||||
+#include <unistd.h>
|
||||
+#include <pthread.h>
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#define DEV_PATH "/dev/infiniband_test"
|
||||
+
|
||||
+const char *test_doc = "test infiniband fd checkpoint/restore, and the conflict condition with the half-closing anonymous unix socket";
|
||||
+
|
||||
+static int fd;
|
||||
+static int sv[2];
|
||||
+
|
||||
+static void *wait(void *arg) {
|
||||
+ while (true) {
|
||||
+ test_msg("sleep...\n");
|
||||
+ sleep(1);
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[]) {
|
||||
+ pthread_t thread;
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
|
||||
+ pr_perror("socketpair");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ printf("sv[0]: %d sv[1]: %d\n", sv[0], sv[1]);
|
||||
+
|
||||
+ if ((fd = open(DEV_PATH, O_RDWR)) < 0) {
|
||||
+ pr_perror("open");
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if (close(sv[1]) < 0) {
|
||||
+ pr_perror("close");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ pthread_create(&thread, NULL, wait, NULL);
|
||||
+
|
||||
+ test_daemon();
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ pass();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/test/zdtm/customization/infiniband_with_unix_sk.desc b/test/zdtm/customization/infiniband_with_unix_sk.desc
|
||||
new file mode 100644
|
||||
index 0000000..43a93e6
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/infiniband_with_unix_sk.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'arch': 'aarch64', 'opts': '--dump-char-dev', 'flavor': 'h', 'flags': 'suid excl', 'sysfs': '/sys/kernel/repairing_device', 'mod': 'infiniband_kern.ko'}
|
||||
diff --git a/test/zdtm/mod/Makefile b/test/zdtm/mod/Makefile
|
||||
index 0bc89f7..58f9a27 100644
|
||||
--- a/test/zdtm/mod/Makefile
|
||||
+++ b/test/zdtm/mod/Makefile
|
||||
@@ -2,7 +2,7 @@
|
||||
# `ARCH` var is used in both criu and kernel, but they have the different value
|
||||
# for the same architecture(e.g. arm64). Therefore, this Makefile can't be
|
||||
# included in the criu Makefile.
|
||||
-obj-m += notifier.o anon_inode.o
|
||||
+obj-m += notifier.o anon_inode.o infiniband_kern.o
|
||||
|
||||
# specific the kernel devel path
|
||||
# example (use `/home/me/kernel` as `KDIR`):
|
||||
@@ -29,3 +29,6 @@ notifier.ko:
|
||||
|
||||
anon_inode.ko:
|
||||
$(MAKE) -C $(KDIR) M=$(MOD) anon_inode.ko
|
||||
+
|
||||
+infiniband_kern.ko:
|
||||
+ $(MAKE) -C $(KDIR) M=$(MOD) infiniband_kern.ko
|
||||
diff --git a/test/zdtm/mod/infiniband_kern.c b/test/zdtm/mod/infiniband_kern.c
|
||||
new file mode 100644
|
||||
index 0000000..a61df3a
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/mod/infiniband_kern.c
|
||||
@@ -0,0 +1,121 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/miscdevice.h>
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/mman.h>
|
||||
+#include <linux/anon_inodes.h>
|
||||
+#include <linux/file.h>
|
||||
+#include <linux/modrestore.h>
|
||||
+#include <linux/uaccess.h>
|
||||
+
|
||||
+static const struct file_operations none_fops = {
|
||||
+ .owner = THIS_MODULE,
|
||||
+};
|
||||
+
|
||||
+static const struct file_operations anonfd_fops = {
|
||||
+ .owner = THIS_MODULE,
|
||||
+};
|
||||
+
|
||||
+static int infiniband_open(struct inode *inode, struct file *filp)
|
||||
+{
|
||||
+ long fd;
|
||||
+
|
||||
+ if (!!(filp->f_flags & O_REPAIR)) {
|
||||
+ pr_info("reuse\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ fd = anon_inode_getfd("[infinibandevent]", &anonfd_fops, NULL, 0);
|
||||
+ if (fd < 0)
|
||||
+ return fd;
|
||||
+ else
|
||||
+ filp->private_data = (void *)fd;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int infiniband_repair(struct file *filp, int from)
|
||||
+{
|
||||
+ struct file *fp;
|
||||
+ long fd;
|
||||
+ int retval = 0;
|
||||
+
|
||||
+ fp = anon_inode_getfile("[infinibandevent]", &anonfd_fops, NULL, 0);
|
||||
+ if (IS_ERR(fp))
|
||||
+ return PTR_ERR(fp);
|
||||
+
|
||||
+ fd = mures_f_dupfd(from, fp, 0);
|
||||
+ if (fd != from) {
|
||||
+ pr_err("different fd, old: %d, dup: %ld\n", from, fd);
|
||||
+ retval = -EEXIST;
|
||||
+ }
|
||||
+ fput(fp);
|
||||
+ filp->private_data = (long *)fd;
|
||||
+
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static long infiniband_ioctl(struct file *filp, unsigned int cmd, unsigned long argp)
|
||||
+{
|
||||
+ long retval = 0;
|
||||
+
|
||||
+ switch (cmd) {
|
||||
+ case IOCTL_CMD_NEEDREPAIR:
|
||||
+ retval = (long )filp->private_data;
|
||||
+ break;
|
||||
+ case IOCTL_CMD_REPAIR:
|
||||
+ retval = infiniband_repair(filp, argp);
|
||||
+ break;
|
||||
+ default:
|
||||
+ pr_warn("wrong cmd\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations infiniband_fops = {
|
||||
+ .owner = THIS_MODULE,
|
||||
+ .open = infiniband_open,
|
||||
+ .unlocked_ioctl = infiniband_ioctl,
|
||||
+ .compat_ioctl = infiniband_ioctl,
|
||||
+};
|
||||
+
|
||||
+static struct miscdevice infiniband_dev = {
|
||||
+ .minor = MISC_DYNAMIC_MINOR,
|
||||
+ .name = "infiniband_test",
|
||||
+ .fops = &infiniband_fops,
|
||||
+};
|
||||
+
|
||||
+static int __init infiniband_init(void)
|
||||
+{
|
||||
+ int retval;
|
||||
+
|
||||
+ retval = mures_add_devname(infiniband_dev.name);
|
||||
+ if (retval != 0)
|
||||
+ goto out;
|
||||
+
|
||||
+ retval = misc_register(&infiniband_dev);
|
||||
+ if (retval != 0)
|
||||
+ goto del_devname;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+del_devname:
|
||||
+ mures_del_devname(infiniband_dev.name);
|
||||
+out:
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static void __exit infiniband_exit(void)
|
||||
+{
|
||||
+ mures_del_devname(infiniband_dev.name);
|
||||
+ misc_deregister(&infiniband_dev);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+module_init(infiniband_init);
|
||||
+module_exit(infiniband_exit);
|
||||
+MODULE_LICENSE("GPL");
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,145 +0,0 @@
|
||||
From b766a8d6b04e9c358cd221b68405a205156c1fe2 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Thu, 17 Feb 2022 17:19:46 +0800
|
||||
Subject: [PATCH 69/72] zdtm: add share port testcase
|
||||
|
||||
---
|
||||
test/zdtm/customization/Makefile | 3 +-
|
||||
test/zdtm/customization/tcp00.c | 101 +++++++++++++++++++++++++++++
|
||||
test/zdtm/customization/tcp00.desc | 1 +
|
||||
3 files changed, 104 insertions(+), 1 deletion(-)
|
||||
create mode 100644 test/zdtm/customization/tcp00.c
|
||||
create mode 100644 test/zdtm/customization/tcp00.desc
|
||||
|
||||
diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile
|
||||
index 728646b..1111908 100644
|
||||
--- a/test/zdtm/customization/Makefile
|
||||
+++ b/test/zdtm/customization/Makefile
|
||||
@@ -13,7 +13,8 @@ TST_NOFILE = \
|
||||
maps008 \
|
||||
notifier00 \
|
||||
chardev00 \
|
||||
- infiniband_with_unix_sk
|
||||
+ infiniband_with_unix_sk \
|
||||
+ tcp00
|
||||
|
||||
TST_FILE = \
|
||||
maps00 \
|
||||
diff --git a/test/zdtm/customization/tcp00.c b/test/zdtm/customization/tcp00.c
|
||||
new file mode 100644
|
||||
index 0000000..d1ead82
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/tcp00.c
|
||||
@@ -0,0 +1,101 @@
|
||||
+#include <stdio.h>
|
||||
+#include <stdbool.h>
|
||||
+#include <unistd.h>
|
||||
+#include <string.h>
|
||||
+#include <arpa/inet.h>
|
||||
+#include <sys/socket.h>
|
||||
+#include <netinet/in.h>
|
||||
+#include <netinet/tcp.h>
|
||||
+#include "zdtmtst.h"
|
||||
+
|
||||
+#define PORT 17173
|
||||
+
|
||||
+const char *test_doc = "Test TCP SO_REUSEADDR checkpoint/restore using `share_{src,dst}_ports`";
|
||||
+
|
||||
+static int sock_bind_and_listen(void)
|
||||
+{
|
||||
+ int serv_sk;
|
||||
+ int optval = 1;
|
||||
+ struct sockaddr_in serv = {
|
||||
+ .sin_family = AF_INET,
|
||||
+ .sin_addr.s_addr = htonl(INADDR_ANY),
|
||||
+ .sin_port = htons(PORT),
|
||||
+ };
|
||||
+
|
||||
+ serv_sk = socket(AF_INET, SOCK_STREAM, 0);
|
||||
+ if (serv_sk < 0) {
|
||||
+ pr_perror("server socket failed");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if (setsockopt(serv_sk, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) < 0) {
|
||||
+ pr_perror("setsockopt");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if (bind(serv_sk, (struct sockaddr *)&serv, sizeof(serv)) < 0) {
|
||||
+ pr_perror("bind");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if (listen(serv_sk, 5) != 0) {
|
||||
+ pr_perror("listen");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ return serv_sk;
|
||||
+}
|
||||
+
|
||||
+static void client_connect(void)
|
||||
+{
|
||||
+ int sk;
|
||||
+ struct sockaddr_in sockaddr = {
|
||||
+ .sin_family = AF_INET,
|
||||
+ };
|
||||
+
|
||||
+ sk = socket(AF_INET, SOCK_STREAM, 0);
|
||||
+ if (sk < 0) {
|
||||
+ pr_perror("client socket failed");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ sockaddr.sin_addr.s_addr = inet_addr("127.0.0.1");
|
||||
+ sockaddr.sin_port = htons(PORT);
|
||||
+
|
||||
+ if (connect(sk, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
|
||||
+ pr_perror("connect failed");
|
||||
+ exit(1);
|
||||
+ }
|
||||
+
|
||||
+ close(sk);
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char *argv[])
|
||||
+{
|
||||
+ int serv_sk;
|
||||
+ int optval = 0;
|
||||
+ socklen_t len = sizeof(optval);
|
||||
+
|
||||
+ test_init(argc, argv);
|
||||
+
|
||||
+ serv_sk = sock_bind_and_listen();
|
||||
+
|
||||
+ test_msg("listen 0.0.0.0: %d\n", PORT);
|
||||
+ /* create CLOSE-WAIT status socket */
|
||||
+ client_connect();
|
||||
+
|
||||
+ test_daemon();
|
||||
+ test_waitsig();
|
||||
+
|
||||
+ if (getsockopt(serv_sk, SOL_SOCKET, SO_REUSEADDR, &optval, &len) != 0) {
|
||||
+ pr_perror("getsockopt failed");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (optval != 1) {
|
||||
+ pr_err("SO_REUSEADDR flag is %d, should 1", optval);
|
||||
+ } else
|
||||
+ pass();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
\ No newline at end of file
|
||||
diff --git a/test/zdtm/customization/tcp00.desc b/test/zdtm/customization/tcp00.desc
|
||||
new file mode 100644
|
||||
index 0000000..bc3b4a8
|
||||
--- /dev/null
|
||||
+++ b/test/zdtm/customization/tcp00.desc
|
||||
@@ -0,0 +1 @@
|
||||
+{'arch': 'aarch64', 'opts': '--use-fork-pid --share-src-ports=17173 --share-dst-ports=17173 --skip-in-flight', 'flavor': 'h', 'sysfs': '/sys/kernel/repair_share_socket'}
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,59 +0,0 @@
|
||||
From a4f00a225ebfed401aed49956eefad391071d0ce Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Thu, 17 Feb 2022 11:02:08 +0800
|
||||
Subject: [PATCH 70/72] zdtm: tmp test script
|
||||
|
||||
---
|
||||
test/jenkins/criu-lib.sh | 2 +-
|
||||
test/jenkins/criu-test.sh | 26 ++++++++++++++++++++++++++
|
||||
2 files changed, 27 insertions(+), 1 deletion(-)
|
||||
create mode 100644 test/jenkins/criu-test.sh
|
||||
|
||||
diff --git a/test/jenkins/criu-lib.sh b/test/jenkins/criu-lib.sh
|
||||
index 72d41b5..89dc936 100644
|
||||
--- a/test/jenkins/criu-lib.sh
|
||||
+++ b/test/jenkins/criu-lib.sh
|
||||
@@ -15,7 +15,7 @@ function prep()
|
||||
|
||||
ulimit -c unlimited &&
|
||||
export CFLAGS=-g
|
||||
- git clean -dfx &&
|
||||
+# git clean -dfx &&
|
||||
make -j 4 &&
|
||||
make -j 4 -C test/zdtm/ &&
|
||||
make -C test zdtm_ct &&
|
||||
diff --git a/test/jenkins/criu-test.sh b/test/jenkins/criu-test.sh
|
||||
new file mode 100644
|
||||
index 0000000..3035f21
|
||||
--- /dev/null
|
||||
+++ b/test/jenkins/criu-test.sh
|
||||
@@ -0,0 +1,26 @@
|
||||
+#!/bin/bash
|
||||
+
|
||||
+set -e
|
||||
+source `dirname $0`/criu-lib.sh
|
||||
+prep
|
||||
+
|
||||
+rm -rf /var/run/criu.kdat
|
||||
+
|
||||
+make zdtm
|
||||
+
|
||||
+if [ -z $(grep 58467 /etc/group) ]; then
|
||||
+ groupadd -g 58467 zdtm
|
||||
+fi
|
||||
+if [ -z $(grep 58467 /etc/passwd) ]; then
|
||||
+ useradd -u 18943 -g 58467 zdtm
|
||||
+fi
|
||||
+
|
||||
+#./test/zdtm.py run --all --keep-going --report report -f h --ignore-taint --parallel 1 --load-pinmem-dev || fail
|
||||
+
|
||||
+#./test/zdtm.py run -t zdtm/static/del_standalone_un --keep-going -f h --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always
|
||||
+
|
||||
+./test/zdtm.py run -t zdtm/customization/chardev00 -t zdtm/customization/notifier00 --keep-going -f h --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always
|
||||
+
|
||||
+#./test/zdtm.py run -t zdtm/static/socket-tcp-nfconntrack --join-ns --keep-going --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always
|
||||
+
|
||||
+./test/zdtm.py run -t zdtm/customization/tcp00 --keep-going -f h --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,512 +0,0 @@
|
||||
From 03d188c492efe079a520319ca48e40843367ddcf Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 18 Feb 2022 16:22:00 +0800
|
||||
Subject: [PATCH 71/72] mod: add criu-indepent test
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
test/modules/Makefile | 21 ++++++
|
||||
test/modules/idr.c | 79 +++++++++++++++++++++
|
||||
test/modules/jump_table.c | 107 ++++++++++++++++++++++++++++
|
||||
test/modules/var_kern.c | 72 +++++++++++++++++++
|
||||
test/modules/var_user.py | 40 +++++++++++
|
||||
test/modules/workqueue_kern.c | 130 ++++++++++++++++++++++++++++++++++
|
||||
6 files changed, 449 insertions(+)
|
||||
create mode 100644 test/modules/Makefile
|
||||
create mode 100644 test/modules/idr.c
|
||||
create mode 100644 test/modules/jump_table.c
|
||||
create mode 100644 test/modules/var_kern.c
|
||||
create mode 100644 test/modules/var_user.py
|
||||
create mode 100644 test/modules/workqueue_kern.c
|
||||
|
||||
diff --git a/test/modules/Makefile b/test/modules/Makefile
|
||||
new file mode 100644
|
||||
index 0000000..9458aa7
|
||||
--- /dev/null
|
||||
+++ b/test/modules/Makefile
|
||||
@@ -0,0 +1,21 @@
|
||||
+obj-m := var_kern.o workqueue_kern.o jump_table.o idr.o
|
||||
+
|
||||
+KDIR := /lib/modules/`uname -r`/build
|
||||
+
|
||||
+all:
|
||||
+ make -C $(KDIR) M=$(PWD) modules
|
||||
+
|
||||
+clean:
|
||||
+ make -C $(KDIR) M=$(PWD) clean
|
||||
+
|
||||
+var_kern.ko:
|
||||
+ make -C $(KDIR) M=$(PWD) var_kern.ko
|
||||
+
|
||||
+workqueue_kern.ko:
|
||||
+ make -C $(KDIR) M=$(PWD) workqueue_kern.ko
|
||||
+
|
||||
+jump_table.ko:
|
||||
+ make -C $(KDIR) M=$(PWD) jump_table.ko
|
||||
+
|
||||
+idr.ko:
|
||||
+ make -C $(KDIR) M=$(PWD) idr.ko
|
||||
diff --git a/test/modules/idr.c b/test/modules/idr.c
|
||||
new file mode 100644
|
||||
index 0000000..67f248e
|
||||
--- /dev/null
|
||||
+++ b/test/modules/idr.c
|
||||
@@ -0,0 +1,79 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/idr.h>
|
||||
+#include <linux/modrestore.h>
|
||||
+
|
||||
+DEFINE_IDR(idr_head);
|
||||
+const int placeholder = 0;
|
||||
+static int idr_uid = 0;
|
||||
+
|
||||
+static int idr_test_show_internal(int id, void *p, void *data)
|
||||
+{
|
||||
+ pr_info("id: %d p %pK\n", id, p);
|
||||
+ sprintf(data+strlen(data), "%d\n", id);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static ssize_t idr_test_show(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ idr_for_each(&idr_head, idr_test_show_internal, buf);
|
||||
+ return strlen(buf);
|
||||
+}
|
||||
+
|
||||
+static ssize_t idr_test_store(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t count)
|
||||
+{
|
||||
+ const unsigned long max = 65536;
|
||||
+ unsigned id = 0;
|
||||
+ int retval;
|
||||
+
|
||||
+ if (sscanf(buf, "%u", &id) != 1) {
|
||||
+ pr_err("sscanf empty\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ retval = idr_alloc_u32(&idr_head, (void *)&placeholder, &id, max, GFP_KERNEL);
|
||||
+ pr_info("alloc idr id %u, errno %d\n", id, retval);
|
||||
+ return retval < 0 ? retval : count;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute idr_test = __ATTR_RW(idr_test);
|
||||
+
|
||||
+static int __init mod_init(void)
|
||||
+{
|
||||
+ return sysfs_create_file(kernel_kobj, &idr_test.attr);
|
||||
+}
|
||||
+
|
||||
+static void __exit mod_exit(void)
|
||||
+{
|
||||
+ sysfs_remove_file(kernel_kobj, &idr_test.attr);
|
||||
+ idr_destroy(&idr_head);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+static int __init mod_resume(void)
|
||||
+{
|
||||
+ int retval = mures_restore_idr(idr_uid, &idr_head);
|
||||
+
|
||||
+ if (retval == 0)
|
||||
+ retval = sysfs_create_file(kernel_kobj, &idr_test.attr);
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static int __exit mod_suspend(void)
|
||||
+{
|
||||
+ sysfs_remove_file(kernel_kobj, &idr_test.attr);
|
||||
+ return mures_save_idr(idr_uid, &idr_head);
|
||||
+}
|
||||
+
|
||||
+module_init(mod_init);
|
||||
+module_exit(mod_exit);
|
||||
+module_resume(mod_resume);
|
||||
+module_suspend(mod_suspend);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
\ No newline at end of file
|
||||
diff --git a/test/modules/jump_table.c b/test/modules/jump_table.c
|
||||
new file mode 100644
|
||||
index 0000000..8648c2a
|
||||
--- /dev/null
|
||||
+++ b/test/modules/jump_table.c
|
||||
@@ -0,0 +1,107 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/hashtable.h>
|
||||
+#include <linux/sysfs.h>
|
||||
+#include <linux/modrestore.h>
|
||||
+
|
||||
+struct func_node {
|
||||
+ struct hlist_node hash;
|
||||
+ unsigned long key;
|
||||
+ unsigned long value;
|
||||
+};
|
||||
+
|
||||
+static int status __attribute__((section(".resume_0")));
|
||||
+
|
||||
+/*
|
||||
+ * The `mures_vcall()` can't used in irq context because of the implementation.
|
||||
+ * Therefore, we must generate cache.
|
||||
+ */
|
||||
+DEFINE_HASHTABLE(__ro_after_init cache, 2);
|
||||
+
|
||||
+static int foo(void)
|
||||
+{
|
||||
+ status += 1;
|
||||
+ return status;
|
||||
+}
|
||||
+
|
||||
+static void *find_func(unsigned long addr);
|
||||
+
|
||||
+static ssize_t jp_test_show(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ int (*func)(void) = find_func((unsigned long)foo);
|
||||
+ ssize_t count = 0;
|
||||
+
|
||||
+ if (func == NULL) {
|
||||
+ count = sprintf(buf, "Not Found\n");
|
||||
+ } else {
|
||||
+ count = sprintf(buf, "%d", func());
|
||||
+ }
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute jp_test = __ATTR_RO(jp_test);
|
||||
+
|
||||
+struct func_node nodes[] __ro_after_init = {
|
||||
+ { .key = (unsigned long)foo, },
|
||||
+};
|
||||
+
|
||||
+static void *find_func(unsigned long addr)
|
||||
+{
|
||||
+ struct func_node *obj;
|
||||
+ int i;
|
||||
+
|
||||
+ pr_info("finding addr: %lx\n", addr);
|
||||
+ hash_for_each(cache, i, obj, hash) {\
|
||||
+ pr_info("found key: %lx, val: %lx\n", obj->key, obj->value);
|
||||
+ if (obj->key == addr)
|
||||
+ return (void *)obj->value;
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void __init build_cache(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < ARRAY_SIZE(nodes); i++) {
|
||||
+ nodes[i].value = mures_vcall(nodes[i].key);
|
||||
+ hash_add(cache, &nodes[i].hash, nodes[i].key);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int __init mod_init(void)
|
||||
+{
|
||||
+ build_cache();
|
||||
+ return sysfs_create_file(kernel_kobj, &jp_test.attr);
|
||||
+}
|
||||
+
|
||||
+static void __exit mod_exit(void)
|
||||
+{
|
||||
+ sysfs_remove_file(kernel_kobj, &jp_test.attr);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+static int __init mod_resume(void)
|
||||
+{
|
||||
+ build_cache();
|
||||
+ return sysfs_create_file(kernel_kobj, &jp_test.attr);
|
||||
+}
|
||||
+
|
||||
+static int __exit mod_suspend(void)
|
||||
+{
|
||||
+ sysfs_remove_file(kernel_kobj, &jp_test.attr);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+module_init(mod_init);
|
||||
+module_exit(mod_exit);
|
||||
+module_resume(mod_resume);
|
||||
+module_suspend(mod_suspend);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
\ No newline at end of file
|
||||
diff --git a/test/modules/var_kern.c b/test/modules/var_kern.c
|
||||
new file mode 100644
|
||||
index 0000000..4321e3b
|
||||
--- /dev/null
|
||||
+++ b/test/modules/var_kern.c
|
||||
@@ -0,0 +1,72 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/sysfs.h>
|
||||
+
|
||||
+/* test variable persistence */
|
||||
+
|
||||
+static int mod_int __attribute__((section(".resume_0")));
|
||||
+static char *mod_str1 __attribute__((section(".resume_1"))) = "init";
|
||||
+static char *mod_str2 __attribute__((section(".resume_2"))) = "upgrade";
|
||||
+static char *mod_str __attribute__((section(".resume_3")));
|
||||
+
|
||||
+static ssize_t var_test_show(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ ssize_t count = 0;
|
||||
+
|
||||
+ count += sprintf(buf, "%d", mod_int);
|
||||
+ count += sprintf(buf+count, " %s", mod_str);
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute sysfs_var = __ATTR_RO(var_test);
|
||||
+
|
||||
+static __init int mod1_resume(void)
|
||||
+{
|
||||
+ mod_int += 1;
|
||||
+ mod_str = mod_str2;
|
||||
+
|
||||
+ pr_info("This is %s, index %d\n", __func__, mod_int);
|
||||
+
|
||||
+ return sysfs_create_file(kernel_kobj, &sysfs_var.attr);
|
||||
+}
|
||||
+
|
||||
+static __exit int mod1_suspend(void)
|
||||
+{
|
||||
+ mod_int += 1;
|
||||
+
|
||||
+ pr_info("This is %s, index %d\n", __func__, mod_int);
|
||||
+ sysfs_remove_file(kernel_kobj, &sysfs_var.attr);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static __init int mod1_init(void)
|
||||
+{
|
||||
+ mod_int = 0;
|
||||
+ mod_str = mod_str1;
|
||||
+
|
||||
+ pr_info("This is %s, index %d\n", __func__, mod_int);
|
||||
+
|
||||
+ return sysfs_create_file(kernel_kobj, &sysfs_var.attr);
|
||||
+}
|
||||
+
|
||||
+static __exit void mod1_exit(void)
|
||||
+{
|
||||
+ mod_int += 1;
|
||||
+
|
||||
+ pr_info("This is %s, index %d\n", __func__, mod_int);
|
||||
+ sysfs_remove_file(kernel_kobj, &sysfs_var.attr);
|
||||
+
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+module_resume(mod1_resume);
|
||||
+module_suspend(mod1_suspend);
|
||||
+module_init(mod1_init);
|
||||
+module_exit(mod1_exit);
|
||||
+MODULE_LICENSE("GPL");
|
||||
diff --git a/test/modules/var_user.py b/test/modules/var_user.py
|
||||
new file mode 100644
|
||||
index 0000000..98c5193
|
||||
--- /dev/null
|
||||
+++ b/test/modules/var_user.py
|
||||
@@ -0,0 +1,40 @@
|
||||
+import unittest
|
||||
+import subprocess
|
||||
+
|
||||
+
|
||||
+class TestVarMethods(unittest.TestCase):
|
||||
+ mod_name = "var_kern"
|
||||
+
|
||||
+ def unload_mod(self):
|
||||
+ with open("/proc/modules") as f:
|
||||
+ for line in f.readlines():
|
||||
+ words = line.split()
|
||||
+ if words[0] == self.mod_name:
|
||||
+ subprocess.check_call(["rmmod", self.mod_name])
|
||||
+ break
|
||||
+
|
||||
+ def setUp(self):
|
||||
+ subprocess.check_call(["make", "var_kern.ko"])
|
||||
+ self.unload_mod()
|
||||
+
|
||||
+ def tearDown(self):
|
||||
+ mod = f"{self.mod_name}.ko"
|
||||
+ self.unload_mod()
|
||||
+
|
||||
+ def test_var(self):
|
||||
+ mod = f"{self.mod_name}.ko"
|
||||
+ subprocess.check_call(["insmod", mod])
|
||||
+ with open("/sys/kernel/var_test") as f:
|
||||
+ line = f.readline()
|
||||
+ self.assertEqual(line, "0 init")
|
||||
+ subprocess.check_call(["rmmod", "-r", mod])
|
||||
+ subprocess.check_call(["rmmod", mod])
|
||||
+ subprocess.check_call(["insmod", "-r", mod])
|
||||
+ with open("/sys/kernel/var_test") as f:
|
||||
+ line = f.readline()
|
||||
+ self.assertEqual(line, "2 upgrade")
|
||||
+ subprocess.check_call(["rmmod", mod])
|
||||
+
|
||||
+
|
||||
+if __name__ == '__main__':
|
||||
+ unittest.main()
|
||||
diff --git a/test/modules/workqueue_kern.c b/test/modules/workqueue_kern.c
|
||||
new file mode 100644
|
||||
index 0000000..cecfb8c
|
||||
--- /dev/null
|
||||
+++ b/test/modules/workqueue_kern.c
|
||||
@@ -0,0 +1,130 @@
|
||||
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/sysfs.h>
|
||||
+#include <linux/delay.h>
|
||||
+#include <linux/modrestore.h>
|
||||
+
|
||||
+struct mod_status {
|
||||
+ struct workqueue_struct *wq;
|
||||
+};
|
||||
+
|
||||
+static struct workqueue_struct *wq;
|
||||
+static int wq_status __attribute__((section(".resume_0")));
|
||||
+
|
||||
+static void worker_func(struct work_struct *work)
|
||||
+{
|
||||
+ wq_status += 1;
|
||||
+ pr_info("worker run...\n");
|
||||
+ mdelay(100);
|
||||
+ pr_info("worker end.\n");
|
||||
+ kfree(work);
|
||||
+}
|
||||
+
|
||||
+static ssize_t wq_test_show(struct kobject *kobj,
|
||||
+ struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ flush_workqueue(wq);
|
||||
+ return sprintf(buf, "%pK %d", wq, wq_status);
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute wq_test = __ATTR_RO(wq_test);
|
||||
+
|
||||
+static int __init mod_init(void)
|
||||
+{
|
||||
+ int retval;
|
||||
+
|
||||
+ retval = sysfs_create_file(kernel_kobj, &wq_test.attr);
|
||||
+ if (retval != 0) {
|
||||
+ pr_err("sysfs_create_file failed.\n");
|
||||
+ return retval;
|
||||
+ }
|
||||
+
|
||||
+ wq = alloc_workqueue("workqueue_kern_test", WQ_UNBOUND, 0);
|
||||
+ if (wq == NULL) {
|
||||
+ pr_err("unable to allocate workqueue\n");
|
||||
+ sysfs_remove_file(kernel_kobj, &wq_test.attr);
|
||||
+ retval = -ENOMEM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ retval = 0;
|
||||
+out:
|
||||
+ return retval;
|
||||
+}
|
||||
+
|
||||
+static void __exit mod_exit(void)
|
||||
+{
|
||||
+ destroy_workqueue(wq);
|
||||
+ sysfs_remove_file(kernel_kobj, &wq_test.attr);
|
||||
+}
|
||||
+
|
||||
+static int __init mod_resume(void)
|
||||
+{
|
||||
+ struct mod_status *data;
|
||||
+ int retval;
|
||||
+
|
||||
+ data = get_module_state_space(KBUILD_MODNAME, NULL);
|
||||
+ if (!data) {
|
||||
+ pr_info("get_module_state_space failure\n");
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+ wq = data->wq;
|
||||
+
|
||||
+ retval = sysfs_create_file(kernel_kobj, &wq_test.attr);
|
||||
+ if (retval != 0) {
|
||||
+ pr_err("sysfs_create_file failed.\n");
|
||||
+ return retval;
|
||||
+ }
|
||||
+
|
||||
+ return resume_workqueue(wq);
|
||||
+}
|
||||
+
|
||||
+static int __exit queue_worker(void)
|
||||
+{
|
||||
+ struct delayed_work *worker = kzalloc(sizeof(struct work_struct), GFP_KERNEL);
|
||||
+
|
||||
+ if (worker == NULL) {
|
||||
+ pr_err("alloc worker space failed\n");
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ INIT_DELAYED_WORK(worker, worker_func);
|
||||
+ queue_delayed_work(wq, worker, 100);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int __exit mod_suspend(void)
|
||||
+{
|
||||
+ struct mod_status *data;
|
||||
+ int retval;
|
||||
+
|
||||
+ data = alloc_module_state_space(KBUILD_MODNAME, sizeof(*data));
|
||||
+ if (data == NULL) {
|
||||
+ pr_err("alloc_module_state_space failed\n");
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ data->wq = wq;
|
||||
+ if (queue_worker() != 0)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ retval = suspend_workqueue(wq);
|
||||
+ if (retval != 0) {
|
||||
+ pr_err("suspend workqueue failed\n");
|
||||
+ return retval;
|
||||
+ }
|
||||
+
|
||||
+ sysfs_remove_file(kernel_kobj, &wq_test.attr);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+module_init(mod_init);
|
||||
+module_exit(mod_exit);
|
||||
+module_resume(mod_resume);
|
||||
+module_suspend(mod_suspend);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
\ No newline at end of file
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,611 +0,0 @@
|
||||
From 57f1017a9c971d8c3a5ef82d04e6c4bc584e9f00 Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 8 Apr 2022 16:14:40 +0800
|
||||
Subject: [PATCH 72/72] kabichk: add KABI check code
|
||||
|
||||
Theory:
|
||||
* The export symbol CRCs source:
|
||||
- /boot/symvers-$(uname -r).gz for Image and in tree modules: the
|
||||
ima mechanism could ensure the file credibility and non-tamper.
|
||||
- ELF section `.symtab` for out of tree modules: the export symbols
|
||||
has `__crc_` prefix, and `st_shndx` is `SHN_ABS`
|
||||
* compare CRC value between the known and the module
|
||||
|
||||
Design Details:
|
||||
- collect export symbols from
|
||||
* collect in tree symbols from `/boot/symvers-<release>.gz`
|
||||
* collect out of tree module symbols from the module self
|
||||
- compare external symbols stored in `__versions` section for each module
|
||||
|
||||
Usage:
|
||||
python3 -m upgchk.kabichk \
|
||||
[[-r <kernel release>],...] \
|
||||
[[-m <modname>],...] \
|
||||
-c <modname>
|
||||
Example:
|
||||
python3 -m upgchk.kabichk -c /lib/modules/$(uname -r)/kernel/fs/mbcache.ko
|
||||
python3 -m upgchk.kabichk -m notify.ko -c osp_proc.ko
|
||||
|
||||
Note:
|
||||
The pyelftools library can't be import, therefore using elfutils
|
||||
wrapper to replace the library.
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
---
|
||||
upgchk/Makefile | 23 ++++
|
||||
upgchk/lib/modsym.c | 268 ++++++++++++++++++++++++++++++++++++++
|
||||
upgchk/lib/modsym.h | 39 ++++++
|
||||
upgchk/setup.py | 20 +++
|
||||
upgchk/upgchk/__init__.py | 11 ++
|
||||
upgchk/upgchk/kabichk.py | 163 +++++++++++++++++++++++
|
||||
6 files changed, 524 insertions(+)
|
||||
create mode 100644 upgchk/Makefile
|
||||
create mode 100644 upgchk/lib/modsym.c
|
||||
create mode 100644 upgchk/lib/modsym.h
|
||||
create mode 100644 upgchk/setup.py
|
||||
create mode 100644 upgchk/upgchk/__init__.py
|
||||
create mode 100644 upgchk/upgchk/kabichk.py
|
||||
|
||||
diff --git a/upgchk/Makefile b/upgchk/Makefile
|
||||
new file mode 100644
|
||||
index 0000000..df6b60e
|
||||
--- /dev/null
|
||||
+++ b/upgchk/Makefile
|
||||
@@ -0,0 +1,23 @@
|
||||
+.PHONY: build install clean
|
||||
+
|
||||
+PYTHON=/usr/bin/python3
|
||||
+TEST=
|
||||
+PARAMETERS=
|
||||
+
|
||||
+build:
|
||||
+ ${PYTHON} setup.py build
|
||||
+
|
||||
+dist:
|
||||
+ ${PYTHON} setup.py sdist
|
||||
+
|
||||
+install:
|
||||
+ ${PYTHON} setup.py install
|
||||
+
|
||||
+clean:
|
||||
+ ${PYTHON} setup.py clean
|
||||
+ rm -rf \
|
||||
+ build \
|
||||
+ dist \
|
||||
+ upgchk/__pycache__ \
|
||||
+ upgchk/*.so \
|
||||
+ upgchk.egg-info
|
||||
diff --git a/upgchk/lib/modsym.c b/upgchk/lib/modsym.c
|
||||
new file mode 100644
|
||||
index 0000000..eb75f68
|
||||
--- /dev/null
|
||||
+++ b/upgchk/lib/modsym.c
|
||||
@@ -0,0 +1,268 @@
|
||||
+#define PY_SSIZE_T_CLEAN
|
||||
+#include <Python.h>
|
||||
+
|
||||
+#include <fcntl.h>
|
||||
+#include <stdio.h>
|
||||
+#include <string.h>
|
||||
+#include <gelf.h>
|
||||
+
|
||||
+#include "modsym.h"
|
||||
+
|
||||
+static Elf_Data *get_elf_sec_data(Elf *elf, const char *sec_name)
|
||||
+{
|
||||
+ Elf_Scn *scn = NULL;
|
||||
+ size_t strndx;
|
||||
+ GElf_Shdr mem;
|
||||
+ GElf_Shdr *shdr;
|
||||
+ const char *name;
|
||||
+
|
||||
+ /* To get the section names. */
|
||||
+ if (elf_getshdrstrndx(elf, &strndx) != 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
|
||||
+ shdr = gelf_getshdr(scn, &mem);
|
||||
+ name = elf_strptr (elf, strndx, shdr->sh_name);
|
||||
+
|
||||
+ if (strcmp(name, sec_name) == 0)
|
||||
+ return elf_getdata(scn, NULL);
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void modvers_dealloc(PyObject *obj)
|
||||
+{
|
||||
+ ModVersState *mvgstate = (ModVersState *)obj;
|
||||
+
|
||||
+ elf_end(mvgstate->elf);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+static PyObject *modvers_iternext(PyObject *obj)
|
||||
+{
|
||||
+ ModVersState *mvgstate = (ModVersState *)obj;
|
||||
+ struct modversion_info *info = mvgstate->d->d_buf;
|
||||
+ PyObject *elem = NULL;
|
||||
+
|
||||
+ if (mvgstate->seq_index >= 0) {
|
||||
+ size_t i = mvgstate->enum_index;
|
||||
+ /* seq_index < 0 means that the generator is exhausted.
|
||||
+ * Returning NULL in this case is enough. The next() builtin
|
||||
+ * will raise the StopIteration error for us.
|
||||
+ */
|
||||
+ elem = Py_BuildValue("(sk)", info[i].name, info[i].crc);
|
||||
+ mvgstate->seq_index -= 1;
|
||||
+ mvgstate->enum_index += 1;
|
||||
+ } else {
|
||||
+ /* The reference to the sequence is cleared in the first
|
||||
+ * generator call after its exhaustion (after the call that
|
||||
+ * returned the last element).
|
||||
+ * Py_CLEAR will be harmless for subsequent calls since it's
|
||||
+ * idempotent on NULL.
|
||||
+ */
|
||||
+ mvgstate->seq_index = -1;
|
||||
+ }
|
||||
+
|
||||
+ return elem;
|
||||
+}
|
||||
+
|
||||
+static PyObject *modvers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
+{
|
||||
+ ModVersState *mvgstate = NULL;
|
||||
+ PyObject *file;
|
||||
+ int fd;
|
||||
+ Py_ssize_t len;
|
||||
+
|
||||
+ if (!PyArg_ParseTuple(args, "O", &file))
|
||||
+ return NULL;
|
||||
+
|
||||
+ fd = PyObject_AsFileDescriptor(file);
|
||||
+ if (fd < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ mvgstate = (ModVersState *)type->tp_alloc(type, 0);
|
||||
+ if (mvgstate == NULL)
|
||||
+ return NULL;
|
||||
+
|
||||
+ elf_version(EV_CURRENT);
|
||||
+ mvgstate->elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
|
||||
+ if (mvgstate->elf == NULL) {
|
||||
+ PyErr_Format(PyExc_TypeError, "File not usable: %s\n", elf_errmsg(-1));
|
||||
+ goto free;
|
||||
+ }
|
||||
+
|
||||
+ mvgstate->d = get_elf_sec_data(mvgstate->elf, VERS_SEC_NAME);
|
||||
+ if (mvgstate->d == NULL) {
|
||||
+ PyErr_Format(PyExc_TypeError, "Can't find ELF section `%s`\n", VERS_SEC_NAME);
|
||||
+ goto elf_end;
|
||||
+ }
|
||||
+
|
||||
+ len = mvgstate->d->d_size / sizeof(struct modversion_info);
|
||||
+ mvgstate->seq_index = len - 1;
|
||||
+ mvgstate->enum_index = 0;
|
||||
+
|
||||
+ return (PyObject *)mvgstate;
|
||||
+
|
||||
+elf_end:
|
||||
+ elf_end(mvgstate->elf);
|
||||
+free:
|
||||
+ type->tp_free(mvgstate);
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+PyTypeObject PyModVersGen_Type = {
|
||||
+ PyVarObject_HEAD_INIT(NULL, 0)
|
||||
+ .tp_name = "modvers",
|
||||
+ .tp_basicsize = sizeof(PyModVersGen_Type),
|
||||
+ .tp_itemsize = 0,
|
||||
+ .tp_dealloc = modvers_dealloc,
|
||||
+ .tp_flags = Py_TPFLAGS_DEFAULT,
|
||||
+ .tp_iter = PyObject_SelfIter,
|
||||
+ .tp_iternext = modvers_iternext,
|
||||
+ .tp_alloc = PyType_GenericAlloc,
|
||||
+ .tp_new = modvers_new,
|
||||
+};
|
||||
+
|
||||
+static void modcrcs_dealloc(PyObject *obj)
|
||||
+{
|
||||
+ ModCRCsState *mcgstate = (ModCRCsState *)obj;
|
||||
+
|
||||
+ elf_end(mcgstate->elf);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+static PyObject *modcrcs_iternext(PyObject *obj)
|
||||
+{
|
||||
+ ModCRCsState *mcgstate = (ModCRCsState *)obj;
|
||||
+ const char *strtab = mcgstate->strtab->d_buf;
|
||||
+ GElf_Sym *sym = mcgstate->symtab->d_buf;
|
||||
+ PyObject *elem = NULL;
|
||||
+
|
||||
+ while (mcgstate->seq_index >= 0) {
|
||||
+ size_t i = mcgstate->enum_index;
|
||||
+ const char *name = strtab + sym[i].st_name;
|
||||
+
|
||||
+ mcgstate->seq_index -= 1;
|
||||
+ mcgstate->enum_index += 1;
|
||||
+
|
||||
+ /*
|
||||
+ * If the symbol has '__crc_' prefix and absolute value,
|
||||
+ * it's export symbol, and has CRC.
|
||||
+ */
|
||||
+ if (strncmp(name, CRC_SYM_PREFIX, strlen(CRC_SYM_PREFIX)) == 0
|
||||
+ && sym[i].st_shndx == SHN_ABS) {
|
||||
+ elem = Py_BuildValue("(sk)",
|
||||
+ name+strlen(CRC_SYM_PREFIX),
|
||||
+ sym[i].st_value);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return elem;
|
||||
+}
|
||||
+
|
||||
+static PyObject *modcrcs_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
+{
|
||||
+ ModCRCsState *mcgstate = NULL;
|
||||
+ PyObject *file;
|
||||
+ Elf_Data *d;
|
||||
+ int fd;
|
||||
+ Py_ssize_t len;
|
||||
+
|
||||
+ if (!PyArg_ParseTuple(args, "O", &file))
|
||||
+ return NULL;
|
||||
+
|
||||
+ fd = PyObject_AsFileDescriptor(file);
|
||||
+ if (fd < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ mcgstate = (ModCRCsState *)type->tp_alloc(type, 0);
|
||||
+ if (mcgstate == NULL)
|
||||
+ return NULL;
|
||||
+
|
||||
+ elf_version(EV_CURRENT);
|
||||
+ mcgstate->elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
|
||||
+ if (mcgstate->elf == NULL) {
|
||||
+ PyErr_Format(PyExc_TypeError, "File not usable: %s\n", elf_errmsg(-1));
|
||||
+ goto free;
|
||||
+ }
|
||||
+
|
||||
+ mcgstate->strtab = get_elf_sec_data(mcgstate->elf, STRT_SEC_NAME);
|
||||
+ if (mcgstate->strtab == NULL) {
|
||||
+ PyErr_Format(PyExc_TypeError, "Can't find ELF section `%s`\n", STRT_SEC_NAME);
|
||||
+ goto elf_end;
|
||||
+ }
|
||||
+
|
||||
+ mcgstate->symtab = get_elf_sec_data(mcgstate->elf, SYMT_SEC_NAME);
|
||||
+ if (mcgstate->symtab == NULL) {
|
||||
+ PyErr_Format(PyExc_TypeError, "Can't find ELF section `%s`\n", SYMT_SEC_NAME);
|
||||
+ goto elf_end;
|
||||
+ }
|
||||
+
|
||||
+ len = mcgstate->symtab->d_size / sizeof(GElf_Sym);
|
||||
+ mcgstate->seq_index = len - 1;
|
||||
+ mcgstate->enum_index = 0;
|
||||
+
|
||||
+ return (PyObject *)mcgstate;
|
||||
+
|
||||
+elf_end:
|
||||
+ elf_end(mcgstate->elf);
|
||||
+free:
|
||||
+ type->tp_free(mcgstate);
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+PyTypeObject PyModCRCsGen_Type = {
|
||||
+ PyVarObject_HEAD_INIT(NULL, 0)
|
||||
+ .tp_name = "modcrcs",
|
||||
+ .tp_basicsize = sizeof(PyModCRCsGen_Type),
|
||||
+ .tp_itemsize = 0,
|
||||
+ .tp_dealloc = modcrcs_dealloc,
|
||||
+ .tp_flags = Py_TPFLAGS_DEFAULT,
|
||||
+ .tp_iter = PyObject_SelfIter,
|
||||
+ .tp_iternext = modcrcs_iternext,
|
||||
+ .tp_alloc = PyType_GenericAlloc,
|
||||
+ .tp_new = modcrcs_new,
|
||||
+};
|
||||
+
|
||||
+/* Module structure */
|
||||
+/* Module structure */
|
||||
+static struct PyModuleDef modvers_module = {
|
||||
+ PyModuleDef_HEAD_INIT,
|
||||
+ .m_name = "modsym",
|
||||
+ .m_doc = "iter `" VERS_SEC_NAME "` section items",
|
||||
+ .m_size = -1,
|
||||
+};
|
||||
+
|
||||
+/* Module initialization function */
|
||||
+PyMODINIT_FUNC PyInit_modsym(void)
|
||||
+{
|
||||
+ PyObject *m = PyModule_Create(&modvers_module);
|
||||
+ if (m == NULL)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (PyType_Ready(&PyModVersGen_Type) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ Py_INCREF(&PyModVersGen_Type);
|
||||
+ if (PyModule_AddObject(m, PyModVersGen_Type.tp_name,
|
||||
+ (PyObject *)&PyModVersGen_Type) < 0)
|
||||
+ goto free_vers;
|
||||
+
|
||||
+ if (PyType_Ready(&PyModCRCsGen_Type) < 0)
|
||||
+ goto free_vers;
|
||||
+
|
||||
+ Py_INCREF(&PyModCRCsGen_Type);
|
||||
+ if (PyModule_AddObject(m, PyModCRCsGen_Type.tp_name,
|
||||
+ (PyObject *)&PyModCRCsGen_Type) < 0)
|
||||
+ goto free_crcs;
|
||||
+
|
||||
+ return m;
|
||||
+free_crcs:
|
||||
+ Py_DECREF(&PyModCRCsGen_Type);
|
||||
+free_vers:
|
||||
+ Py_DECREF(&PyModVersGen_Type);
|
||||
+ Py_DECREF(m);
|
||||
+ return NULL;
|
||||
+}
|
||||
diff --git a/upgchk/lib/modsym.h b/upgchk/lib/modsym.h
|
||||
new file mode 100644
|
||||
index 0000000..b8069c3
|
||||
--- /dev/null
|
||||
+++ b/upgchk/lib/modsym.h
|
||||
@@ -0,0 +1,39 @@
|
||||
+#ifndef __PYTHON_MODSYM_H__
|
||||
+#define __PYTHON_MODSYM_H__
|
||||
+
|
||||
+#include <libelf.h>
|
||||
+
|
||||
+typedef struct {
|
||||
+ PyObject_HEAD
|
||||
+ Py_ssize_t seq_index;
|
||||
+ Py_ssize_t enum_index;
|
||||
+ Elf *elf;
|
||||
+ Elf_Data *d;
|
||||
+} ModVersState;
|
||||
+
|
||||
+#define VERS_SEC_NAME "__versions"
|
||||
+
|
||||
+/* --- the following is copied from linux src --- */
|
||||
+#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
|
||||
+#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
|
||||
+
|
||||
+struct modversion_info {
|
||||
+ unsigned long crc;
|
||||
+ char name[MODULE_NAME_LEN];
|
||||
+};
|
||||
+/* --- end --- */
|
||||
+
|
||||
+typedef struct {
|
||||
+ PyObject_HEAD
|
||||
+ Py_ssize_t seq_index;
|
||||
+ Py_ssize_t enum_index;
|
||||
+ Elf *elf;
|
||||
+ Elf_Data *strtab;
|
||||
+ Elf_Data *symtab;
|
||||
+} ModCRCsState;
|
||||
+
|
||||
+#define STRT_SEC_NAME ".strtab"
|
||||
+#define SYMT_SEC_NAME ".symtab"
|
||||
+#define CRC_SYM_PREFIX "__crc_"
|
||||
+
|
||||
+#endif /* __PYTHON_MODSYM_H__ */
|
||||
diff --git a/upgchk/setup.py b/upgchk/setup.py
|
||||
new file mode 100644
|
||||
index 0000000..6758c95
|
||||
--- /dev/null
|
||||
+++ b/upgchk/setup.py
|
||||
@@ -0,0 +1,20 @@
|
||||
+#!/usr/bin/python3
|
||||
+# -*- coding: utf-8 -*-
|
||||
+
|
||||
+from setuptools import setup, Extension
|
||||
+
|
||||
+if __name__ == "__main__":
|
||||
+
|
||||
+ setup(name="upgchk",
|
||||
+ version="0.1",
|
||||
+ description="Check the kernel upgrading environment",
|
||||
+
|
||||
+ packages=["upgchk"],
|
||||
+ ext_modules=[
|
||||
+ Extension("modsym",
|
||||
+ sources=["lib/modsym.c"],
|
||||
+ libraries=["elf"])
|
||||
+ ],
|
||||
+
|
||||
+ python_requires='>=3.6',
|
||||
+ )
|
||||
diff --git a/upgchk/upgchk/__init__.py b/upgchk/upgchk/__init__.py
|
||||
new file mode 100644
|
||||
index 0000000..c831e1d
|
||||
--- /dev/null
|
||||
+++ b/upgchk/upgchk/__init__.py
|
||||
@@ -0,0 +1,11 @@
|
||||
+# -*- coding: utf-8 -*-
|
||||
+
|
||||
+"""
|
||||
+.. module:: upgchk
|
||||
+ :synopsis: Check the kernel upgrading environment
|
||||
+"""
|
||||
+
|
||||
+__title = "upgchk"
|
||||
+__description = "Check the upgrade environment"
|
||||
+__license__ = "GPL-2.0-or-later or LGPL-2.1-only"
|
||||
+__version__ = "0.1"
|
||||
diff --git a/upgchk/upgchk/kabichk.py b/upgchk/upgchk/kabichk.py
|
||||
new file mode 100644
|
||||
index 0000000..cccacf3
|
||||
--- /dev/null
|
||||
+++ b/upgchk/upgchk/kabichk.py
|
||||
@@ -0,0 +1,163 @@
|
||||
+#!/usr/bin/python3
|
||||
+# -*- coding: utf-8 -*-
|
||||
+
|
||||
+'''
|
||||
+Theory:
|
||||
+- compare CRC value between the known and the module
|
||||
+- The export symbols CRC source:
|
||||
+ * `/boot/symvers-<release>.gz` for in tree modules and Image
|
||||
+ - the ima mechanism could ensure the file credibility and non-tamper
|
||||
+ * The `.symtab` section for out of tree modules
|
||||
+ - name format: `__crc_<symbol name>`
|
||||
+ - it's absolute value, means: `sym->st_shndx == SHN_ABS`
|
||||
+
|
||||
+Design Details:
|
||||
+- collect export symbols from
|
||||
+ * collect in tree symbols from `/boot/symvers-<release>.gz`
|
||||
+ * collect out of tree module symbols from the module self
|
||||
+- compare external symbols stored in `__versions` section for each module
|
||||
+
|
||||
+`__versions` section data format:
|
||||
+
|
||||
+ # define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
|
||||
+ # define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
|
||||
+
|
||||
+ struct modversion_info {
|
||||
+ unsigned long crc;
|
||||
+ char name[MODULE_NAME_LEN];
|
||||
+ };
|
||||
+
|
||||
+Usage:
|
||||
+ python3 -m upgchk.kabichk \
|
||||
+ [[-r <kernel release>],...] \
|
||||
+ [[-m <modname>],...] \
|
||||
+ -c <modname>
|
||||
+Example:
|
||||
+ python3 -m upgchk.kabichk -c /lib/modules/$(uname -r)/kernel/fs/mbcache.ko
|
||||
+ python3 -m upgchk.kabichk -m notify.ko -c osp_proc.ko
|
||||
+'''
|
||||
+
|
||||
+import argparse
|
||||
+import gzip
|
||||
+import pathlib
|
||||
+import platform
|
||||
+from typing import Tuple
|
||||
+
|
||||
+import modsym
|
||||
+
|
||||
+__all__ = ["KABI"]
|
||||
+
|
||||
+ELF_SELFMAG = 4
|
||||
+ELF_ELFMAG = b"\177ELF"
|
||||
+
|
||||
+
|
||||
+class KABI:
|
||||
+ def __init__(self, version: str):
|
||||
+ """
|
||||
+ read all symbols of the specific kernel
|
||||
+ """
|
||||
+ self._symbols = dict()
|
||||
+ filename = f"symvers-{version}.gz"
|
||||
+ filepath = pathlib.Path("/boot/").joinpath(filename)
|
||||
+
|
||||
+ with gzip.open(filepath, "rt") as f:
|
||||
+ for line in f.readlines():
|
||||
+ # (crc, sym, loc, type)
|
||||
+ (_crc, sym, loc, _) = line.split()
|
||||
+ crc = int(_crc, 16) # convert hex crc to integer
|
||||
+ self._insert(sym, (crc, sym, loc))
|
||||
+
|
||||
+ def _insert(self, key: str, val: Tuple[int, str, str]):
|
||||
+ inst = self._symbols.get(key)
|
||||
+ if inst is None:
|
||||
+ self._symbols[key] = val
|
||||
+ elif inst != val:
|
||||
+ raise KeyError(
|
||||
+ f"{key} already exits value {self._symbols[key]}, can't insert new value {val}")
|
||||
+
|
||||
+ def _get(self, key: str) -> Tuple[int, str, str]:
|
||||
+ return self._symbols.get(key)
|
||||
+
|
||||
+ def _parse_mod_vers(self, filepath: pathlib.Path) -> Tuple[int, str]:
|
||||
+ with open(filepath, "rb") as f:
|
||||
+ magic = f.read(ELF_SELFMAG)
|
||||
+ if magic != ELF_ELFMAG:
|
||||
+ raise TypeError(f"{filepath} isn't an ELF file")
|
||||
+
|
||||
+ for sym, crc in modsym.modvers(f):
|
||||
+ yield (sym, crc)
|
||||
+
|
||||
+ def check_mod_syms(self, filepath: pathlib.Path) -> Tuple[bool, str]:
|
||||
+ if not filepath.exists():
|
||||
+ raise FileNotFoundError(f"{filepath} isn't found")
|
||||
+
|
||||
+ for sym, crc in self._parse_mod_vers(filepath):
|
||||
+ val = self._get(sym)
|
||||
+ if val is None:
|
||||
+ msg = f"symbol {sym} isn't known"
|
||||
+ return (False, msg)
|
||||
+ elif val[0] != crc:
|
||||
+ msg = f"symbol {sym} CRC should be {hex(crc)}, but {hex(val[0])}"
|
||||
+ return (False, msg)
|
||||
+
|
||||
+ return (True, "")
|
||||
+
|
||||
+ def _parse_mod_crcs(self, filepath: pathlib.Path) -> Tuple[int, str]:
|
||||
+ with open(filepath, "rb") as f:
|
||||
+ magic = f.read(ELF_SELFMAG)
|
||||
+ if magic != ELF_ELFMAG:
|
||||
+ raise TypeError(f"{filepath} isn't an ELF file")
|
||||
+
|
||||
+ for inst in modsym.modcrcs(f):
|
||||
+ yield inst
|
||||
+
|
||||
+ def add_mod_crcs(self, filepath: pathlib.Path):
|
||||
+ if not filepath.exists():
|
||||
+ raise FileNotFoundError(f"{filepath} isn't found")
|
||||
+
|
||||
+ modname = filepath.name[:-3]
|
||||
+ for (sym, crc) in self._parse_mod_crcs(filepath):
|
||||
+ self._insert(sym, (crc, sym, modname))
|
||||
+
|
||||
+
|
||||
+def parse_argument() -> argparse.Namespace:
|
||||
+ parser = argparse.ArgumentParser()
|
||||
+ parser.add_argument("-r", "--release", action="store",
|
||||
+ required=False, default=platform.release(),
|
||||
+ help="specific the kernel release version")
|
||||
+ parser.add_argument("-m", "--module", action="append",
|
||||
+ required=False, default=[],
|
||||
+ help="specific the out of tree modules")
|
||||
+ parser.add_argument("-c", "--check", action="append",
|
||||
+ required=True,
|
||||
+ help="specific the checked module, e.g. -c a.ko -c b.ko")
|
||||
+ options = parser.parse_args()
|
||||
+ return (options.release, options.module, options.check)
|
||||
+
|
||||
+
|
||||
+def main():
|
||||
+ release, modules, checks = parse_argument()
|
||||
+ kabi = KABI(release)
|
||||
+
|
||||
+ for mod in modules:
|
||||
+ filepath = pathlib.Path(mod)
|
||||
+ kabi.add_mod_crcs(filepath)
|
||||
+
|
||||
+ print("-------------- start check --------------")
|
||||
+ passed = 0
|
||||
+ failed = 0
|
||||
+ for mod in checks:
|
||||
+ filepath = pathlib.Path(mod)
|
||||
+ modname = filepath.name
|
||||
+ result, msg = kabi.check_mod_syms(filepath)
|
||||
+ if not result:
|
||||
+ print(f"module {modname} fail: {msg}")
|
||||
+ failed += 1
|
||||
+ else:
|
||||
+ print(f"module {modname} pass")
|
||||
+ passed += 1
|
||||
+ print(f"-------------- {passed} pass, {failed} failed --------------")
|
||||
+
|
||||
+
|
||||
+if __name__ == '__main__':
|
||||
+ main()
|
||||
--
|
||||
2.34.1
|
||||
|
||||
@ -1,269 +0,0 @@
|
||||
From 9e512890a5858431acb42a2a685b445e7111dfc5 Mon Sep 17 00:00:00 2001
|
||||
From: z00557007 <zhoujie133@huawei.com>
|
||||
Date: Wed, 4 Jan 2023 16:26:16 +0800
|
||||
Subject: [PATCH] criu: fix conflicting headers There are several changes in
|
||||
glibc 2.36 that make sys/mount.h header incompatible with kernel headers:
|
||||
|
||||
https://sourceware.org/glibc/wiki/Release/2.36#Usage_of_.3Clinux.2Fmount.h.3E_and_.3Csys.2Fmount.h.3E
|
||||
|
||||
This patch removes conflicting includes for `<linux/mount.h>` and
|
||||
updates the content of `criu/include/linux/mount.h` to match
|
||||
`/usr/include/sys/mount.h`. In addition, inline definitions sys_*()
|
||||
functions have been moved from "linux/mount.h" to "syscall.h" to
|
||||
avoid conflicts with `uapi/compel/plugins/std/syscall.h` and
|
||||
`<unistd.h>`. The include for `<linux/aio_abi.h>` has been replaced
|
||||
with local include to avoid conflicts with `<sys/mount.h>`.
|
||||
|
||||
Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
|
||||
---
|
||||
Makefile.config | 2 +-
|
||||
criu/cgroup.c | 1 +
|
||||
criu/cr-check.c | 2 +-
|
||||
criu/cr-restore.c | 3 ++-
|
||||
criu/include/aio.h | 2 +-
|
||||
criu/include/linux/aio_abi.h | 14 ++++++++++++++
|
||||
criu/include/linux/mount.h | 34 ++++++++++++++++++----------------
|
||||
criu/include/syscall.h | 17 +++++++++++++++++
|
||||
criu/pie/parasite.c | 2 +-
|
||||
criu/util.c | 1 +
|
||||
scripts/feature-tests.mak | 13 -------------
|
||||
11 files changed, 57 insertions(+), 34 deletions(-)
|
||||
create mode 100644 criu/include/linux/aio_abi.h
|
||||
create mode 100644 criu/include/syscall.h
|
||||
|
||||
diff --git a/Makefile.config b/Makefile.config
|
||||
index 6e3e1b0..0b4ccd4 100644
|
||||
--- a/Makefile.config
|
||||
+++ b/Makefile.config
|
||||
@@ -70,7 +70,7 @@ export DEFINES += $(FEATURE_DEFINES)
|
||||
export CFLAGS += $(FEATURE_DEFINES)
|
||||
|
||||
FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
|
||||
- SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG MEMFD_CREATE
|
||||
+ SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW MEMFD_CREATE
|
||||
|
||||
# $1 - config name
|
||||
define gen-feature-test
|
||||
diff --git a/criu/cgroup.c b/criu/cgroup.c
|
||||
index ccac37f..3874b65 100644
|
||||
--- a/criu/cgroup.c
|
||||
+++ b/criu/cgroup.c
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "images/cgroup.pb-c.h"
|
||||
#include "kerndat.h"
|
||||
#include "linux/mount.h"
|
||||
+#include "syscall.h"
|
||||
|
||||
/*
|
||||
* This structure describes set of controller groups
|
||||
diff --git a/criu/cr-check.c b/criu/cr-check.c
|
||||
index ba87511..951e71d 100644
|
||||
--- a/criu/cr-check.c
|
||||
+++ b/criu/cr-check.c
|
||||
@@ -21,7 +21,6 @@
|
||||
#include <sys/prctl.h>
|
||||
#include <sched.h>
|
||||
#include <sys/mount.h>
|
||||
-#include <linux/aio_abi.h>
|
||||
|
||||
#include "../soccr/soccr.h"
|
||||
|
||||
@@ -52,6 +51,7 @@
|
||||
#include "net.h"
|
||||
#include "restorer.h"
|
||||
#include "uffd.h"
|
||||
+#include "linux/aio_abi.h"
|
||||
|
||||
#include "images/inventory.pb-c.h"
|
||||
|
||||
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
|
||||
index c3ff65d..10be969 100644
|
||||
--- a/criu/cr-restore.c
|
||||
+++ b/criu/cr-restore.c
|
||||
@@ -22,7 +22,6 @@
|
||||
#include <compel/ptrace.h>
|
||||
#include "common/compiler.h"
|
||||
|
||||
-#include "linux/mount.h"
|
||||
#include "linux/rseq.h"
|
||||
|
||||
#include "clone-noasan.h"
|
||||
@@ -89,6 +88,8 @@
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
#include "compel/include/asm/syscall.h"
|
||||
|
||||
+#include "linux/mount.h"
|
||||
+
|
||||
#include "protobuf.h"
|
||||
#include "images/sa.pb-c.h"
|
||||
#include "images/timer.pb-c.h"
|
||||
diff --git a/criu/include/aio.h b/criu/include/aio.h
|
||||
index f8a59df..715a45c 100644
|
||||
--- a/criu/include/aio.h
|
||||
+++ b/criu/include/aio.h
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __CR_AIO_H__
|
||||
#define __CR_AIO_H__
|
||||
|
||||
-#include <linux/aio_abi.h>
|
||||
+#include "linux/aio_abi.h"
|
||||
#include "images/mm.pb-c.h"
|
||||
unsigned int aio_estimate_nr_reqs(unsigned int size);
|
||||
int dump_aio_ring(MmEntry *mme, struct vma_area *vma);
|
||||
diff --git a/criu/include/linux/aio_abi.h b/criu/include/linux/aio_abi.h
|
||||
new file mode 100644
|
||||
index 0000000..d9ce787
|
||||
--- /dev/null
|
||||
+++ b/criu/include/linux/aio_abi.h
|
||||
@@ -0,0 +1,14 @@
|
||||
+#ifndef __LINUX__AIO_ABI_H
|
||||
+#define __LINUX__AIO_ABI_H
|
||||
+
|
||||
+typedef __kernel_ulong_t aio_context_t;
|
||||
+
|
||||
+/* read() from /dev/aio returns these structures. */
|
||||
+struct io_event {
|
||||
+ __u64 data; /* the data field from the iocb */
|
||||
+ __u64 obj; /* what iocb this event came from */
|
||||
+ __s64 res; /* result code for this event */
|
||||
+ __s64 res2; /* secondary result */
|
||||
+};
|
||||
+
|
||||
+#endif /* __LINUX__AIO_ABI_H */
|
||||
diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h
|
||||
index 840d627..0d55a58 100644
|
||||
--- a/criu/include/linux/mount.h
|
||||
+++ b/criu/include/linux/mount.h
|
||||
@@ -4,32 +4,34 @@
|
||||
#include "common/config.h"
|
||||
#include "compel/plugins/std/syscall-codes.h"
|
||||
|
||||
-#ifdef CONFIG_HAS_FSCONFIG
|
||||
-#include <linux/mount.h>
|
||||
-#else
|
||||
+/* Copied from /usr/include/sys/mount.h */
|
||||
+
|
||||
+#ifndef FSCONFIG_CMD_CREATE
|
||||
+/* The type of fsconfig call made. */
|
||||
enum fsconfig_command {
|
||||
FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
|
||||
+#define FSCONFIG_SET_FLAG FSCONFIG_SET_FLAG
|
||||
FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
|
||||
+#define FSCONFIG_SET_STRING FSCONFIG_SET_STRING
|
||||
FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
|
||||
+#define FSCONFIG_SET_BINARY FSCONFIG_SET_BINARY
|
||||
FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
|
||||
+#define FSCONFIG_SET_PATH FSCONFIG_SET_PATH
|
||||
FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
|
||||
+#define FSCONFIG_SET_PATH_EMPTY FSCONFIG_SET_PATH_EMPTY
|
||||
FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
|
||||
+#define FSCONFIG_SET_FD FSCONFIG_SET_FD
|
||||
FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
|
||||
+#define FSCONFIG_CMD_CREATE FSCONFIG_CMD_CREATE
|
||||
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
|
||||
+#define FSCONFIG_CMD_RECONFIGURE FSCONFIG_CMD_RECONFIGURE
|
||||
};
|
||||
-#endif
|
||||
+#endif // FSCONFIG_CMD_CREATE
|
||||
|
||||
-static inline int sys_fsopen(const char *fsname, unsigned int flags)
|
||||
-{
|
||||
- return syscall(__NR_fsopen, fsname, flags);
|
||||
-}
|
||||
-static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
-{
|
||||
- return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
|
||||
-}
|
||||
-static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
|
||||
-{
|
||||
- return syscall(__NR_fsmount, fd, flags, attr_flags);
|
||||
-}
|
||||
+#ifndef MS_MGC_VAL
|
||||
+/* Magic mount flag number. Has to be or-ed to the flag values. */
|
||||
+#define MS_MGC_VAL 0xc0ed0000 /* Magic flag number to indicate "new" flags */
|
||||
+#define MS_MGC_MSK 0xffff0000 /* Magic flag number mask */
|
||||
+#endif
|
||||
|
||||
#endif
|
||||
diff --git a/criu/include/syscall.h b/criu/include/syscall.h
|
||||
new file mode 100644
|
||||
index 0000000..3c0b3a4
|
||||
--- /dev/null
|
||||
+++ b/criu/include/syscall.h
|
||||
@@ -0,0 +1,17 @@
|
||||
+#ifndef __CR_SYSCALL_H__
|
||||
+#define __CR_SYSCALL_H__
|
||||
+
|
||||
+static inline int sys_fsopen(const char *fsname, unsigned int flags)
|
||||
+{
|
||||
+ return syscall(__NR_fsopen, fsname, flags);
|
||||
+}
|
||||
+static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
+{
|
||||
+ return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
|
||||
+}
|
||||
+static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
|
||||
+{
|
||||
+ return syscall(__NR_fsmount, fd, flags, attr_flags);
|
||||
+}
|
||||
+
|
||||
+#endif /* __CR_SYSCALL_H__ */
|
||||
diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c
|
||||
index c781303..9f8fbf8 100644
|
||||
--- a/criu/pie/parasite.c
|
||||
+++ b/criu/pie/parasite.c
|
||||
@@ -3,7 +3,6 @@
|
||||
#include <signal.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/capability.h>
|
||||
-#include <sys/mount.h>
|
||||
#include <stdarg.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/uio.h>
|
||||
@@ -14,6 +13,7 @@
|
||||
#include "int.h"
|
||||
#include "types.h"
|
||||
#include <compel/plugins/std/syscall.h>
|
||||
+#include "linux/mount.h"
|
||||
#include "parasite.h"
|
||||
#include "fcntl.h"
|
||||
#include "prctl.h"
|
||||
diff --git a/criu/util.c b/criu/util.c
|
||||
index e682161..915a043 100644
|
||||
--- a/criu/util.c
|
||||
+++ b/criu/util.c
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "mem.h"
|
||||
#include "namespaces.h"
|
||||
#include "criu-log.h"
|
||||
+#include "syscall.h"
|
||||
|
||||
#include "clone-noasan.h"
|
||||
#include "cr_options.h"
|
||||
diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
|
||||
index 8df20af..eecefa3 100644
|
||||
--- a/scripts/feature-tests.mak
|
||||
+++ b/scripts/feature-tests.mak
|
||||
@@ -137,19 +137,6 @@ ENTRY(main)
|
||||
END(main)
|
||||
endef
|
||||
|
||||
-define FEATURE_TEST_FSCONFIG
|
||||
-
|
||||
-#include <linux/mount.h>
|
||||
-
|
||||
-int main(void)
|
||||
-{
|
||||
- if (FSCONFIG_CMD_CREATE > 0)
|
||||
- return 0;
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-endef
|
||||
-
|
||||
define FEATURE_TEST_NFTABLES_LIB_API_0
|
||||
|
||||
#include <string.h>
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
From ae4b0ff2b9c91859513d841ebb71a67bed8a0d7c Mon Sep 17 00:00:00 2001
|
||||
From: z00557007 <zhoujie133@huawei.com>
|
||||
Date: Wed, 4 Jan 2023 17:22:29 +0800
|
||||
Subject: [PATCH] mount: add definition for FSOPEN_CLOEXEC A recent change in
|
||||
glibc introduced `enum fsconfig_command` [1] and as a result the compilation
|
||||
of criu fails with the following errors
|
||||
|
||||
In file included from criu/pie/util.c:3:
|
||||
/usr/include/sys/mount.h:240:6: error: redeclaration of 'enum fsconfig_command'
|
||||
240 | enum fsconfig_command
|
||||
| ^~~~~~~~~~~~~~~~
|
||||
In file included from /usr/include/sys/mount.h:32:
|
||||
criu/include/linux/mount.h:11:6: note: originally defined here
|
||||
11 | enum fsconfig_command {
|
||||
| ^~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:242:3: error: redeclaration of enumerator 'FSCONFIG_SET_FLAG'
|
||||
242 | FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
|
||||
| ^~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:12:9: note: previous definition of 'FSCONFIG_SET_FLAG' with type 'enum fsconfig_command'
|
||||
12 | FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
|
||||
| ^~~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:244:3: error: redeclaration of enumerator 'FSCONFIG_SET_STRING'
|
||||
244 | FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
|
||||
| ^~~~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:14:9: note: previous definition of 'FSCONFIG_SET_STRING' with type 'enum fsconfig_command'
|
||||
14 | FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
|
||||
| ^~~~~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:246:3: error: redeclaration of enumerator 'FSCONFIG_SET_BINARY'
|
||||
246 | FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
|
||||
| ^~~~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:16:9: note: previous definition of 'FSCONFIG_SET_BINARY' with type 'enum fsconfig_command'
|
||||
16 | FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
|
||||
| ^~~~~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:248:3: error: redeclaration of enumerator 'FSCONFIG_SET_PATH'
|
||||
248 | FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
|
||||
| ^~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:18:9: note: previous definition of 'FSCONFIG_SET_PATH' with type 'enum fsconfig_command'
|
||||
18 | FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
|
||||
| ^~~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:250:3: error: redeclaration of enumerator 'FSCONFIG_SET_PATH_EMPTY'
|
||||
250 | FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
|
||||
| ^~~~~~~~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:20:9: note: previous definition of 'FSCONFIG_SET_PATH_EMPTY' with type 'enum fsconfig_command'
|
||||
20 | FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
|
||||
| ^~~~~~~~~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:252:3: error: redeclaration of enumerator 'FSCONFIG_SET_FD'
|
||||
252 | FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
|
||||
| ^~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:22:9: note: previous definition of 'FSCONFIG_SET_FD' with type 'enum fsconfig_command'
|
||||
22 | FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
|
||||
| ^~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:254:3: error: redeclaration of enumerator 'FSCONFIG_CMD_CREATE'
|
||||
254 | FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
|
||||
| ^~~~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:24:9: note: previous definition of 'FSCONFIG_CMD_CREATE' with type 'enum fsconfig_command'
|
||||
24 | FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
|
||||
| ^~~~~~~~~~~~~~~~~~~
|
||||
/usr/include/sys/mount.h:256:3: error: redeclaration of enumerator 'FSCONFIG_CMD_RECONFIGURE'
|
||||
256 | FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
|
||||
| ^~~~~~~~~~~~~~~~~~~~~~~~
|
||||
criu/include/linux/mount.h:26:9: note: previous definition of 'FSCONFIG_CMD_RECONFIGURE' with type 'enum fsconfig_command'
|
||||
26 | FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
|
||||
|
||||
This patch adds definition for FSOPEN_CLOEXEC to solve this problem. In particular,
|
||||
sys/mount.h includes ifndef check for FSOPEN_CLOEXEC surrounding `enum fsconfig_command`.
|
||||
|
||||
[1] https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=7eae6a91e9b1670330c9f15730082c91c0b1d570
|
||||
|
||||
Reported-by: Younes Manton (@ymanton)
|
||||
Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
|
||||
---
|
||||
criu/include/linux/mount.h | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h
|
||||
index 0d55a58..ee9386c 100644
|
||||
--- a/criu/include/linux/mount.h
|
||||
+++ b/criu/include/linux/mount.h
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
/* Copied from /usr/include/sys/mount.h */
|
||||
|
||||
-#ifndef FSCONFIG_CMD_CREATE
|
||||
+#ifndef FSOPEN_CLOEXEC
|
||||
/* The type of fsconfig call made. */
|
||||
enum fsconfig_command {
|
||||
FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
|
||||
@@ -26,7 +26,12 @@ enum fsconfig_command {
|
||||
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
|
||||
#define FSCONFIG_CMD_RECONFIGURE FSCONFIG_CMD_RECONFIGURE
|
||||
};
|
||||
-#endif // FSCONFIG_CMD_CREATE
|
||||
+#endif // FSOPEN_CLOEXEC
|
||||
+
|
||||
+/* fsopen flags. With the redundant definition, we check if the kernel,
|
||||
+ * glibc value and our value still match.
|
||||
+ */
|
||||
+#define FSOPEN_CLOEXEC 0x00000001
|
||||
|
||||
#ifndef MS_MGC_VAL
|
||||
/* Magic mount flag number. Has to be or-ed to the flag values. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
From 0568889ee368c2bc2682aae5c69d67ac16eac675 Mon Sep 17 00:00:00 2001
|
||||
From: Adrian Reber <areber@redhat.com>
|
||||
Date: Tue, 18 Jan 2022 17:22:46 +0000
|
||||
Subject: [PATCH] compel: fix parasite with GCC 12
|
||||
|
||||
Parasite creation started to fail with GCC 12:
|
||||
|
||||
On x86_64 with:
|
||||
./compel/compel-host hgen -f criu/pie/restorer.built-in.o -o criu/pie/restorer-blob.h
|
||||
Error (compel/src/lib/handle-elf-host.c:337): Unexpected undefined symbol: `strlen'. External symbol in PIE?
|
||||
|
||||
On aarch64 with:
|
||||
ld: criu/pie/restorer.o: in function `lsm_set_label':
|
||||
/drone/src/criu/pie/restorer.c:174: undefined reference to `strlen'
|
||||
|
||||
Line 174 is: "for (len = 0; label[len]; len++)"
|
||||
|
||||
Adding '-ffreestanding' to parasite compilation fixes these errors
|
||||
because, according to GCC developers:
|
||||
|
||||
"strlen is a standard C function, so I don't see any bug in that being used
|
||||
unless you do a freestanding compilation (-nostdlib isn't that)."
|
||||
|
||||
Signed-off-by: Adrian Reber <areber@redhat.com>
|
||||
---
|
||||
compel/src/main.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/compel/src/main.c b/compel/src/main.c
|
||||
index a9a50959f..f461ff04d 100644
|
||||
--- a/compel/src/main.c
|
||||
+++ b/compel/src/main.c
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
#define CFLAGS_DEFAULT_SET \
|
||||
"-Wstrict-prototypes " \
|
||||
+ "-ffreestanding " \
|
||||
"-fno-stack-protector -nostdlib -fomit-frame-pointer "
|
||||
|
||||
#define COMPEL_CFLAGS_PIE CFLAGS_DEFAULT_SET "-fpie"
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -1,59 +0,0 @@
|
||||
From db2a18df9d47b7511120bc48a614c5abb0d67c16 Mon Sep 17 00:00:00 2001
|
||||
From: luofeng <luofeng13@huawei.com>
|
||||
Date: Wed, 6 Sep 2023 14:15:57 +0000
|
||||
Subject: [PATCH] support build with clang
|
||||
|
||||
---
|
||||
Makefile | 1 -
|
||||
criu/arch/aarch64/include/asm/restorer.h | 10 +++++-----
|
||||
2 files changed, 5 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/Makefile b/Makefile
|
||||
index c1eafdd..14c0008 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -82,7 +82,6 @@ endif
|
||||
|
||||
# secure compilation options
|
||||
CFLAGS += -fstack-protector-all -fPIE
|
||||
-LDFLAGS += -pie
|
||||
|
||||
#
|
||||
# CFLAGS_PIE:
|
||||
diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h
|
||||
index 64a9c24..f12f89d 100644
|
||||
--- a/criu/arch/aarch64/include/asm/restorer.h
|
||||
+++ b/criu/arch/aarch64/include/asm/restorer.h
|
||||
@@ -13,7 +13,7 @@
|
||||
#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \
|
||||
thread_args, clone_restore_fn) \
|
||||
asm volatile( \
|
||||
- "clone_emul: \n" \
|
||||
+ "clone_emul_%=: \n" \
|
||||
"ldr x1, %2 \n" \
|
||||
"and x1, x1, #~15 \n" \
|
||||
"sub x1, x1, #16 \n" \
|
||||
@@ -24,16 +24,16 @@
|
||||
"mov x8, #"__stringify(__NR_clone)" \n" \
|
||||
"svc #0 \n" \
|
||||
\
|
||||
- "cbz x0, thread_run \n" \
|
||||
+ "cbz x0, thread_run_%= \n" \
|
||||
\
|
||||
"mov %0, x0 \n" \
|
||||
- "b clone_end \n" \
|
||||
+ "b clone_end_%= \n" \
|
||||
\
|
||||
- "thread_run: \n" \
|
||||
+ "thread_run_%=: \n" \
|
||||
"ldp x1, x0, [sp] \n" \
|
||||
"br x1 \n" \
|
||||
\
|
||||
- "clone_end: \n" \
|
||||
+ "clone_end_%=: \n" \
|
||||
: "=r"(ret) \
|
||||
: "r"(clone_flags), \
|
||||
"m"(new_sp), \
|
||||
--
|
||||
2.39.1
|
||||
|
||||
@ -1,168 +0,0 @@
|
||||
diff -u -r criu-3.16.1/compel/test/fdspy/Makefile criu-3.16.1/compel/test/fdspy/Makefile
|
||||
--- criu-3.16.1/compel/test/fdspy/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/compel/test/fdspy/Makefile 2023-07-14 12:04:56.000000000 +0800
|
||||
@@ -1,4 +1,4 @@
|
||||
-CC := gcc
|
||||
+CC ?= gcc
|
||||
CFLAGS ?= -O2 -g -Wall -Werror
|
||||
|
||||
COMPEL := ../../../compel/compel-host
|
||||
diff -u -r criu-3.16.1/compel/test/infect/Makefile criu-3.16.1/compel/test/infect/Makefile
|
||||
--- criu-3.16.1/compel/test/infect/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/compel/test/infect/Makefile 2023-07-14 12:05:03.000000000 +0800
|
||||
@@ -1,4 +1,4 @@
|
||||
-CC := gcc
|
||||
+CC ?= gcc
|
||||
CFLAGS ?= -O2 -g -Wall -Werror
|
||||
|
||||
COMPEL := ../../../compel/compel-host
|
||||
diff -u -r criu-3.16.1/compel/test/rsys/Makefile criu-3.16.1/compel/test/rsys/Makefile
|
||||
--- criu-3.16.1/compel/test/rsys/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/compel/test/rsys/Makefile 2023-07-14 12:04:49.000000000 +0800
|
||||
@@ -1,4 +1,4 @@
|
||||
-CC := gcc
|
||||
+CC ?= gcc
|
||||
CFLAGS ?= -O2 -g -Wall -Werror
|
||||
|
||||
COMPEL := ../../../compel/compel-host
|
||||
diff -u -r criu-3.16.1/scripts/ci/docker.env criu-3.16.1/scripts/ci/docker.env
|
||||
--- criu-3.16.1/scripts/ci/docker.env 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/scripts/ci/docker.env 2023-07-14 11:51:52.000000000 +0800
|
||||
@@ -1,4 +1,4 @@
|
||||
SKIP_CI_PREP=1
|
||||
ZDTM_OPTS=-x zdtm/static/binfmt_misc -x zdtm/static/sched_policy00
|
||||
-CC=gcc
|
||||
+CC=$(CC)
|
||||
SKIP_EXT_DEV_TEST=1
|
||||
diff -u -r criu-3.16.1/scripts/ci/run-ci-tests.sh criu-3.16.1/scripts/ci/run-ci-tests.sh
|
||||
--- criu-3.16.1/scripts/ci/run-ci-tests.sh 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/scripts/ci/run-ci-tests.sh 2023-07-14 12:06:23.000000000 +0800
|
||||
@@ -36,7 +36,7 @@
|
||||
# This can fail on aarch64 travis
|
||||
service apport stop || :
|
||||
|
||||
- if [ "$CLANG" = "1" ]; then
|
||||
+ if [ "$CC" = "clang" ]; then
|
||||
# clang support
|
||||
CC=clang
|
||||
# If this is running in an environment without gcc installed
|
||||
diff -u -r criu-3.16.1/scripts/nmk/scripts/tools.mk criu-3.16.1/scripts/nmk/scripts/tools.mk
|
||||
--- criu-3.16.1/scripts/nmk/scripts/tools.mk 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/scripts/nmk/scripts/tools.mk 2023-07-14 11:52:23.000000000 +0800
|
||||
@@ -7,7 +7,7 @@
|
||||
ifeq ($(origin LD), default)
|
||||
LD := $(CROSS_COMPILE)$(HOSTLD)
|
||||
endif
|
||||
-HOSTCC ?= gcc
|
||||
+HOSTCC ?= $(CC)
|
||||
ifeq ($(origin CC), default)
|
||||
CC := $(CROSS_COMPILE)$(HOSTCC)
|
||||
endif
|
||||
diff -u -r criu-3.16.1/test/others/app-emu/job/Makefile criu-3.16.1/test/others/app-emu/job/Makefile
|
||||
--- criu-3.16.1/test/others/app-emu/job/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/app-emu/job/Makefile 2023-07-14 12:01:22.000000000 +0800
|
||||
@@ -2,10 +2,10 @@
|
||||
.PHONY: all
|
||||
|
||||
%.o: %.c
|
||||
- gcc -c $< -o $@
|
||||
+ $(CC) -c $< -o $@
|
||||
|
||||
job: job.o
|
||||
- gcc -o $@ job.o
|
||||
+ $(CC) -o $@ job.o
|
||||
|
||||
clean:
|
||||
rm -f *.o job
|
||||
diff -u -r criu-3.16.1/test/others/app-emu/make/Makefile criu-3.16.1/test/others/app-emu/make/Makefile
|
||||
--- criu-3.16.1/test/others/app-emu/make/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/app-emu/make/Makefile 2023-07-14 12:03:31.000000000 +0800
|
||||
@@ -3,7 +3,7 @@
|
||||
.PHONY: all
|
||||
|
||||
%.o: %.c
|
||||
- gcc -c $< -o $@
|
||||
+ $(CC) -c $< -o $@
|
||||
|
||||
foo%.c: tmpl.c
|
||||
cp $< $@
|
||||
diff -u -r criu-3.16.1/test/others/ext-links/Makefile criu-3.16.1/test/others/ext-links/Makefile
|
||||
--- criu-3.16.1/test/others/ext-links/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/ext-links/Makefile 2023-07-14 12:03:28.000000000 +0800
|
||||
@@ -1,4 +1,4 @@
|
||||
all: mvlink.so
|
||||
|
||||
mvlink.so: mvlink.c
|
||||
- gcc -g -Werror -Wall -shared -nostartfiles mvlink.c -o mvlink.so -iquote ../../../criu/include -fPIC
|
||||
+ $(CC) -g -Werror -Wall -shared -nostartfiles mvlink.c -o mvlink.so -iquote ../../../criu/include -fPIC
|
||||
diff -u -r criu-3.16.1/test/others/libcriu/Makefile criu-3.16.1/test/others/libcriu/Makefile
|
||||
--- criu-3.16.1/test/others/libcriu/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/libcriu/Makefile 2023-07-14 12:03:37.000000000 +0800
|
||||
@@ -16,13 +16,13 @@
|
||||
|
||||
define genb
|
||||
$(1): $(1).o lib.o
|
||||
- gcc $$^ -L ../../../../criu/lib/c/ -L ../../../../criu/images/ -lcriu -o $$@
|
||||
+ $(CC) $$^ -L ../../../../criu/lib/c/ -L ../../../../criu/images/ -lcriu -o $$@
|
||||
endef
|
||||
|
||||
$(foreach t, $(TESTS), $(eval $(call genb, $(t))))
|
||||
|
||||
%.o: %.c
|
||||
- gcc -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror
|
||||
+ $(CC) -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror
|
||||
|
||||
clean: libcriu_clean
|
||||
rm -rf $(TESTS) $(TESTS:%=%.o) lib.o
|
||||
diff -u -r criu-3.16.1/test/others/Makefile criu-3.16.1/test/others/Makefile
|
||||
--- criu-3.16.1/test/others/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/Makefile 2023-07-14 12:03:25.000000000 +0800
|
||||
@@ -1,2 +1,2 @@
|
||||
loop:
|
||||
- gcc -Wall loop.c -o loop
|
||||
+ $(CC) -Wall loop.c -o loop
|
||||
diff -u -r criu-3.16.1/test/others/mounts/ext/Makefile criu-3.16.1/test/others/mounts/ext/Makefile
|
||||
--- criu-3.16.1/test/others/mounts/ext/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/mounts/ext/Makefile 2023-07-14 12:01:34.000000000 +0800
|
||||
@@ -1,13 +1,13 @@
|
||||
all: ext-mount.so ns_init
|
||||
|
||||
ext-mount.so: ext-mount.c
|
||||
- gcc -g -Werror -Wall -shared -nostartfiles ext-mount.c -o ext-mount.so -iquote ../../../include -fPIC
|
||||
+ $(CC) -g -Werror -Wall -shared -nostartfiles ext-mount.c -o ext-mount.so -iquote ../../../include -fPIC
|
||||
|
||||
ns_init: ns_init.o
|
||||
- gcc -static $< -o $@
|
||||
+ $(CC) -static $< -o $@
|
||||
|
||||
ns_init.o: ns_init.c
|
||||
- gcc -c $< -o $@
|
||||
+ $(CC) -c $< -o $@
|
||||
|
||||
run: all
|
||||
./run.sh
|
||||
diff -u -r criu-3.16.1/test/others/unix-callback/Makefile criu-3.16.1/test/others/unix-callback/Makefile
|
||||
--- criu-3.16.1/test/others/unix-callback/Makefile 2021-10-14 13:44:30.000000000 +0800
|
||||
+++ criu-3.16.1/test/others/unix-callback/Makefile 2023-07-14 12:01:53.000000000 +0800
|
||||
@@ -7,16 +7,16 @@
|
||||
protoc-c --proto_path=. --c_out=. unix.proto
|
||||
|
||||
unix-lib.so: unix-lib.c unix.pb-c.c
|
||||
- gcc -g -Werror -Wall -shared -nostartfiles unix-lib.c unix.pb-c.c -o unix-lib.so -iquote ../../../criu/include -fPIC
|
||||
+ $(CC) -g -Werror -Wall -shared -nostartfiles unix-lib.c unix.pb-c.c -o unix-lib.so -iquote ../../../criu/include -fPIC
|
||||
|
||||
syslog-lib.so: syslog-lib.c
|
||||
- gcc -g -Werror -Wall -shared -nostartfiles syslog-lib.c -o syslog-lib.so -iquote ../../../criu/include -fPIC
|
||||
+ $(CC) -g -Werror -Wall -shared -nostartfiles syslog-lib.c -o syslog-lib.so -iquote ../../../criu/include -fPIC
|
||||
|
||||
unix-server: unix-server.c
|
||||
- gcc -Werror -Wall -o unix-server unix-server.c
|
||||
+ $(CC) -Werror -Wall -o unix-server unix-server.c
|
||||
|
||||
unix-client: unix-client.c
|
||||
- gcc -Werror -Wall -o unix-client unix-client.c
|
||||
+ $(CC) -Werror -Wall -o unix-client unix-client.c
|
||||
|
||||
clean:
|
||||
rm -rf data unix-lib.so unix-server unix-client syslog-lib.so output pid unix.pb-c.*
|
||||
|
||||
36
1000-backport-page-pipe-fix-limiting-a-pipe-size.patch
Normal file
36
1000-backport-page-pipe-fix-limiting-a-pipe-size.patch
Normal file
@ -0,0 +1,36 @@
|
||||
From 51533d98ac389711a704266a1a5d7afc9b267f2d Mon Sep 17 00:00:00 2001
|
||||
From: Andrei Vagin <avagin@gmail.com>
|
||||
Date: Wed, 27 Apr 2022 06:51:47 +0300
|
||||
Subject: [PATCH] page-pipe: fix limiting a pipe size
|
||||
|
||||
But actually, 5a92f100b88e probably has to be reverted as a whole.
|
||||
PIPE_MAX_SIZE is the hard limit to avoid PAGE_ALLOC_COSTLY_ORDER
|
||||
allocations in the kernel. But F_SETPIPE_SZ rounds up a requested pipe
|
||||
size to a power-of-2 pages. It means that when we request PIPE_MAX_SIZE
|
||||
that isn't a power-of-2 number, we actually request a pipe size greater
|
||||
than PIPE_MAX_SIZE.
|
||||
|
||||
Fixes: 5a92f100b88e ("page-pipe: Resize up to PIPE_MAX_SIZE")
|
||||
|
||||
Signed-off-by: Andrei Vagin <avagin@gmail.com>
|
||||
Signed-off-by: He Wenliang <hewenliang4@huawei.com>
|
||||
---
|
||||
criu/page-pipe.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/page-pipe.c b/criu/page-pipe.c
|
||||
index 5a7e50bc1..54dc3ccc4 100644
|
||||
--- a/criu/page-pipe.c
|
||||
+++ b/criu/page-pipe.c
|
||||
@@ -56,7 +56,7 @@ static inline int ppb_resize_pipe(struct page_pipe_buf *ppb)
|
||||
|
||||
if (new_size > PIPE_MAX_SIZE) {
|
||||
if (ppb->pipe_size < PIPE_MAX_SIZE)
|
||||
- ppb->pipe_size = PIPE_MAX_SIZE;
|
||||
+ new_size = PIPE_MAX_SIZE;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
From 485a83c110bef1b2700acec0bd63bad4518aa62f Mon Sep 17 00:00:00 2001
|
||||
From: "fu.lin" <fulin10@huawei.com>
|
||||
Date: Fri, 17 Sep 2021 17:16:48 +0800
|
||||
Subject: [PATCH] tty: fix the null pointer of get_tty_driver
|
||||
|
||||
v2: split error checking from index variable initialization
|
||||
v3: use PRIx64 for printing dev_t
|
||||
|
||||
Signed-off-by: fu.lin <fulin10@huawei.com>
|
||||
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
|
||||
Signed-off-by: He Wenliang <hewenliang4@huawei.com>
|
||||
---
|
||||
criu/tty.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/criu/tty.c b/criu/tty.c
|
||||
index 1598ad956..1462193c5 100644
|
||||
--- a/criu/tty.c
|
||||
+++ b/criu/tty.c
|
||||
@@ -1977,6 +1977,12 @@ static int dump_one_tty(int lfd, u32 id, const struct fd_parms *p)
|
||||
pr_info("Dumping tty %d with id %#x\n", lfd, id);
|
||||
|
||||
driver = get_tty_driver(p->stat.st_rdev, p->stat.st_dev);
|
||||
+ if (driver == NULL) {
|
||||
+ pr_err("Unable to find a tty driver (rdev %#" PRIx64 " dev %#" PRIx64 ")\n", p->stat.st_rdev,
|
||||
+ p->stat.st_dev);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
if (driver->fd_get_index)
|
||||
index = driver->fd_get_index(lfd, p);
|
||||
else
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,42 @@
|
||||
From 74d1233b596c52ae8dc5da4730e6e3e48152023e Mon Sep 17 00:00:00 2001
|
||||
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
|
||||
Date: Tue, 27 Apr 2021 19:08:57 -0400
|
||||
Subject: [PATCH] criu/files: Don't cache fd ids for device files
|
||||
|
||||
Restore operation fails when we perform CR operation of multiple
|
||||
independent proceses that have device files because criu caches
|
||||
the ids for the device files with same mnt_ids, inode pair. This
|
||||
change ensures that even in case of a cached id found for a device, a
|
||||
unique subid is generated and returned which is used for dumping.
|
||||
|
||||
Suggested-by: Andrei Vagin <avagin@gmail.com>
|
||||
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
|
||||
Signed-off-by: He Wenliang <hewenliang4@huawei.com>
|
||||
---
|
||||
criu/file-ids.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/criu/file-ids.c b/criu/file-ids.c
|
||||
index 1b9d68888..772bd92cf 100644
|
||||
--- a/criu/file-ids.c
|
||||
+++ b/criu/file-ids.c
|
||||
@@ -77,8 +77,14 @@ int fd_id_generate_special(struct fd_parms *p, u32 *id)
|
||||
|
||||
fi = fd_id_cache_lookup(p);
|
||||
if (fi) {
|
||||
- *id = fi->id;
|
||||
- return 0;
|
||||
+ if (p->stat.st_mode & (S_IFCHR | S_IFBLK)) {
|
||||
+ /* Don't cache the id for mapped devices */
|
||||
+ *id = fd_tree.subid++;
|
||||
+ return 1;
|
||||
+ } else {
|
||||
+ *id = fi->id;
|
||||
+ return 0;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
From efeedf3912df4a9a13d5ac719700ca06a9dad327 Mon Sep 17 00:00:00 2001
|
||||
From: Andrei Vagin <avagin@gmail.com>
|
||||
Date: Wed, 27 Apr 2022 07:02:58 +0300
|
||||
Subject: [PATCH] pre-dump: call vmsplice with SPLICE_F_GIFT
|
||||
|
||||
In this case, vmplice attaches pages without coping them.
|
||||
|
||||
Signed-off-by: Andrei Vagin <avagin@gmail.com>
|
||||
---
|
||||
criu/page-xfer.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/criu/page-xfer.c b/criu/page-xfer.c
|
||||
index 3d29fbf78..2a9f6e2cc 100644
|
||||
--- a/criu/page-xfer.c
|
||||
+++ b/criu/page-xfer.c
|
||||
@@ -822,7 +822,7 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, struct page_pipe *p
|
||||
|
||||
bufvec.iov_base = userbuf;
|
||||
bufvec.iov_len = bytes_read;
|
||||
- ret = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK);
|
||||
+ ret = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK | SPLICE_F_GIFT);
|
||||
|
||||
if (ret == -1 || ret != bytes_read) {
|
||||
pr_err("vmsplice: Failed to splice user buffer to pipe %ld\n", ret);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
36
README.en.md
Normal file
36
README.en.md
Normal file
@ -0,0 +1,36 @@
|
||||
# criu
|
||||
|
||||
#### Description
|
||||
A tool of Checkpoint/Restore in User-space
|
||||
|
||||
#### Software Architecture
|
||||
Software architecture description
|
||||
|
||||
#### Installation
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Instructions
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### Contribution
|
||||
|
||||
1. Fork the repository
|
||||
2. Create Feat_xxx branch
|
||||
3. Commit your code
|
||||
4. Create Pull Request
|
||||
|
||||
|
||||
#### Gitee Feature
|
||||
|
||||
1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
|
||||
2. Gitee blog [blog.gitee.com](https://blog.gitee.com)
|
||||
3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
|
||||
4. The most valuable open source project [GVP](https://gitee.com/gvp)
|
||||
5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
|
||||
6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
|
||||
37
README.md
Normal file
37
README.md
Normal file
@ -0,0 +1,37 @@
|
||||
# criu
|
||||
|
||||
#### 介绍
|
||||
A tool of Checkpoint/Restore in User-space
|
||||
|
||||
#### 软件架构
|
||||
软件架构说明
|
||||
|
||||
|
||||
#### 安装教程
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### 使用说明
|
||||
|
||||
1. xxxx
|
||||
2. xxxx
|
||||
3. xxxx
|
||||
|
||||
#### 参与贡献
|
||||
|
||||
1. Fork 本仓库
|
||||
2. 新建 Feat_xxx 分支
|
||||
3. 提交代码
|
||||
4. 新建 Pull Request
|
||||
|
||||
|
||||
#### 码云特技
|
||||
|
||||
1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md
|
||||
2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com)
|
||||
3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目
|
||||
4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目
|
||||
5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help)
|
||||
6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
|
||||
143
criu.spec
143
criu.spec
@ -5,97 +5,25 @@ Provides: crtools = %{version}-%{release}
|
||||
Obsoletes: crtools <= 1.0-2
|
||||
Summary: A tool of Checkpoint/Restore in User-space
|
||||
License: GPL-2.0-or-later or LGPL-2.1-only
|
||||
URL: https://criu.org/
|
||||
Source0: https://github.com/checkpoint-restore/criu/archive/v%{version}/%{name}-%{version}.tar.gz
|
||||
URL: http://criu.org/
|
||||
Source0: http://github.com/checkpoint-restore/criu/archive/v%{version}/%{name}-%{version}.tar.gz
|
||||
BuildRequires: systemd libnet-devel asciidoc xmlto perl-interpreter libselinux-devel gcc
|
||||
BuildRequires: protobuf-devel protobuf-c-devel python3-devel libnl3-devel libcap-devel
|
||||
BuildRequires: libmnl-devel libnftnl-devel
|
||||
Recommends: tar
|
||||
ExclusiveArch: x86_64 %{arm} ppc64le aarch64 s390x
|
||||
Requires: %{name} = %{version}-%{release}
|
||||
Provides: %{name}-libs = %{version}-%{release}
|
||||
Obsoletes: %{name}-libs < %{version}-%{release}
|
||||
|
||||
Patch: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch
|
||||
Patch: 0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch
|
||||
Patch: 0003-kerndat-check-for-rseq-syscall-support-Signed-off-by.patch
|
||||
Patch: 0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch
|
||||
Patch: 0005-cr-check-Add-ptrace-rseq-conf-dump-feature-Add-get_r.patch
|
||||
Patch: 0006-rseq-initial-support-TODO-1.-properly-handle-case-wh.patch
|
||||
Patch: 0007-zdtm-add-simple-test-for-rseq-C-R-Signed-off-by-Alex.patch
|
||||
Patch: 0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus-We-have-a.patch
|
||||
Patch: 0009-include-add-thread_pointer.h-from-Glibc-Implementati.patch
|
||||
Patch: 0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch
|
||||
Patch: 0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch
|
||||
Patch: 0012-compel-add-helpers-to-get-set-instruction-pointer-Si.patch
|
||||
Patch: 0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs-Signed-o.patch
|
||||
Patch: 0014-zdtm-add-rseq-transition-test-for-amd64-Signed-off-b.patch
|
||||
Patch: 0015-cr-dump-handle-rseq-flags-field-Userspace-may-config.patch
|
||||
Patch: 0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch
|
||||
Patch: 0017-zdtm-fix-zdtm-static-maps00-case-in-arm64.patch
|
||||
Patch: 0018-test-flush-ipt-rules-after-program-exits.patch
|
||||
Patch: 0019-zdtm-fix-cleaning-step-of-zdtm_netns.patch
|
||||
%ifarch aarch64
|
||||
Patch: 0020-mm-add-pin-memory-method-for-criu.patch
|
||||
Patch: 0021-pid-add-pid-recover-method-for-criu.patch
|
||||
Patch: 0022-notifier-add-notifier-calling-method-for-checkpoint-.patch
|
||||
Patch: 0023-block-device-dump-block-device-as-reguler-file.patch
|
||||
Patch: 0024-anon-inode-add-support-for-anon-inode-fd.patch
|
||||
Patch: 0025-char_dev-add-support-for-char-device-dump-and-restor.patch
|
||||
Patch: 0026-improve-char-dev-fd-check-and-repair-method.patch
|
||||
Patch: 0027-mmap-restore-dev-hisi_sec2-deivce-vma.patch
|
||||
Patch: 0028-infiniband-fix-the-infiniband-fd-conflict.patch
|
||||
Patch: 0029-cred-provide-cred-checkpoint-restore-method.patch
|
||||
Patch: 0030-socket-fix-connect-error-of-invalid-param.patch
|
||||
Patch: 0031-criu-eventpollfd-fix-for-improper-usage-in-appdata.patch
|
||||
Patch: 0032-task_exit_notify-add-task-exit-notify-mask-method-fo.patch
|
||||
Patch: 0033-unix-socket-add-support-for-unix-stream-socket.patch
|
||||
Patch: 0034-netlink-add-repair-modes-and-clear-resource-when-fai.patch
|
||||
Patch: 0035-sysvshm-add-dump-restore-sysv-shm-in-host-ipc-ns.patch
|
||||
Patch: 0036-add-O_REPAIR-flag-to-vma-fd.patch
|
||||
Patch: 0037-looser-file-mode-and-size-check.patch
|
||||
Patch: 0038-file-lock-add-repair-mode-to-dump-file-locks.patch
|
||||
Patch: 0039-unlock-network-when-restore-fails.patch
|
||||
Patch: 0040-net-add-shared-socket-recover-method-for-criu.patch
|
||||
Patch: 0041-tcp-save-src-ports-to-ip_local_reserved_ports-when-d.patch
|
||||
Patch: 0042-reg-file-fix-dump-fail-problem-with-null-seek-op.patch
|
||||
Patch: 0043-fix-dump-fail-problem-with-no-access-to-get-socket-f.patch
|
||||
Patch: 0044-proc-parse-fix-vma-offset-value-for-the-sysfs-file-o.patch
|
||||
Patch: 0045-add-reuse-file-method-for-recover-deleted-file-state.patch
|
||||
Patch: 0046-sk-fix-share-sockets-repair-problem.patch
|
||||
Patch: 0047-mm-add-clear-pin-mem-and-init-page-map-option.patch
|
||||
Patch: 0048-fds-fix-fds-list-restore.patch
|
||||
Patch: 0049-log-print-error-log-to-dev-kmsg.patch
|
||||
Patch: 0050-unix-sk-improve-dgram-robustness.patch
|
||||
Patch: 0051-sk-ignore-the-bind-error-for-icmp-socket.patch
|
||||
Patch: 0052-optimization-parallel-collecting-vmas.patch
|
||||
Patch: 0053-mm-add-exec-file-mapping-pin-method.patch
|
||||
Patch: 0054-ptrace-trace-specific-syscall.patch
|
||||
Patch: 0055-notifier-rollback-when-open-img-failed.patch
|
||||
Patch: 0056-detach-don-t-kill-task-when-ptrace-PTRACE_DETACH-ret.patch
|
||||
Patch: 0057-build-add-secure-compilation-options.patch
|
||||
Patch: 0058-nftables-add-mnl-api.patch
|
||||
Patch: 0059-nftables-implement-nft-api-for-tcp.patch
|
||||
Patch: 0060-net-switch-to-nftables-API.patch
|
||||
Patch: 0061-zdtm-unlink-kdat-before-testing.patch
|
||||
Patch: 0062-zdtm-add-host-ns-sysvshm-ipc-case.patch
|
||||
Patch: 0063-zdtm-add-pinmem-testcase.patch
|
||||
Patch: 0064-zdtm-init-notifier-testcase.patch
|
||||
Patch: 0065-zdtm-print-errno-info-when-accessing-.out-failure.patch
|
||||
Patch: 0066-zdtm-print-more-info-for-fs.c.patch
|
||||
Patch: 0067-zdtm-add-chardev-testcase.patch
|
||||
Patch: 0068-zdtm-add-infiniband-testcase.patch
|
||||
Patch: 0069-zdtm-add-share-port-testcase.patch
|
||||
Patch: 0070-zdtm-tmp-test-script.patch
|
||||
Patch: 0071-mod-add-criu-indepent-test.patch
|
||||
Patch: 0072-kabichk-add-KABI-check-code.patch
|
||||
Patch: 0076-support-build-with-clang.patch
|
||||
%endif
|
||||
Patch: 0073-criu-fix-conflicting-headers.patch
|
||||
Patch: 0074-mount-add-definition-for-FSOPEN_CLOEXEC.patch
|
||||
Patch: 0075-compel-fix-parasite-with-GCC-12.patch
|
||||
Patch: 0077-fix-clang.patch
|
||||
Patch1: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch
|
||||
Patch2: 0002-mm-add-pin-memory-method-for-criu.patch
|
||||
Patch3: revert-fix-BUG-at-criu-pstree.c-452.patch
|
||||
|
||||
Patch1000: 1000-backport-page-pipe-fix-limiting-a-pipe-size.patch
|
||||
Patch1001: 1001-backport-tty-fix-the-null-pointer-of-get_tty_driver.patch
|
||||
Patch1002: 1002-backport-criu-files-Don-t-cache-fd-ids-for-device-files.patch
|
||||
Patch1003: 1003-backport-pre-dump-call-vmsplice-with-SPLICE_F_GIFT.patch
|
||||
|
||||
%description
|
||||
Checkpoint/Restore in Userspace(CRIU),is a software tool for the linux operating system.
|
||||
Using this tool,it is possible to freeze a running application (or part of it) and
|
||||
@ -163,7 +91,7 @@ chmod 0755 %{buildroot}/run/%{name}/
|
||||
%exclude %{_libdir}/libcriu.a
|
||||
|
||||
%files -n python3-criu
|
||||
%{python3_sitelib}/crit-0.0.1-py%{python3_version}.egg
|
||||
%{python3_sitelib}/{pycriu/*,*egg-info}
|
||||
|
||||
%files -n crit
|
||||
%{_bindir}/crit
|
||||
@ -172,39 +100,38 @@ chmod 0755 %{buildroot}/run/%{name}/
|
||||
%{_sbindir}/criu-ns
|
||||
|
||||
%files help
|
||||
%doc COPYING
|
||||
%doc README.md COPYING
|
||||
%doc %{_mandir}/man8/criu.8*
|
||||
%doc %{_mandir}/man1/{compel.1*,crit.1*,criu-ns.1*}
|
||||
|
||||
%changelog
|
||||
* Mon Apr 1 2024 Liu Chao <liuchao173@huawei.com> -3.16.1-9
|
||||
* Mon Jun 19 2023 hewenliang <314264452@qq.com> - 3.16.1-9
|
||||
- revert: fix BUG at criu/pstree.c:452
|
||||
|
||||
* Mon Jun 19 2023 hewenliang <314264452@qq.com> - 3.16.1-8
|
||||
- revert "rseq c/r support"
|
||||
|
||||
* Tue Nov 22 2022 Hewenliang <hewenliang4@huawei.com> - 3.16.1-7
|
||||
- fix the null pointer of get_tty_driver.
|
||||
- criu files Dont cache fd ids for device files.
|
||||
- pre dump call vmsplice with SPLICE_F_GIFT.
|
||||
|
||||
* Tue Nov 22 2022 Hewenliang <hewenliang4@huawei.com> - 3.16.1-6
|
||||
- page-pipe:fix-limiting a pipe size.
|
||||
|
||||
* Thu Nov 10 2022 caodongxia <caodongxia@h-partners.com> - 3.16.1-5
|
||||
- Modify invalid source0
|
||||
|
||||
* Wed Oct 19 2022 fu.lin <fulin10@huawei.com> -3.16.1-4
|
||||
- bump the version
|
||||
|
||||
* Fri Nov 10 2023 wangqing <wangqing@uniontech.com> - 3.16.1-8
|
||||
- Fix Source0 URL errors
|
||||
|
||||
* Tue Aug 22 2023 feng luo<luofeng13@huawei.com> - 3.16.1-7
|
||||
- Support build with clang
|
||||
|
||||
* Thu Jul 27 2023 zhoujie <zhoujie133@huawei.com> - 3.16.1-6
|
||||
- compel fix parasite with GCC 12
|
||||
|
||||
* Wed Jan 4 2023 zhoujie <zhoujie133@huawei.com> - 3.16.1-5
|
||||
- Fix compilation problems caused by glibc upgrade
|
||||
|
||||
* Fri Jul 22 2022 tenglei <tenglei@kylinos.cn> - 3.16.1-4
|
||||
- Remove non-compliant README files
|
||||
- fix files not found egg-info
|
||||
- move changelog into spec file
|
||||
|
||||
* Wed Apr 13 2022 fu.lin <fulin10@huawei.com> - 3.16.1-3
|
||||
- backport kinds of feature/bugfix
|
||||
- spec: split changelog
|
||||
|
||||
* Fri Mar 4 2022 ningyu <ningyu9@huawei.com> - 3.16.1-2
|
||||
* Fri Mar 4 2022 ningyu <ningyu9@huawei.com> - 3.16.1-3
|
||||
- rseq c/r support
|
||||
|
||||
* Thu Dec 2 2021 zhouwenpei <zhouwenpei11@huawei.com> - 3.16.1-1
|
||||
* Sat Feb 26 2022 luolongjun <luolongjuna@gmail.com> - 3.16.1-2
|
||||
- add support for pin memory
|
||||
|
||||
* Fri Dec 24 2021 zhouwenpei <zhouwenpei11@huawei.com> - 3.16.1-1
|
||||
- upgrade criu version to 3.16.1
|
||||
|
||||
* Tue Sep 07 2021 chenchen <chen_aka_jan@163.com> - 3.15-4
|
||||
|
||||
90
revert-fix-BUG-at-criu-pstree.c-452.patch
Normal file
90
revert-fix-BUG-at-criu-pstree.c-452.patch
Normal file
@ -0,0 +1,90 @@
|
||||
Subject: [PATCH 1/1] revert: fix BUG at criu/pstree.c:452
|
||||
|
||||
Not all the process which is dumped by criu is session leader, the
|
||||
enhancing verification is annoying, because it causes many problems in
|
||||
some testcases. Therefore, revert this bugfix.
|
||||
|
||||
If the bugfix is resumed, using `setsid` to start process is necessary,
|
||||
and using `stdbuf -oL` to redirect standard output at the same time.
|
||||
|
||||
This bug detail sees #1332.
|
||||
|
||||
Revert "pstree: don't change sid/gid-s if current sid/gid is the same"
|
||||
This reverts commit 90e03b1a1142ca40fb78de9eb04944ab51d06eeb.
|
||||
|
||||
Revert "pstree: check for pid collision before switching to new sid/gid"
|
||||
This reverts commit 7e6a1a7011b404fbf0108b062bda118e9a696b60.
|
||||
---
|
||||
criu/pstree.c | 37 ++++++++++---------------------------
|
||||
1 file changed, 10 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/criu/pstree.c b/criu/pstree.c
|
||||
index d5080e515..bf09c761c 100644
|
||||
--- a/criu/pstree.c
|
||||
+++ b/criu/pstree.c
|
||||
@@ -340,7 +340,6 @@ static int prepare_pstree_for_shell_job(pid_t pid)
|
||||
pid_t current_gid = getpgid(pid);
|
||||
|
||||
struct pstree_item *pi;
|
||||
- struct pid *tmp;
|
||||
|
||||
pid_t old_sid;
|
||||
pid_t old_gid;
|
||||
@@ -348,7 +347,6 @@ static int prepare_pstree_for_shell_job(pid_t pid)
|
||||
if (!opts.shell_job)
|
||||
return 0;
|
||||
|
||||
- /* root_item is a session leader */
|
||||
if (root_item->sid == vpid(root_item))
|
||||
return 0;
|
||||
|
||||
@@ -370,37 +368,22 @@ static int prepare_pstree_for_shell_job(pid_t pid)
|
||||
*/
|
||||
|
||||
old_sid = root_item->sid;
|
||||
- if (old_sid != current_sid) {
|
||||
- pr_info("Migrating process tree (SID %d->%d)\n", old_sid, current_sid);
|
||||
|
||||
- tmp = pstree_pid_by_virt(current_sid);
|
||||
- if (tmp) {
|
||||
- pr_err("Current sid %d intersects with pid (%d) in images\n", current_sid, tmp->state);
|
||||
- return -1;
|
||||
- }
|
||||
+ pr_info("Migrating process tree (SID %d->%d)\n",
|
||||
+ old_sid, current_sid);
|
||||
|
||||
- for_each_pstree_item(pi) {
|
||||
- if (pi->sid == old_sid)
|
||||
- pi->sid = current_sid;
|
||||
- }
|
||||
-
|
||||
- if (lookup_create_item(current_sid) == NULL)
|
||||
- return -1;
|
||||
+ for_each_pstree_item(pi) {
|
||||
+ if (pi->sid == old_sid)
|
||||
+ pi->sid = current_sid;
|
||||
}
|
||||
|
||||
- /* root_item is a group leader */
|
||||
- if (root_item->pgid == vpid(root_item))
|
||||
- return 0;
|
||||
-
|
||||
old_gid = root_item->pgid;
|
||||
- if (old_gid != current_gid) {
|
||||
- pr_info("Migrating process tree (GID %d->%d)\n", old_gid, current_gid);
|
||||
-
|
||||
- tmp = pstree_pid_by_virt(current_gid);
|
||||
- if (tmp) {
|
||||
- pr_err("Current gid %d intersects with pid (%d) in images\n", current_gid, tmp->state);
|
||||
+ if (old_gid != vpid(root_item)) {
|
||||
+ if (lookup_create_item(current_sid) == NULL)
|
||||
return -1;
|
||||
- }
|
||||
+
|
||||
+ pr_info("Migrating process tree (GID %d->%d)\n",
|
||||
+ old_gid, current_gid);
|
||||
|
||||
for_each_pstree_item(pi) {
|
||||
if (pi->pgid == old_gid)
|
||||
--
|
||||
2.35.1
|
||||
Loading…
x
Reference in New Issue
Block a user