From fe19a2639373175c134fa51a7c1c26ca5306d22c Mon Sep 17 00:00:00 2001 From: "fu.lin" Date: Fri, 10 Sep 2021 16:06:55 +0800 Subject: [PATCH 27/72] mmap: restore /dev/hisi_sec2* deivce vma There are two kinds of vmas: anonymous vma and file-based vma. For anonymous vma, criu just map area and fill content to it; for file-based vma, criu preprocess it, such as setting `open_vm()` callback function. `/dev/hisi_sec2*` char device is different from the normal. The `open`, `mmap`, and `close` syscall actions has a special meaning. - `open`: allocate physical resource of the device - `mmap`: create instance - `close`: release physical resource The vma means the instance in this device. One fd may be associated with a group instances: one mmio (vma size is 2 pages, pgoff is 0), one dus (vma size is 37 pages, pgoff is 0x2000). As for dus vma, it's split two vmas by `mprotect(addr, 0x5000, PROT_READ)`: one size is 0x20000, one size is 0x5000. This patch makes the /dev/hisi_sec* restore possible. Idea: It's impossible for criu to know the relationship between vma and the mapped file fd. Therefore, just collect the total fds number during collecting /dev/hisi_sec* files, then the fd is tagged that which function is used during vma restoration, and aissign the unused fd to the specific vma. And during `mmap()` process, dus vma is splitted by `mprotect`. Note: - criu use ino to index the fd. - this physical device drivers is hisi_sec2.ko, which is located in `drivers/crypto/hisilicon/sec2/` of linux kernel. - this device name has prefix "hisi_sec2" that is found from `drivers/crypto/hisilicon/sec2/sec_main.c`. Conflict:NA Reference:https://gitee.com/src-openeuler/criu/pulls/21 Signed-off-by: fu.lin --- criu/files-chr.c | 130 +++++++++++++++++++++++++++++++++++++-- criu/include/files-chr.h | 16 +++++ criu/include/vma.h | 12 ++++ criu/pie/restorer.c | 130 ++++++++++++++++++++++++++++++++++++++- criu/proc_parse.c | 4 +- 5 files changed, 284 insertions(+), 8 deletions(-) diff --git a/criu/files-chr.c b/criu/files-chr.c index 315e9c6..95d93e1 100644 --- a/criu/files-chr.c +++ b/criu/files-chr.c @@ -6,6 +6,9 @@ #include "log.h" #include "protobuf.h" +#include "rst-malloc.h" + +static unsigned hisi_sec_fds_n; /* Checks if file descriptor @lfd is infinibandevent */ int is_infiniband_link(char *link) @@ -16,11 +19,14 @@ int is_infiniband_link(char *link) static int chrfile_open(struct file_desc *d, int *new_fd) { int fd, mntns_root; - int ret = 0; + int ret = -1; struct chrfile_info *ci; ci = container_of(d, struct chrfile_info, d); + pr_info("charfile: Opening %s (repair %d index %d)\n", + ci->path, ci->cfe->repair, ci->cfe->index); + if (ci->cfe->repair) ci->cfe->flags |= O_REPAIR; @@ -32,6 +38,7 @@ static int chrfile_open(struct file_desc *d, int *new_fd) } *new_fd = fd; + ret = 0; return ret; } @@ -52,10 +59,12 @@ static int collect_one_chrfile(void *o, ProtobufCMessage *base, struct cr_img *i else ci->path = ci->cfe->name; - pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path); - file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops); + /* collect `/dev/hisi_sec2*` fds */ + if (strstr(ci->path, HISI_SEC_DEV) != NULL) + hisi_sec_fds_n += 1; - return 0; + pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path); + return file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops); } struct collect_image_info chrfile_cinfo = { @@ -65,6 +74,7 @@ struct collect_image_info chrfile_cinfo = { .collect = collect_one_chrfile, }; +static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma); int collect_chr_map(struct pstree_item *me, struct vma_area *vma) { struct list_head *list = &rsti(me)->fds; @@ -72,6 +82,12 @@ int collect_chr_map(struct pstree_item *me, struct vma_area *vma) struct chrfile_info *ci; bool exist_fd; + if (strstr(vma->e->name, HISI_SEC_DEV) != NULL) { + if (handle_hisi_vma(list, vma) != 0) { + return -1; + } else + goto out; + } list_for_each_entry_safe(fle, tmp, list, ps_list) { struct file_desc *d = fle->desc; @@ -91,5 +107,111 @@ int collect_chr_map(struct pstree_item *me, struct vma_area *vma) if (!exist_fd) return -EEXIST; +out: + pr_info(" `- find fd %ld for dev %s at this vma\n", vma->e->fd, vma->e->name); + + return 0; +} + +#define MAX_HISI_SEC_SIZE 3 /* one physical device expose three char dev */ +static struct hlist_head hisi_sec_fds_hash[MAX_HISI_SEC_SIZE]; + +static int collect_hisi_sec_fds(struct list_head *list) +{ + struct fdinfo_list_entry *fle, *tmp; + struct chrfile_info *ci; + struct file_desc *d; + struct hisi_sec_desc *desc; + int idx; + int nr = 0; + + for (idx = 0; idx < MAX_HISI_SEC_SIZE; idx++) + INIT_HLIST_HEAD(&hisi_sec_fds_hash[idx]); + + list_for_each_entry_safe(fle, tmp, list, ps_list) { + d = fle->desc; + + if (d->ops->type != FD_TYPES__CHR) + continue; + + ci = container_of(d, struct chrfile_info, d); + + if (strstr(ci->path, HISI_SEC_DEV) != NULL) { + desc = shmalloc(sizeof(*desc)); + if (desc == NULL) + return -ENOMEM; + + desc->name = ci->path; + desc->fd = fle->fe->fd; + desc->mmio = desc->dus = 0; + + idx = (ci->path[strlen(ci->path)-1] - '0') % MAX_HISI_SEC_SIZE; + hlist_add_head(&desc->hash, &hisi_sec_fds_hash[idx]); + + nr += 1; + } + } + + return nr; +} + +static long delivery_hisi_sec_fd(struct list_head *fds, struct vma_area *vma) +{ + extern unsigned hisi_sec_fds_n; /* defined in criu/files.c */ + static bool initialized = false; + struct hisi_sec_desc *desc; + int fd = -1, idx; + + if (!initialized) { + int nr; + + pr_info("find %d fds for hisi_sec char device\n", hisi_sec_fds_n); + + nr = collect_hisi_sec_fds(fds); + if (nr != hisi_sec_fds_n) { + pr_err("Collected fds(%d) aren't equal opened(%d)\n", + nr, hisi_sec_fds_n); + return -1; + } + + initialized = true; + } else if (vma->e->pgoff != HISI_SEC_MMIO && vma->e->pgoff != HISI_SEC_DUS) { + /* It's impossible value for fd, just as a tag to show it's a + * vma by `mprotect` syscall. + */ + return LONG_MAX; + } + + idx = (vma->e->name[strlen(vma->e->name)-1] - '0') % MAX_HISI_SEC_SIZE; + hlist_for_each_entry(desc, &hisi_sec_fds_hash[idx], hash) { + if (strcmp(desc->name, vma->e->name) != 0) + continue; + + if (vma->e->pgoff == HISI_SEC_MMIO && !desc->mmio) { + fd = desc->fd; + desc->mmio = true; + break; + } else if (vma->e->pgoff == HISI_SEC_DUS && !desc->dus) { + fd = desc->fd; + desc->dus = true; + break; + } + } + + return fd; +} + +static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma) +{ + long fd = delivery_hisi_sec_fd(fds, vma); + + if (fd < 0) { + pr_err("find fd for char dev vma pgoff %lx named %s failed.\n", + vma->e->pgoff, vma->e->name); + return -1; + } + + vma->e->fd = fd; + return 0; } diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h index 5be11f5..26b8fb2 100644 --- a/criu/include/files-chr.h +++ b/criu/include/files-chr.h @@ -22,4 +22,20 @@ bool find_devname(const char *name); int collect_chr_map(struct pstree_item *me, struct vma_area *vma); int is_infiniband_link(char *link); +struct hisi_sec_desc { + struct hlist_node hash; + char *name; + bool mmio; + bool dus; + int fd; +}; + +#define HISI_SEC_DEV "hisi_sec2" /* `/dev/hisi_sec2*` char device */ + +/* here is the selection of offset in `mmap`, they're from drivers */ +enum hisi_sec_dev { + HISI_SEC_MMIO = 0x0, + HISI_SEC_DUS = 0x2000, +}; + #endif /* __CRIU_FILES_CHR_H__ */ diff --git a/criu/include/vma.h b/criu/include/vma.h index ed9f31e..2b6e86f 100644 --- a/criu/include/vma.h +++ b/criu/include/vma.h @@ -125,4 +125,16 @@ static inline bool vma_entry_can_be_lazy(VmaEntry *e) !(vma_entry_is(e, VMA_AREA_VDSO)) && !(vma_entry_is(e, VMA_AREA_VSYSCALL))); } +struct vma_attr { + int prot; + int flags; +}; + +enum ALIEN_MAP_METHOD { + PGOFF_IS_ZERO, + MAP_THEN_PROTECT, + + MAX_ALIEN_MAP_METHOD, +}; + #endif /* __CR_VMA_H__ */ diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 549bbd6..dcc922e 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -37,6 +37,7 @@ #include "uffd.h" #include "sched.h" #include "notifier.h" +#include "files-chr.h" #include "common/lock.h" #include "common/page.h" @@ -861,6 +862,129 @@ static unsigned long restore_mapping(VmaEntry *vma_entry) return addr; } +static unsigned long restore_map_then_protect_mapping(VmaEntry *curr, + struct vma_attr *curr_attr, + VmaEntry *next, + struct vma_attr *next_attr) +{ + int retval; + unsigned long addr; + + if (next->fd != LONG_MAX + || curr->end != next->start + || (vma_entry_len(curr) + curr->pgoff) != next->pgoff + || curr->prot == next->prot + || curr->flags != next->flags) { + pr_err("They looks not currect:\n"); + pr_err(" `- vma A: (%x %x %d %lx)\n", + curr_attr->prot, curr_attr->flags, + (int)curr->fd, curr->pgoff); + pr_err(" `- vma B: (%x %x %d %lx)\n", + next_attr->prot, next_attr->flags, + (int)next->fd, next->pgoff); + return -1; + } + + pr_info("\tmmap(%x %x %d %lx) in map then protect mapping\n", + curr_attr->prot, curr_attr->flags, + (int)curr->fd, curr->pgoff); + + addr = sys_mmap(decode_pointer(curr->start), + vma_entry_len(curr) + vma_entry_len(next), + curr_attr->prot, curr_attr->flags, curr->fd, curr->pgoff); + if (addr != curr->start) { + pr_err("%s: mmap failed with code %ld\n", __func__, addr); + goto out; + } + + pr_info("\t mprotect(%x)\n", next_attr->prot); + retval = sys_mprotect(decode_pointer(next->start), + vma_entry_len(next), next_attr->prot); + if (retval != 0) { + addr = retval; + pr_err("%s: mprotect failed with code %d\n", __func__, retval); + } + +out: + return addr; +} + +static unsigned long restore_pgoff_is_zero_mapping(VmaEntry *curr, struct vma_attr *attr) +{ + unsigned long addr; + + pr_debug("\tmmap(%x %x %d %lx) in pgoff is zero mapping\n", + attr->prot, attr->flags, (int)curr->fd, curr->pgoff); + + addr = sys_mmap(decode_pointer(curr->start), + vma_entry_len(curr), + attr->prot, attr->flags, + curr->fd, curr->pgoff); + + return addr; +} + +static unsigned long restore_hisi_sec_mapping(struct task_restore_args *args, + int i, int *step) +{ + VmaEntry *curr = args->vmas + i; + VmaEntry *next = args->vmas + i + 1; + struct vma_attr curr_attr = { + .prot = curr->prot, + .flags = curr->flags | MAP_FIXED, + }; + struct vma_attr next_attr = { + .prot = next->prot, + .flags = next->flags | MAP_FIXED, + }; + unsigned long addr; + + switch (curr->pgoff) { + case HISI_SEC_MMIO: + addr = restore_pgoff_is_zero_mapping(curr, &curr_attr); + break; + case HISI_SEC_DUS: + *step = 2; + addr = restore_map_then_protect_mapping(curr, &curr_attr, next, &next_attr); + break; + default: + pr_err("invalid pgoff %lx for vma\n", curr->pgoff); + return -1; + } + return addr; +} + +static bool find(const char *s1, const char *s2) +{ + if (s1 == NULL || s2 == NULL) + return NULL; + + while (*s1 != '\0' && *s2 != '\0') { + if (*s1 == *s2) { + s1 += 1; + s2 += 1; + } else + s1 += 1; + + if (*s2 == '\0') + return true; + } + + return false; +} + +static unsigned long distribute_restore_mapping(struct task_restore_args *args, + int i, int *step) +{ + VmaEntry *vma = args->vmas + i; + struct vma_names *vma_name = args->vma_names + i; + + if (vma_entry_is(vma, VMA_AREA_CHR) && find(vma_name->name, HISI_SEC_DEV)) + return restore_hisi_sec_mapping(args, i, step); + else + return restore_mapping(vma); +} + /* * This restores aio ring header, content, head and in-kernel position * of tail. To set tail, we write to /dev/null and use the fact this @@ -1542,7 +1666,7 @@ int write_fork_pid(int pid) long __export_restore_task(struct task_restore_args *args) { long ret = -1; - int i; + int i, step; VmaEntry *vma_entry; unsigned long va; struct restore_vma_io *rio; @@ -1691,7 +1815,7 @@ long __export_restore_task(struct task_restore_args *args) /* * OK, lets try to map new one. */ - for (i = 0; i < args->vmas_n; i++) { + for (i = 0, step = 1; i < args->vmas_n; i += step, step = 1) { vma_entry = args->vmas + i; vma_name = args->vma_names + i; @@ -1708,7 +1832,7 @@ long __export_restore_task(struct task_restore_args *args) if (vma_entry_is(vma_entry, VMA_PREMMAPED)) continue; - va = restore_mapping(vma_entry); + va = distribute_restore_mapping(args, i, &step); if (va != vma_entry->start) { pr_err("Can't restore %" PRIx64 " mapping with %lx\n", vma_entry->start, va); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 8913d93..daa54d9 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -41,6 +41,7 @@ #include "path.h" #include "fault-injection.h" #include "memfd.h" +#include "files-chr.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" @@ -613,7 +614,8 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat /* NOTICE: if `--dump-char-dev` option is set, permmit * all char device memory area dumping. */ - if (strstr(file_path, "uverbs") != NULL) { + if (strstr(file_path, "uverbs") != NULL + || strstr(file_path, HISI_SEC_DEV) != NULL) { int len = strlen(file_path) + 1; vma_area->e->status |= VMA_AREA_CHR; -- 2.34.1