From a3cc0e70fa60f315c3bfc79c040bd603ed6317f3 Mon Sep 17 00:00:00 2001 From: liuzh Date: Thu, 12 Dec 2024 02:23:00 +0000 Subject: [PATCH] Publish k1 x86 code on 6.6.0-67.0.0 internal branch: ptdesc-test4 internal commit: 1aaccdb --- .gitignore | 4 + Makefile | 7 + arch/arm64/Kconfig | 35 + arch/arm64/configs/openeuler_defconfig | 9 +- arch/arm64/include/asm/assembler.h | 67 + arch/arm64/include/asm/daifflags.h | 16 + arch/arm64/include/asm/efi.h | 4 + arch/arm64/include/asm/fixmap.h | 3 + arch/arm64/include/asm/hw_breakpoint.h | 12 + arch/arm64/include/asm/iee-access.h | 46 + arch/arm64/include/asm/iee-cred.h | 150 ++ arch/arm64/include/asm/iee-def.h | 130 ++ arch/arm64/include/asm/iee-key.h | 149 ++ arch/arm64/include/asm/iee-koi.h | 13 + arch/arm64/include/asm/iee-selinuxp.h | 27 + arch/arm64/include/asm/iee-si.h | 61 + arch/arm64/include/asm/iee-slab.h | 23 + arch/arm64/include/asm/iee-token.h | 33 + arch/arm64/include/asm/iee.h | 10 + arch/arm64/include/asm/kernel-pgtable.h | 21 + arch/arm64/include/asm/koi.h | 532 +++++ arch/arm64/include/asm/memory.h | 28 + arch/arm64/include/asm/mmu_context.h | 20 + arch/arm64/include/asm/pgalloc.h | 4 + arch/arm64/include/asm/pgtable-hwdef.h | 14 + arch/arm64/include/asm/pgtable.h | 191 +- arch/arm64/include/asm/pgtable_slab.h | 8 + arch/arm64/include/asm/pointer_auth.h | 5 + arch/arm64/include/asm/stack_slab.h | 8 + arch/arm64/include/asm/sysreg.h | 58 + arch/arm64/include/asm/tlb.h | 20 + arch/arm64/include/asm/tlbflush.h | 56 +- arch/arm64/kernel/Makefile | 3 + arch/arm64/kernel/armv8_deprecated.c | 16 + arch/arm64/kernel/asm-offsets.c | 13 + arch/arm64/kernel/cpu_errata.c | 12 + arch/arm64/kernel/cpufeature.c | 80 + arch/arm64/kernel/debug-monitors.c | 4 + arch/arm64/kernel/entry-common.c | 4 + arch/arm64/kernel/entry.S | 623 +++++- arch/arm64/kernel/fpsimd.c | 4 + arch/arm64/kernel/head.S | 5 + arch/arm64/kernel/hibernate.c | 9 + arch/arm64/kernel/hw_breakpoint.c | 99 + arch/arm64/kernel/iee/Makefile | 5 + arch/arm64/kernel/iee/iee-func.c | 722 +++++++ arch/arm64/kernel/iee/iee-gate.S | 314 +++ arch/arm64/kernel/iee/iee-pgtable.c | 402 ++++ arch/arm64/kernel/iee/iee-selinuxp.c | 36 + arch/arm64/kernel/iee/iee.c | 1720 +++++++++++++++ arch/arm64/kernel/iee/pgtable_slab.c | 107 + arch/arm64/kernel/iee/stack_slab.c | 19 + arch/arm64/kernel/irq.c | 4 +- arch/arm64/kernel/koi/Makefile | 1 + arch/arm64/kernel/koi/koi.c | 1688 +++++++++++++++ arch/arm64/kernel/mte.c | 5 + arch/arm64/kernel/process.c | 13 +- arch/arm64/kernel/proton-pack.c | 8 + arch/arm64/kernel/setup.c | 34 + arch/arm64/kernel/sfi_bpf_arch.c | 85 + arch/arm64/kernel/traps.c | 26 + arch/arm64/kernel/vmlinux.lds.S | 55 + arch/arm64/mm/context.c | 125 +- arch/arm64/mm/fault.c | 28 + arch/arm64/mm/fixmap.c | 75 +- arch/arm64/mm/init.c | 40 + arch/arm64/mm/mmu.c | 1916 +++++++++++++++-- arch/arm64/mm/pgd.c | 32 + arch/arm64/mm/proc.S | 8 + arch/arm64/mm/trans_pgd.c | 25 +- arch/arm64/net/bpf_jit_comp.c | 105 + arch/x86/Kconfig | 25 + arch/x86/boot/compressed/ident_map_64.c | 34 + arch/x86/boot/compressed/pgtable_64.c | 4 + arch/x86/entry/entry_64.S | 32 + arch/x86/include/asm/desc.h | 18 + arch/x86/include/asm/fixmap.h | 5 + arch/x86/include/asm/iee-access.h | 39 + arch/x86/include/asm/iee-cred.h | 148 ++ arch/x86/include/asm/iee-def.h | 124 ++ arch/x86/include/asm/iee-key.h | 147 ++ arch/x86/include/asm/iee-koi.h | 5 + arch/x86/include/asm/iee-selinuxp.h | 26 + arch/x86/include/asm/iee-si.h | 25 + arch/x86/include/asm/iee-slab.h | 21 + arch/x86/include/asm/iee-token.h | 32 + arch/x86/include/asm/iee.h | 5 + arch/x86/include/asm/koi.h | 432 ++++ arch/x86/include/asm/page.h | 20 + arch/x86/include/asm/page_64.h | 4 + arch/x86/include/asm/page_types.h | 7 + arch/x86/include/asm/pgalloc.h | 73 + arch/x86/include/asm/pgtable.h | 77 +- arch/x86/include/asm/pgtable_64.h | 119 + arch/x86/include/asm/pgtable_64_types.h | 5 + arch/x86/include/asm/pgtable_slab.h | 13 + arch/x86/include/asm/special_insns.h | 15 + arch/x86/include/asm/stack_slab.h | 8 + arch/x86/include/asm/tlb.h | 6 + arch/x86/kernel/Makefile | 5 + arch/x86/kernel/asm-offsets.c | 6 + arch/x86/kernel/cpu/common.c | 12 + arch/x86/kernel/espfix_64.c | 14 + arch/x86/kernel/head64.c | 17 + arch/x86/kernel/idt.c | 8 +- arch/x86/kernel/iee/Makefile | 4 + arch/x86/kernel/iee/iee-func.c | 365 ++++ arch/x86/kernel/iee/iee-gate.S | 209 ++ arch/x86/kernel/iee/iee-selinuxp.c | 36 + arch/x86/kernel/iee/iee.c | 1048 +++++++++ arch/x86/kernel/iee/pgtable-slab.c | 169 ++ arch/x86/kernel/iee/stack-slab.c | 25 + arch/x86/kernel/koi/Makefile | 1 + arch/x86/kernel/koi/koi.c | 1414 ++++++++++++ arch/x86/kernel/ldt.c | 4 + arch/x86/kernel/machine_kexec_64.c | 40 +- arch/x86/kernel/paravirt.c | 4 + arch/x86/kernel/setup.c | 174 ++ arch/x86/kernel/sfi_bpf_arch.c | 85 + arch/x86/kernel/vmlinux.lds.S | 78 + arch/x86/mm/fault.c | 3 + arch/x86/mm/ident_map_for_iee.c | 197 ++ arch/x86/mm/init.c | 659 ++++++ arch/x86/mm/init_64.c | 341 +++ arch/x86/mm/ioremap.c | 37 + arch/x86/mm/kaslr.c | 17 + arch/x86/mm/mm_internal.h | 9 + arch/x86/mm/pat/set_memory.c | 72 +- arch/x86/mm/pgtable.c | 75 + arch/x86/mm/pti.c | 52 + arch/x86/net/bpf_jit_comp.c | 62 + arch/x86/platform/efi/efi_64.c | 25 + arch/x86/power/hibernate_64.c | 18 + arch/x86/xen/mmu_pv.c | 6 + block/sed-opal.c | 8 + certs/blacklist.c | 8 + certs/system_keyring.c | 18 + crypto/af_alg.c | 16 + crypto/asymmetric_keys/asymmetric_type.c | 20 + crypto/asymmetric_keys/public_key.c | 16 + crypto/asymmetric_keys/signature.c | 10 + drivers/firmware/efi/arm-runtime.c | 4 + drivers/firmware/efi/memmap.c | 20 + drivers/iommu/amd/io_pgtable.c | 4 + drivers/md/Makefile | 1 + drivers/md/dm-crypt.c | 20 + drivers/md/dm-verity-verify-sig.c | 8 + drivers/md/dm-zero.c | 2 +- drivers/nvdimm/security.c | 24 + drivers/rtc/rtc-test.c | 2 +- drivers/rtc/rtc-test_glue.h | 13 + drivers/tty/serial/earlycon.c | 4 + drivers/usb/early/ehci-dbgp.c | 4 + fs/coredump.c | 8 + fs/crypto/keyring.c | 13 + fs/crypto/keysetup_v1.c | 12 + fs/ecryptfs/ecryptfs_kernel.h | 4 + fs/ecryptfs/keystore.c | 52 + fs/ecryptfs/main.c | 4 + fs/exec.c | 23 + fs/nfs/flexfilelayout/flexfilelayout.c | 9 + fs/nfs/nfs4idmap.c | 24 + fs/nfsd/auth.c | 38 + fs/nfsd/nfs4callback.c | 12 +- fs/nfsd/nfs4recover.c | 9 + fs/nfsd/nfsfh.c | 9 + fs/open.c | 26 + fs/overlayfs/dir.c | 9 + fs/overlayfs/super.c | 12 + fs/smb/client/cifs_spnego.c | 29 + fs/smb/client/cifsacl.c | 50 + fs/smb/client/connect.c | 8 + fs/smb/client/sess.c | 4 + fs/smb/client/smb2pdu.c | 4 + fs/ubifs/auth.c | 8 + fs/verity/signature.c | 4 + include/asm-generic/early_ioremap.h | 3 + include/asm-generic/fixmap.h | 18 + include/asm-generic/memory_model.h | 20 + include/asm-generic/pgalloc.h | 7 + include/asm-generic/pgtable-nop4d.h | 5 + include/asm-generic/tlb.h | 26 +- include/asm-generic/vmlinux.lds.h | 34 +- include/keys/asymmetric-subtype.h | 4 + include/keys/asymmetric-type.h | 8 + include/keys/request_key_auth-type.h | 4 + include/linux/bpf.h | 18 + include/linux/bpf_verifier.h | 19 + include/linux/cred.h | 45 +- include/linux/efi.h | 9 + include/linux/filter.h | 3 + include/linux/iee-func.h | 31 + include/linux/key.h | 86 + include/linux/mm.h | 59 + include/linux/mm_types.h | 39 + include/linux/module.h | 1 + include/linux/pgtable.h | 32 + include/linux/sched.h | 19 + include/linux/sfi_bpf.h | 86 + include/linux/skbuff.h | 3 + include/uapi/linux/bpf.h | 1 + include/uapi/linux/bpf_common.h | 1 + init/main.c | 32 +- kernel/bpf/Kconfig | 11 + kernel/bpf/Makefile | 2 + kernel/bpf/arraymap.c | 194 +- kernel/bpf/hashtab.c | 323 ++- kernel/bpf/sfi_bpf.c | 1387 ++++++++++++ kernel/bpf/verifier.c | 360 +++- kernel/cred.c | 184 ++ kernel/exit.c | 8 + kernel/fork.c | 284 ++- kernel/groups.c | 7 + kernel/kthread.c | 12 + kernel/module/main.c | 105 +- kernel/sys.c | 107 + kernel/umh.c | 10 + kernel/user_namespace.c | 18 + lib/digsig.c | 8 + mm/Kconfig | 5 + mm/damon/ops-common.c | 1 + mm/debug_vm_pgtable.c | 32 + mm/early_ioremap.c | 57 + mm/huge_memory.c | 26 +- mm/hugetlb_vmemmap.c | 2 +- mm/init-mm.c | 16 + mm/memory.c | 14 + mm/mmap.c | 14 + mm/mmu_gather.c | 176 ++ mm/pgtable-generic.c | 29 + mm/slab.h | 39 +- mm/slab_common.c | 39 + mm/slub.c | 608 +++++- mm/sparse-vmemmap.c | 12 + mm/vmalloc.c | 2 +- net/ceph/ceph_common.c | 4 + net/ceph/crypto.c | 4 + net/core/filter.c | 18 +- net/dns_resolver/dns_key.c | 24 + net/dns_resolver/dns_query.c | 18 + net/rxrpc/af_rxrpc.c | 4 + net/rxrpc/conn_event.c | 5 + net/rxrpc/key.c | 16 + net/rxrpc/rxkad.c | 40 + net/rxrpc/security.c | 8 + net/rxrpc/sendmsg.c | 4 + net/rxrpc/server_key.c | 8 + net/sched/cls_bpf.c | 78 + net/sunrpc/auth.c | 14 + security/commoncap.c | 169 ++ security/integrity/evm/evm_crypto.c | 12 + security/keys/big_key.c | 16 + security/keys/dh.c | 8 + security/keys/encrypted-keys/encrypted.c | 28 + .../keys/encrypted-keys/masterkey_trusted.c | 5 + security/keys/gc.c | 52 + security/keys/internal.h | 8 + security/keys/key.c | 237 +- security/keys/keyctl.c | 91 + security/keys/keyring.c | 269 +++ security/keys/proc.c | 28 + security/keys/process_keys.c | 88 + security/keys/request_key.c | 16 + security/keys/request_key_auth.c | 20 + security/keys/trusted-keys/trusted_core.c | 8 + security/keys/user_defined.c | 12 + security/security.c | 15 + security/selinux/hooks.c | 60 + security/selinux/ima.c | 20 + security/selinux/include/security.h | 8 + security/selinux/selinuxfs.c | 66 + security/selinux/ss/services.c | 56 + security/selinux/status.c | 32 + security/smack/smack_lsm.c | 12 + 274 files changed, 23480 insertions(+), 373 deletions(-) create mode 100644 arch/arm64/include/asm/iee-access.h create mode 100644 arch/arm64/include/asm/iee-cred.h create mode 100644 arch/arm64/include/asm/iee-def.h create mode 100644 arch/arm64/include/asm/iee-key.h create mode 100644 arch/arm64/include/asm/iee-koi.h create mode 100644 arch/arm64/include/asm/iee-selinuxp.h create mode 100644 arch/arm64/include/asm/iee-si.h create mode 100644 arch/arm64/include/asm/iee-slab.h create mode 100644 arch/arm64/include/asm/iee-token.h create mode 100644 arch/arm64/include/asm/iee.h create mode 100644 arch/arm64/include/asm/koi.h create mode 100644 arch/arm64/include/asm/pgtable_slab.h create mode 100644 arch/arm64/include/asm/stack_slab.h create mode 100644 arch/arm64/kernel/iee/Makefile create mode 100644 arch/arm64/kernel/iee/iee-func.c create mode 100644 arch/arm64/kernel/iee/iee-gate.S create mode 100644 arch/arm64/kernel/iee/iee-pgtable.c create mode 100644 arch/arm64/kernel/iee/iee-selinuxp.c create mode 100644 arch/arm64/kernel/iee/iee.c create mode 100644 arch/arm64/kernel/iee/pgtable_slab.c create mode 100644 arch/arm64/kernel/iee/stack_slab.c create mode 100644 arch/arm64/kernel/koi/Makefile create mode 100644 arch/arm64/kernel/koi/koi.c create mode 100644 arch/arm64/kernel/sfi_bpf_arch.c create mode 100644 arch/x86/include/asm/iee-access.h create mode 100644 arch/x86/include/asm/iee-cred.h create mode 100644 arch/x86/include/asm/iee-def.h create mode 100644 arch/x86/include/asm/iee-key.h create mode 100644 arch/x86/include/asm/iee-koi.h create mode 100644 arch/x86/include/asm/iee-selinuxp.h create mode 100644 arch/x86/include/asm/iee-si.h create mode 100644 arch/x86/include/asm/iee-slab.h create mode 100644 arch/x86/include/asm/iee-token.h create mode 100644 arch/x86/include/asm/iee.h create mode 100644 arch/x86/include/asm/koi.h create mode 100644 arch/x86/include/asm/pgtable_slab.h create mode 100644 arch/x86/include/asm/stack_slab.h create mode 100644 arch/x86/kernel/iee/Makefile create mode 100644 arch/x86/kernel/iee/iee-func.c create mode 100644 arch/x86/kernel/iee/iee-gate.S create mode 100644 arch/x86/kernel/iee/iee-selinuxp.c create mode 100644 arch/x86/kernel/iee/iee.c create mode 100644 arch/x86/kernel/iee/pgtable-slab.c create mode 100644 arch/x86/kernel/iee/stack-slab.c create mode 100644 arch/x86/kernel/koi/Makefile create mode 100644 arch/x86/kernel/koi/koi.c create mode 100644 arch/x86/kernel/sfi_bpf_arch.c create mode 100644 arch/x86/mm/ident_map_for_iee.c create mode 100644 drivers/rtc/rtc-test_glue.h create mode 100644 include/linux/iee-func.h create mode 100644 include/linux/sfi_bpf.h create mode 100644 kernel/bpf/sfi_bpf.c diff --git a/.gitignore b/.gitignore index d1a8ab3f98aa..f87d7b8380ab 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ *.tar *.xz *.zst +*.log Module.symvers modules.order @@ -169,3 +170,6 @@ sphinx_*/ # Rust analyzer configuration /rust-project.json + +#command +command.txt diff --git a/Makefile b/Makefile index ee377cec01f3..7b354f051344 100644 --- a/Makefile +++ b/Makefile @@ -555,6 +555,9 @@ LINUXINCLUDE := \ $(USERINCLUDE) KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE +ifeq ($(ARCH), arm64) + KBUILD_AFLAGS += -march=armv8.1-a +endif KBUILD_CFLAGS := KBUILD_CFLAGS += -std=gnu11 @@ -563,6 +566,10 @@ KBUILD_CFLAGS += -funsigned-char KBUILD_CFLAGS += -fno-common KBUILD_CFLAGS += -fno-PIE KBUILD_CFLAGS += -fno-strict-aliasing +ifeq ($(ARCH), arm64) + KBUILD_CFLAGS += -march=armv8.1-a +endif +#KBUILD_CFLAGS += -fPIC KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_RUSTFLAGS := $(rust_common_flags) \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9da9d58f1c02..cd15ba85864b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1776,6 +1776,41 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +# Config for iee +config IEE + depends on ARM64 + depends on ARM64_PAN + depends on ARM64_VA_BITS_48 + depends on ARM64_4K_PAGES + def_bool y + +# Config for support of interruption of iee +config IEE_INTERRUPTABLE + depends on IEE + def_bool n + +# Config for credentials isolation +config CREDP + depends on IEE + def_bool y + +# Config for kernel module isolation +config KOI + depends on ARM64 + depends on ARM64_VA_BITS_48 + depends on ARM64_4K_PAGES + def_bool y + +config IEE_SELINUX_P + depends on IEE + depends on SECURITY_SELINUX + def_bool n + +# Config for key isolation +config KEYP + depends on IEE + def_bool y + config MITIGATE_SPECTRE_BRANCH_HISTORY bool "Mitigate Spectre style attacks against branch history" if EXPERT default y diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index b152d1ffb547..5e68208c0a72 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -89,6 +89,7 @@ CONFIG_BPF_JIT_DEFAULT_ON=y # CONFIG_BPF_PRELOAD is not set CONFIG_BPF_LSM=y CONFIG_BPF_SCHED=y +CONFIG_HIVE=y # end of BPF subsystem CONFIG_PREEMPT_NONE_BUILD=y @@ -1388,7 +1389,7 @@ CONFIG_NETFILTER_NETLINK_ACCT=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK_OSF=m -CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK=y CONFIG_NF_LOG_SYSLOG=m CONFIG_NETFILTER_CONNCOUNT=m CONFIG_NF_CONNTRACK_MARK=y @@ -1419,7 +1420,7 @@ CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_CT_NETLINK_HELPER=m CONFIG_NETFILTER_NETLINK_GLUE_CT=y -CONFIG_NF_NAT=m +CONFIG_NF_NAT=y CONFIG_NF_NAT_AMANDA=m CONFIG_NF_NAT_FTP=m CONFIG_NF_NAT_IRC=m @@ -1623,7 +1624,7 @@ CONFIG_IP_VS_PE_SIP=m # # IP: Netfilter Configuration # -CONFIG_NF_DEFRAG_IPV4=m +CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_SOCKET_IPV4=m CONFIG_NF_TPROXY_IPV4=m CONFIG_NF_TABLES_IPV4=y @@ -1695,7 +1696,7 @@ CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m # end of IPv6: Netfilter Configuration -CONFIG_NF_DEFRAG_IPV6=m +CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_TABLES_BRIDGE=m # CONFIG_NFT_BRIDGE_META is not set CONFIG_NFT_BRIDGE_REJECT=m diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 38b23786aeb4..6af10d509c2e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -26,6 +26,41 @@ #include #include +#ifdef CONFIG_IEE + .macro iee_si_restore_daif, flags:req + msr daifclr, #0xf + tbnz \flags, #6, 114221f + tbnz \flags, #7, 114210f + tbnz \flags, #8, 114100f + msr daifset, #0b000 + b 114514f +114221: + tbnz \flags, #7, 114211f + tbnz \flags, #8, 114101f + msr daifset, #0b001 + b 114514f +114211: + tbnz \flags, #8, 114111f + msr daifset, #0b011 + b 114514f +114210: + tbnz \flags, #8, 114110f + msr daifset, #0b010 + b 114514f +114100: + msr daifset, #0b100 + b 114514f +114101: + msr daifset, #0b101 + b 114514f +114110: + msr daifset, #0b110 + b 114514f +114111: + msr daifset, #0b111 +114514: + .endm +#endif /* * Provide a wxN alias for each wN register so what we can paste a xN * reference after a 'w' to obtain the 32-bit version. @@ -52,7 +87,11 @@ alternative_else_nop_endif .macro disable_daif disable_allint +// #ifdef CONFIG_IEE +// msr daifset, #0x7 +// #else msr daifset, #0xf +// #endif .endm .macro enable_daif @@ -69,7 +108,11 @@ alternative_else_nop_endif .endm .macro restore_irq, flags +// #ifdef CONFIG_IEE +// iee_si_restore_daif \flags +// #else msr daif, \flags +// #endif .endm .macro enable_dbg @@ -77,20 +120,44 @@ alternative_else_nop_endif .endm .macro disable_step_tsk, flgs, tmp +// #ifdef CONFIG_IEE +// 1145: +// tbz \flgs, #TIF_SINGLESTEP, 9990f +// mrs \tmp, mdscr_el1 +// bic \tmp, \tmp, #DBG_MDSCR_SS +// orr \tmp, \tmp, #DBG_MDSCR_MDE +// msr mdscr_el1, \tmp +// isb // Synchronise with enable_dbg +// mrs \tmp, mdscr_el1 +// tbz \tmp, #15, 1145b +// #else tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 bic \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp isb // Synchronise with enable_dbg +// #endif 9990: .endm /* call with daif masked */ .macro enable_step_tsk, flgs, tmp +// #ifdef CONFIG_IEE +// 1146: +// tbz \flgs, #TIF_SINGLESTEP, 9990f +// mrs \tmp, mdscr_el1 +// orr \tmp, \tmp, #DBG_MDSCR_SS +// orr \tmp, \tmp, #DBG_MDSCR_MDE +// msr mdscr_el1, \tmp +// isb // Synchronise with enable_dbg +// mrs \tmp, mdscr_el1 +// tbz \tmp, #15, 1146b +// #else tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 orr \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp +// #endif 9990: .endm diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 2417cc6b1631..cb5b4c2e03b8 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -26,11 +26,19 @@ static inline void local_daif_mask(void) (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET))); +// #ifdef CONFIG_IEE +// asm volatile( +// "msr daifset, #0x7 // local_daif_mask\n" +// : +// : +// : "memory"); +// #else asm volatile( "msr daifset, #0xf // local_daif_mask\n" : : : "memory"); +// #endif /* Don't really care for a dsb here, we don't intend to enable IRQs */ if (system_uses_irq_prio_masking()) @@ -118,7 +126,11 @@ static inline void local_daif_restore(unsigned long flags) gic_write_pmr(pmr); } +// #ifdef CONFIG_IEE +// iee_si_write_daif(flags); +// #else write_sysreg(flags, daif); +// #endif /* If we can take asynchronous errors we can take NMIs */ if (system_uses_nmi()) { @@ -151,7 +163,11 @@ static inline void local_daif_inherit(struct pt_regs *regs) * system_has_prio_mask_debugging() won't restore the I bit if it can * use the pmr instead. */ +// #ifdef CONFIG_IEE +// iee_si_write_daif(flags); +// #else write_sysreg(flags, daif); +// #endif /* The ALLINT field is at the same position in pstate and ALLINT */ if (system_uses_nmi()) { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index bcd5622aa096..76c4bd6c2b20 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -58,7 +58,11 @@ void arch_efi_call_virt_teardown(void); #define arch_efi_save_flags(state_flags) \ ((void)((state_flags) = read_sysreg(daif))) +// #ifdef CONFIG_IEE +// #define arch_efi_restore_flags(state_flags) iee_si_write_daif(state_flags) +// #else #define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif) +// #endif /* arch specific definitions used by the stub code */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 58c294a96676..095a0731dce3 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -108,6 +108,9 @@ void __init fixmap_copy(pgd_t *pgdir); #define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +#ifdef CONFIG_PTP +extern void __iee_set_fixmap_pre_init(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +#endif #include diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 84055329cd8b..f72d89bb9a32 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -104,6 +104,18 @@ static inline void decode_ctrl_reg(u32 reg, write_sysreg(VAL, dbg##REG##N##_el1);\ } while (0) +#ifdef CONFIG_IEE +#define IEE_SI_AARCH64_DBG_READ(N, REG, VAL) do{\ + VAL = this_cpu_read(iee_si_user_##REG##N);\ +} while (0) + +#define IEE_SI_AARCH64_DBG_WRITE(N, REG, VAL) do{\ + u64 __val = (u64)(VAL); \ + this_cpu_write(iee_si_user_##REG##N, __val);\ + iee_rwx_gate_entry(IEE_WRITE_AFSR0);\ +} while (0) +#endif + struct task_struct; struct notifier_block; struct perf_event_attr; diff --git a/arch/arm64/include/asm/iee-access.h b/arch/arm64/include/asm/iee-access.h new file mode 100644 index 000000000000..9be519b707c5 --- /dev/null +++ b/arch/arm64/include/asm/iee-access.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_IEE_ACCESS_H +#define _LINUX_IEE_ACCESS_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_IEE +static inline void iee_write_in_byte(void *ptr, u64 data, int length) +{ + iee_rw_gate(IEE_WRITE_IN_BYTE, ptr, data, length); +} + +static inline void iee_memset(void *ptr, int data, size_t n) +{ + iee_rw_gate(IEE_MEMSET, ptr, data, n); +} + +static inline void iee_memcpy(void *dst, const void *src, size_t n) +{ + iee_rw_gate(IEE_MEMCPY, dst, src, n); +} + +static inline void iee_set_track(struct track *ptr, struct track *data) +{ + iee_rw_gate(IEE_OP_SET_TRACK, ptr, data); +} + +static inline void iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr) +{ + iee_rw_gate(IEE_OP_SET_FREEPTR, pptr, ptr); +} + +static inline void iee_copy_pte_range(pte_t *new_dst, pte_t *old_dst, pte_t *src_pte, struct vm_area_struct *src_vma, unsigned long dst_vm_flags, pte_t *end_pte) +{ + iee_rw_gate(IEE_COPY_PTE_RANGE, new_dst, old_dst, src_pte, src_vma, dst_vm_flags, end_pte); +} + +static inline void iee_split_huge_pmd(pmd_t *pmdp, pte_t *pgtable) +{ + iee_rw_gate(IEE_SPLIT_HUGE_PMD, pmdp, pgtable); +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-cred.h b/arch/arm64/include/asm/iee-cred.h new file mode 100644 index 000000000000..b8c3bb53f98a --- /dev/null +++ b/arch/arm64/include/asm/iee-cred.h @@ -0,0 +1,150 @@ +#ifndef _LINUX_IEE_CRED_H +#define _LINUX_IEE_CRED_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_CREDP +static void __maybe_unused iee_copy_cred(const struct cred *old, struct cred *new) +{ + iee_rw_gate(IEE_OP_COPY_CRED,old,new); +} + +static void __maybe_unused iee_set_cred_uid(struct cred *cred, kuid_t uid) +{ + iee_rw_gate(IEE_OP_SET_CRED_UID,cred,uid); +} + +static void __maybe_unused iee_set_cred_gid(struct cred *cred, kgid_t gid) +{ + iee_rw_gate(IEE_OP_SET_CRED_GID,cred,gid); +} + +static void __maybe_unused iee_set_cred_suid(struct cred *cred, kuid_t suid) +{ + iee_rw_gate(IEE_OP_SET_CRED_SUID,cred,suid); +} + +static void __maybe_unused iee_set_cred_sgid(struct cred *cred, kgid_t sgid) +{ + iee_rw_gate(IEE_OP_SET_CRED_SGID,cred,sgid); +} + +static void __maybe_unused iee_set_cred_euid(struct cred *cred, kuid_t euid) +{ + iee_rw_gate(IEE_OP_SET_CRED_EUID,cred,euid); +} + +static void __maybe_unused iee_set_cred_egid(struct cred *cred, kgid_t egid) +{ + iee_rw_gate(IEE_OP_SET_CRED_EGID,cred,egid); +} + +static void __maybe_unused iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) +{ + iee_rw_gate(IEE_OP_SET_CRED_FSUID,cred,fsuid); +} + +static void __maybe_unused iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) +{ + iee_rw_gate(IEE_OP_SET_CRED_FSGID,cred,fsgid); +} + +static void __maybe_unused iee_set_cred_user(struct cred *cred, struct user_struct *user) +{ + iee_rw_gate(IEE_OP_SET_CRED_USER,cred,user); +} + +static void __maybe_unused iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + iee_rw_gate(IEE_OP_SET_CRED_USER_NS,cred,user_ns); +} + +static void __maybe_unused iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) +{ + iee_rw_gate(IEE_OP_SET_CRED_UCOUNTS,cred,ucounts); +} + +static void __maybe_unused iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) +{ + iee_rw_gate(IEE_OP_SET_CRED_GROUP_INFO,cred,group_info); +} + +static void __maybe_unused iee_set_cred_securebits(struct cred *cred, unsigned securebits) +{ + iee_rw_gate(IEE_OP_SET_CRED_SECUREBITS,cred,securebits); +} + +static void __maybe_unused iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_INHER,cred,cap_inheritable); +} + +static void __maybe_unused iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_PERM,cred,cap_permitted); +} + +static void __maybe_unused iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_EFFECT,cred,cap_effective); +} + +static void __maybe_unused iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_BSET,cred,cap_bset); +} + +static void __maybe_unused iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_AMBIENT,cred,cap_ambient); +} + +#ifdef CONFIG_KEYS +static void __maybe_unused iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_JIT_KEYRING,cred,jit_keyring); +} + +static void __maybe_unused iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_SESS_KEYRING,cred,session_keyring); +} + +static void __maybe_unused iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_PROC_KEYRING,cred,process_keyring); +} + +static void __maybe_unused iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_THREAD_KEYRING,cred,thread_keyring); +} + +static void __maybe_unused iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth) +{ + iee_rw_gate(IEE_OP_SET_CRED_REQ_KEYRING,cred,request_key_auth); +} +#endif + +static void __maybe_unused iee_set_cred_atomic_set_usage(struct cred *cred, int i) +{ + iee_rw_gate(IEE_OP_SET_CRED_ATSET_USAGE,cred,i); +} + +#ifdef CONFIG_SECURITY +static void __maybe_unused iee_set_cred_security(struct cred *cred, void *security) +{ + iee_rw_gate(IEE_OP_SET_CRED_SECURITY,cred,security); +} +#endif + +static void __maybe_unused iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) +{ + iee_rw_gate(IEE_OP_SET_CRED_RCU,cred,rcu); +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-def.h b/arch/arm64/include/asm/iee-def.h new file mode 100644 index 000000000000..d33d8570a546 --- /dev/null +++ b/arch/arm64/include/asm/iee-def.h @@ -0,0 +1,130 @@ +#ifndef _LINUX_IEE_DEF_H +#define _LINUX_IEE_DEF_H + +/* Add new IEE ops here */ + +#define AT_ADD 1 +#define AT_INC_NOT_ZERO 2 +#define AT_SUB_AND_TEST 3 +/* Atomic ops for atomic_t */ + +#define REFCOUNT_INC 1 +#define REFCOUNT_SET 2 +#define REFCOUNT_DEC_AND_TEST 3 +#define REFCOUNT_INC_NOT_ZERO 4 + +#define SET_BIT_OP 1 +#define TEST_AND_CLEAR_BIT 2 +#define TEST_AND_SET_BIT 3 + +// Function Identifiers with Parameters Description +enum { + IEE_WRITE_IN_BYTE=0, // Parameters: void *ptr, __u64 data, int length + IEE_OP_SET_PTE, // Parameters: pte_t *ptep, pte_t pte + IEE_OP_SET_PMD, // Parameters: pmd_t *pmdp, pmd_t pmd + IEE_OP_SET_PUD, // Parameters: pud_t *pudp, pud_t pud + IEE_OP_SET_P4D, // Parameters: p4d_t *p4dp, p4d_t p4d + IEE_OP_SET_BM_PTE, // Parameters: pte_t *ptep, pte_t pte + IEE_OP_SET_SWAPPER_PGD, // Parameters: pgd_t *pgdp, pgd_t pgd + IEE_OP_SET_TRAMP_PGD, // Parameters: pgd_t *pgdp, pgd_t pgd + IEE_MEMSET, // Parameters: void *ptr, int data, size_t n + IEE_OP_SET_TRACK, // Parameters: struct track *ptr, struct track *data + IEE_OP_SET_FREEPTR, // Parameters: void **pptr, void *ptr + IEE_OP_SET_PTE_U, // Parameters: pte_t *ptep + IEE_OP_SET_PTE_P, // Parameters: pte_t *ptep + IEE_SET_TOKEN_PGD, // Parameters: struct task_token *token, pgd_t *pgd + IEE_INIT_TOKEN, // Parameters: struct task_struct *tsk, void *kernel_stack, void *iee_stack + IEE_INVALIDATE_TOKEN, // Parameters: struct task_struct *tsk + IEE_SET_SENSITIVE_PTE, // Parameters: pte_t *lm_ptep, pte_t *iee_ptep + IEE_UNSET_SENSITIVE_PTE, // Parameters: pte_t *lm_ptep, pte_t *iee_ptep + IEE_SET_TOKEN, // Parameters: pte_t *ptep, void *new, unsigned long order + IEE_UNSET_TOKEN, // Parameters: pte_t *ptep, void *token_addr, void *token_page, unsigned long order + IEE_COPY_PTE_RANGE, // Parameters: pte_t *new_dst, pte_t *old_dst, pte_t *src_pte, struct vm_area_struct *src_vma, + // unsigned long dst_vm_flags, pte_t *end_pte + IEE_SPLIT_HUGE_PMD, + IEE_VALIDATE_TOKEN, + IEE_MEMCPY, +#ifdef CONFIG_KOI + _IEE_READ_KOI_STACK, // Parameters: struct task_struct *tsk + _IEE_WRITE_KOI_STACK, // Parameters: struct task_struct *tsk, unsigned long koi_stack + _IEE_READ_TOKEN_TTBR1, // Parameters: struct task_struct *tsk + _IEE_WRITE_TOKEN_TTBR1, // Parameters: struct task_struct *tsk, unsigned long current_ttbr1 + _IEE_READ_KOI_KERNEL_STACK, // Parameters: struct task_struct *tsk + _IEE_WRITE_KOI_KERNEL_STACK, // Parameters: struct task_struct *tsk, unsigned long kernel_stack + _IEE_READ_KOI_STACK_BASE, // Parameters: struct task_struct *tsk + _IEE_WRITE_KOI_STACK_BASE, // Parameters: struct task_struct *tsk, unsigned long koi_stack_base + _IEE_SET_KOI_PGD, // Parameters: unsigned long koi_pgd_addr +#endif +#ifdef CONFIG_CREDP + IEE_OP_COPY_CRED, // Parameters: struct cred *old, struct cred *new + IEE_OP_SET_CRED_UID, // Parameters: struct cred *cred, kuid_t uid + IEE_OP_SET_CRED_GID, // Parameters: struct cred *cred, kgid_t gid + IEE_OP_SET_CRED_SUID, // Parameters: struct cred *cred, kuid_t suid + IEE_OP_SET_CRED_SGID, // Parameters: struct cred *cred, kgid_t sgid + IEE_OP_SET_CRED_EUID, // Parameters: struct cred *cred, kuid_t euid + IEE_OP_SET_CRED_EGID, // Parameters: struct cred *cred, kgid_t egid + IEE_OP_SET_CRED_FSUID, // Parameters: struct cred *cred, kuid_t fsuid + IEE_OP_SET_CRED_FSGID, // Parameters: struct cred *cred, kgid_t fsgid + IEE_OP_SET_CRED_USER, // Parameters: struct cred *cred, struct user_struct *user + IEE_OP_SET_CRED_USER_NS, // Parameters: struct cred *cred, struct user_namespace *user_ns + IEE_OP_SET_CRED_GROUP_INFO, // Parameters: struct cred *cred, struct group_info *group_info + IEE_OP_SET_CRED_SECUREBITS, // Parameters: struct cred *cred, unsigned securebits + IEE_OP_SET_CRED_CAP_INHER, // Parameters: struct cred *cred, kernel_cap_t cap_inheritable + IEE_OP_SET_CRED_CAP_PERM, // Parameters: struct cred *cred, kernel_cap_t cap_permitted + IEE_OP_SET_CRED_CAP_EFFECT, // Parameters: struct cred *cred, kernel_cap_t cap_effective + IEE_OP_SET_CRED_CAP_BSET, // Parameters: struct cred *cred, kernel_cap_t cap_bset + IEE_OP_SET_CRED_CAP_AMBIENT, // Parameters: struct cred *cred, kernel_cap_t cap_ambient + IEE_OP_SET_CRED_JIT_KEYRING, // Parameters: struct cred *cred, unsigned char jit_keyring + IEE_OP_SET_CRED_SESS_KEYRING, // Parameters: struct cred *cred, struct key *session_keyring + IEE_OP_SET_CRED_PROC_KEYRING, // Parameters: struct cred *cred, struct key *process_keyring + IEE_OP_SET_CRED_THREAD_KEYRING, // Parameters: struct cred *cred, struct key *thread_keyring + IEE_OP_SET_CRED_REQ_KEYRING, // Parameters: struct cred *cred, struct key *request_key_auth + IEE_OP_SET_CRED_NON_RCU, // Parameters: struct cred *cred, int non_rcu + IEE_OP_SET_CRED_ATSET_USAGE, // Parameters: struct cred *cred, int i + IEE_OP_SET_CRED_ATOP_USAGE, // Parameters: struct cred *cred, int flag + IEE_OP_SET_CRED_SECURITY, // Parameters: struct cred *cred, void *security + IEE_OP_SET_CRED_RCU, // Parameters: struct cred *cred, struct rcu_head *rcu + IEE_OP_SET_CRED_UCOUNTS, // Parameters: struct cred *cred, struct ucounts *ucounts +// IEE_OP_COMMIT_CRED, // Parameters: struct cred *cred +// IEE_OP_COPY_CRED_KERNEL, // Parameters: struct cred *old, struct cred *new +#endif +#ifdef CONFIG_KEYP + IEE_OP_SET_KEY_UNION, + IEE_OP_SET_KEY_STRUCT, + IEE_OP_SET_KEY_PAYLOAD, + IEE_OP_SET_KEY_USAGE, + IEE_OP_SET_KEY_SERIAL, + IEE_OP_SET_KEY_WATCHERS, + IEE_OP_SET_KEY_USERS, + IEE_OP_SET_KEY_SECURITY, + IEE_OP_SET_KEY_EXPIRY, + IEE_OP_SET_KEY_REVOKED_AT, + IEE_OP_SET_KEY_LAST_USED_AT, + IEE_OP_SET_KEY_UID, + IEE_OP_SET_KEY_GID, + IEE_OP_SET_KEY_PERM, + IEE_OP_SET_KEY_QUOTALEN, + IEE_OP_SET_KEY_DATALEN, + IEE_OP_SET_KEY_STATE, + IEE_OP_SET_KEY_MAGIC, + IEE_OP_SET_KEY_FLAGS, + IEE_OP_SET_KEY_INDEX_KEY, + IEE_OP_SET_KEY_HASH, + IEE_OP_SET_KEY_LEN_DESC, + IEE_OP_SET_KEY_TYPE, + IEE_OP_SET_KEY_TAG, + IEE_OP_SET_KEY_DESCRIPTION, + IEE_OP_SET_KEY_RESTRICT_LINK, + IEE_OP_SET_KEY_FLAG_BIT, +#endif +#ifdef CONFIG_IEE_SELINUX_P + IEE_SEL_SET_STATUS_PG, // Parameters: struct page* new_page + IEE_SEL_SET_ENFORCING, // Parameters: bool value + IEE_SEL_SET_INITIALIZED, + IEE_SEL_SET_POLICY_CAP, // Parameters: unsigned int idx, int cap + IEE_SEL_RCU_ASSIGN_POLICY, // Parameters: struct selinux_policy* new_policy, struct selinux_policy* iee_new_policy +#endif + IEE_FLAG_END +}; + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-key.h b/arch/arm64/include/asm/iee-key.h new file mode 100644 index 000000000000..5653720badb6 --- /dev/null +++ b/arch/arm64/include/asm/iee-key.h @@ -0,0 +1,149 @@ +#ifndef _LINUX_IEE_KEY_H +#define _LINUX_IEE_KEY_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_KEYP +static void __maybe_unused iee_set_key_union(struct key *key, struct key_union *key_union) +{ + iee_rw_gate(IEE_OP_SET_KEY_UNION, key, key_union); +} + +static void __maybe_unused iee_set_key_struct(struct key *key, struct key_struct *key_struct) +{ + iee_rw_gate(IEE_OP_SET_KEY_STRUCT, key, key_struct); +} + +static void __maybe_unused iee_set_key_payload(struct key *key, union key_payload *key_payload) +{ + iee_rw_gate(IEE_OP_SET_KEY_PAYLOAD, key, key_payload); +} + +extern bool iee_set_key_usage(struct key *key, int n, int flag); + +static void __maybe_unused iee_set_key_serial(struct key *key, key_serial_t serial) +{ + iee_rw_gate(IEE_OP_SET_KEY_SERIAL, key, serial); +} + +#ifdef CONFIG_KEY_NOTIFICATIONS +static void __maybe_unused iee_set_key_watchers(struct key *key, struct watch_list *watchers) +{ + iee_rw_gate(IEE_OP_SET_KEY_WATCHERS, key, watchers); +} +#endif + +static void __maybe_unused iee_set_key_user(struct key *key, struct key_user *user) +{ + iee_rw_gate(IEE_OP_SET_KEY_USERS, key, user); +} + +static void __maybe_unused iee_set_key_security(struct key *key, void *security) +{ + iee_rw_gate(IEE_OP_SET_KEY_SECURITY, key, security); +} + +static void __maybe_unused iee_set_key_expiry(struct key *key, time64_t expiry) +{ + iee_rw_gate(IEE_OP_SET_KEY_EXPIRY, key, expiry); +} + +static void __maybe_unused iee_set_key_revoked_at(struct key *key, time64_t revoked_at) +{ + iee_rw_gate(IEE_OP_SET_KEY_REVOKED_AT, key, revoked_at); +} + +static void __maybe_unused iee_set_key_last_used_at(struct key *key, time64_t last_used_at) +{ + iee_rw_gate(IEE_OP_SET_KEY_LAST_USED_AT, key, last_used_at); +} + +static void __maybe_unused iee_set_key_uid(struct key *key, kuid_t uid) +{ + iee_rw_gate(IEE_OP_SET_KEY_UID, key, uid); +} + +static void __maybe_unused iee_set_key_gid(struct key *key, kgid_t gid) +{ + iee_rw_gate(IEE_OP_SET_KEY_GID, key, gid); +} + +static void __maybe_unused iee_set_key_perm(struct key *key, key_perm_t perm) +{ + iee_rw_gate(IEE_OP_SET_KEY_PERM, key, perm); +} + +static void __maybe_unused iee_set_key_quotalen(struct key *key, unsigned short quotalen) +{ + iee_rw_gate(IEE_OP_SET_KEY_QUOTALEN, key, quotalen); +} + +static void __maybe_unused iee_set_key_datalen(struct key *key, unsigned short datalen) +{ + iee_rw_gate(IEE_OP_SET_KEY_DATALEN, key, datalen); +} + +static void __maybe_unused iee_set_key_state(struct key *key, short state) +{ + iee_rw_gate(IEE_OP_SET_KEY_STATE, key, state); +} + +#ifdef KEY_DEBUGGING +static void __maybe_unused iee_set_key_magic(struct key *key, unsigned magic) +{ + iee_rw_gate(IEE_OP_SET_KEY_MAGIC, key, magic); +} +#endif + +static void __maybe_unused iee_set_key_flags(struct key *key, unsigned long flags) +{ + iee_rw_gate(IEE_OP_SET_KEY_FLAGS, key, flags); +} + +static void __maybe_unused iee_set_key_index_key(struct key *key, struct keyring_index_key *index_key) +{ + iee_rw_gate(IEE_OP_SET_KEY_INDEX_KEY, key, index_key); +} + +static void __maybe_unused iee_set_key_hash(struct key *key, unsigned long hash) +{ + iee_rw_gate(IEE_OP_SET_KEY_HASH, key, hash); +} + +static void __maybe_unused iee_set_key_len_desc(struct key *key, unsigned long len_desc) +{ + iee_rw_gate(IEE_OP_SET_KEY_LEN_DESC, key, len_desc); +} + +static void __maybe_unused iee_set_key_type(struct key *key, struct key_type *type) +{ + iee_rw_gate(IEE_OP_SET_KEY_TYPE, key, type); +} + +static void __maybe_unused iee_set_key_domain_tag(struct key *key, struct key_tag *domain_tag) +{ + iee_rw_gate(IEE_OP_SET_KEY_TAG, key, domain_tag); +} + +static void __maybe_unused iee_set_key_description(struct key *key, char *description) +{ + iee_rw_gate(IEE_OP_SET_KEY_DESCRIPTION, key, description); +} + +static void __maybe_unused iee_set_key_restrict_link(struct key *key, struct key_restriction *restrict_link) +{ + iee_rw_gate(IEE_OP_SET_KEY_RESTRICT_LINK, key, restrict_link); +} + +static bool __maybe_unused iee_set_key_flag_bit(struct key *key, long nr, int flag) +{ + bool ret; + ret = iee_rw_gate(IEE_OP_SET_KEY_FLAG_BIT, key, nr, flag); + return ret; +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-koi.h b/arch/arm64/include/asm/iee-koi.h new file mode 100644 index 000000000000..8f7bfcc7e581 --- /dev/null +++ b/arch/arm64/include/asm/iee-koi.h @@ -0,0 +1,13 @@ +#if defined(CONFIG_KOI) && defined(CONFIG_IEE) +#define IEE_SWITCH_TO_KERNEL 7 +#define IEE_SWITCH_TO_KOI 8 +#define IEE_READ_KOI_STACK 24 +#define IEE_WRITE_KOI_STACK 25 +#define IEE_READ_TOKEN_TTBR1 26 +#define IEE_WRITE_TOKEN_TTBR1 27 +#define IEE_READ_KOI_KERNEL_STACK 28 +#define IEE_WRITE_KOI_KERNEL_STACK 29 +#define IEE_READ_KOI_STACK_BASE 30 +#define IEE_WRITE_KOI_STACK_BASE 31 +#define IEE_SET_KOI_PGD 32 +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-selinuxp.h b/arch/arm64/include/asm/iee-selinuxp.h new file mode 100644 index 000000000000..b1cf52d8c2d8 --- /dev/null +++ b/arch/arm64/include/asm/iee-selinuxp.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_IEE_SELINUX_P_H +#define _LINUX_IEE_SELINUX_P_H + +#include +#include +#include "security.h" +#include "ss/services.h" + +static inline struct mutex* iee_get_selinux_policy_lock(void) +{ + return (struct mutex*)(selinux_state.policy_mutex.owner.counter); +} + +static inline struct mutex* iee_get_selinux_status_lock(void) +{ + return (struct mutex*)(selinux_state.status_lock.owner.counter); +} + +/* APIs for modifying selinux_state */ +extern void iee_set_selinux_status_pg(struct page* new_page); +extern void iee_set_sel_policy_cap(unsigned int idx, int cap); +extern void iee_sel_rcu_assign_policy(struct selinux_policy* new_policy, + struct selinux_policy* iee_new_policy); + +extern struct kmem_cache *policy_jar; + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-si.h b/arch/arm64/include/asm/iee-si.h new file mode 100644 index 000000000000..02b77547b29c --- /dev/null +++ b/arch/arm64/include/asm/iee-si.h @@ -0,0 +1,61 @@ +#ifndef _LINUX_IEE_SI_H +#define _LINUX_IEE_SI_H + +#include +#define __iee_si_code __section(".iee.si_text") +#define __iee_si_data __section(".iee.si_data") + +/* Used for copying globals that iee rwx gate needs. */ +extern unsigned long iee_base_swapper_pg_dir; +extern unsigned long iee_base_idmap_pg_dir; +extern unsigned long iee_base_reserved_pg_dir; +extern unsigned long iee_base__bp_harden_el1_vectors; +extern bool iee_init_done; +extern unsigned long iee_si_tcr; + +/* The following are __init functions used for iee si initialization. */ +extern void iee_si_prepare_data(void); + +extern unsigned long __iee_si_text_start[]; +// Handler function for sensitive inst +u64 iee_si_handler(int flag, ...); +/* + * TODO: scan a page to check whether it contains sensitive instructions + * return 1 when finding sensitive inst, 0 on safe page. + */ +extern int iee_si_scan_page(unsigned long addr); + + +#define DBG_MDSCR_SS (1 << 0) +#define DBG_MDSCR_MDE (1 << 15) + +#define IEE_SI_TEST 0 +#define IEE_WRITE_SCTLR 1 +#define IEE_WRITE_TTBR0 2 +#define IEE_WRITE_VBAR 3 +#define IEE_WRITE_TCR 4 +#define IEE_WRITE_MDSCR 5 +#define IEE_CONTEXT_SWITCH 6 +// #define IEE_WRITE_AFSR0 10 +/* Provide ttbr1 switch gate for KOI */ +/* MASK modify-permitted bits on IEE protected sys registers */ +#define IEE_SCTLR_MASK (SCTLR_EL1_CP15BEN | SCTLR_EL1_SED | SCTLR_EL1_UCT | SCTLR_EL1_UCI |\ + SCTLR_EL1_BT0 | SCTLR_EL1_BT1 | SCTLR_EL1_TCF0_MASK | SCTLR_ELx_DSSBS |\ + SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB|\ + SCTLR_EL1_SPINTMASK | SCTLR_EL1_NMI | SCTLR_EL1_TIDCP | SCTLR_EL1_MSCEn|\ + SCTLR_ELx_ENTP2 | SCTLR_EL1_TCF_MASK) +#define IEE_TTBR0_MASK ~0 +#define IEE_TTBR1_MASK ~0 +#define IEE_TCR_MASK (TCR_HD | TCR_T0SZ_MASK | TCR_E0PD1) +#define IEE_MDSCR_MASK (DBG_MDSCR_SS | DBG_MDSCR_MDE) + +#define IEE_DBGBCR_BT 0b0000 << 20 +#define IEE_DBGBCR_SSC 0b00 << 14 +#define IEE_DBGBCR_HMC 0b1 << 13 +#define IEE_DBGBCR_BAS 0b1111 << 5 +#define IEE_DBGBCR_PMC 0b11 << 1 +#define IEE_DBGBCR_E 0b1 +#define IEE_DBGBCR IEE_DBGBCR_BT | IEE_DBGBCR_SSC | IEE_DBGBCR_HMC | IEE_DBGBCR_BAS \ + | IEE_DBGBCR_PMC | IEE_DBGBCR_E + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-slab.h b/arch/arm64/include/asm/iee-slab.h new file mode 100644 index 000000000000..4f3c17c7da00 --- /dev/null +++ b/arch/arm64/include/asm/iee-slab.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_IEE_SLAB_H +#define _LINUX_IEE_SLAB_H +/* + * Tracking user of a slab. + */ +#include + +#define TRACK_ADDRS_COUNT 16 +struct track { + unsigned long addr; /* Called from address */ +#ifdef CONFIG_STACKDEPOT + depot_stack_handle_t handle; +#endif + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ +}; + +enum track_item { TRACK_ALLOC, TRACK_FREE }; + +typedef struct { unsigned long v; } freeptr_t; + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-token.h b/arch/arm64/include/asm/iee-token.h new file mode 100644 index 000000000000..25ebf08faf8f --- /dev/null +++ b/arch/arm64/include/asm/iee-token.h @@ -0,0 +1,33 @@ +#ifndef _LINUX_IEE_TOKEN_H +#define _LINUX_IEE_TOKEN_H + +#include + +extern unsigned long long iee_rw_gate(int flag, ...); +struct task_token; +struct task_struct; +struct mm_struct; + +#ifdef CONFIG_IEE +static inline void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) +{ + iee_rw_gate(IEE_SET_TOKEN_PGD, tsk, pgd); +} + +static inline void iee_init_token(struct task_struct *tsk, void *iee_stack, void *tmp_page) +{ + iee_rw_gate(IEE_INIT_TOKEN, tsk, iee_stack, tmp_page); +} + +static inline void iee_invalidate_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_INVALIDATE_TOKEN, tsk); +} + +static inline void iee_validate_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_VALIDATE_TOKEN, tsk); +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee.h b/arch/arm64/include/asm/iee.h new file mode 100644 index 000000000000..598f6d0b2626 --- /dev/null +++ b/arch/arm64/include/asm/iee.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_IEE_H +#define _LINUX_IEE_H +#define __iee_code __section(".iee.text") +#define __iee_header __section(".iee.text.header") + +u64 iee_dispatch(int flag, ...); + +#include + +#endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 85d26143faa5..e7a3081ce285 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -118,4 +118,25 @@ #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif +#ifdef CONFIG_IEE + +#ifdef CONFIG_ARM64_4K_PAGES // zgcXXX: it has been deleted in 6.6. +#define ARM64_SWAPPER_USES_SECTION_MAPS 1 +#else +#define ARM64_SWAPPER_USES_SECTION_MAPS 0 +#endif + +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) // zgcXXX: warning: 6.6 delete this macro. should delete this line later. + +#define SWAPPER_PTE_FLAGS_IDMAP (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_RDONLY) +#define SWAPPER_PMD_FLAGS_IDMAP (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_RDONLY) + +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_MM_MMUFLAGS_IDMAP (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS_IDMAP) +#else +#define SWAPPER_MM_MMUFLAGS_IDMAP (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS_IDMAP) +#endif + +#endif + #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/koi.h b/arch/arm64/include/asm/koi.h new file mode 100644 index 000000000000..c9afe886bef8 --- /dev/null +++ b/arch/arm64/include/asm/koi.h @@ -0,0 +1,532 @@ +#include "linux/mm.h" +#include "asm/current.h" +#include "asm/pgtable-hwdef.h" +#include "asm/pgtable-types.h" +#include "asm/pgtable.h" +#include "linux/mm_types.h" +#include "linux/pgtable.h" +#include "linux/printk.h" +#include "linux/slab.h" +#include "linux/string.h" +#include +#include +#include "linux/hashtable.h" +#include "linux/module.h" +#include "linux/vmalloc.h" +#include "stacktrace.h" +#include "asm/mmu.h" +#ifdef CONFIG_IEE +#include "asm/iee-si.h" +#include "asm/iee-def.h" +#include +#endif + +#ifdef CONFIG_KOI + +#define HASH_TABLE_BIT 10 +#define HASH_TABLE_LEN (1 << HASH_TABLE_BIT) +#define HASH_KEY_MASK ((1 << HASH_TABLE_BIT) - 1) + +#define MAX_VAR_NAME 64 +#define DRIVER_ISOLATION_VAR_ARRAY_SIZE 32 +#define DRIVER_ISOLATION_MAX_VAL 256 + +extern struct hlist_head koi_mem_htbl[1024]; +extern spinlock_t koi_mem_htbl_spin_lock; +extern unsigned long koi_swapper_ttbr1; +extern s64 koi_offset; + +#ifdef CONFIG_IEE +extern unsigned long long iee_rw_gate(int flag, ...); +#endif + +DECLARE_PER_CPU(unsigned long[PAGE_SIZE / sizeof(unsigned long)], + koi_irq_current_ttbr1); + +/** +* struct koi_mem_hash_node - +*@mod:pointer to driver module +*@mem_list_head:free memory list head +*@ko_mm: mm_struct in each driver +*@pgdp:entry to Page Global Directory :pgd +*@node:hash linked list node +*@addr_htbl[1 << (HASH_TABLE_BIT)]: +*@rcu: +*/ +struct koi_mem_hash_node { + struct module *mod; + struct list_head mem_list_head; + struct mm_struct *ko_mm; + pgd_t *pgdp; + unsigned long ko_ttbr1; + struct hlist_node node; + struct hlist_head addr_htbl[1 << (HASH_TABLE_BIT)]; + struct rcu_head rcu; + // used to protect free mem list + spinlock_t spin_lock; + // used to protect addr hashtable + spinlock_t addr_htbl_spin_lock; + bool is_valid; + spinlock_t mod_lock; +}; +//describe the global shared var +struct shared_variable_descriptor { + unsigned int id; + unsigned int type; + char name[MAX_VAR_NAME]; + unsigned long offset; + unsigned int size; + unsigned int self_ptr_ids[DRIVER_ISOLATION_VAR_ARRAY_SIZE]; +}; + +int koi_do_switch_to_kernel_pgtbl(void); + +int koi_share_kstack(struct module *mod); + +int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, + unsigned long addr, unsigned long end, pteval_t prot); + +void koi_create_pagetable(struct module *mod); +void koi_destroy_pagetable(struct module *mod); + +void koi_map_kostack(struct module *mod); +unsigned long koi_mem_alloc(struct module *mod, unsigned long orig_addr, + unsigned long size); +void koi_mem_free(struct module *mod, unsigned long addr, unsigned long size, + bool is_const, int count, ...); +void *koi_mem_lookup(struct module *mod, unsigned long addr); +void koi_mem_free_callback(struct module *mod, unsigned long addr, + unsigned long size, void (*func)(void *)); +void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size); +void koi_unmap_mem(struct module *mod, unsigned long addr, unsigned long size); +void koi_mem_free_to_user(struct module *mod, unsigned long addr, + unsigned long size); + +unsigned long koi_ttbr_ctor(struct module *mod); + +// unsigned long koi_get_token_addr(struct task_struct *tsk); + +#define switch_pgtable(ttbr1) \ + do { \ + write_sysreg((ttbr1), ttbr1_el1); \ + isb(); \ + asm volatile(ALTERNATIVE("nop; nop; nop", \ + "ic iallu; dsb nsh; isb", \ + ARM64_WORKAROUND_CAVIUM_27456)); \ + } while (0); + +#ifndef CONFIG_IEE +#define koi_switch_to_ko() \ + do { \ + unsigned long flags, ko_ttbr1, cur_sp; \ + unsigned long *ptr; \ + struct task_token *token; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + if (!on_irq_stack(cur_sp, (unsigned long)NULL)) { \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + token = (struct task_token *)((unsigned long)current + \ + (unsigned long) \ + koi_offset); \ + token->current_ttbr1 = ko_ttbr1 & (~TTBR_ASID_MASK); \ + } else { \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = ko_ttbr1 & ~(TTBR_ASID_MASK); \ + } \ + switch_pgtable(ko_ttbr1); \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); + +#define koi_switch_to_kernel() \ + do { \ + unsigned long cur_sp, flags, asid; \ + unsigned long *ptr; \ + struct task_token *token; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + asid = read_sysreg(ttbr0_el1) & (~USER_ASID_FLAG); \ + asid &= TTBR_ASID_MASK; \ + switch_pgtable((koi_swapper_ttbr1 | asid)); \ + if (!on_irq_stack(cur_sp, (unsigned long)NULL)) { \ + token = (struct task_token *)((unsigned long)current + \ + (unsigned long)koi_offset); \ + token->current_ttbr1 = koi_swapper_ttbr1; \ + } else { \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = koi_swapper_ttbr1; \ + } \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); +#else +#define koi_switch_to_ko() \ + do { \ + unsigned long cur_sp, flags, ko_ttbr1; \ + unsigned long *ptr; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + if (!on_irq_stack(cur_sp, (unsigned long)NULL)) { \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, \ + ko_ttbr1 &(~TTBR_ASID_MASK)); \ + } else { \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = ko_ttbr1 & (~TTBR_ASID_MASK); \ + } \ + iee_rwx_gate_entry(IEE_SWITCH_TO_KOI, ko_ttbr1); \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); + +#define koi_switch_to_kernel() \ + do { \ + unsigned long flags, cur_sp; \ + unsigned long *ptr; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); \ + if (!on_irq_stack(cur_sp, (unsigned long)NULL)) { \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, \ + koi_swapper_ttbr1); \ + } else { \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = koi_swapper_ttbr1; \ + } \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); +#endif +//kzalloc function in driver space +static __maybe_unused noinline void * +koi_kzalloc_wrapper(struct module *mod, size_t size, gfp_t flags) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + return NULL; + } + + addr = kzalloc(size, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); + koi_switch_to_ko(); + return addr; +} + +static __maybe_unused noinline void *koi_kzalloc_node_wrapper(struct module *mod, size_t size, gfp_t flags, int node) { + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr = NULL; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + + rcu_read_lock(); + hash_for_each_possible_rcu(koi_mem_htbl, target, node, (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", mod->name); + goto ret; + } + addr = kzalloc_node(cnt * PAGE_SIZE, flags, node); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); +ret: + koi_switch_to_ko(); + return (void *)addr; +} + +//kmalloc function in driver space +static __maybe_unused void * +koi_kmalloc_wrapper(struct module *mod, size_t size, gfp_t flags) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr = NULL; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR"mem node for module: %s not found\n", mod->name); + goto ret; + } + + addr = kmalloc(cnt * PAGE_SIZE, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); +ret: + koi_switch_to_ko(); + return (void *)addr; +} +//vmalloc function in driver space +static __maybe_unused void *koi_vmalloc_wrapper(struct module *mod, + unsigned long size) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + koi_switch_to_ko(); + return 0; + } + addr = vmalloc(cnt * PAGE_SIZE); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); + koi_switch_to_ko(); + return addr; +} +//kmalloc_array function in driver space +static __maybe_unused void *koi_kmalloc_array_wrapper(struct module *mod, + size_t n, size_t size, + gfp_t flags) +{ + int kpage; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + koi_switch_to_ko(); + return 0; + } + kpage = (n * size + PAGE_SIZE - 1) / PAGE_SIZE; + n = (kpage * PAGE_SIZE) / size; + addr = kmalloc_array(n, size, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * kpage, 0); + koi_switch_to_ko(); + return addr; +} + +static __maybe_unused noinline void *koi_kcalloc_wrapper(struct module *mod, size_t n, size_t size, gfp_t flags) { + return koi_kmalloc_array_wrapper(mod, n, size, flags | __GFP_ZERO); +} +#endif + +#ifdef CONFIG_KOI + +#define koi_copy_to_user_wrapper(to, from, n) \ +({ \ + koi_switch_to_kernel(); \ + long long ret = copy_to_user(to, from, n); \ + koi_switch_to_ko(); \ + ret; \ +}) + +#define koi_copy_from_user_wrapper(to, from, n) \ +({ \ + koi_switch_to_kernel(); \ + long long ret = copy_from_user(to, from, n); \ + koi_switch_to_ko(); \ + ret; \ +}) + +#define koi_kasprintf_wrapper(gfp, fmt, args...)\ + ({ \ + koi_switch_to_kernel(); \ + void *ret = kasprintf(gfp, fmt, ##args); \ + koi_map_mem(THIS_MODULE, (unsigned long)ret, sizeof(void *)); \ + koi_switch_to_ko(); \ + ret;\ + }) + +#define koi_scnprintf_wrapper(buf, size, fmt, args...) \ + ({ \ + int ret; \ + koi_switch_to_kernel(); \ + ret = scnprintf(buf, size, fmt, ##args); \ + koi_switch_to_ko(); \ + ret; \ + }) + +#define koi_sscanf_wrapper(buf, fmt, args...) \ + ({ \ + int ret; \ + koi_switch_to_kernel(); \ + ret = sscanf(buf, fmt, ##args); \ + koi_switch_to_ko(); \ + ret; \ + }) + +#define koi_rcu_read_lock_wrapper() \ + do { \ + koi_switch_to_kernel(); \ + rcu_read_lock(); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_rcu_read_unlock_wrapper() \ + do { \ + koi_switch_to_kernel(); \ + rcu_read_unlock(); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_mutex_lock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + mutex_lock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_mutex_unlock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + mutex_unlock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_mutex_init_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + mutex_init(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_lock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_unlock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_unlock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_lock_irq_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock_irq(lock); \ + koi_switch_to_ko(); \ + } while(0); + + +#define koi_spin_unlock_irq_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_unlock_irq(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_lock_irqsave_wrapper(lock, flags) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock_irqsave(lock, flags); \ + koi_switch_to_ko(); \ + } while(0); + + +#define koi_spin_lock_irqrestore_wrapper(lock, flags) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock_irqrestore(lock, flags); \ + koi_switch_to_ko(); \ + } while(0); + +#else + +#define koi_copy_to_user_wrapper copy_to_user + +#define koi_copy_from_user_wrapper copy_from_user + +#define koi_kasprintf_wrapper kasprintf + +#define koi_scnprintf_wrapper scnprintf + +#define koi_sscanf_wrapper sscanf + +#define koi_rcu_read_lock_wrapper rcu_read_lock + +#define koi_rcu_read_unlock_wrapper rcu_read_unlock + +#define koi_mutex_lock_wrapper mutex_lock + +#define koi_mutex_unlock_wrapper mutex_unlock + +#define koi_mutex_init_wrapper mutex_init + +#define koi_spin_lock_irq_wrapper spin_lock_irq + +#define koi_spin_unlock_irq_wrapper spin_unlock_irq + +#define koi_spin_lock_wrapper spin_lock + +#define koi_spin_unlock_wrapper spin_unlock + +#define koi_spin_lock_irqsave_wrapper spin_lock_irqsave + +#define koi_spin_lock_irqrestore_wrapper spin_lock_irqrestore + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index fde4186cc387..c9e9a5a288bc 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -190,6 +190,13 @@ extern u64 vabits_actual; #endif extern s64 memstart_addr; + +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +extern s64 memstart_addr_init; +extern s64 iee_offset; +#define LOGICAL_RANDOM (long long int)((long unsigned int)__va(memstart_addr_init) & (~PAGE_OFFSET)) +#endif + /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) @@ -310,6 +317,27 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) +#ifdef CONFIG_KOI +#define KOI_OFFSET (((unsigned long)BIT(vabits_actual - 2)) - LOGICAL_RANDOM) +#endif + +#ifdef CONFIG_IEE +#ifdef CONFIG_IEE_OFFSET +#define IEE_OFFSET ((CONFIG_IEE_OFFSET) - LOGICAL_RANDOM) +#else +#define IEE_OFFSET (((unsigned long)BIT(vabits_actual - 2)) - LOGICAL_RANDOM) +#endif +#define __phys_to_iee(x) (__phys_to_virt(x) + IEE_OFFSET) +#define SET_UPAGE(x) __pgprot(pgprot_val(x) | PTE_USER) +#define SET_PPAGE(x) __pgprot(pgprot_val(x) & (~PTE_USER)) +#define SET_INVALID(x) __pgprot(pgprot_val(x) & (~PTE_VALID)) +#define SET_NG(x) __pgprot(pgprot_val(x) | PTE_NG) +#endif + +#if defined(CONFIG_IEE) || defined (CONFIG_KOI) +#define SET_NG(x) __pgprot(pgprot_val(x) | PTE_NG) +#define SET_INVALID(x) __pgprot(pgprot_val(x) & (~PTE_VALID)) +#endif /* * Convert a page to/from a physical address */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a6fb325424e7..cca5994dabfb 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -24,6 +24,9 @@ #include #include #include +#ifdef CONFIG_IEE +#define INIT_ASID 0x2 +#endif extern bool rodata_full; @@ -43,7 +46,12 @@ static inline void cpu_set_reserved_ttbr0_nosync(void) { unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); +#ifdef CONFIG_IEE + ttbr |= FIELD_PREP(TTBR_ASID_MASK, 1); + iee_rwx_gate_entry(IEE_WRITE_ttbr0_el1, ttbr); +#else write_sysreg(ttbr, ttbr0_el1); +#endif } static inline void cpu_set_reserved_ttbr0(void) @@ -79,7 +87,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) tcr &= ~TCR_T0SZ_MASK; tcr |= t0sz << TCR_T0SZ_OFFSET; +#ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_tcr_el1, tcr); +#else write_sysreg(tcr, tcr_el1); +#endif isb(); } @@ -144,7 +156,11 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) __cpu_set_tcr_t0sz(t0sz); /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ + #ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_ttbr0_el1, ttbr0); + #else write_sysreg(ttbr0, ttbr0_el1); + #endif isb(); } @@ -174,6 +190,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) ttbr1 |= TTBR_CNP_BIT; } + #ifdef CONFIG_IEE + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, ASID(current->active_mm)); + #endif + replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); __cpu_install_idmap(idmap); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 661964e99b9d..6e40b14db920 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -63,6 +63,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); +#ifdef CONFIG_KOI +pgd_t *koi_pgd_alloc(void); +#endif + static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f736a4222190..babc366acc51 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -84,6 +84,13 @@ #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) +#ifdef CONFIG_IEE +#define PGD_APT_RO (_AT(pudval_t, 1) << 62) +#endif +#define PGD_APT (_AT(pudval_t, 1) << 61) +#define PGD_PXN (_AT(pudval_t, 1) << 59) +#define PGD_UXN (_AT(pudval_t, 1) << 60) + /* * Hardware page table definitions. * @@ -157,6 +164,9 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#ifdef CONFIG_HIVE +#define PTE_BPF_SFI_GP (_AT(pteval_t, 1) << 55) /* BPF_SFI guarded */ +#endif #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 @@ -288,6 +298,10 @@ #define TCR_TCMA0 (UL(1) << 57) #define TCR_TCMA1 (UL(1) << 58) +#ifdef CONFIG_IEE +#define TCR_HPD1 (UL(1) << 42) +#endif + /* * TTBR. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d457dd74f534..89206b1a517b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,6 +34,9 @@ #include #include #include +#ifdef CONFIG_PTP +#include +#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -156,6 +159,14 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pud_access_permitted(pud, write) \ (pte_access_permitted(pud_pte(pud), (write))) +#ifdef CONFIG_PTP +extern bool in_tramp_pgdir(void *addr); +extern unsigned long long iee_rw_gate(int flag, ...); +extern void iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd); +extern pteval_t iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval); +extern pteval_t iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval); +#endif + static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { pte_val(pte) &= ~pgprot_val(prot); @@ -260,19 +271,35 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } +#ifdef CONFIG_PTP +extern void iee_set_bm_pte(pte_t *ptep, pte_t pte); +extern void iee_set_fixmap_pte_pre_init(pte_t *ptep, pte_t pte); +#endif + +#ifdef CONFIG_IEE +extern void __set_pte(pte_t *ptep, pte_t pte); +#else static inline void __set_pte(pte_t *ptep, pte_t pte) { +#ifdef CONFIG_KOI + if (pte_valid(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif WRITE_ONCE(*ptep, pte); - - /* - * Only if the new pte is valid and kernel, otherwise TLB maintenance - * or update_mmu_cache() have the necessary barriers. - */ if (pte_valid_not_user(pte)) { dsb(ishst); isb(); } } +#endif + +#ifdef CONFIG_IEE +extern void iee_set_stack_pte(pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr); +extern void iee_unset_stack_pte(pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr); +extern void iee_set_sensitive_pte(pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd); +extern void iee_unset_sensitive_pte(pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd); +#endif static inline pte_t __ptep_get(pte_t *ptep) { @@ -361,8 +388,15 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); } -static inline void __set_ptes(struct mm_struct *mm, - unsigned long __always_unused addr, +#ifdef CONFIG_PTP +extern void iee_set_pte_pre_init(pte_t *ptep, pte_t pte); +extern void iee_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern void iee_set_pte_at_delayed(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +#endif + +static inline void __set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned int nr) { page_table_check_ptes_set(mm, ptep, pte, nr); @@ -546,6 +580,44 @@ static inline void __set_pte_at(struct mm_struct *mm, __set_pte(ptep, pte); } +#ifdef CONFIG_PTP +extern void set_pmd(pmd_t *pmdp, pmd_t pmd); +extern void set_pud(pud_t *pudp, pud_t pud); + +static inline void __set_pmd_at(struct mm_struct *mm, + unsigned long __always_unused addr, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) +{ + __sync_cache_and_tags(pmd_pte(pmd), nr); + __check_safe_pte_update(mm, (pte_t *)pmdp, pmd_pte(pmd)); + set_pmd(pmdp, pmd); +} + +static inline void __set_pud_at(struct mm_struct *mm, + unsigned long __always_unused addr, + pud_t *pudp, pud_t pud, unsigned int nr) +{ + __sync_cache_and_tags(pud_pte(pud), nr); + __check_safe_pte_update(mm, (pte_t *)pudp, pud_pte(pud)); + set_pud(pudp, pud); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + page_table_check_pmd_set(mm, pmdp, pmd); + return __set_pmd_at(mm, addr, pmdp, pmd, + PMD_SIZE >> PAGE_SHIFT); +} + +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(mm, pudp, pud); + return __set_pud_at(mm, addr, pudp, pud, + PUD_SIZE >> PAGE_SHIFT); +} +#else static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { @@ -561,6 +633,7 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), PUD_SIZE >> PAGE_SHIFT); } +#endif #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -640,6 +713,14 @@ static inline bool in_swapper_pgdir(void *addr) ((unsigned long)swapper_pg_dir & PAGE_MASK); } +#ifdef CONFIG_PTP +extern bool in_tramp_pgdir(void *addr); +extern void iee_set_fixmap_pmd_pre_init(pmd_t *pmdp, pmd_t pmd); +#endif + +#ifdef CONFIG_IEE +extern void set_pmd(pmd_t *pmdp, pmd_t pmd); +#else static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { #ifdef __PAGETABLE_PMD_FOLDED @@ -648,14 +729,19 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) return; } #endif /* __PAGETABLE_PMD_FOLDED */ - +#ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } +#endif WRITE_ONCE(*pmdp, pmd); - if (pmd_valid(pmd)) { dsb(ishst); isb(); } } +#endif static inline void pmd_clear(pmd_t *pmdp) { @@ -675,6 +761,12 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* Find an entry in the third-level page table. */ #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) +#ifdef CONFIG_PTP +#define pte_set_fixmap_init(addr) ((pte_t *)iee_set_fixmap_offset_pre_init(FIX_PTE, addr)) +#define pte_set_fixmap_offset_init(pmd, addr) pte_set_fixmap_init(pte_offset_phys(pmd, addr)) +#define pte_clear_fixmap_init() clear_fixmap_init(FIX_PTE) +#endif + #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) @@ -703,6 +795,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_user(pud) pte_user(pud_pte(pud)) #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) + +#ifdef CONFIG_PTP +extern void iee_set_fixmap_pud_pre_init(pud_t *pudp, pud_t pud); +#endif + +#ifdef CONFIG_IEE +extern void set_pud(pud_t *pudp, pud_t pud); +#else static inline void set_pud(pud_t *pudp, pud_t pud) { #ifdef __PAGETABLE_PUD_FOLDED @@ -711,14 +811,20 @@ static inline void set_pud(pud_t *pudp, pud_t pud) return; } #endif /* __PAGETABLE_PUD_FOLDED */ - +#ifdef CONFIG_KOI + pudval_t val = pud_val(pud); + if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { + // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. + pud = __pud(val | PMD_SECT_NG); + } +#endif WRITE_ONCE(*pudp, pud); - if (pud_valid(pud)) { dsb(ishst); isb(); } } +#endif static inline void pud_clear(pud_t *pudp) { @@ -738,6 +844,12 @@ static inline pmd_t *pud_pgtable(pud_t pud) /* Find an entry in the second-level page table. */ #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) +#ifdef CONFIG_PTP +#define pmd_set_fixmap_init(addr) ((pmd_t *)iee_set_fixmap_offset_pre_init(FIX_PMD, addr)) +#define pmd_set_fixmap_offset_init(pud, addr) pmd_set_fixmap_init(pmd_offset_phys(pud, addr)) +#define pmd_clear_fixmap_init() clear_fixmap_init(FIX_PMD) +#endif + #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) @@ -769,10 +881,15 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) #define p4d_present(p4d) (p4d_val(p4d)) +#define p4d_valid(p4d) pte_valid(p4d_pte(p4d)) +#ifdef CONFIG_IEE +extern void set_p4d(p4d_t *p4dp, p4d_t p4d); +#else static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { - if (in_swapper_pgdir(p4dp)) { + if (in_swapper_pgdir(p4dp)) + { set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); return; } @@ -781,6 +898,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) dsb(ishst); isb(); } +#endif static inline void p4d_clear(p4d_t *p4dp) { @@ -800,6 +918,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) /* Find an entry in the first-level page table. */ #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) +#ifdef CONFIG_PTP +#define pud_set_fixmap_init(addr) ((pud_t *)iee_set_fixmap_offset_pre_init(FIX_PUD, addr)) +#define pud_set_fixmap_offset_init(p4d, addr) pud_set_fixmap_init(pud_offset_phys(p4d, addr)) +#define pud_clear_fixmap_init() clear_fixmap_init(FIX_PUD) +#endif + #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) #define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) @@ -826,6 +950,10 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) +#ifdef CONFIG_PTP +#define pgd_set_fixmap_init(addr) ((pgd_t *)iee_set_fixmap_offset_pre_init(FIX_PGD, addr)) +#define pgd_clear_fixmap_init() clear_fixmap_init(FIX_PGD) +#endif #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) @@ -910,10 +1038,19 @@ static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, pte = __ptep_get(ptep); do { + #ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); + #endif old_pte = pte; pte = pte_mkold(pte); + #ifdef CONFIG_PTP + pte_val(pte) = iee_set_cmpxchg_relaxed(ptep, + pte_val(old_pte), pte_val(pte)); + #else pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); + #endif } while (pte_val(pte) != pte_val(old_pte)); return pte_young(pte); @@ -954,8 +1091,12 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + #ifdef CONFIG_PTP + pteval_t pteval= iee_set_xchg_relaxed((pte_t *)&pte_val(*ptep), (pteval_t)0); + pte_t pte = __pte(pteval); + #else pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); - + #endif page_table_check_pte_clear(mm, pte); return pte; @@ -997,7 +1138,12 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { + #ifdef CONFIG_PTP + pteval_t pteval= iee_set_xchg_relaxed((pte_t *)&pmd_val(*pmdp), (pteval_t)0); + pmd_t pmd = __pmd(pteval); + #else pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); + #endif page_table_check_pmd_clear(mm, pmd); @@ -1012,10 +1158,19 @@ static inline void ___ptep_set_wrprotect(struct mm_struct *mm, pte_t old_pte; do { + #ifdef CONFIG_KOI + if (pte_valid(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } + #endif old_pte = pte; pte = pte_wrprotect(pte); + #ifdef CONFIG_PTP + pte_val(pte) = iee_set_cmpxchg_relaxed(ptep,pte_val(old_pte), pte_val(pte)); + #else pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); + #endif } while (pte_val(pte) != pte_val(old_pte)); } @@ -1091,7 +1246,17 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); + #ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } + #endif + #ifdef CONFIG_PTP + return __pmd((pmdval_t)iee_set_xchg_relaxed((pte_t *)&pmd_val(*pmdp), (pmdval_t)pmd_val(pmd))); + #else return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); + #endif } #endif diff --git a/arch/arm64/include/asm/pgtable_slab.h b/arch/arm64/include/asm/pgtable_slab.h new file mode 100644 index 000000000000..0674582a1948 --- /dev/null +++ b/arch/arm64/include/asm/pgtable_slab.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_PGTABLE_SLAB_H +#define _LINUX_PGTABLE_SLAB_H + +extern void __init iee_pgtable_init(void); +extern void *get_iee_pgtable_page(gfp_t gfpflags); +extern void free_iee_pgtable_page(void *obj); + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index d2e0306e65d3..8352e92d4536 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -108,8 +108,13 @@ static __always_inline void ptrauth_enable(void) { if (!system_supports_address_auth()) return; + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); + #else sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); + #endif isb(); } diff --git a/arch/arm64/include/asm/stack_slab.h b/arch/arm64/include/asm/stack_slab.h new file mode 100644 index 000000000000..0a478828421e --- /dev/null +++ b/arch/arm64/include/asm/stack_slab.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACK_SLAB_H +#define _LINUX_STACK_SLAB_H + +extern void __init iee_stack_init(void); +extern void *get_iee_stack(void); +extern void free_iee_stack(void *obj); + +#endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 435634a703c6..c214643777a6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1140,6 +1140,64 @@ write_sysreg_s(__scs_new, sysreg); \ } while (0) + +#ifdef CONFIG_IEE + +#define SYS_TCR_IEE_SI TCR_HPD1 | TCR_A1 + +extern void iee_rwx_gate_entry(int flag, ...); +#define IEE_SI_TEST 0 +#define IEE_WRITE_sctlr_el1 1 +#define IEE_WRITE_ttbr0_el1 2 +#define IEE_WRITE_vbar_el1 3 +#define IEE_WRITE_tcr_el1 4 +#define IEE_WRITE_mdscr_el1 5 +#define IEE_WRITE_AFSR0 10 + +#define sysreg_clear_set_iee_si(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + iee_rwx_gate_entry(IEE_WRITE_##sysreg, __scs_new); \ +} while (0) + +#define IEE_SI_WRITE_DAIF_SEL "msr daifclr, #0xf\n\t" \ + "tbnz %x0, #6, 114221f\n\t" \ + "tbnz %x0, #7, 114210f\n\t" \ + "tbnz %x0, #8, 114100f\n\t" \ + "msr daifset, #0b000\n\t" \ + "b 114514f\n\t" \ +"114221:\n\t" \ + "tbnz %x0, #7, 114211f\n\t" \ + "tbnz %x0, #8, 114101f\n\t" \ + "msr daifset, #0b001\n\t" \ + "b 114514f\n\t" \ +"114211:\n\t" \ + "tbnz %x0, #8, 114111f\n\t" \ + "msr daifset, #0b011\n\t" \ + "b 114514f\n\t" \ +"114210:\n\t" \ + "tbnz %x0, #8, 114110f\n\t" \ + "msr daifset, #0b010\n\t" \ + "b 114514f\n\t" \ +"114100:\n\t" \ + "msr daifset, #0b100\n\t" \ + "b 114514f\n\t" \ +"114101:\n\t" \ + "msr daifset, #0b101\n\t" \ + "b 114514f\n\t" \ +"114110:\n\t" \ + "msr daifset, #0b110\n\t" \ + "b 114514f\n\t" \ +"114111:\n\t" \ + "msr daifset, #0b111\n\t" \ +"114514:\n\t" + +#define iee_si_write_daif(v) do { \ + u64 __val = (u64)(v); \ + asm volatile(IEE_SI_WRITE_DAIF_SEL: : "rZ" (__val));} while (0) +#endif + #define read_sysreg_par() ({ \ u64 par; \ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 2c29239d05c3..a68ca9e784cf 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -11,11 +11,31 @@ #include #include +#ifdef CONFIG_PTP +#include +#include "pgtable_slab.h" +#endif + static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } +#ifdef CONFIG_PTP +static inline void __iee_tlb_remove_table(void *_table) +{ + struct page *page = (struct page *)_table; + + // if (!is_huge_zero_page(page)) + // { + // if (page_ref_dec_return(page) == 1) + // { + free_iee_pgtable_page((void *)page_to_virt(page)); + // } + // } +} +#endif + #define tlb_flush tlb_flush static void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 4bbd9ed591f2..ecd8e35ab777 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -49,6 +49,7 @@ #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) + #define __tlbi_user(op, arg) do { \ if (arm64_kernel_unmapped_at_el0()) \ __tlbi(op, (arg) | USER_ASID_FLAG); \ @@ -258,6 +259,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) asid = __TLBI_VADDR(0, ASID(mm)); __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); + #if defined(CONFIG_IEE) || defined (CONFIG_KOI) + if (!arm64_kernel_unmapped_at_el0()) + __tlbi(aside1is, asid | USER_ASID_FLAG); + #endif dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } @@ -273,6 +278,10 @@ static inline void __flush_tlb_page_nosync(struct mm_struct *mm, __tlbi_user(vale1is, addr); mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, (uaddr & PAGE_MASK) + PAGE_SIZE); + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + if (!arm64_kernel_unmapped_at_el0()) + __tlbi(vale1is, addr | USER_ASID_FLAG); + #endif } static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, @@ -366,6 +375,7 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * 2. If there is 1 page remaining, flush it through non-range operations. Range * operations can only span an even number of pages. */ +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user) \ do { \ @@ -378,6 +388,8 @@ do { \ pages == 1) { \ addr = __TLBI_VADDR(start, asid); \ __tlbi_level(op, addr, tlb_level); \ + if (!arm64_kernel_unmapped_at_el0()) /* added for IEE */ \ + __tlbi_level(op, addr | USER_ASID_FLAG, tlb_level); \ if (tlbi_user) \ __tlbi_user_level(op, addr, tlb_level); \ start += stride; \ @@ -390,6 +402,8 @@ do { \ addr = __TLBI_VADDR_RANGE(start, asid, scale, \ num, tlb_level); \ __tlbi(r##op, addr); \ + if (!arm64_kernel_unmapped_at_el0()) /* added for IEE */ \ + __tlbi(r##op, addr | USER_ASID_FLAG); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ @@ -399,6 +413,42 @@ do { \ } \ } while (0) +#else +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user) \ +do { \ + int num = 0; \ + int scale = 0; \ + unsigned long addr; \ + \ + while (pages > 0) { \ + if (!system_supports_tlb_range() || \ + pages % 2 == 1) { \ + addr = __TLBI_VADDR(start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + start += stride; \ + pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ + num, tlb_level); \ + __tlbi(r##op, addr); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + pages -= __TLBI_RANGE_PAGES(num, scale); \ + } \ + scale++; \ + } \ +} while (0) + +#endif //if defined(CONFIG_IEE) || defined(CONFIG_KOI) + #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) @@ -493,9 +543,9 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end */ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { - unsigned long addr = __TLBI_VADDR(kaddr, 0); - - dsb(ishst); + unsigned long addr = __TLBI_VADDR(kaddr, 0); + + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); isb(); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4ce58887302a..579b1f713849 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -36,6 +36,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ syscall.o proton-pack.o idreg-override.o idle.o \ patching.o +obj-y += iee/ +obj-$(CONFIG_KOI) += koi/ obj-$(CONFIG_AARCH32_EL0) += binfmt_elf32.o sys32.o signal32.o \ sys_compat.o obj-$(CONFIG_AARCH32_EL0) += sigreturn32.o @@ -84,6 +86,7 @@ obj-$(CONFIG_IPI_AS_NMI) += ipi_nmi.o obj-$(CONFIG_HISI_VIRTCCA_GUEST) += virtcca_cvm_guest.o virtcca_cvm_tsi.o obj-$(CONFIG_HISI_VIRTCCA_HOST) += virtcca_cvm_host.o CFLAGS_patch-scs.o += -mbranch-protection=none +obj-$(CONFIG_HIVE) += sfi_bpf_arch.o # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 87ac0b9c0b4f..f135db1d9965 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -306,11 +306,19 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) static int cp15_barrier_set_hw_mode(bool enable) { +#ifdef CONFIG_IEE + if (enable) + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_CP15BEN); + else + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_CP15BEN, 0); + return 0; +#else if (enable) sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_CP15BEN); else sysreg_clear_set(sctlr_el1, SCTLR_EL1_CP15BEN, 0); return 0; +#endif } static bool try_emulate_cp15_barrier(struct pt_regs *regs, u32 insn) @@ -341,11 +349,19 @@ static int setend_set_hw_mode(bool enable) if (!cpu_supports_mixed_endian_el0()) return -EINVAL; +#ifdef CONFIG_IEE + if (enable) + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_CP15BEN); + else + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_CP15BEN, 0); + return 0; +#else if (enable) sysreg_clear_set(sctlr_el1, SCTLR_EL1_SED, 0); else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SED); return 0; +#endif } static int __a32_setend_handler(struct pt_regs *regs, u32 big_endian) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f20918eb36bc..68c08adc86c9 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -102,6 +102,19 @@ int main(void) DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); BLANK(); #endif +#ifdef CONFIG_IEE + DEFINE(iee_from_token_offset, offsetof(struct task_token, iee_stack)); + DEFINE(tmp_page_from_token_offset, offsetof(struct task_token, tmp_page)); + DEFINE(kernel_from_token_offset, offsetof(struct task_token, kernel_stack)); + DEFINE(mm_from_task_offset, offsetof(struct task_struct, mm)); +#endif +#ifdef CONFIG_KOI + DEFINE(koi_kernel_from_token_offset, offsetof(struct task_token, koi_kernel_stack)); + DEFINE(koi_from_token_offset, offsetof(struct task_token, koi_stack)); + DEFINE(ttbr1_from_token_offset, offsetof(struct task_token, current_ttbr1)); + DEFINE(koi_stack_base_from_token_offset, offsetof(struct task_token, koi_stack_base)); +#endif + BLANK(); #ifdef CONFIG_AARCH32_EL0 DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct a32_sigframe, uc.uc_mcontext.arm_r0)); DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct a32_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2e5e4052a182..d27d11d7b7bb 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -80,7 +80,11 @@ hisilicon_1980005_enable(const struct arm64_cpu_capabilities *__unused) __set_bit(ARM64_HAS_CACHE_IDC, system_cpucaps); arm64_ftr_reg_ctrel0.sys_val |= BIT(CTR_EL0_IDC_SHIFT); arm64_ftr_reg_ctrel0.strict_mask &= ~BIT(CTR_EL0_IDC_SHIFT); +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +#endif } #endif @@ -132,7 +136,11 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) enable_uct_trap = true; if (enable_uct_trap) +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +#endif } #ifdef CONFIG_ARM64_ERRATUM_1463225 @@ -147,7 +155,11 @@ has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, static void __maybe_unused cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCI, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +#endif } #ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a1736e9044da..81aaca96478c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -94,6 +94,11 @@ #include #include +#ifdef CONFIG_IEE +#include +#include +#endif + /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -1616,7 +1621,11 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu * value. */ if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +#endif } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, @@ -1877,7 +1886,11 @@ static inline void __cpu_enable_hw_dbm(void) { u64 tcr = read_sysreg(tcr_el1) | TCR_HD; +#ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_tcr_el1, tcr); +#else write_sysreg(tcr, tcr_el1); +#endif isb(); local_flush_tlb_all(); } @@ -2060,7 +2073,9 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) */ WARN_ON_ONCE(in_interrupt()); + #ifndef CONFIG_IEE sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + #endif set_pstate_pan(1); } #endif /* CONFIG_ARM64_PAN */ @@ -2125,7 +2140,11 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) { if (this_cpu_has_cap(ARM64_HAS_E0PD)) +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(tcr_el1, 0, TCR_E0PD1); +#else sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); +#endif } #endif /* CONFIG_ARM64_E0PD */ @@ -2220,7 +2239,11 @@ static void nmi_enable(const struct arm64_cpu_capabilities *__unused) * avoid leaving things masked. */ _allint_clear(); + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + #else sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + #endif isb(); } #endif @@ -2235,7 +2258,11 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) * So, be strict and forbid other BRs using other registers to * jump onto a PACIxSP instruction: */ +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); +#else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); +#endif isb(); } #endif /* CONFIG_ARM64_BTI */ @@ -2243,7 +2270,11 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + #else sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + #endif mte_cpu_setup(); @@ -2288,7 +2319,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) { + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_TIDCP); + #else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); + #endif } static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) @@ -2298,7 +2333,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) { + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_MSCEn); + #else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); + #endif } /* Internal helper functions to match cpu capability type */ @@ -3593,6 +3632,43 @@ static void __init setup_system_capabilities(void) enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); } +#ifdef CONFIG_IEE + +static void iee_si_test_end(void) +{ + pr_info("IEE: testing iee_exec_entry sctlr...\n"); + iee_rwx_gate_entry(IEE_WRITE_SCTLR, read_sysreg(sctlr_el1)& ~SCTLR_ELx_M); + pr_info("IEE: testing iee_exec_entry ttbr0_el1...\n"); + iee_rwx_gate_entry(IEE_WRITE_TTBR0, read_sysreg(ttbr0_el1)); + pr_info("IEE: testing iee_exec_entry vbar...\n"); + iee_rwx_gate_entry(IEE_WRITE_VBAR, read_sysreg(vbar_el1)); + pr_info("IEE: testing iee_exec_entry tcr...\n"); + iee_rwx_gate_entry(IEE_WRITE_TCR, read_sysreg(tcr_el1)); + // pr_info("IEE: testing iee_exec_entry mdscr...\n"); + // iee_rwx_gate_entry(IEE_WRITE_MDSCR, read_sysreg(mdscr_el1)); + // pr_info("IEE: testing iee_exec_entry afsr0...\n"); + // iee_rwx_gate_entry(IEE_WRITE_AFSR0); + #ifdef CONFIG_KOI + pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); + pr_info("IEE: testing iee_exec_entry switch to koi...\n"); + iee_rwx_gate_entry(IEE_SWITCH_TO_KOI, phys_to_ttbr(__pa_symbol(swapper_pg_dir)) | 3UL << 48); + pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); + pr_info("IEE: testing iee_exec_entry switch to kernel...\n"); + iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); + pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); + #endif +} + +/* Finish iee rwx gate initializations. */ +static void __init iee_si_init_done(void) +{ + // Prepare data for iee rwx gate + iee_si_prepare_data(); + // All initialization is done. Do some simple tests. + iee_si_test_end(); +} +#endif + void __init setup_cpu_features(void) { u32 cwg; @@ -3621,6 +3697,10 @@ void __init setup_cpu_features(void) if (!cwg) pr_warn("No Cache Writeback Granule information, assuming %d\n", ARCH_DMA_MINALIGN); + + #ifdef CONFIG_IEE + iee_si_init_done(); + #endif } static int enable_mismatched_32bit_el0(unsigned int cpu) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 745aefddd9a3..265417e0ad81 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -36,10 +36,14 @@ u8 debug_monitors_arch(void) */ static void mdscr_write(u32 mdscr) { +// #ifdef CONFIG_IEE +// iee_rwx_gate_entry(IEE_WRITE_mdscr_el1, mdscr); +// #else unsigned long flags; flags = local_daif_save(); write_sysreg(mdscr, mdscr_el1); local_daif_restore(flags); +// #endif } NOKPROBE_SYMBOL(mdscr_write); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 4602c107c40a..73aa0aad07b1 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -212,7 +212,11 @@ static __always_inline void fast_enter_from_user_mode(struct pt_regs *regs) * mode. Before this function is called it is not safe to call regular kernel * code, instrumentable code, or any code which may trigger an exception. */ +#ifdef CONFIG_IEE +void noinstr arm64_enter_nmi(struct pt_regs *regs) +#else static void noinstr arm64_enter_nmi(struct pt_regs *regs) +#endif { regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index da3809632f0f..40c279f562f0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,12 +29,399 @@ #include #include +#ifdef CONFIG_IEE +#include +#define BAD_SP_EL0 0 +#define BAD_ELR_EL1 1 +#define BAD_TCR_EL1 2 +#define BAD_IEE_SI 4 +#endif + .macro clear_gp_regs .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 mov x\n, xzr .endr .endm + +#ifdef CONFIG_KOI +#ifdef CONFIG_IEE +/* + * This function is used to switch to ko stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_ko_stack) + sub sp, sp, #48 + stp x29, x30, [sp] + str x2, [sp, #16] + stp x0, x1, [sp, #32] + + // iee_rw_gate(IEE_WRITE_KERNEL_STACK, current, sp) + mov x0, #IEE_WRITE_KOI_KERNEL_STACK + mrs x1, sp_el0 + add x2, sp, #48 + + bl iee_rw_gate + + // iee_rw_gate(IEE_READ_KOI_STACK, current) + mov x0, #IEE_READ_KOI_STACK + mrs x1, sp_el0 + bl iee_rw_gate + + ldp x29, x30, [sp] + ldr x2, [sp, #16] + add x1, sp, #32 + mov sp, x0 + ldp x0, x1, [x1] + + isb + ret +SYM_FUNC_END(koi_do_switch_to_ko_stack) + +/* + * This fucntion is used to switch to kernel stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_kernel_stack) + sub sp, sp, #48 + stp x29, x30, [sp] + str x2, [sp, #16] + stp x0, x1, [sp, #32] + // iee_rw_gate(IEE_WRITE_KOI_STACK, current, sp) + mov x0, #IEE_WRITE_KOI_STACK + mrs x1, sp_el0 + add x2, sp, #48 + bl iee_rw_gate + + // iee_rw_gate(IEE_READ_KOI_KERNEL_STACK, current) + mov x0, #IEE_READ_KOI_KERNEL_STACK + mrs x1, sp_el0 + bl iee_rw_gate + + ldp x29, x30, [sp] + ldr x2, [sp, #16] + add x1, sp, #32 + mov sp, x0 + ldp x0, x1, [x1] + isb + ret +SYM_FUNC_END(koi_do_switch_to_kernel_stack) + +/* + * Before switch to ko's pgtable, we must switch current stack to ko's stack. + * We have stored registers to kernel stack, and we need to restore them from ko's stack after switching, + * so we need to copy from kernel stack to ko stack + * the memory region to copy is [sp, stack_top) + * void koi_switch_to_ko_stack(void); + */ +SYM_FUNC_START(koi_switch_to_ko_stack) + mrs x17, pan + msr pan, 0x0 + + sub sp, sp, #32 + str x17, [sp, #16] + stp x30, x29, [sp] + + // current sp stores in x2 + add x2, x1, #176 + // current sp_el0 stores in x0 + mov x1, x0 + adrp x0, iee_offset + ldr x0, [x0, #:lo12:iee_offset] + bl _iee_write_koi_kernel_stack + + adrp x0, iee_offset + ldr x0, [x0, #:lo12:iee_offset] + mrs x1, sp_el0 + bl _iee_read_koi_stack + + ldr x17, [sp, #16] + ldp x30, x29, [sp] + add sp, sp, #32 + + msr pan, x17 + + sub x0, x0, #176 + mov x1, sp + mov x2, #176 + + // memcpy(current->driver_stack, current->kernel_stack, 176) + mov x16, lr + bl memcpy + mov lr, x16 + + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_ko_stack) + +SYM_FUNC_START(koi_switch_to_kernel_stack) + /* + * current sp belongs to driver stack, and the bottom 160 bytes saves registers when exception occurred, + * so we should add 160 to current sp, and store it in task_struct + * also, fetch kernel sp from task_struct, copy the bottom 160 bytes from driver stack to kernel stack + */ + mrs x17, pan + msr pan, 0x0 + + sub sp, sp, #32 + stp x30, x29, [sp] + str x17, [sp, #16] + + adrp x0, iee_offset + ldr x0, [x0, #:lo12:iee_offset] + mrs x1, sp_el0 + add x2, sp, #192 + bl _iee_write_koi_stack + + adrp x0, iee_offset + ldr x0, [x0, #:lo12:iee_offset] + mrs x1, sp_el0 + bl _iee_read_koi_kernel_stack + + ldr x17, [sp, #16] + ldp x30, x29, [sp] + add sp, sp, #32 + + msr pan, x17 + + // x0 = kernel_stack + sub x0, x0, #160 + mov x1, sp + // x2 = 160 + mov x2, #160 + + mov x16, lr + bl memcpy + mov lr, x16 + + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_kernel_stack) +#else +/* + * This function is used to switch to ko stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_ko_stack) + sub sp, sp, #16 + stp x16, x17, [sp] + mrs x17, sp_el0 + adrp x16, koi_offset + ldr x16, [x16, #:lo12:koi_offset] + add x17, x17, x16 + add x16, sp, #16 + str x16, [x17, #koi_kernel_from_token_offset] + ldr x16, [x17, #koi_from_token_offset] + mov x17, sp + mov sp, x16 + ldp x16, x17, [x17] + isb + ret +SYM_FUNC_END(koi_do_switch_to_ko_stack) + +/* + * This fucntion is used to switch to kernel stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_kernel_stack) + sub sp, sp, #16 + stp x16, x17, [sp] + mrs x17, sp_el0 + adrp x16, koi_offset + ldr x16, [x16, #:lo12:koi_offset] + add x17, x17, x16 + add x16, sp, #16 + str x16, [x17, #koi_from_token_offset] + ldr x16, [x17, #koi_kernel_from_token_offset] + mov x17, sp + mov sp, x16 + ldp x16, x17, [x17] + isb + ret +SYM_FUNC_END(koi_do_switch_to_kernel_stack) + +/* + * Before switch to ko's pgtable, we must switch current stack to ko's stack. + * We have stored registers to kernel stack, and we need to restore them from ko's stack after switching, + * so we need to copy from kernel stack to ko stack + * the memory region to copy is [sp, stack_top) + * void koi_switch_to_ko_stack(unsigned long stack_top); + */ +SYM_FUNC_START(koi_switch_to_ko_stack) + // current sp stores in x1 + add x3, x1, #176 + adrp x4, koi_offset + ldr x4, [x4, #:lo12:koi_offset] + add x4, x0, x4 + // current sp_el0 stores in x0 + str x3, [x4, #koi_kernel_from_token_offset] + ldr x0, [x4, #koi_from_token_offset] + sub x0, x0, #176 + mov x2, #176 + + // memcpy(current->driver_stack, current->kernel_stack, 176) + mov x16, lr + bl memcpy + mov lr, x16 + + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_ko_stack) + +SYM_FUNC_START(koi_switch_to_kernel_stack) + /* + * current sp belongs to driver stack, and the bottom 176 bytes saves registers when exception occurred, + * so we should add 176 to current sp, and store it in task_struct + * also, fetch kernel sp from task_struct, copy the bottom 176 bytes from driver stack to kernel stack + */ + mov x1, sp + add x3, sp, #160 + + mrs x16, sp_el0 + adrp x2, koi_offset + ldr x2, [x2, #:lo12:koi_offset] + add x16, x16, x2 + str x3, [x16, #koi_from_token_offset] + // sp points to kernel_stack + ldr x0, [x16, #koi_kernel_from_token_offset] + + // x0 = kernel_stack + sub x0, x0, #160 + // x2 = 160 + mov x2, #160 + mov x16, lr + // memcpy(kernel_stack, driver_stack, 160) + bl memcpy + mov lr, x16 + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_kernel_stack) +#endif +.pushsection ".koi.text", "ax" +SYM_FUNC_START(koi_switch_to_ko_pgtbl) + stp x0, x1, [sp, #16 * 1] + stp x2, x3, [sp, #16 * 2] + stp x4, x5, [sp, #16 * 3] + stp x6, x7, [sp, #16 * 4] + stp x8, x9, [sp, #16 * 5] + stp x10, x11, [sp, #16 * 6] + stp x12, x13, [sp, #16 * 7] + stp x14, x15, [sp, #16 * 8] + stp x16, x17, [sp, #16 * 9] + stp x18, x30, [sp, #16 * 10] + + adrp x0, koi_swapper_ttbr1 + ldr x0, [x0, #:lo12:koi_swapper_ttbr1] + cbz x0, 0f + bl koi_do_switch_to_ko_pgtbl + // if x0 == 0, don't need to switch pgtable and stack, jump to 0 + cbz x0, 0f + mov x19, x0 + // if current on task's kernel stack, switch to ko stack + mrs x0, sp_el0 + mov x1, sp + ldr x2, [x0, TSK_STACK] + eor x2, x2, x1 + and x2, x2, #~(THREAD_SIZE - 1) + cbnz x2, 1f + +1: +#ifndef CONFIG_IEE + msr ttbr1_el1, x19 + isb + nop + nop + nop +#else + mov x0, #IEE_SWITCH_TO_KOI + mov x1, x19 + bl iee_rwx_gate_entry +#endif +0: + + ldp x0, x1, [sp, #16 * 1] + ldp x2, x3, [sp, #16 * 2] + ldp x4, x5, [sp, #16 * 3] + ldp x6, x7, [sp, #16 * 4] + ldp x8, x9, [sp, #16 * 5] + ldp x10, x11, [sp, #16 * 6] + ldp x12, x13, [sp, #16 * 7] + ldp x14, x15, [sp, #16 * 8] + ldp x16, x17, [sp, #16 * 9] + ldp x18, x30, [sp, #16 * 10] + ret +SYM_FUNC_END(koi_switch_to_ko_pgtbl) + +SYM_FUNC_START(koi_switch_to_kernel_pgtbl) + sub sp, sp, #160 + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x30, [sp, #16 * 9] + // check whether paging init finished + adrp x0, koi_swapper_ttbr1 + ldr x0, [x0, #:lo12:koi_swapper_ttbr1] + cbz x0, 0f + + bl koi_do_switch_to_kernel_pgtbl + /* + * koi_do_switch_to_kernel_pgtbl return 0 indicates + * that when exception occurred, the isolated ko is executing under koi pgtbl, + * so we need to switch stack to kernel stack after switch pgtbl back to koi_swapper_ttbr1. + */ + cbz x0, 0f +#ifndef CONFIG_IEE + mrs x0, sp_el0 + adrp x1, koi_offset + ldr x1, [x1, #:lo12:koi_offset] + add x0, x0, x1 + mov x16, sp + ldr x17, [x0, #koi_stack_base_from_token_offset] + eor x17, x17, x16 + and x17, x17, #~(THREAD_SIZE - 1) + cbnz x17, 0f +#else + // save current pan + mrs x17, pan + // disable pan + msr pan, 0x0 + adrp x0, iee_offset + ldr x0, [x0, #:lo12:iee_offset] + mrs x1, sp_el0 + bl _iee_read_koi_stack_base + // restore pan + msr pan, x17 + + mov x16, sp + eor x0, x0, x16 + and x0, x0, #~(THREAD_SIZE - 1) + cbnz x0, 0f +#endif +0: + + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + ldp x16, x17, [sp, #16 * 8] + ldp x18, x30, [sp, #16 * 9] + add sp, sp, #160 + ret +SYM_FUNC_END(koi_switch_to_kernel_pgtbl) +.popsection +#endif + .macro kernel_ventry, el:req, ht:req, regsize:req, label:req .align 7 .Lventry_start\@: @@ -53,6 +440,15 @@ .Lskip_tramp_vectors_cleanup\@: .endif +#ifdef CONFIG_KOI + .if \el == 1 + msr tpidrro_el0, x30 + bl koi_switch_to_kernel_pgtbl + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr + .endif +#endif + sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK /* @@ -332,6 +728,17 @@ alternative_else_nop_endif msr_s SYS_ICC_PMR_EL1, x20 .Lskip_pmr_save\@: +#endif + +#ifndef CONFIG_IEE +#ifdef CONFIG_KOI + // set tcr_el1 to choose asid from ttbr1_el1 or ttbr0_el1 + .if \el == 0 + mrs x0, tcr_el1 + orr x0, x0 ,#0x0000000000400000 + msr tcr_el1,x0 + .endif +#endif #endif /* @@ -345,9 +752,11 @@ alternative_else_nop_endif .endm .macro kernel_exit, el, fast_mode = std + #ifndef CONFIG_IEE .if \el != 0 disable_daif .endif + #endif #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if_not ARM64_HAS_GIC_PRIO_MASKING @@ -435,6 +844,40 @@ alternative_else_nop_endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 + +#ifdef CONFIG_IEE + + .if \el == 0 + + /* Skip TCR settings if User\Kernel isolation is already enforced by KPTI.*/ + alternative_insn nop, "b 6f", ARM64_UNMAP_KERNEL_AT_EL0 + // SET hpd1 = 0 start + mrs x0, tcr_el1 + and x0, x0, #0xFFFFFBFFFFFFFFFF + and x0, x0, #0xFFFFFFFFFFBFFFFF + msr tcr_el1, x0 + // SET hpd1 = 0 end + + // Check ELR_EL1 + mrs x0, elr_el1 + lsr x0, x0, #48 + tst x0, #0xffff + b.ne 5f +6: + + .endif + +#else +#ifdef CONFIG_KOI + .if \el==0 + mrs x0, tcr_el1 + and x0, x0, #0xFFFFFFFFFFBFFFFF + msr tcr_el1,x0 + .endif +#endif + +#endif + ldp x0, x1, [sp, #16 * 0] ldp x2, x3, [sp, #16 * 1] ldp x4, x5, [sp, #16 * 2] @@ -480,6 +923,17 @@ alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD alternative_else_nop_endif .endif eret + +#ifdef CONFIG_IEE +5: + // ELR_EL1 check fail + mov x0, sp + mov x1, #BAD_ELR_EL1 + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() +#endif + .else ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp @@ -488,7 +942,13 @@ alternative_else_nop_endif .if \fast_mode == std alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 .endif - +#ifdef CONFIG_KOI + sub sp, sp, #176 + stp x30, x19, [sp, #16 * 0] + bl koi_switch_to_ko_pgtbl + ldp x30, x19, [sp, #16 * 0] + add sp, sp, #176 +#endif eret .endif sb @@ -600,6 +1060,151 @@ SYM_CODE_START_LOCAL(__bad_stack) SYM_CODE_END(__bad_stack) #endif /* CONFIG_VMAP_STACK */ +/* + * iee exception entry + */ + .macro iee_exception_entry, el + + /* Check whether exception is permmited. */ + ldr x1, =__iee_si_no_irq + cmp x1, x22 + b.hi 1148f + ldr x1, =__iee_si_text_end + cmp x1, x22 + b.lo 1148f + /* ELR check fail */ + mov x0, sp + mov x1, #BAD_IEE_SI + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() +1148: + + /* el0 set hpds */ + .if \el == 0 + + /* Skip TCR settings if User\Kernel isolation is already enforced by KPTI.*/ + alternative_insn nop, "b 6f", ARM64_UNMAP_KERNEL_AT_EL0 + /* SET hpd1 = 1 start */ + mrs x0, tcr_el1 + orr x0, x0, #0x0000040000000000 + orr x0, x0, #0x0000000000400000 + msr tcr_el1, x0 + /* SET hpd1 = 1 end */ + + disable_daif + + /* Check TCR_EL1 */ + mrs x0, tcr_el1 + tst x0, #0x0000040000000000 + b.eq 5f + tst x0, #0x0000000000400000 + b.ne 6f + +5: + /* TCR_EL1 check fail */ + mov x0, sp + mov x1, #BAD_TCR_EL1 + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() + +6: + nop + + .else +#ifdef CONFIG_IEE_INTERRUPTABLE + /* el1 save elr_el1 and set pan */ + /* Check ELR_EL1 */ + ldr x1, =__iee_code_start + cmp x1, x22 + b.hi 7f + ldr x1, =__iee_code_end + cmp x1, x22 + b.lo 7f + /* Exception from iee code */ + /* Switch to kernel stack */ + mrs x0, sp_el0 /* x0 -> task_struct(VA) */ + adrp x2, iee_offset + ldr x2, [x2, #:lo12:iee_offset] + add x1, x0, x2 /* x1 -> task_token(IEE) */ + // store iee stack + mov x3, sp + str x3, [x1, #iee_from_token_offset] + // load kernel stack + ldr x3, [x1, #kernel_from_token_offset] + mov sp, x3 + sub sp, sp, #PT_REGS_SIZE + /* Enable PAN */ + msr pan, #0x1 + +7: + /* Exception from kernel code */ + mov x0, #0x0 + mov x1, #0x0 + mov x2, #0x0 + mov x3, #0x0 +#endif + .endif + .endm + +/* + * iee exception exit + */ + .macro iee_exception_exit, el + // Disable daif + disable_daif + + .if \el == 1 +#ifdef CONFIG_IEE_INTERRUPTABLE + /* el1 pop elr_el1 and set pan */ + /* Check ELR_EL1 */ + ldr x1, =__iee_code_start + cmp x1, x22 + b.hi 9f + ldr x1, =__iee_code_end + cmp x1, x22 + b.lo 9f + /* Eret iee code */ + /* Disable PAN */ + msr pan, #0x0 + /* Switch to iee stack */ + add sp, sp, #PT_REGS_SIZE + mrs x0, sp_el0 /* x0 -> task_struct */ + adrp x2, iee_offset + ldr x2, [x2, #:lo12:iee_offset] + add x1, x0, x2 /* x1 -> task_token(IEE) */ + // store kernel stack + mov x3, sp + str x3, [x1, #kernel_from_token_offset] + // load iee stack + ldr x2, [x1, #iee_from_token_offset] + mov sp, x2 + /* Load ELR_EL1 from iee stack */ + ldr x21, [sp, #S_PC] + /* Check the modify of ELR_EL1 */ + cmp x21, x22 + b.ne 8f + /* ELR_EL1 not modified */ + b 9f + +8: + // ELR_EL1 modified + mov x0, sp + mov x1, #BAD_ELR_EL1 + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() + +9: + // Eret kernel code + mov x0, #0x0 + mov x1, #0x0 + mov x2, #0x0 + mov x3, #0x0 +#endif + .endif + .endm #ifdef CONFIG_FAST_SYSCALL .macro check_esr_el1_ec_svc64 /* Only support SVC64 for now */ @@ -731,8 +1336,18 @@ SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) .endif #endif kernel_entry \el, \regsize + + #ifdef CONFIG_IEE + iee_exception_entry \el + #endif + mov x0, sp bl el\el\ht\()_\regsize\()_\label\()_handler + + #ifdef CONFIG_IEE + iee_exception_exit \el + #endif + .if \el == 0 b ret_to_user .else @@ -764,9 +1379,15 @@ SYM_CODE_END(el\el\ht\()_\regsize\()_\label) entry_handler 0, t, 32, fiq entry_handler 0, t, 32, error +#ifdef CONFIG_KOI +.pushsection ".koi.text", "ax" +#endif SYM_CODE_START_LOCAL(ret_to_kernel) kernel_exit 1 SYM_CODE_END(ret_to_kernel) +#ifdef CONFIG_KOI +.popsection +#endif SYM_CODE_START_LOCAL(ret_to_user) ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0137d987631e..fbb543bcdb4a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1309,7 +1309,11 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); /* Allow EL0 to access TPIDR2 */ + #ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_sctlr_el1, read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2); + #else write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); + #endif isb(); } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6517bf2644a0..3d0716d59c0b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -661,6 +661,7 @@ SYM_FUNC_START_LOCAL(secondary_startup) SYM_FUNC_END(secondary_startup) .text + SYM_FUNC_START_LOCAL(__secondary_switched) mov x0, x20 bl set_cpu_boot_mode_flag @@ -746,6 +747,10 @@ SYM_FUNC_START(__enable_mmu) cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX b.gt __no_granule_support phys_to_ttbr x2, x2 +#ifdef CONFIG_IEE + mov x3, #1 + bfi x2, x3, #48, #16 // ASID 1 is used by IEE rwx gate. +#endif msr ttbr0_el1, x2 // load TTBR0 load_ttbr1 x1, x1, x3 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 02870beb271e..76d86b3d71b1 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -34,6 +34,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + /* * Hibernate core relies on this value being 0 on resume, and marks it * __nosavedata assuming it will keep the resume kernel's '0' value. This @@ -203,6 +207,11 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy(page, src_start, length); caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)page_address(page),0); + #endif + rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index d39a8787edf2..b5ac4b7670bc 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -26,6 +26,10 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif + /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -102,13 +106,68 @@ int hw_breakpoint_slots(int type) WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ WRITE_WB_REG_CASE(OFF, 15, REG, VAL) +#ifdef CONFIG_IEE + +#define IEE_SI_READ_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + IEE_SI_AARCH64_DBG_READ(N, REG, VAL); \ + break + +#define IEE_SI_WRITE_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + IEE_SI_AARCH64_DBG_WRITE(N, REG, VAL); \ + break + +#define IEE_SI_GEN_READ_REG_CASES(OFF, REG, VAL) \ + IEE_SI_READ_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +#define IEE_SI_GEN_WRITE_REG_CASES(OFF, REG, VAL) \ + IEE_SI_WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +#endif + static u64 read_wb_reg(int reg, int n) { u64 val = 0; switch (reg + n) { +// #ifdef CONFIG_IEE +// IEE_SI_GEN_READ_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); +// IEE_SI_GEN_READ_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #else GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #endif GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); default: @@ -122,8 +181,13 @@ NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { switch (reg + n) { +// #ifdef CONFIG_IEE +// IEE_SI_GEN_WRITE_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); +// IEE_SI_GEN_WRITE_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #else GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #endif GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); default: @@ -171,6 +235,10 @@ static int is_a32_compat_bp(struct perf_event *bp) return tsk && is_a32_compat_thread(task_thread_info(tsk)); } +#ifdef CONFIG_IEE +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +#endif + /** * hw_breakpoint_slot_setup - Find and setup a perf slot according to * operations @@ -191,6 +259,37 @@ static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, { int i; struct perf_event **slot; +// reserve hw breakpoint 0 for iee rwx gate in kernel sapce. +// #ifdef CONFIG_IEE +// struct arch_hw_breakpoint *info = counter_arch_bp(bp); +// if (arch_check_bp_in_kernelspace(info)){ +// for (i = 1; i < max_slots; ++i) { // search from hw breakpoint 1 +// slot = &slots[i]; +// switch (ops) { +// case HW_BREAKPOINT_INSTALL: +// if (!*slot) { +// *slot = bp; +// return i; +// } +// break; +// case HW_BREAKPOINT_UNINSTALL: +// if (*slot == bp) { +// *slot = NULL; +// return i; +// } +// break; +// case HW_BREAKPOINT_RESTORE: +// if (*slot == bp) +// return i; +// break; +// default: +// pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); +// return -EINVAL; +// } +// } +// return -ENOSPC; +// } +// #endif for (i = 0; i < max_slots; ++i) { slot = &slots[i]; diff --git a/arch/arm64/kernel/iee/Makefile b/arch/arm64/kernel/iee/Makefile new file mode 100644 index 000000000000..c62a1cc4f03b --- /dev/null +++ b/arch/arm64/kernel/iee/Makefile @@ -0,0 +1,5 @@ +ccflags-$(CONFIG_IEE_SELINUX_P) := -I$(srctree)/security/selinux -I$(srctree)/security/selinux/include + +obj-$(CONFIG_IEE) += iee.o iee-gate.o iee-func.o iee-pgtable.o stack_slab.o pgtable_slab.o + +obj-$(CONFIG_IEE_SELINUX_P) += iee-selinuxp.o \ No newline at end of file diff --git a/arch/arm64/kernel/iee/iee-func.c b/arch/arm64/kernel/iee/iee-func.c new file mode 100644 index 000000000000..29035c96a4f2 --- /dev/null +++ b/arch/arm64/kernel/iee/iee-func.c @@ -0,0 +1,722 @@ +#include "asm/pgtable.h" +#include +#include +#include +#include +#include +#include + +extern s64 iee_offset; +extern void iee_split_huge_pmd(pmd_t *pmdp, pte_t *pgtable); +#ifdef CONFIG_PTP +extern phys_addr_t __init early_pgtable_alloc(int shift); +#endif + +static inline void iee_set_token(pte_t *ptep, void *new, unsigned long order) +{ +#ifdef CONFIG_PTP + pgd_t *pgdir; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + int use_block_pmd = 0; + + pgdir = swapper_pg_dir; + pgdp = pgd_offset_pgd(pgdir, (unsigned long)new); + p4dp = p4d_offset(pgdp, (unsigned long)new); + pudp = pud_offset(p4dp, (unsigned long)new); + pmdp = pmd_offset(pudp, (unsigned long)new); + + // Handling cont mapping. + if(pmd_val(*pmdp) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)new & CONT_PMD_MASK); + for(i = 0; i < CONT_PMDS; i++) + { + set_pmd(pmdp,__pmd(pmd_val(*pmdp) & ~PTE_CONT)); + pmdp++; + } + } + + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + #ifndef CONFIG_PTP + struct page *page = pmd_page(*pmdp); + #endif + pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + + if (!pgtable) + panic("Alloc pgtable error.\n"); + + iee_split_huge_pmd(pmdp, pgtable); + + spinlock_t *ptl = pmd_lock(&init_mm, pmdp); + if(pmd_leaf(READ_ONCE(*pmdp))) + { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(ptl); + + if(pgtable) + { + #ifdef CONFIG_PTP + iee_memset(pgtable, 0, PAGE_SIZE); + #endif + pte_free_kernel(&init_mm, pgtable); + } + } + else if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + } + + iee_rw_gate(IEE_SET_TOKEN, ptep, new, order, use_block_pmd); +#else + int i; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, new); + p4d_t *p4dp = p4d_offset(pgdp, new); + pud_t *pudp = pud_offset(p4dp, new); + pmd_t *pmdp; + pte_t *lm_ptep; + pte_t *iee_ptep; + unsigned long iee_addr; + int use_block_pmd = 0; + + pmdp = pmd_offset(pudp, new); + + // Handling cont mapping. + if(pmd_val(*pmdp) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)new & CONT_PMD_MASK); + for(i = 0; i < CONT_PMDS; i++) + { + set_pmd(pmdp,__pmd(pmd_val(*pmdp) & ~PTE_CONT)); + pmdp++; + } + } + + // Use Block Descriptor. + if(pmd_leaf(*pmdp) && order < 9) + { + struct page *page = pmd_page(*pmdp); + pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + int i; + pte_t *ptep = pgtable; + + if (!pgtable) + panic("Alloc pgtable error.\n"); + + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = PAGE_KERNEL; + pgprot = __pgprot(pgprot_val(pgprot) | PTE_CONT); + + entry = mk_pte(page + i, pgprot); + set_pte(ptep, entry); + } + + spin_lock(&init_mm.page_table_lock); + if(pmd_leaf(READ_ONCE(*pmdp))) + { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(&init_mm.page_table_lock); + + if(pgtable) + { + pte_free_kernel(&init_mm, pgtable); + } + } + else if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + } + + if(use_block_pmd) + lm_ptep = (pte_t *)pmdp; + else + lm_ptep = pte_offset_kernel(pmdp, new); + + // Handling cont mapping. + if(pte_val(*lm_ptep) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)new & CONT_PTE_MASK); + if(order < CONFIG_ARM64_CONT_PTE_SHIFT) + { + for(i = 0; i < CONT_PTES; i++) + { + set_pte(ptep,__pte(pte_val(*ptep) & ~PTE_CONT)); + ptep++; + } + } + } + + iee_addr = ((unsigned long)new + (unsigned long)iee_offset); + pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4dp = p4d_offset(pgdp, iee_addr); + pudp = pud_offset(p4dp, iee_addr); + pmdp = pmd_offset(pudp, iee_addr); + iee_ptep = pte_offset_kernel(pmdp, iee_addr); + + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)lm_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) | PMD_SECT_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*pmdp, pmd); + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | 0x1); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + iee_ptep++; + new += PAGE_SIZE; + } + } + else + { + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*lm_ptep); + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | 0x1); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + lm_ptep++; + iee_ptep++; + new += PAGE_SIZE; + } + } +#endif + dsb(ishst); + isb(); +} + +static inline void iee_unset_token(pte_t *ptep, void *token_addr, void *token_page, unsigned long order) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_UNSET_TOKEN, ptep, token_addr, token_page, order); +#else + int i; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, token_page); + p4d_t *p4dp = p4d_offset(pgdp, token_page); + pud_t *pudp = pud_offset(p4dp, token_page); + pmd_t *pmdp = pmd_offset(pudp, token_page); + pte_t *lm_ptep; + pte_t *iee_ptep; + unsigned long iee_addr; + int use_block_pmd = 0; + + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + lm_ptep = (pte_t *)pmdp; + } + else + lm_ptep = pte_offset_kernel(pmdp, token_page); + + iee_addr = ((unsigned long)token_page + (unsigned long)iee_offset); + pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4dp = p4d_offset(pgdp, iee_addr); + pudp = pud_offset(p4dp, iee_addr); + pmdp = pmd_offset(pudp, iee_addr); + iee_ptep = pte_offset_kernel(pmdp, iee_addr); + + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)lm_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | PTE_DBM); + WRITE_ONCE(*pmdp, pmd); + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~((unsigned long)0x1)) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token_addr - IEE_OFFSET))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~0x1); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + iee_ptep++; + token_addr += PAGE_SIZE; + token_page += PAGE_SIZE; + } + } + else + { + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~((unsigned long)0x1)) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token_addr - IEE_OFFSET))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*lm_ptep); + pte = __pte(pte_val(pte) | PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~0x1); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + lm_ptep++; + iee_ptep++; + token_addr += PAGE_SIZE; + token_page += PAGE_SIZE; + } + } +#endif + dsb(ishst); + isb(); +} + +// Input is the lm vaddr of sensitive data. +void set_iee_page(unsigned long addr, int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp; + pte_t *lm_ptep; + pte_t *iee_ptep; + unsigned long iee_addr; + int use_block_pmd = 0; + + pmdp = pmd_offset(pudp, addr); + + // Handling cont mapping. + if(pmd_val(*pmdp) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pmd_t *pmdp = pmd_offset(pudp, addr & CONT_PMD_MASK); + for(i = 0; i < CONT_PMDS; i++) + { + set_pmd(pmdp,__pmd(pmd_val(*pmdp) & ~PTE_CONT)); + pmdp++; + } + } + + // Use Block Descriptor. + if(pmd_leaf(*pmdp) && order < 9) + { + #ifndef CONFIG_PTP + struct page *page = pmd_page(*pmdp); + #endif + pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + + if (!pgtable) + panic("Alloc pgtable error.\n"); + + #ifdef CONFIG_PTP + iee_split_huge_pmd(pmdp, pgtable); + #else + { + int i; + pte_t *ptep = pgtable; + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = PAGE_KERNEL; + pgprot = __pgprot(pgprot_val(pgprot) | PTE_CONT); + + entry = mk_pte(page + i, pgprot); + set_pte(ptep, entry); + } + } + #endif + + spinlock_t *ptl = pmd_lock(&init_mm, pmdp); + if(pmd_leaf(READ_ONCE(*pmdp))) + { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(ptl); + + if(pgtable) + { + #ifdef CONFIG_PTP + iee_memset(pgtable, 0, PAGE_SIZE); + #endif + pte_free_kernel(&init_mm, pgtable); + } + } + else if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + } + + if(use_block_pmd) + lm_ptep = (pte_t *)pmdp; + else + lm_ptep = pte_offset_kernel(pmdp, addr); + + // Handling cont mapping. + if(pte_val(*lm_ptep) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pte_t *ptep = pte_offset_kernel(pmdp, addr & CONT_PTE_MASK); + if(order < CONFIG_ARM64_CONT_PTE_SHIFT) + { + for(i = 0; i < CONT_PTES; i++) + { + set_pte(ptep,__pte(pte_val(*ptep) & ~PTE_CONT)); + ptep++; + } + } + } + + iee_addr = ((unsigned long)addr + (unsigned long)iee_offset); + pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4dp = p4d_offset(pgdp, iee_addr); + pudp = pud_offset(p4dp, iee_addr); + pmdp = pmd_offset(pudp, iee_addr); + iee_ptep = pte_offset_kernel(pmdp, iee_addr); + iee_set_sensitive_pte(lm_ptep, iee_ptep, order, use_block_pmd); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE*(1 << order)); + isb(); +} + +// Input is the lm vaddr of sensitive data. +void unset_iee_page(unsigned long addr, int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp = pmd_offset(pudp, addr); + pte_t *lm_ptep; + pte_t *iee_ptep; + unsigned long iee_addr; + int use_block_pmd = 0; + + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + lm_ptep = (pte_t *)pmdp; + } + else + lm_ptep = pte_offset_kernel(pmdp, addr); + + iee_addr = ((unsigned long)addr + (unsigned long)iee_offset); + pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4dp = p4d_offset(pgdp, iee_addr); + pudp = pud_offset(p4dp, iee_addr); + pmdp = pmd_offset(pudp, iee_addr); + iee_ptep = pte_offset_kernel(pmdp, iee_addr); + iee_unset_sensitive_pte(lm_ptep, iee_ptep, order, use_block_pmd); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE*(1 << order)); + flush_tlb_kernel_range(iee_addr, iee_addr+PAGE_SIZE*(1 << order)); + isb(); +} + +void set_iee_page_valid(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) + return; + + pte = __pte(pte_val(pte) | PTE_VALID); + set_pte(ptep, pte); +} + +void iee_set_logical_mem_ro(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp; + pte_t *ptep; + pte_t pte; + + pmdp = pmd_offset(pudp, addr); + + // Handling cont mapping. + if(pmd_val(*pmdp) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pmd_t *pmdp = pmd_offset(pudp, addr & CONT_PMD_MASK); + for(i = 0; i < CONT_PMDS; i++) + { + set_pmd(pmdp,__pmd(pmd_val(*pmdp) & ~PTE_CONT)); + pmdp++; + } + } + + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + #ifndef CONFIG_PTP + struct page *page = pmd_page(*pmdp); + #endif + pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + + if (!pgtable) + panic("Alloc pgtable error.\n"); + + #ifdef CONFIG_PTP + iee_split_huge_pmd(pmdp, pgtable); + #else + { + int i; + pte_t *ptep = pgtable; + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = PAGE_KERNEL; + pgprot = __pgprot(pgprot_val(pgprot) | PTE_CONT); + + entry = mk_pte(page + i, pgprot); + set_pte(ptep, entry); + } + } + #endif + + spinlock_t *ptl = pmd_lock(&init_mm, pmdp); + if(pmd_leaf(READ_ONCE(*pmdp))) + { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(ptl); + + if(pgtable) + { + #ifdef CONFIG_PTP + iee_memset(pgtable, 0, PAGE_SIZE); + #endif + pte_free_kernel(&init_mm, pgtable); + } + } + + ptep = pte_offset_kernel(pmdp, addr); + + // Handling cont mapping. + if(pte_val(*ptep) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pte_t *ptep = pte_offset_kernel(pmdp, addr & CONT_PTE_MASK); + for(i = 0; i < CONT_PTES; i++) + { + set_pte(ptep,__pte(pte_val(*ptep) & ~PTE_CONT)); + ptep++; + } + } + + pte = READ_ONCE(*ptep); + + if(addr < PAGE_OFFSET) + return; + + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} + +void iee_set_token_page_valid(void *token, void *new, unsigned int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + iee_set_token(ptep, new, order); + + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range((unsigned long)new, (unsigned long)(new + (PAGE_SIZE * (1 << order)))); + + isb(); +} + +void iee_set_token_page_invalid(void *token_addr, void *token_page, unsigned long order) +{ + unsigned long iee_addr = ((unsigned long)token_page + (unsigned long)iee_offset); + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_addr); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token_addr); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token_addr); + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token_addr); + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token_addr); + + if(token_page == NULL) + panic("Token of task_struct was unset.\n"); + + iee_unset_token(ptep, token_addr, token_page, order); + + flush_tlb_kernel_range((unsigned long)token_addr, (unsigned long)(token_addr + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range((unsigned long)token_page, (unsigned long)(token_page + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range(iee_addr, iee_addr + (PAGE_SIZE * (1 << order))); + isb(); +} + +void unset_iee_stack_page(unsigned long addr, int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp = pmd_offset(pudp, addr); + pte_t *ptep; + int use_block_pmd = 0; + + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + ptep = (pte_t *)pmdp; + } + else + ptep = pte_offset_kernel(pmdp, addr); + + iee_unset_stack_pte(ptep, order, use_block_pmd, addr); + flush_tlb_kernel_range(addr+iee_offset, addr+iee_offset+(1 << order)*PAGE_SIZE); + isb(); +} + +void set_iee_stack_page(unsigned long addr, int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp; + pte_t *ptep; + int use_block_pmd = 0; + + pmdp = pmd_offset(pudp, addr); + + // Handling cont mapping. + if(pmd_val(*pmdp) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pmd_t *pmdp = pmd_offset(pudp, addr & CONT_PMD_MASK); + for(i = 0; i < CONT_PMDS; i++) + { + set_pmd(pmdp,__pmd(pmd_val(*pmdp) & ~PTE_CONT)); + pmdp++; + } + } + + // Use Block Descriptor. + if(pmd_leaf(*pmdp) && order < 9) + { + #ifndef CONFIG_PTP + struct page *page = pmd_page(*pmdp); + #endif + pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + + if (!pgtable) + panic("Alloc pgtable error.\n"); + + #ifdef CONFIG_PTP + iee_split_huge_pmd(pmdp, pgtable); + #else + { + int i; + pte_t *ptep = pgtable; + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = PAGE_KERNEL; + pgprot = __pgprot(pgprot_val(pgprot) | PTE_CONT); + + entry = mk_pte(page + i, pgprot); + set_pte(ptep, entry); + } + } + #endif + + spinlock_t *ptl = pmd_lock(&init_mm, pmdp); + if(pmd_leaf(READ_ONCE(*pmdp))) + { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(ptl); + + if(pgtable) + { + #ifdef CONFIG_PTP + iee_memset(pgtable, 0, PAGE_SIZE); + #endif + pte_free_kernel(&init_mm, pgtable); + } + } + else if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + } + + if(use_block_pmd) + ptep = (pte_t *)pmdp; + else + ptep = pte_offset_kernel(pmdp, addr); + + // Handling cont mapping. + if(pte_val(*ptep) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pte_t *ptep = pte_offset_kernel(pmdp, addr & CONT_PTE_MASK); + if(order < CONFIG_ARM64_CONT_PTE_SHIFT) + { + for(i = 0; i < CONT_PTES; i++) + { + set_pte(ptep,__pte(pte_val(*ptep) & ~PTE_CONT)); + ptep++; + } + } + } + + iee_set_stack_pte(ptep, order, use_block_pmd, addr); + flush_tlb_kernel_range(addr, addr+(1 << order)*PAGE_SIZE); + isb(); +} + +void __init iee_rest_init(void) {} \ No newline at end of file diff --git a/arch/arm64/kernel/iee/iee-gate.S b/arch/arm64/kernel/iee/iee-gate.S new file mode 100644 index 000000000000..3b039e1dfbc4 --- /dev/null +++ b/arch/arm64/kernel/iee/iee-gate.S @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IEE + +SYM_FUNC_START(iee_read_tmp_page) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* disable PAN */ + msr pan, #0x0 + adrp x12, iee_offset + ldr x12, [x12, #:lo12:iee_offset] + add x11, x0, x12 /* x11 -> task_token(IEE) */ + ldr x0, [x11, #tmp_page_from_token_offset] + /* enable PAN */ + msr pan, #0x1 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_read_tmp_page) + +SYM_FUNC_START(iee_read_freeptr) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* disable PAN */ + msr pan, #0x0 + adrp x12, iee_offset + ldr x12, [x12, #:lo12:iee_offset] + add x0, x0, x12 + ldr x0, [x0] + /* enable PAN */ + msr pan, #0x1 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_read_freeptr) + +SYM_FUNC_START(iee_read_token_stack) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* disable PAN */ + msr pan, #0x0 + adrp x12, iee_offset + ldr x12, [x12, #:lo12:iee_offset] + add x11, x0, x12 /* x11 -> task_token(IEE) */ + ldr x0, [x11, #iee_from_token_offset] + /* enable PAN */ + msr pan, #0x1 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_read_token_stack) + +SYM_FUNC_START(iee_set_xchg_relaxed) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* disable PAN */ + msr pan, #0x0 + /* begin */ + adrp x2, iee_offset + ldr x2, [x2, #:lo12:iee_offset] + add x0, x0, x2 + prfm pstl1strm, [x0] +12: + ldxr x3, [x0] + stxr w4, x1, [x0] + cbnz w4, 12b + mov x0, x3 + /* end */ + /* enable PAN */ + msr pan, #0x1 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_set_xchg_relaxed) + +SYM_FUNC_START(iee_set_cmpxchg_relaxed) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* disable PAN */ + msr pan, #0x0 + /* begin */ + adrp x9, iee_offset + ldr x9, [x9, #:lo12:iee_offset] + add x3, x0, x9 + prfm pstl1strm, [x3] +11: + ldxr x0, [x3] + eor x4, x0, x1 + cbnz w4, 13f + stxr w4, x2, [x3] + cbnz w4, 11b +13: + /* end */ + /* enable PAN */ + msr pan, #0x1 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_set_cmpxchg_relaxed) + +SYM_FUNC_START(iee_rw_gate) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* save lr */ + sub sp, sp, #16 + stp x29, x30, [sp] + bl iee_protected_rw_gate + /* restore lr */ + ldp x29, x30, [sp] + add sp, sp, #16 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_rw_gate) +#if defined(CONFIG_CREDP) || defined(CONFIG_KOI) || (CONFIG_KEYP) +EXPORT_SYMBOL(iee_rw_gate) +#endif + + .pushsection ".iee.text.header", "ax" + +SYM_FUNC_START(iee_protected_rw_gate) + /* disable PAN */ + msr pan, #0x0 + /* switch to iee stack */ + mrs x9, sp_el0 /* x9 -> task_struct */ + adrp x7, iee_offset + ldr x7, [x7, #:lo12:iee_offset] + add x11, x9, x7 /* x11 -> task_token(IEE) */ + // store kernel stack + mov x10, sp + str x10, [x11, #kernel_from_token_offset] + // load iee stack + ldr x10, [x11, #iee_from_token_offset] + mov sp, x10 +#ifdef CONFIG_IEE_INTERRUPTABLE + isb + /* restore daif */ + msr daif, x13 + sub sp, sp, #16 + stp x11, x30, [sp] +#else + sub sp, sp, #32 + stp x11, x7, [sp, #16] + stp x13, x30, [sp] +#endif + /* call iee func */ + adrp x12, iee_funcs + add x12, x12, x0, lsl #3 + ldr x12, [x12, #:lo12:iee_funcs] + mov x0, x7 + blr x12 +#ifdef CONFIG_IEE_INTERRUPTABLE + ldp x11, x30, [sp] + add sp, sp, #16 + /* store and disable daif */ + mrs x13, daif + msr daifset, #0x2 + isb +#else + ldp x13, x30, [sp] + ldp x11, x7, [sp, #16] + add sp, sp, #32 +#endif + /* switch to kernel stack */ + // load kernel stack + ldr x10, [x11, #kernel_from_token_offset] + mov sp, x10 + /* enable PAN */ + msr pan, #0x1 + ret +SYM_FUNC_END(iee_protected_rw_gate) + + .popsection + +#include +#define BAD_IEE 4 +#define BAD_IEE_SI 5 + +#define SYS_TCR_EL1_HPD1 0x40000000000 +#define SYS_TCR_EL1_A1 0x400000 + + .pushsection ".iee.exec_entry", "ax" + +SYM_FUNC_START(iee_rwx_gate_entry) + /* Disable irq first. */ + mrs x15, daif // use x15 to restore daif + msr DAIFSet, #0xf + isb + + /* Set HPD1 = 0 to exec follwing codes in U RWX page */ + mrs x9, tcr_el1 + bic x9, x9, #SYS_TCR_EL1_HPD1 + bic x9, x9, #SYS_TCR_EL1_A1 + msr tcr_el1, x9 + isb + + b iee_rwx_gate_tramp +SYM_FUNC_END(iee_rwx_gate_entry) +#ifdef CONFIG_KOI +EXPORT_SYMBOL(iee_rwx_gate_entry) +#endif + .popsection + +#define USER_ASID_FLAG (UL(1) << 48) + + .pushsection ".iee.si_text", "awx" + +SYM_FUNC_START(iee_rwx_gate_tramp) + /* Check tcr val. */ + mrs x10, tcr_el1 + adrp x12, iee_si_tcr // tcr value shall be const after init + ldr x12, [x12, #:lo12:iee_si_tcr] + cbz x12, 1f + cmp x12, x10 + b.ne 3f +1: + mov x13, sp + /* if called by koi, skip stack switch */ +#ifdef CONFIG_KOI + cmp x0, #IEE_SWITCH_TO_KERNEL + b.eq 4f + cmp x0, #IEE_SWITCH_TO_KOI + b.eq 5f +#endif + + /* If iee hasn't been initialized, skip stack switch. */ + adrp x11, iee_init_done + ldr x10, [x11, #:lo12:iee_init_done] + cbz x10, 2f + + /* Switch to iee stack */ + mrs x9, sp_el0 // x9 -> task_struct + adrp x12, iee_si_offset + ldr x12, [x12, #:lo12:iee_si_offset] + add x11, x9, x12 // x11 -> task_token(IEE) + // load iee stack + ldr x10, [x11, #iee_from_token_offset] + mov sp, x10 + + /* x15 stores daif and x13 stores previous sp */ +2: + stp x15, x13, [sp, #-32]! + stp x29, x30, [sp, #16] + bl iee_si_handler // enter actual handler + ldp x29, x30, [sp, #16] + + b iee_rwx_gate_exit // jump to iee exit +3: + mov x0, sp + mov x1, #BAD_IEE_SI + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() +#ifdef CONFIG_KOI +/* Handle KOI switch gates. */ +4: + /* IEE_SWITCH_TO_KERNEL */ + mrs x9, ttbr1_el1 + bic x9, x9, #USER_ASID_FLAG // Set new ASID to even + adrp x11, iee_base_swapper_pg_dir + ldr x11, [x11, #:lo12:iee_base_swapper_pg_dir] + ubfx x10, x9, #48, #16 // x10 -> new TTBR1 ASID + bfi x11, x10, #48, #16 + b 6f +5: + /* IEE_SWITCH_TO_KOI */ + tbz x1, #48, 3b // KOI ASID shall be odd. + mov x11, x1 +6: + msr ttbr1_el1, x11 + isb + stp x15, x13, [sp, #-32]! + b iee_rwx_gate_exit +#endif +SYM_FUNC_END(iee_rwx_gate_tramp) + + .popsection + + .pushsection ".iee.exec_exit", "ax" + +SYM_FUNC_START(iee_rwx_gate_exit) + ldp x15, x13, [sp], #32 + mov sp, x13 // switch to kernel stack + mrs x9, tcr_el1 + orr x9, x9, #SYS_TCR_EL1_HPD1 + orr x9, x9, #SYS_TCR_EL1_A1 + msr tcr_el1, x9 +/* --------Page boundary-------- */ + isb + msr daif, x15 + isb + ret +SYM_FUNC_END(iee_rwx_gate_exit) + + .popsection + +#endif diff --git a/arch/arm64/kernel/iee/iee-pgtable.c b/arch/arm64/kernel/iee/iee-pgtable.c new file mode 100644 index 000000000000..1369f00f410d --- /dev/null +++ b/arch/arm64/kernel/iee/iee-pgtable.c @@ -0,0 +1,402 @@ +#include +#include +#include +#include + +#ifdef CONFIG_PTP + +void iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) +{ + iee_rw_gate(IEE_OP_SET_TRAMP_PGD, pgdp, pgd); +} + +inline void iee_set_bm_pte(pte_t *ptep, pte_t pte) +{ + // If it is pre init, write once. + // Else, write once will cause exception. So it is safe. + unsigned long flags; + unsigned long res; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(__phys_to_iee(__pa_symbol(ptep)))); + isb(); + res = read_sysreg(par_el1); + local_irq_restore(flags); +#ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); +#endif + if(res & 0x1) + WRITE_ONCE(*ptep,pte); + else + iee_rw_gate(IEE_OP_SET_BM_PTE, ptep, pte); + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +} + +inline void iee_set_fixmap_pte_pre_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_KOI + if (pte_valid(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif + WRITE_ONCE(*ptep, pte); + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +} + +inline void iee_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + __sync_cache_and_tags(pte, 1); + __check_safe_pte_update(mm, ptep, pte); + + iee_set_pte_pre_init(ptep, pte); +} + +inline void iee_set_pte_at_delayed(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + __sync_cache_and_tags(pte, 1); + __check_safe_pte_update(mm, ptep, pte); +} + +inline bool in_tramp_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)tramp_pg_dir & PAGE_MASK); +} + +inline void iee_set_fixmap_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) +{ +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ +#ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } +#endif + WRITE_ONCE(*pmdp, pmd); + + if (pmd_valid(pmd)) { + dsb(ishst); + isb(); + } +} + +inline void iee_set_fixmap_pud_pre_init(pud_t *pudp, pud_t pud) +{ +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ +#ifdef CONFIG_KOI + pudval_t val = pud_val(pud); + if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { + // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. + pud = __pud(val | PMD_SECT_NG); + } +#endif + WRITE_ONCE(*pudp, pud); + + if (pud_valid(pud)) { + dsb(ishst); + isb(); + } +} + +#endif //#ifdef CONFIG_PTP + +inline void __set_pte(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_KOI + if (pte_valid(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE, ptep, pte); + dsb(ishst); + isb(); +#else + WRITE_ONCE(*ptep, pte); + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +#endif +} + +inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ +#ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } +#endif +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PMD, pmdp, pmd); +#else + WRITE_ONCE(*pmdp, pmd); +#endif + if (pmd_valid(pmd)) { + dsb(ishst); + isb(); + } +} + +inline void set_pud(pud_t *pudp, pud_t pud) +{ +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ +#ifdef CONFIG_KOI + pudval_t val = pud_val(pud); + if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { + // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. + pud = __pud(val | PMD_SECT_NG); + } +#endif +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PUD, pudp, pud); +#else + WRITE_ONCE(*pudp, pud); +#endif + if (pud_valid(pud)) { + dsb(ishst); + isb(); + } +} + +inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + if (in_swapper_pgdir(p4dp)) + { + set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); + return; + } + +#ifdef CONFIG_PTP + if(in_tramp_pgdir(p4dp)) + { + iee_set_tramp_pgd_pre_init((pgd_t *)p4dp, __pgd(p4d_val(p4d))); + return; + } + iee_rw_gate(IEE_OP_SET_P4D, p4dp, p4d); +#else + WRITE_ONCE(*p4dp, p4d); +#endif + dsb(ishst); + isb(); +} + +#ifdef CONFIG_IEE +inline void iee_set_stack_pte(pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE_U, ptep, order, use_block_pmd, lm_addr); +#else + int i; + unsigned long iee_addr = lm_addr + iee_offset; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4d_t *p4dp = p4d_offset(pgdp, iee_addr); + pud_t *pudp = pud_offset(p4dp, iee_addr); + pmd_t *pmdp = pmd_offset(pudp, iee_addr); + pte_t *iee_ptep = pte_offset_kernel(pmdp, iee_addr); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) & ~PTE_VALID); + WRITE_ONCE(*pmdp, pmd); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*ptep, pte); + ptep++; + } + } +#endif + dsb(ishst); + isb(); +} + +inline void iee_unset_stack_pte(pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE_P, ptep, order, use_block_pmd, lm_addr); +#else + int i; + unsigned long iee_addr = lm_addr + iee_offset; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4d_t *p4dp = p4d_offset(pgdp, iee_addr); + pud_t *pudp = pud_offset(p4dp, iee_addr); + pmd_t *pmdp = pmd_offset(pudp, iee_addr); + pte_t *iee_ptep = pte_offset_kernel(pmdp, iee_addr); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | PTE_VALID); + WRITE_ONCE(*pmdp, pmd); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + #ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); + #endif + WRITE_ONCE(*ptep, pte); + ptep++; + } + } +#endif + dsb(ishst); + isb(); +} + +inline void iee_set_sensitive_pte(pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_SET_SENSITIVE_PTE, lm_ptep, iee_ptep, order, use_block_pmd); +#else + int i; + if(use_block_pmd) + { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd((pmd_val(pmd) | PMD_SECT_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + #ifdef CONFIG_KOI + pte = __pte(pte_val(pte) | PTE_VALID | PTE_NG); + #else + pte = __pte(pte_val(pte) | PTE_VALID); + #endif + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + #ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); + #endif + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + #ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); + #endif + WRITE_ONCE(*iee_ptep, pte); + lm_ptep++; + iee_ptep++; + } + } +#endif + dsb(ishst); + isb(); +} + +inline void iee_unset_sensitive_pte(pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_UNSET_SENSITIVE_PTE, lm_ptep, iee_ptep, order, use_block_pmd); +#else + int i; + if(use_block_pmd) + { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd(pmd_val(pmd) | PTE_DBM); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte(pte_val(pte) | PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + lm_ptep++; + iee_ptep++; + } + } +#endif + dsb(ishst); + isb(); +} +#endif //#ifdef CONFIG_IEE diff --git a/arch/arm64/kernel/iee/iee-selinuxp.c b/arch/arm64/kernel/iee/iee-selinuxp.c new file mode 100644 index 000000000000..394f99f3f574 --- /dev/null +++ b/arch/arm64/kernel/iee/iee-selinuxp.c @@ -0,0 +1,36 @@ +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_IEE_SELINUX_P +inline void iee_set_selinux_status_pg(struct page* new_page) +{ + iee_rw_gate(IEE_SEL_SET_STATUS_PG, new_page); +} + +inline void enforcing_set(bool value) +{ + iee_rw_gate(IEE_SEL_SET_ENFORCING, value); +} + +inline void selinux_mark_initialized(void) +{ + iee_rw_gate(IEE_SEL_SET_INITIALIZED); +} + +inline void iee_set_sel_policy_cap(unsigned int idx, int cap) +{ + iee_rw_gate(IEE_SEL_SET_POLICY_CAP, idx, cap); +} + +/* + * Please make sure param iee_new_policy is from policy_jar memcache. + * Need to free new_policy after calling this func as it's only used to + * trans data from kernel. + */ +inline void iee_sel_rcu_assign_policy(struct selinux_policy* new_policy, + struct selinux_policy* iee_new_policy) +{ + iee_rw_gate(IEE_SEL_RCU_ASSIGN_POLICY, new_policy, iee_new_policy); +} +#endif diff --git a/arch/arm64/kernel/iee/iee.c b/arch/arm64/kernel/iee/iee.c new file mode 100644 index 000000000000..1464c2e947e7 --- /dev/null +++ b/arch/arm64/kernel/iee/iee.c @@ -0,0 +1,1720 @@ +#include "linux/sched.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + +#ifdef CONFIG_IEE +extern struct cred init_cred; +extern s64 memstart_addr; +extern unsigned long highest_memmap_pfn; + +struct watch_list; + +void __iee_code _iee_set_swapper_pgd(unsigned long iee_offset, pgd_t *pgdp, pgd_t pgd); +void __iee_code _iee_set_tramp_pgd(unsigned long iee_offset, pgd_t *pgdp, pgd_t pgd); +void __iee_code _iee_set_pte(unsigned long iee_offset, pte_t *ptep, pte_t pte); +void __iee_code _iee_set_pmd(unsigned long iee_offset, pmd_t *pmdp, pmd_t pmd); +void __iee_code _iee_set_pud(unsigned long iee_offset, pud_t *pudp, pud_t pud); +void __iee_code _iee_set_p4d(unsigned long iee_offset, p4d_t *p4dp, p4d_t p4d); +void __iee_code _iee_set_bm_pte(unsigned long iee_offset, pte_t *ptep, pte_t pte); +void __iee_code _iee_write_in_byte(unsigned long iee_offset, void *ptr, __u64 data, int length); +void __iee_code _iee_set_cred_uid(unsigned long iee_offset, struct cred *cred, kuid_t uid); +void __iee_code _iee_set_cred_gid(unsigned long iee_offset, struct cred *cred, kgid_t gid); +void __iee_code _iee_copy_cred(unsigned long iee_offset, struct cred *old, struct cred *new); +void __iee_code _iee_set_cred_suid(unsigned long iee_offset, struct cred *cred, kuid_t suid); +void __iee_code _iee_set_cred_sgid(unsigned long iee_offset, struct cred *cred, kgid_t sgid); +void __iee_code _iee_set_cred_euid(unsigned long iee_offset, struct cred *cred, kuid_t euid); +void __iee_code _iee_set_cred_egid(unsigned long iee_offset, struct cred *cred, kgid_t egid); +void __iee_code _iee_set_cred_fsuid(unsigned long iee_offset, struct cred *cred, kuid_t fsuid); +void __iee_code _iee_set_cred_fsgid(unsigned long iee_offset, struct cred *cred, kgid_t fsgid); +void __iee_code _iee_set_cred_user(unsigned long iee_offset, struct cred *cred, struct user_struct *user); +void __iee_code _iee_set_cred_user_ns(unsigned long iee_offset, struct cred *cred, struct user_namespace *user_ns); +void __iee_code _iee_set_cred_group_info(unsigned long iee_offset, struct cred *cred, struct group_info *group_info); +void __iee_code _iee_set_cred_securebits(unsigned long iee_offset, struct cred *cred, unsigned securebits); +void __iee_code _iee_set_cred_cap_inheritable(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_inheritable); +void __iee_code _iee_set_cred_cap_permitted(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_permitted); +void __iee_code _iee_set_cred_cap_effective(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_effective); +void __iee_code _iee_set_cred_cap_bset(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_bset); +void __iee_code _iee_set_cred_cap_ambient(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_ambient); +void __iee_code _iee_set_cred_jit_keyring(unsigned long iee_offset, struct cred *cred, unsigned char jit_keyring); +void __iee_code _iee_set_cred_session_keyring(unsigned long iee_offset, struct cred *cred, struct key *session_keyring); +void __iee_code _iee_set_cred_process_keyring(unsigned long iee_offset, struct cred *cred, struct key *process_keyring); +void __iee_code _iee_set_cred_thread_keyring(unsigned long iee_offset, struct cred *cred, struct key *thread_keyring); +void __iee_code _iee_set_cred_request_key_auth(unsigned long iee_offset, struct cred *cred, struct key *request_key_auth); +void __iee_code _iee_set_cred_non_rcu(unsigned long iee_offset, struct cred *cred, int non_rcu); +void __iee_code _iee_set_cred_atomic_set_usage(unsigned long iee_offset, struct cred *cred, int i); +bool __iee_code _iee_set_cred_atomic_op_usage(unsigned long iee_offset, struct cred *cred, int flag, int nr); +void __iee_code _iee_set_cred_security(unsigned long iee_offset, struct cred *cred, void *security); +void __iee_code _iee_set_cred_rcu(unsigned long iee_offset, struct cred *cred, struct rcu_head *rcu); +void __iee_code _iee_memset(unsigned long iee_offset, void *ptr, int data, size_t n); +void __iee_code _iee_set_track(unsigned long iee_offset, struct track *ptr, struct track *data); +void __iee_code _iee_set_freeptr(unsigned long iee_offset, void **pptr, void *ptr); +void __iee_code _iee_set_stack_pte(unsigned long iee_offset, pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr); +void __iee_code _iee_unset_stack_pte(unsigned long iee_offset, pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr); +void __iee_code _iee_set_token_pgd(unsigned long iee_offset, struct task_struct *tsk, pgd_t *pgd); +void __iee_code _iee_init_token(unsigned long iee_offset, struct task_struct *tsk, void *iee_stack, void *tmp_page); +void __iee_code _iee_invalidate_token(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_validate_token(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_set_sensitive_pte(unsigned long iee_offset, pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd); +void __iee_code _iee_unset_sensitive_pte(unsigned long iee_offset, pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd); +void __iee_code _iee_set_token(unsigned long iee_offset, pte_t *ptep, void *new, unsigned long order, int use_block_pmd); +void __iee_code _iee_unset_token(unsigned long iee_offset, pte_t *ptep, void *token_addr, void *token_page, unsigned long order); +void __iee_code _iee_copy_pte_range(unsigned long iee_offset, pte_t *new_dst, pte_t *old_dst, pte_t *src_pte, struct vm_area_struct *src_vma, unsigned long dst_vm_flags, pte_t *end_pte); +void __iee_code _iee_split_huge_pmd(unsigned long iee_offset, pmd_t *pmdp, pte_t *pgtable); +void __iee_code _iee_set_cred_ucounts(unsigned long iee_offset, struct cred *cred, struct ucounts *ucounts); +void __iee_code _iee_set_key_union(unsigned long iee_offset, struct key *key, struct key_union *key_union); +void __iee_code _iee_set_key_struct(unsigned long iee_offset, struct key *key, struct key_struct *key_struct); +void __iee_code _iee_set_key_payload(unsigned long iee_offset, struct key *key, union key_payload *key_payload); +void __iee_code _iee_memcpy(unsigned long iee_offset, void *dst, void *src, size_t n); +bool __iee_code _iee_set_key_usage(unsigned long iee_offset, struct key *key, int n, int flag); +void __iee_code _iee_set_key_serial(unsigned long iee_offset, struct key *key, key_serial_t serial); +void __iee_code _iee_set_key_watchers(unsigned long iee_offset, struct key *key, struct watch_list *watchers); +void __iee_code _iee_set_key_user(unsigned long iee_offset, struct key *key, struct key_user *user); +void __iee_code _iee_set_key_security(unsigned long iee_offset, struct key *key, void *security); +void __iee_code _iee_set_key_expiry(unsigned long iee_offset, struct key *key, time64_t expiry); +void __iee_code _iee_set_key_revoked_at(unsigned long iee_offset, struct key *key, time64_t revoked_at); +void __iee_code _iee_set_key_last_used_at(unsigned long iee_offset, struct key *key, time64_t last_used_at); +void __iee_code _iee_set_key_uid(unsigned long iee_offset, struct key *key, kuid_t uid); +void __iee_code _iee_set_key_gid(unsigned long iee_offset, struct key *key, kgid_t gid); +void __iee_code _iee_set_key_perm(unsigned long iee_offset, struct key *key, key_perm_t perm); +void __iee_code _iee_set_key_quotalen(unsigned long iee_offset, struct key *key, unsigned short quotalen); +void __iee_code _iee_set_key_datalen(unsigned long iee_offset, struct key *key, unsigned short datalen); +void __iee_code _iee_set_key_state(unsigned long iee_offset, struct key *key, short state); +void __iee_code _iee_set_key_magic(unsigned long iee_offset, struct key *key, unsigned magic); +void __iee_code _iee_set_key_flags(unsigned long iee_offset, struct key *key, unsigned long flags); +void __iee_code _iee_set_key_index_key(unsigned long iee_offset, struct key *key, struct keyring_index_key *index_key); +void __iee_code _iee_set_key_hash(unsigned long iee_offset, struct key *key, unsigned long hash); +void __iee_code _iee_set_key_len_desc(unsigned long iee_offset, struct key *key, unsigned long len_desc); +void __iee_code _iee_set_key_type(unsigned long iee_offset, struct key *key, struct key_type *type); +void __iee_code _iee_set_key_domain_tag(unsigned long iee_offset, struct key *key, struct key_tag *domain_tag); +void __iee_code _iee_set_key_description(unsigned long iee_offset, struct key *key, char *description); +void __iee_code _iee_set_key_restrict_link(unsigned long iee_offset, struct key *key, struct key_restriction *restrict_link); +bool __iee_code _iee_set_key_flag_bit(unsigned long iee_offset, struct key *key, long nr, int flag); +#ifdef CONFIG_KOI +unsigned long __iee_code _iee_read_koi_stack(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_koi_stack(unsigned long iee_offset, struct task_struct *tsk, unsigned long koi_stack); +unsigned long __iee_code _iee_read_token_ttbr1(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_token_ttbr1(unsigned long iee_offset, struct task_struct *tsk, unsigned long current_ttbr1); +unsigned long __iee_code _iee_read_koi_kernel_stack(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_koi_kernel_stack(unsigned long iee_offset, struct task_struct *tsk, unsigned long kernel_stack); +unsigned long __iee_code _iee_read_koi_stack_base(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_koi_stack_base(unsigned long iee_offset, struct task_struct *tsk, unsigned long koi_stack_base); +void __iee_code _iee_set_koi_pgd(unsigned long iee_offset, unsigned long koi_pgd_addr); +#endif + +#ifdef CONFIG_IEE_SELINUX_P +#include +void __iee_code _iee_set_selinux_status_pg(unsigned long iee_offset, struct page* new_page); +void __iee_code _iee_set_selinux_enforcing(unsigned long iee_offset, bool value); +void __iee_code _iee_mark_selinux_initialized(unsigned long iee_offset); +void __iee_code _iee_set_sel_policy_cap(unsigned long iee_offset, unsigned int idx, int cap); +void __iee_code _iee_sel_rcu_assign_policy(unsigned long iee_offset, + struct selinux_policy* new_policy, struct selinux_policy* iee_new_policy); +#endif + +static void inline _iee_set_pte_single(pte_t *ptep, pte_t pte, unsigned long iee_offset); +static pteval_t inline _iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval, unsigned long iee_offset); + +// Define the function pointer type for wrapper functions. +// Each function pointer conforms to a standardized calling convention +// using a variable argument list (va_list) as its parameter. +// This allows dynamic invocation of different functions with various arguments. +typedef void (*iee_func)(void); +iee_func iee_funcs[] = { + (iee_func)_iee_write_in_byte, + (iee_func)_iee_set_pte, + (iee_func)_iee_set_pmd, + (iee_func)_iee_set_pud, + (iee_func)_iee_set_p4d, + (iee_func)_iee_set_bm_pte, + (iee_func)_iee_set_swapper_pgd, + (iee_func)_iee_set_tramp_pgd, + (iee_func)_iee_memset, + (iee_func)_iee_set_track, + (iee_func)_iee_set_freeptr, + (iee_func)_iee_set_stack_pte, + (iee_func)_iee_unset_stack_pte, + (iee_func)_iee_set_token_pgd, + (iee_func)_iee_init_token, + (iee_func)_iee_invalidate_token, + (iee_func)_iee_set_sensitive_pte, + (iee_func)_iee_unset_sensitive_pte, + (iee_func)_iee_set_token, + (iee_func)_iee_unset_token, + (iee_func)_iee_copy_pte_range, + (iee_func)_iee_split_huge_pmd, + (iee_func)_iee_validate_token, + (iee_func)_iee_memcpy, +#ifdef CONFIG_KOI + (iee_func)_iee_read_koi_stack, + (iee_func)_iee_write_koi_stack, + (iee_func)_iee_read_token_ttbr1, + (iee_func)_iee_write_token_ttbr1, + (iee_func)_iee_read_koi_kernel_stack, + (iee_func)_iee_write_koi_kernel_stack, + (iee_func)_iee_read_koi_stack_base, + (iee_func)_iee_write_koi_stack_base, + (iee_func)_iee_set_koi_pgd, +#endif +#ifdef CONFIG_CREDP + (iee_func)_iee_copy_cred, + (iee_func)_iee_set_cred_uid, + (iee_func)_iee_set_cred_gid, + (iee_func)_iee_set_cred_suid, + (iee_func)_iee_set_cred_sgid, + (iee_func)_iee_set_cred_euid, + (iee_func)_iee_set_cred_egid, + (iee_func)_iee_set_cred_fsuid, + (iee_func)_iee_set_cred_fsgid, + (iee_func)_iee_set_cred_user, + (iee_func)_iee_set_cred_user_ns, + (iee_func)_iee_set_cred_group_info, + (iee_func)_iee_set_cred_securebits, + (iee_func)_iee_set_cred_cap_inheritable, + (iee_func)_iee_set_cred_cap_permitted, + (iee_func)_iee_set_cred_cap_effective, + (iee_func)_iee_set_cred_cap_bset, + (iee_func)_iee_set_cred_cap_ambient, + (iee_func)_iee_set_cred_jit_keyring, + (iee_func)_iee_set_cred_session_keyring, + (iee_func)_iee_set_cred_process_keyring, + (iee_func)_iee_set_cred_thread_keyring, + (iee_func)_iee_set_cred_request_key_auth, + (iee_func)_iee_set_cred_non_rcu, + (iee_func)_iee_set_cred_atomic_set_usage, + (iee_func)_iee_set_cred_atomic_op_usage, + (iee_func)_iee_set_cred_security, + (iee_func)_iee_set_cred_rcu, + (iee_func)_iee_set_cred_ucounts, +#endif +#ifdef CONFIG_KEYP + (iee_func)_iee_set_key_union, + (iee_func)_iee_set_key_struct, + (iee_func)_iee_set_key_payload, + (iee_func)_iee_set_key_usage, + (iee_func)_iee_set_key_serial, + (iee_func)_iee_set_key_watchers, + (iee_func)_iee_set_key_user, + (iee_func)_iee_set_key_security, + (iee_func)_iee_set_key_expiry, + (iee_func)_iee_set_key_revoked_at, + (iee_func)_iee_set_key_last_used_at, + (iee_func)_iee_set_key_uid, + (iee_func)_iee_set_key_gid, + (iee_func)_iee_set_key_perm, + (iee_func)_iee_set_key_quotalen, + (iee_func)_iee_set_key_datalen, + (iee_func)_iee_set_key_state, + (iee_func)_iee_set_key_magic, + (iee_func)_iee_set_key_flags, + (iee_func)_iee_set_key_index_key, + (iee_func)_iee_set_key_hash, + (iee_func)_iee_set_key_len_desc, + (iee_func)_iee_set_key_type, + (iee_func)_iee_set_key_domain_tag, + (iee_func)_iee_set_key_description, + (iee_func)_iee_set_key_restrict_link, + (iee_func)_iee_set_key_flag_bit, +#endif +#ifdef CONFIG_IEE_SELINUX_P + (iee_func)_iee_set_selinux_status_pg, + (iee_func)_iee_set_selinux_enforcing, + (iee_func)_iee_mark_selinux_initialized, + (iee_func)_iee_set_sel_policy_cap, + (iee_func)_iee_sel_rcu_assign_policy, +#endif + NULL +}; + +#ifdef CONFIG_KOI +unsigned long __iee_code _iee_read_koi_stack(unsigned long iee_offset, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + return (unsigned long)token->koi_stack; +} + +void __iee_code _iee_write_koi_stack(unsigned long iee_offset, struct task_struct *tsk, unsigned long koi_stack) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->koi_stack = (void *) koi_stack; +} + +unsigned long __iee_code _iee_read_token_ttbr1(unsigned long iee_offset, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + return token->current_ttbr1; +} + +void __iee_code _iee_write_token_ttbr1(unsigned long iee_offset, struct task_struct *tsk, unsigned long current_ttbr1) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->current_ttbr1 = current_ttbr1; +} + +unsigned long __iee_code _iee_read_koi_kernel_stack(unsigned long iee_offset, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + return (unsigned long) token->koi_kernel_stack; +} + +void __iee_code _iee_write_koi_kernel_stack(unsigned long iee_offset, struct task_struct *tsk, unsigned long kernel_stack) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->koi_kernel_stack = (void *) kernel_stack; +} + +unsigned long __iee_code _iee_read_koi_stack_base(unsigned long iee_offset, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + return (unsigned long)token->koi_stack_base; +} + +void __iee_code _iee_write_koi_stack_base(unsigned long iee_offset, struct task_struct *tsk, unsigned long koi_stack_base) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->koi_stack_base = (void *) koi_stack_base; +} + +static inline void iee_set_koi_pgd_writeable(unsigned long koi_pgd_addr, unsigned long iee_si_addr) +{ + pgd_t *pgdir = (pgd_t *)koi_pgd_addr; + pgd_t *pgdp = pgd_offset_pgd(pgdir, iee_si_addr); + p4d_t *p4dp = p4d_offset(pgdp, iee_si_addr); + pud_t *pudp = pud_offset(p4dp, iee_si_addr); + pmd_t *pmdp = pmd_offset(pudp, iee_si_addr); + pte_t *ptep = pte_offset_kernel(pmdp, iee_si_addr); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_DBM); + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); +} +/* + * Set IEE SI codes U RWX here to avoid IEE DEP checking fault. + * Mark koi pgd in the same time. + */ +void __iee_code _iee_set_koi_pgd(unsigned long iee_offset, unsigned long koi_pgd_addr) +{ + // IEE SI codes are 2 pages starting at __iee_si_text_start. + unsigned long iee_si_addr = (unsigned long)__iee_si_text_start; + iee_set_koi_pgd_writeable(koi_pgd_addr, iee_si_addr); + iee_si_addr += PAGE_SIZE; + iee_set_koi_pgd_writeable(koi_pgd_addr, iee_si_addr); + // Use DBM=0, AP[7]=0 to mark this page as a koi pgd in IEE. +} +#endif + +#ifdef CONFIG_IEE_SELINUX_P +void __iee_code _iee_set_selinux_status_pg(unsigned long iee_offset, struct page* new_page) +{ + struct page** iee_addr = (struct page**)__phys_to_iee(__pa_symbol(&(selinux_state.status_page))); + *iee_addr = new_page; +} + +void __iee_code _iee_set_selinux_enforcing(unsigned long iee_offset, bool value) +{ + *(bool*)__phys_to_iee(__pa_symbol(&(selinux_state.enforcing))) = value; +} + +void __iee_code _iee_mark_selinux_initialized(unsigned long iee_offset) +{ + smp_store_release(((bool*)__phys_to_iee(__pa_symbol(&(selinux_state.initialized)))), true); + printk("IEE: Mark selinux initialized."); +} + +void __iee_code _iee_set_sel_policy_cap(unsigned long iee_offset, unsigned int idx, int cap) +{ + *(bool*)__phys_to_iee(__pa_symbol(&(selinux_state.policycap[idx]))) = cap; +} + +/* + * Please make sure param iee_new_policy is from policy_jar memcache. + * Need to free new_policy after calling this func as it's only used to + * trans data from kernel. + */ +void __iee_code _iee_sel_rcu_assign_policy(unsigned long iee_offset, struct selinux_policy* new_policy, + struct selinux_policy* iee_new_policy) +{ + /* TODO: Verify informations from incoming policy. */ + // /* Make sure iee_new_policy is from policy_jar memcache. */ + // struct slab* policy_pg = (struct slab*)pfn_to_page(__pa(iee_new_policy) >> PAGE_SHIFT); + // if (policy_pg->slab_cache != policy_jar) + // printk("IEE SELINUXP ERROR: new policy is not from iee memcache."); + /* Copy data from kernel to new allocated policy struct inside iee. */ + struct selinux_policy* iee_addr = (struct selinux_policy *)((unsigned long)iee_new_policy + iee_offset); + memcpy(iee_addr, new_policy, sizeof(struct selinux_policy)); + + rcu_assign_pointer(*((struct selinux_policy**)__phys_to_iee(__pa_symbol(&(selinux_state.policy)))), + iee_new_policy); + printk("IEE: assigned rcu pointer selinux_state.policy."); +} +#endif + +bool __iee_code _iee_set_key_flag_bit(unsigned long iee_offset, struct key *key, long nr, int flag) +{ + key = (struct key *)((unsigned long)key + iee_offset); + switch(flag) + { + case SET_BIT_OP: + { + set_bit(nr, &key->flags); + break; + } + case TEST_AND_CLEAR_BIT: + { + return test_and_clear_bit(nr, &key->flags); + } + case TEST_AND_SET_BIT: + { + return test_and_set_bit(nr, &key->flags); + } + } + return 0; +} + +void __iee_code _iee_set_key_restrict_link(unsigned long iee_offset, struct key *key, struct key_restriction *restrict_link) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->restrict_link = restrict_link; +} + +void __iee_code _iee_set_key_magic(unsigned long iee_offset, struct key *key, unsigned magic) +{ + #ifdef KEY_DEBUGGING + key = (struct key *)((unsigned long)key + iee_offset); + key->magic = magic; + #endif +} + +void __iee_code _iee_set_key_flags(unsigned long iee_offset, struct key *key, unsigned long flags) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->flags = flags; +} + +void __iee_code _iee_set_key_index_key(unsigned long iee_offset, struct key *key, struct keyring_index_key *index_key) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->index_key = *index_key; +} + +void __iee_code _iee_set_key_hash(unsigned long iee_offset, struct key *key, unsigned long hash) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->hash = hash; +} + +void __iee_code _iee_set_key_len_desc(unsigned long iee_offset, struct key *key, unsigned long len_desc) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->len_desc = len_desc; +} + +void __iee_code _iee_set_key_type(unsigned long iee_offset, struct key *key, struct key_type *type) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->type = type; +} + +void __iee_code _iee_set_key_domain_tag(unsigned long iee_offset, struct key *key, struct key_tag *domain_tag) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->domain_tag = domain_tag; +} + +void __iee_code _iee_set_key_description(unsigned long iee_offset, struct key *key, char *description) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->description = description; +} + +void __iee_code _iee_set_key_uid(unsigned long iee_offset, struct key *key, kuid_t uid) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->uid = uid; +} + +void __iee_code _iee_set_key_gid(unsigned long iee_offset, struct key *key, kgid_t gid) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->gid = gid; +} + +void __iee_code _iee_set_key_perm(unsigned long iee_offset, struct key *key, key_perm_t perm) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->perm = perm; +} + +void __iee_code _iee_set_key_quotalen(unsigned long iee_offset, struct key *key, unsigned short quotalen) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->quotalen = quotalen; +} + +void __iee_code _iee_set_key_datalen(unsigned long iee_offset, struct key *key, unsigned short datalen) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->datalen = datalen; +} + +void __iee_code _iee_set_key_state(unsigned long iee_offset, struct key *key, short state) +{ + key = (struct key *)((unsigned long)key + iee_offset); + WRITE_ONCE(key->state, state); +} + +void __iee_code _iee_set_key_user(unsigned long iee_offset, struct key *key, struct key_user *user) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->user = user; +} + +void __iee_code _iee_set_key_security(unsigned long iee_offset, struct key *key, void *security) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->security = security; +} + +void __iee_code _iee_set_key_expiry(unsigned long iee_offset, struct key *key, time64_t expiry) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->expiry = expiry; +} + +void __iee_code _iee_set_key_revoked_at(unsigned long iee_offset, struct key *key, time64_t revoked_at) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->revoked_at = revoked_at; +} + +void __iee_code _iee_set_key_last_used_at(unsigned long iee_offset, struct key *key, time64_t last_used_at) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->last_used_at = last_used_at; +} + +bool __iee_code _iee_set_key_usage(unsigned long iee_offset, struct key *key, int n, int flag) +{ + key = (struct key *)((unsigned long)key + iee_offset); + switch(flag) + { + case REFCOUNT_INC: + { + refcount_inc(&key->usage); + break; + } + case REFCOUNT_SET: + { + refcount_set(&key->usage, n); + break; + } + case REFCOUNT_DEC_AND_TEST: + { + return refcount_dec_and_test(&key->usage); + } + case REFCOUNT_INC_NOT_ZERO: + { + return refcount_inc_not_zero(&key->usage); + } + } + return 0; +} + +void __iee_code _iee_set_key_serial(unsigned long iee_offset, struct key *key, key_serial_t serial) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->serial = serial; +} + +void __iee_code _iee_set_key_watchers(unsigned long iee_offset, struct key *key, struct watch_list *watchers) +{ + #ifdef CONFIG_KEY_NOTIFICATIONS + key = (struct key *)((unsigned long)key + iee_offset); + key->watchers = watchers; + #endif +} + +void __iee_code _iee_set_key_union(unsigned long iee_offset, struct key *key, struct key_union *key_union) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->graveyard_link.next = (struct list_head *)key_union; +} + +void __iee_code _iee_set_key_struct(unsigned long iee_offset, struct key *key, struct key_struct *key_struct) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->name_link.prev = (struct list_head *)key_struct; +} + +void __iee_code _iee_set_key_payload(unsigned long iee_offset, struct key *key, union key_payload *key_payload) +{ + key = (struct key *)((unsigned long)key + iee_offset); + key->name_link.next = (struct list_head *)key_payload; +} + +void __iee_code _iee_split_huge_pmd(unsigned long iee_offset, pmd_t *pmdp, pte_t *pgtable) +{ + int i; + struct page *page = pmd_page(*pmdp); + pte_t *ptep = (pte_t *)((unsigned long)pgtable + (unsigned long)iee_offset); + + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = PAGE_KERNEL; + #ifdef CONFIG_KOI + pgprot = __pgprot(pgprot_val(pgprot) | PTE_CONT | PTE_NG); + #else + pgprot = __pgprot(pgprot_val(pgprot) | PTE_CONT); + #endif + entry = mk_pte(page + i, pgprot); + WRITE_ONCE(*ptep, entry); + } +} + +void __iee_code _iee_copy_pte_range(unsigned long iee_offset, pte_t *new_dst, pte_t *old_dst, pte_t *src_pte, struct vm_area_struct *src_vma, unsigned long dst_vm_flags, pte_t *end_pte) +{ + pte_t pte, tmp; + swp_entry_t entry; + tmp = __pte(0); + while(src_pte < end_pte) + { + if (!pte_none(*src_pte) && unlikely(!pte_present(*src_pte))) + { + pte = *src_pte; + entry = pte_to_swp_entry(pte); + if (likely(!non_swap_entry(entry))) + ; + else if(is_migration_entry(entry)) + { + if (is_readable_migration_entry(entry) && + is_cow_mapping(dst_vm_flags)) + { + entry = make_readable_migration_entry(swp_offset(entry)); + pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + _iee_set_pte_single(src_pte, pte, iee_offset); + } + } + else if (is_device_private_entry(entry)) + { + if (is_writable_device_private_entry(entry) && + is_cow_mapping(dst_vm_flags)) { + entry = make_readable_device_private_entry( + swp_offset(entry)); + pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + _iee_set_pte_single(src_pte, pte, iee_offset); + } + } + } + else if(!pte_none(*src_pte) && likely(pte_present(*src_pte))) + { + struct page *page = NULL; + #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL + if (likely(!pte_special(*src_pte))) + { + if (unlikely(pte_pfn(*src_pte) > highest_memmap_pfn)) + page = NULL; + else + page = pte_page(*src_pte); + } + #else + if((!unlikely(src_vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) || ((src_vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pte_pfn(*src_pte))) || (!(src_vma->vm_flags & VM_MIXEDMAP) && !(pfn == src_vma->vm_pgoff + off) && is_cow_mapping(src_vma->vm_flags))) && !is_zero_pfn(pte_pfn(*src_pte)) && !unlikely(pte_pfn(*src_pte) > highest_memmap_pfn)) + page = pte_page(*src_pte); + #endif + if(!(page && folio_test_anon(page_folio(page))) || likely(!PageAnonExclusive(page) || !(likely(!is_device_private_page(page) && unlikely(folio_needs_cow_for_dma(src_vma, page_folio(page))))))) + { + if (is_cow_mapping(src_vma->vm_flags) && pte_write(*src_pte)) + { + pte_t old_pte, new_pte; + + new_pte = READ_ONCE(*src_pte); + do { + old_pte = new_pte; + new_pte = pte_wrprotect(new_pte); + _iee_set_cmpxchg_relaxed(src_pte, pte_val(old_pte), pte_val(new_pte), iee_offset); + } while (pte_val(new_pte) != pte_val(old_pte)); + } + } + } + if(!pte_none(*new_dst)) + { + _iee_set_pte_single(old_dst, *new_dst, iee_offset); + WRITE_ONCE(*new_dst, __pte(0)); + } + old_dst++; + src_pte++; + new_dst++; + } +} + +void __iee_code _iee_set_sensitive_pte(unsigned long iee_offset, pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd) +{ + int i; + + lm_ptep = (pte_t *)((unsigned long)lm_ptep + iee_offset); + iee_ptep = (pte_t *)((unsigned long)iee_ptep + iee_offset); + if(use_block_pmd) + { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd((pmd_val(pmd) | PMD_SECT_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + lm_ptep++; + iee_ptep++; + } + } +} + +void __iee_code _iee_unset_sensitive_pte(unsigned long iee_offset, pte_t *lm_ptep, pte_t *iee_ptep, int order, int use_block_pmd) +{ + int i; + + lm_ptep = (pte_t *)((unsigned long)lm_ptep + iee_offset); + iee_ptep = (pte_t *)((unsigned long)iee_ptep + iee_offset); + if(use_block_pmd) + { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd(pmd_val(pmd) | PTE_DBM); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte(pte_val(pte) | PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + lm_ptep++; + iee_ptep++; + } + } +} + +void __iee_code _iee_set_token(unsigned long iee_offset, pte_t *ptep, void *new, unsigned long order, int use_block_pmd) +{ + int i; + pgd_t *pgdir; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *lm_ptep; + pte_t *iee_ptep; + unsigned long iee_addr; + + pgdir = swapper_pg_dir; + + pgdp = pgd_offset_pgd(pgdir, (unsigned long)new); + p4dp = p4d_offset(pgdp, (unsigned long)new); + pudp = pud_offset(p4dp, (unsigned long)new); + pmdp = pmd_offset(pudp, (unsigned long)new); + if(use_block_pmd) + lm_ptep = (pte_t *)pmdp; + else + lm_ptep = pte_offset_kernel(pmdp, (unsigned long)new); + // Handling cont mapping. + if(pte_val(*lm_ptep) & PTE_CONT) + { + // The beginning of cont mapping. + int i; + pte_t *ptep = (pte_t *)((unsigned long)pte_offset_kernel(pmdp, (unsigned long)new & CONT_PTE_MASK) + iee_offset); + if(order < CONFIG_ARM64_CONT_PTE_SHIFT) + { + for(i = 0; i < CONT_PTES; i++) + { + WRITE_ONCE(*ptep,__pte(pte_val(*ptep) & ~PTE_CONT)); + ptep++; + } + } + } + + iee_addr = ((unsigned long)new + (unsigned long)iee_offset); + pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4dp = p4d_offset(pgdp, iee_addr); + pudp = pud_offset(p4dp, iee_addr); + pmdp = pmd_offset(pudp, iee_addr); + iee_ptep = pte_offset_kernel(pmdp, iee_addr); + + ptep = (pte_t *)((unsigned long)ptep + iee_offset); + lm_ptep = (pte_t *)((unsigned long)lm_ptep + iee_offset); + iee_ptep = (pte_t *)((unsigned long)iee_ptep + iee_offset); + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)lm_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) | PMD_SECT_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*pmdp, pmd); + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | PTE_VALID) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + iee_ptep++; + new += PAGE_SIZE; + } + } + else + { + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | PTE_VALID) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*lm_ptep); + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + lm_ptep++; + iee_ptep++; + new += PAGE_SIZE; + } + } +} + +void __iee_code _iee_unset_token(unsigned long iee_offset, pte_t *ptep, void *token_addr, void *token_page, unsigned long order) +{ + int i; + pgd_t *pgdir; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *lm_ptep; + pte_t *iee_ptep; + unsigned long iee_addr; + int use_block_pmd = 0; + + pgdir = swapper_pg_dir; + + pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_page); + p4dp = p4d_offset(pgdp, (unsigned long)token_page); + pudp = pud_offset(p4dp, (unsigned long)token_page); + pmdp = pmd_offset(pudp, (unsigned long)token_page); + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + lm_ptep = (pte_t *)pmdp; + } + else + lm_ptep = pte_offset_kernel(pmdp, (unsigned long)token_page); + + iee_addr = ((unsigned long)token_page + (unsigned long)iee_offset); + pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4dp = p4d_offset(pgdp, iee_addr); + pudp = pud_offset(p4dp, iee_addr); + pmdp = pmd_offset(pudp, iee_addr); + iee_ptep = pte_offset_kernel(pmdp, iee_addr); + + ptep = (pte_t *)((unsigned long)ptep + iee_offset); + lm_ptep = (pte_t *)((unsigned long)lm_ptep + iee_offset); + iee_ptep = (pte_t *)((unsigned long)iee_ptep + iee_offset); + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)lm_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | PTE_DBM); + WRITE_ONCE(*pmdp, pmd); + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~PTE_VALID) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token_addr - IEE_OFFSET))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + iee_ptep++; + token_addr += PAGE_SIZE; + token_page += PAGE_SIZE; + } + } + else + { + for(i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~PTE_VALID) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token_addr - IEE_OFFSET))); + WRITE_ONCE(*ptep, pte); + pte = READ_ONCE(*lm_ptep); + pte = __pte(pte_val(pte) | PTE_DBM); + WRITE_ONCE(*lm_ptep, pte); + pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + ptep++; + lm_ptep++; + iee_ptep++; + token_addr += PAGE_SIZE; + token_page += PAGE_SIZE; + } + } +} + +void __iee_code _iee_invalidate_token(unsigned long iee_offset, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->pgd = NULL; + token->valid = false; + token->kernel_stack = NULL; +} + +void __iee_code _iee_validate_token(unsigned long iee_offset, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->valid = true; +} + +#ifdef CONFIG_KOI +extern unsigned long koi_swapper_ttbr1; +#endif +void __iee_code _iee_init_token(unsigned long iee_offset, struct task_struct *tsk, void *iee_stack, void *tmp_page) +{ + struct task_token *token; + + token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->iee_stack = iee_stack; + token->tmp_page = tmp_page; +#ifdef CONFIG_KOI + token->koi_kernel_stack = NULL; + token->koi_stack = NULL; + token->koi_stack_base = NULL; + token->current_ttbr1 = 0; +#endif +} + +void __iee_code _iee_set_token_pgd(unsigned long iee_offset, struct task_struct *tsk, pgd_t *pgd) +{ + struct task_token *token; + + token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + token->pgd = pgd; +} + +void __iee_code _iee_set_freeptr(unsigned long iee_offset, void **pptr, void *ptr) +{ + pptr = (void **)((unsigned long)pptr + (unsigned long)iee_offset); + *pptr = ptr; +} + +#pragma GCC push_options +#pragma GCC optimize("O0") +void __iee_code _iee_memset(unsigned long iee_offset, void *ptr, int data, size_t n) +{ + char *_ptr; + + _ptr = (char *)((unsigned long)ptr + (unsigned long)iee_offset); + + while (n--) + *_ptr++ = data; +} + +void __iee_code _iee_memcpy(unsigned long iee_offset, void *dst, void *src, size_t n) +{ + char *_dst, *_src; + + _dst = (char *)((unsigned long)dst + (unsigned long)iee_offset); + _src = (char *)src; + + while(n--) + *_dst++ = *_src++; +} +#pragma GCC pop_options + +void __iee_code _iee_set_track(unsigned long iee_offset, struct track *ptr, struct track *data) +{ + _iee_memcpy(iee_offset, ptr, data, sizeof(struct track)); +} + +void __iee_code _iee_set_cred_rcu(unsigned long iee_offset, struct cred *cred, struct rcu_head *rcu) +{ + if(cred == &init_cred) + cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); + else + cred = (struct cred *)((unsigned long)cred + (unsigned long)iee_offset); + #ifdef CONFIG_CREDP + *((struct rcu_head **)(&(cred->rcu.func))) = rcu; + #endif +} + +void __iee_code _iee_set_cred_security(unsigned long iee_offset, struct cred *cred, void *security) +{ + if(cred == &init_cred) + cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); + else + cred = (struct cred *)((unsigned long)cred + (unsigned long)iee_offset); + cred->security = security; +} + +bool __iee_code _iee_set_cred_atomic_op_usage(unsigned long iee_offset, struct cred *cred, int flag, int nr) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + switch (flag) + { + case AT_ADD: { + atomic_long_add(nr, &cred->usage); + return 0; + } + case AT_INC_NOT_ZERO: { + return atomic_long_inc_not_zero(&cred->usage); + } + case AT_SUB_AND_TEST: { + return atomic_long_sub_and_test(nr, &cred->usage); + } + } + return 0; +} + +void __iee_code _iee_set_cred_atomic_set_usage(unsigned long iee_offset, struct cred *cred, int i) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + atomic_long_set(&cred->usage,i); +} + +void __iee_code _iee_set_cred_non_rcu(unsigned long iee_offset, struct cred *cred, int non_rcu) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->non_rcu = non_rcu; +} + +void __iee_code _iee_set_cred_session_keyring(unsigned long iee_offset, struct cred *cred, struct key *session_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->session_keyring = session_keyring; +} + +void __iee_code _iee_set_cred_process_keyring(unsigned long iee_offset, struct cred *cred, struct key *process_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->process_keyring = process_keyring; +} + +void __iee_code _iee_set_cred_thread_keyring(unsigned long iee_offset, struct cred *cred, struct key *thread_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->thread_keyring = thread_keyring; +} + +void __iee_code _iee_set_cred_request_key_auth(unsigned long iee_offset, struct cred *cred, struct key *request_key_auth) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->request_key_auth = request_key_auth; +} + +void __iee_code _iee_set_cred_jit_keyring(unsigned long iee_offset, struct cred *cred, unsigned char jit_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->jit_keyring = jit_keyring; +} + +void __iee_code _iee_set_cred_cap_inheritable(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_inheritable) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_inheritable = cap_inheritable; +} + +void __iee_code _iee_set_cred_cap_permitted(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_permitted) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_permitted = cap_permitted; +} + +void __iee_code _iee_set_cred_cap_effective(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_effective) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_effective = cap_effective; +} + +void __iee_code _iee_set_cred_cap_bset(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_bset) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_bset = cap_bset; +} + +void __iee_code _iee_set_cred_cap_ambient(unsigned long iee_offset, struct cred *cred, kernel_cap_t cap_ambient) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_ambient = cap_ambient; +} + +void __iee_code _iee_set_cred_securebits(unsigned long iee_offset, struct cred *cred, unsigned securebits) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->securebits = securebits; +} + +void __iee_code _iee_set_cred_group_info(unsigned long iee_offset, struct cred *cred, struct group_info *group_info) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->group_info = group_info; +} + +void __iee_code _iee_set_cred_ucounts(unsigned long iee_offset, struct cred *cred, struct ucounts *ucounts) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->ucounts = ucounts; +} + +void __iee_code _iee_set_cred_user_ns(unsigned long iee_offset, struct cred *cred, struct user_namespace *user_ns) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->user_ns = user_ns; +} + +void __iee_code _iee_set_cred_user(unsigned long iee_offset, struct cred *cred, struct user_struct *user) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->user = user; +} + +void __iee_code _iee_set_cred_fsgid(unsigned long iee_offset, struct cred *cred, kgid_t fsgid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->fsgid = fsgid; +} + +void __iee_code _iee_set_cred_fsuid(unsigned long iee_offset, struct cred *cred, kuid_t fsuid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->fsuid = fsuid; +} + +void __iee_code _iee_set_cred_egid(unsigned long iee_offset, struct cred *cred, kgid_t egid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->egid = egid; +} + +void __iee_code _iee_set_cred_euid(unsigned long iee_offset, struct cred *cred, kuid_t euid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->euid = euid; +} + +void __iee_code _iee_set_cred_sgid(unsigned long iee_offset, struct cred *cred, kgid_t sgid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->sgid = sgid; +} + +void __iee_code _iee_set_cred_suid(unsigned long iee_offset, struct cred *cred, kuid_t suid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->suid = suid; +} + +void __iee_code _iee_copy_cred(unsigned long iee_offset, struct cred *old, struct cred *new) +{ + #ifdef CONFIG_CREDP + struct rcu_head *rcu = (struct rcu_head *)(new->rcu.func); + struct cred *_new = (struct cred *)__phys_to_iee(__pa(new)); + _iee_memcpy(iee_offset, new, old, sizeof(struct cred)); + *(struct rcu_head **)(&(_new->rcu.func)) = rcu; + *(struct rcu_head *)(_new->rcu.func) = *(struct rcu_head *)(old->rcu.func); + #endif +} + +void __iee_code _iee_set_cred_gid(unsigned long iee_offset, struct cred *cred, kgid_t gid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->gid = gid; +} + +void __iee_code _iee_set_cred_uid(unsigned long iee_offset, struct cred *cred, kuid_t uid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->uid = uid; +} + +void __iee_code _iee_write_in_byte(unsigned long iee_offset, void *ptr, __u64 data, int length) +{ + ptr = (void *)((unsigned long)ptr + (unsigned long)iee_offset); + switch(length) { + case 8: { + *(__u64 *)ptr = data; + break; + } + case 4: { + *(__u32 *)ptr = (__u32)data; + break; + } + case 2: { + *(__u16 *)ptr = (__u16)data; + break; + } + case 1: { + *(__u8 *)ptr = (__u8)data; + break; + } + } +} + +static pteval_t inline _iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval, unsigned long iee_offset) +{ + pteval_t pteval = cmpxchg_relaxed((pteval_t *)((unsigned long)ptep + iee_offset), old_pteval, new_pteval); + return pteval; +} + +/* Check if addr is allocated in IEE page */ +static inline bool check_addr_in_iee_valid(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + if(!(p4d_val(READ_ONCE(*p4dp)) & PTE_VALID)) + return false; + + pudp = pud_offset(p4dp, addr); + + if(!(pud_val(READ_ONCE(*pudp)) & PTE_VALID)) + return false; + + pmdp = pmd_offset(pudp, addr); + + if(!(pmd_val(READ_ONCE(*pmdp)) & PTE_VALID)) + return false; + + ptep = pte_offset_kernel(pmdp, addr); + + return (pte_val(READ_ONCE(*ptep)) & PTE_VALID); +} + +void __iee_code _iee_set_tramp_pgd(unsigned long iee_offset, pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); +} + +void __iee_code _iee_set_swapper_pgd(unsigned long iee_offset, pgd_t *pgdp, pgd_t pgd) +{ + if(!(pgd_val(pgd) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); + return; + } + + if ((pgd_val(pgd) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pgd_to_phys(pgd)))) + panic("You can't use non-iee-pgtable\n"); + + if((pgdp >= pgd_offset_pgd((pgd_t *)swapper_pg_dir, PAGE_OFFSET + BIT(vabits_actual - 2))) && (pgdp < pgd_offset_pgd((pgd_t *)swapper_pg_dir, PAGE_OFFSET + BIT(vabits_actual - 1))) && !(pgd_val(pgd) & PGD_APT)) + panic("Set IEE pgd U page.\n"); + + WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); +} + +void __iee_code _iee_set_p4d(unsigned long iee_offset, p4d_t *p4dp, p4d_t p4d) +{ + if(!(p4d_val(p4d) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((p4d_t *)((unsigned long)p4dp + (unsigned long)iee_offset)), p4d); + return; + } + + if ((p4d_val(p4d) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__p4d_to_phys(p4d)))) + panic("You can't use non-iee-pgtable\n"); + + WRITE_ONCE(*((p4d_t *)((unsigned long)p4dp + (unsigned long)iee_offset)), p4d); +} + +void __iee_code _iee_set_pud(unsigned long iee_offset, pud_t *pudp, pud_t pud) +{ + if(!(pud_val(pud) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((pud_t *)((unsigned long)pudp + (unsigned long)iee_offset)), pud); + return; + } + + if ((pud_val(pud) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pud_to_phys(pud)))) + panic("You can't use non-iee-pgtable\n"); + + WRITE_ONCE(*((pud_t *)((unsigned long)pudp + (unsigned long)iee_offset)), pud); +} + +// Return true if the modify does not break DEP. +static inline bool check_pmd_dep(char *addr, pmd_t pmd) +{ + // DEP for kernel code and readonly data + // _text: .text start addr, __init_begin: .rodata end addr + if (addr >= _stext && addr < _etext) + { + if ((PTE_WRITE & pmd_val(pmd)) || // DBM == 1 --> writable + !(PTE_RDONLY & pmd_val(pmd))) // DBM == 0 && AP[2] = 0 --> writable + { + panic("Can't make kernel's text/readonly page as writable!\n" + "addr = 0x%16llx, pmd_val = 0x%16llx", + (u64)addr, pmd_val(pmd)); + } + } + return true; +} + +// Return true if the pmd table is a part of kernel page table. +// TODO : Optimize to get lower overhead. +static inline bool is_kernel_pmd_table(pmd_t *pmdp, pmd_t pmd) +{ + int i = 0,j = 0; + for(i = 0; i < PAGE_SIZE/sizeof(pgd_t); i++) + { + pgd_t *pgdp = (pgd_t *)swapper_pg_dir + i; + if((pgd_val(*pgdp) & PMD_SECT_VALID) && (pgd_val(*pgdp) & PMD_TABLE_BIT)) + { + for(j = 0; j < PAGE_SIZE/sizeof(pud_t); j++) + { + pud_t *pudp = (pud_t *)__va(__pgd_to_phys(*pgdp)) + i; + if((pud_val(*pudp) & PMD_SECT_VALID) && (pud_val(*pudp) & PMD_TABLE_BIT)) + { + pmd_t *current_pmdp = __va(__pud_to_phys(*pudp)); + if((unsigned long)current_pmdp == ((unsigned long)pmdp & PAGE_MASK)) + return true; + } + } + } + } + return false; +} + +// Return true if it is mapped to a physical range containing IEE page. +// TODO : Optimize to get lower overhead. +static inline bool check_addr_range_in_iee_valid(pmd_t pmd) +{ + int i = 0; + unsigned long addr = __phys_to_iee(__pmd_to_phys(pmd)); + pgd_t *pgdir = swapper_pg_dir; + + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + if(!(p4d_val(READ_ONCE(*p4dp)) & PTE_VALID)) + return false; + + pudp = pud_offset(p4dp, addr); + + if(!(pud_val(READ_ONCE(*pudp)) & PTE_VALID)) + return false; + + pmdp = pmd_offset(pudp, addr); + + if(!(pmd_val(READ_ONCE(*pmdp)) & PTE_VALID)) + return false; + + ptep = pte_offset_kernel(pmdp, addr); + + for(i = 0; i < PAGE_SIZE/sizeof(pte_t); i++) + { + if(pte_val(READ_ONCE(*ptep)) & PTE_VALID) + return true; + ptep++; + } + return false; +} + +void __iee_code _iee_set_pmd(unsigned long iee_offset, pmd_t *pmdp, pmd_t pmd) +{ + char * addr = (char *)__phys_to_kimg(__pmd_to_phys(pmd)); + + if(!(pmd_val(pmd) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((pmd_t *)((unsigned long)pmdp + (unsigned long)iee_offset)), pmd); + return; + } + + // Check if the pte table is legally allocated. + if ((pmd_val(pmd) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pmd_to_phys(pmd)))) + panic("You can't use non-iee-pgtable\n"); + + // Avoid mapping a huge pmd as U page. + // if(!(pmd_val(pmd) & PMD_TABLE_BIT) && (pmd_val(pmd) & PMD_SECT_USER) && is_kernel_pmd_table(pmdp, pmd)) + // panic("Set a block descriptor in kernel space U page.\n"); + + // Avoid mapping a huge pmd to IEE physical page. + // if(!(pmd_val(pmd) & PMD_TABLE_BIT) && check_addr_range_in_iee_valid(pmd)) + // panic("Mapping IEE physical page to a huge pmd.\n"); + + if(!check_pmd_dep(addr, pmd)) + return; + + WRITE_ONCE(*((pmd_t *)((unsigned long)pmdp + (unsigned long)iee_offset)), pmd); +} + +// Return true if the pte table is a part of kernel page table. +// TODO : Optimize to get lower overhead. +static inline bool is_kernel_pte_table(pte_t *ptep, pte_t pte) +{ + return false; +} + +// Return true if it does not change the privilage or add new U page in kernel. +static inline bool check_privilage_safe(pte_t *ptep, pte_t pte) +{ + if(!(pte_val(pte) & PTE_VALID)) + return true; + + if((pte_val(*ptep) & PTE_VALID)) + { + if((pte_val(*ptep) & PTE_USER) != (pte_val(pte) & PTE_USER)) + panic("Incorrectly change privilage.\n"); + } + else + { + if((pte_val(pte) & PTE_USER) && is_kernel_pte_table(ptep, pte)) + panic("Add new U page in kernel space.\n"); + } + return true; +} + +// TODO : When adding a new executable page, check it for DEP. +static inline bool safely_adding_new_exec_page(pte_t *ptep, pte_t pte) +{ + return true; +} + +// Return true if it is only changing prot of a pte. +static inline bool is_changing_pte_prot(pte_t *ptep, pte_t pte) +{ + if(((pte_val(*ptep) ^ pte_val(pte)) & PTE_ADDR_MASK) == 0) + return true; + else + return false; +} + +// Return true if the modify does not break DEP. +static inline bool check_pte_dep(char *addr, pte_t pte) +{ + // DEP for kernel code and readonly data + // _text: .text start addr, __init_begin: .rodata end addr + if (addr >= _stext && addr < _etext) + { + if ((PTE_WRITE & pte_val(pte)) // DBM == 1 --> writable + || !(PTE_RDONLY & pte_val(pte))) // DBM == 0 && AP[2] = 0 --> writable + { + panic("Can't make kernel's text/readonly page as writable!\n" + "addr = 0x%16llx, pte_val = 0x%16llx", + (u64)addr, pte_val(pte)); + } + } + return true; +} + +void __iee_code _iee_set_pte(unsigned long iee_offset, pte_t *ptep, pte_t pte) +{ + char * addr = (char *)__phys_to_kimg(__pte_to_phys(pte)); + + if(!(pte_val(pte) & PTE_VALID)) + { + WRITE_ONCE(*((pte_t *)((unsigned long)ptep + (unsigned long)iee_offset)), pte); + return; + } + + // Avoid modify privilage unsafely. + if(!check_privilage_safe(ptep, pte)) + panic("You are modify privilage unsafely.\n"); + + // Avoid mapping a new executable page. + if(!safely_adding_new_exec_page(ptep, pte)) + panic("You are adding a new executable page unsafely.\n"); + + // Avoid mapping a new VA to IEE PA. + if(!is_changing_pte_prot(ptep, pte) && + check_addr_in_iee_valid(__phys_to_iee(__pte_to_phys(pte)))) + panic("You are remmaping IEE page to other VA.\n"); + + // Avoid mapping a writable VA to kernel code PA. + if(!check_pte_dep(addr, pte)) + return; +#ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); +#endif + WRITE_ONCE(*((pte_t *)((unsigned long)ptep + (unsigned long)iee_offset)), pte); +} + +static void inline _iee_set_pte_single(pte_t *ptep, pte_t pte, unsigned long iee_offset) +{ + char * addr = (char *)__phys_to_kimg(__pte_to_phys(pte)); + + if(!(pte_val(pte) & PTE_VALID)) + { + WRITE_ONCE(*((pte_t *)((unsigned long)ptep + (unsigned long)iee_offset)), pte); + return; + } + + // Avoid modify privilage unsafely. + if(!check_privilage_safe(ptep, pte)) + panic("You are modify privilage unsafely.\n"); + + // Avoid mapping a new executable page. + if(!safely_adding_new_exec_page(ptep, pte)) + panic("You are adding a new executable page unsafely.\n"); + + // Avoid mapping a new VA to IEE PA. + if(!is_changing_pte_prot(ptep, pte) && + check_addr_in_iee_valid(__phys_to_iee(__pte_to_phys(pte)))) + panic("You are remmaping IEE page to other VA.\n"); + + // Avoid mapping a writable VA to kernel code PA. + if(!check_pte_dep(addr, pte)) + return; + + WRITE_ONCE(*((pte_t *)((unsigned long)ptep + (unsigned long)iee_offset)), pte); +} + +void __iee_code _iee_set_stack_pte(unsigned long iee_offset, pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr) +{ + int i; + unsigned long iee_addr = lm_addr + iee_offset; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4d_t *p4dp = p4d_offset(pgdp, iee_addr); + pud_t *pudp = pud_offset(p4dp, iee_addr); + pmd_t *pmdp = pmd_offset(pudp, iee_addr); + pte_t *iee_ptep = (pte_t *)(((unsigned long)pte_offset_kernel(pmdp, iee_addr)) + iee_offset); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + + ptep = (pte_t *)((unsigned long)ptep + iee_offset); + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) & ~PTE_VALID); + WRITE_ONCE(*pmdp, pmd); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*ptep, pte); + ptep++; + } + } +} + +void __iee_code _iee_unset_stack_pte(unsigned long iee_offset, pte_t *ptep, int order, int use_block_pmd, unsigned long lm_addr) +{ + int i; + unsigned long iee_addr = lm_addr + iee_offset; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, iee_addr); + p4d_t *p4dp = p4d_offset(pgdp, iee_addr); + pud_t *pudp = pud_offset(p4dp, iee_addr); + pmd_t *pmdp = pmd_offset(pudp, iee_addr); + pte_t *iee_ptep = (pte_t *)(((unsigned long)pte_offset_kernel(pmdp, iee_addr)) + iee_offset); + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*iee_ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + WRITE_ONCE(*iee_ptep, pte); + iee_ptep++; + } + + ptep = (pte_t *)((unsigned long)ptep + iee_offset); + if(use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | PTE_VALID); + WRITE_ONCE(*pmdp, pmd); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + WRITE_ONCE(*ptep, pte); + ptep++; + } + } +} + +void __iee_code _iee_set_bm_pte(unsigned long iee_offset, pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_KOI + if (pte_valid(pte)) + pte = __pte(pte_val(pte) | PTE_NG); +#endif + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa_symbol(ptep)))), pte); +} + +/* Data in iee_si_base is visible to all pgd while iee_si_data is private. */ +unsigned long iee_base_swapper_pg_dir __iee_si_data; +unsigned long iee_base_idmap_pg_dir __iee_si_data; +unsigned long iee_base_reserved_pg_dir __iee_si_data; +unsigned long iee_base__bp_harden_el1_vectors __iee_si_data; +bool iee_init_done __iee_si_data; +unsigned long iee_si_tcr __iee_si_data; +s64 iee_si_offset __iee_si_data; + +static u64 __iee_si_code inline iee_si_mask(unsigned long mask, unsigned long new_val, unsigned long old_val) +{ + return (new_val & mask) | (old_val & ~mask); +} +/* + * handler function for requests of executing sensitive instrutions. + */ +u64 __iee_si_code iee_si_handler(int flag, ...) +{ + va_list pArgs; + u64 old_val, new_val; + + // BUG_ON(flag > IEE_WRITE_MDSCR); + va_start(pArgs, flag); + switch (flag) { + case IEE_SI_TEST: + break; + case IEE_WRITE_SCTLR: { + old_val = read_sysreg(sctlr_el1); + new_val = va_arg(pArgs, u64); + new_val = iee_si_mask(IEE_SCTLR_MASK, new_val, old_val); + write_sysreg(new_val, sctlr_el1); + break; + } + case IEE_WRITE_TTBR0: + case IEE_CONTEXT_SWITCH: { + u64 new_asid, new_phys, old_phys, token_phys; + struct task_struct *tsk; + struct task_token *token; + new_val = va_arg(pArgs, u64); + new_phys = (new_val & PAGE_MASK) & ~TTBR_ASID_MASK; + new_asid = new_val >> 48; + + // Check ASID first + if (new_phys == iee_base_reserved_pg_dir){ + if (new_asid != 1) + panic("IEE SI warning: reserved_pg_dir ASID invalid: %llx:%llx", new_asid, new_val); + } + // Already reserved asid 1 for iee rwx gate. + else if (new_asid == 0){ + new_val |= FIELD_PREP(TTBR_ASID_MASK, 1); + printk("IEE SI: Modify ASID of %llx to 1.", new_val); + } + // TO DO: operations to protect idmap_pg_dir + else if (new_phys == iee_base_idmap_pg_dir) + { + // printk("IEE SI: switch to idmap_pg_dir.); + } + else if (new_asid % 2 ==0) + panic("IEE SI warning: TTBR0 ASID invalid: %llx:%llx", new_asid, new_val); + + /* Skip verification if iee hasn't been initialized. */ + if (iee_init_done){ + // Verify current sp_el0 with iee token info + asm volatile("mrs %x0, sp_el0":"=r"(tsk)); + token = (struct task_token *)((unsigned long)tsk + (unsigned long)iee_offset); + + /* + * token->pgd != NULL means it is a user task, then we need to check whether current ttbr0 is correct. + */ + if (token->pgd){ + old_val = read_sysreg(ttbr0_el1); + // When TTBR0 is reserved_pg_dir then no checking is available. + if (old_val != iee_base_reserved_pg_dir){ + old_phys = (old_val & PAGE_MASK) & ~TTBR_ASID_MASK; + token_phys = __pa(token->pgd); + if (old_phys != token_phys) + panic("IEE SI warning: Pgd set error. old ttbr0:%lx, token ttbr0:%lx, token pgd:%lx", + (unsigned long)old_phys, (unsigned long)token_phys, (unsigned long)(token->pgd)); + } + } + } + // all checks are done. + write_sysreg(new_val, ttbr0_el1); + + // SET ASID in TTBR1 when context switch + if (flag == IEE_CONTEXT_SWITCH){ + new_val = (read_sysreg(ttbr1_el1) & ~TTBR_ASID_MASK) | FIELD_PREP(TTBR_ASID_MASK, new_asid-1); + write_sysreg(new_val, ttbr1_el1); + } + break; + } + case IEE_WRITE_VBAR: { + u64 el1_vector; + new_val = va_arg(pArgs, u64); + el1_vector = iee_base__bp_harden_el1_vectors; + if(new_val == el1_vector || new_val == el1_vector+SZ_2K || + new_val == el1_vector+SZ_2K*2 || new_val == el1_vector+SZ_2K*3) + write_sysreg(new_val, vbar_el1); + break; + } + case IEE_WRITE_TCR: { + old_val = read_sysreg(tcr_el1); + new_val = va_arg(pArgs, u64); + new_val = iee_si_mask(IEE_TCR_MASK, new_val, old_val); + write_sysreg(new_val, tcr_el1); + break; + } + } + va_end(pArgs); + return 0; +} +/* + * TODO: scan a page to check whether it contains sensitive instructions + * return 1 when finding sensitive inst, 0 on safe page. + */ +int iee_si_scan_page(unsigned long addr); +#endif \ No newline at end of file diff --git a/arch/arm64/kernel/iee/pgtable_slab.c b/arch/arm64/kernel/iee/pgtable_slab.c new file mode 100644 index 000000000000..abd3b3988250 --- /dev/null +++ b/arch/arm64/kernel/iee/pgtable_slab.c @@ -0,0 +1,107 @@ +#include +#include +#include + +#define PGTABLE_INIT_ORDER 7 +struct kmem_cache *pgtable_jar; +struct kmem_cache *ptdesc_jar; +struct kmem_cache *pgd_jar; +unsigned long pgtable_jar_offset; +unsigned long pgd_jar_offset; + +#ifdef CONFIG_PTP +static inline void iee_ptdesc_init(struct page *page) +{ + /* Alloc struct ptdesc from iee memory pool. */ + struct ptdesc *tmp = kmem_cache_alloc(ptdesc_jar, GFP_KERNEL | __GFP_ZERO); + spin_lock_init(&tmp->ptl); + /* Fill the iee ptdesc pointer array. */ + page_to_iee_ptdesc(page) = tmp; + /* We need a ptr point back to struct page for list operations. */ + ((struct ptdesc_t *)page_to_iee_ptdesc(page))->page = page; +} + +/* Free ptdesc and clear the iee ptdesc ptr. */ +static inline void iee_ptdesc_free(struct page *page) +{ + kmem_cache_free(ptdesc_jar, page_to_iee_ptdesc(page)); + page_to_iee_ptdesc(page) = NULL; +} + +extern void early_pgtable_jar_alloc(struct kmem_cache *pgtable_jar); +void __init iee_pmd_pgtable_init(pud_t *pud) { + struct page *page; + pmd_t *orig_pmd = pud_pgtable(*pud); + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = orig_pmd + i; + if (pmd_none(*pmd) || pmd_bad(*pmd)) + continue; + page = pmd_page(*pmd); + iee_ptdesc_init(page); + } +} + +void __init iee_pud_pgtable_init(p4d_t *p4d) { + struct page *page; + pud_t *orig_pud = p4d_pgtable(*p4d); + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = orig_pud + i; + if (pud_none(*pud) || pud_bad(*pud)) + continue; + iee_pmd_pgtable_init(pud); + page = pud_page(*pud); + iee_ptdesc_init(page); + } +} + +void __init iee_pgtable_init(void) +{ + int i; + pgd_t *pgd; + struct page* page; + + ptdesc_jar = kmem_cache_create("ptdesc_jar", sizeof(struct ptdesc_t), 0, SLAB_PANIC|SLAB_RED_ZONE, NULL); + pgtable_jar = kmem_cache_create("pgtable_jar", PAGE_SIZE, PAGE_SIZE, + SLAB_PANIC, NULL); + pgd_jar = NULL; + for(i = 0; i < ((1 << (PGTABLE_INIT_ORDER))/nr_cpu_ids); i++) + { + early_pgtable_jar_alloc(pgtable_jar); + } + for (i = 0; i < PTRS_PER_PGD; i++) { + pgd = swapper_pg_dir + i; + if (p4d_none_or_clear_bad((p4d_t *)pgd)) + continue; + iee_pud_pgtable_init((p4d_t *)pgd); + page = pgd_page(*pgd); + iee_ptdesc_init(page); + } +} +#endif + +void *get_iee_pgtable_page(gfp_t gfpflags) +{ + struct page *page; + void *res = kmem_cache_alloc(pgtable_jar, gfpflags); + if (!res) { + panic("IEE PTP: alloc pgtable failed."); + return res; + } + iee_rw_gate(IEE_OP_SET_FREEPTR, (void **)((unsigned long)res + pgtable_jar_offset), NULL); + page = virt_to_page(res); + // page_ref_inc(page); + iee_ptdesc_init(page); + return res; +} + +void free_iee_pgtable_page(void *obj) +{ + iee_ptdesc_free(virt_to_page(obj)); + kmem_cache_free(pgtable_jar, obj); +} \ No newline at end of file diff --git a/arch/arm64/kernel/iee/stack_slab.c b/arch/arm64/kernel/iee/stack_slab.c new file mode 100644 index 000000000000..c52a11d67415 --- /dev/null +++ b/arch/arm64/kernel/iee/stack_slab.c @@ -0,0 +1,19 @@ +#include +#include + +struct kmem_cache *iee_stack_jar; + +void __init iee_stack_init(void) +{ + iee_stack_jar = kmem_cache_create("iee_stack_jar", (PAGE_SIZE << 3), (PAGE_SIZE << 3), SLAB_PANIC, NULL); +} + +void *get_iee_stack(void) +{ + return kmem_cache_alloc(iee_stack_jar, GFP_KERNEL) + iee_offset; +} + +void free_iee_stack(void *obj) +{ + kmem_cache_free(iee_stack_jar, obj - iee_offset); +} diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index b1f2a9b49039..7b86a1047278 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -31,7 +31,9 @@ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); DEFINE_PER_CPU(unsigned long *, irq_stack_ptr); - +#ifdef CONFIG_KOI +EXPORT_SYMBOL(irq_stack_ptr); +#endif DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); diff --git a/arch/arm64/kernel/koi/Makefile b/arch/arm64/kernel/koi/Makefile new file mode 100644 index 000000000000..9be8710b714a --- /dev/null +++ b/arch/arm64/kernel/koi/Makefile @@ -0,0 +1 @@ +obj-y += koi.o \ No newline at end of file diff --git a/arch/arm64/kernel/koi/koi.c b/arch/arm64/kernel/koi/koi.c new file mode 100644 index 000000000000..20daf4889ca6 --- /dev/null +++ b/arch/arm64/kernel/koi/koi.c @@ -0,0 +1,1688 @@ +#include "asm/koi.h" +#include "linux/compiler_attributes.h" +#include "linux/compiler_types.h" +#include "asm/barrier.h" +#include "asm-generic/bug.h" +#include "asm-generic/errno-base.h" +#include "asm-generic/memory_model.h" +#include "asm-generic/pgtable-nop4d.h" +#include "asm-generic/rwonce.h" +#include "asm/pgalloc.h" +#include "asm/memory.h" +#include "linux/bitfield.h" +#include "linux/compiler.h" +#include "linux/types.h" +#include "linux/module.h" +#include "linux/spinlock.h" +#include "linux/spinlock_types.h" +#include "linux/kernel.h" +#include "linux/rculist.h" +#include "linux/rcupdate.h" +#include "linux/list.h" +#include "asm/current.h" +#include "linux/compiler_types.h" +#include "asm-generic/barrier.h" +#include "asm-generic/rwonce.h" +#include "asm-generic/pgalloc.h" +#include "asm/cpufeature.h" +#include "asm/kvm_hyp.h" +#include "asm/mmu.h" +#include "asm/mmu_context.h" +#include "asm/page-def.h" +#include "asm/pgalloc.h" +#include "asm/pgtable-hwdef.h" +#include "asm/pgtable-types.h" +#include "asm/pgtable.h" +#include "asm/string.h" +#include "asm/sysreg.h" +#include "linux/bitfield.h" +#include "linux/compiler.h" +#include "linux/export.h" +#include "linux/gfp.h" +#include "linux/huge_mm.h" +#include "linux/kallsyms.h" +#include "linux/kconfig.h" +#include "linux/kern_levels.h" +#include "linux/kernel.h" +#include "linux/list.h" +#include "linux/lockdep.h" +#include "linux/mm.h" +#include "linux/mm_types.h" +#include "linux/pgtable.h" +#include "linux/printk.h" +#include "linux/rculist.h" +#include "linux/rcupdate.h" +#include "linux/rmap.h" +#include "linux/sched.h" +#include "linux/stddef.h" +#include "linux/string.h" +#include "linux/swap.h" +#include "linux/swapops.h" +#include "linux/types.h" +#include "linux/slab.h" +#include "linux/string.h" +#include "linux/hashtable.h" +#include +// #define DEBUG + +#ifdef DEBUG +#define debug_printk(...) printk(KERN_ERR __VA_ARGS__) +#else +#define debug_printk(...) +#endif + +#define __koi_code __section(".koi.text") +#define __koi_data __section(".data..koi") + +#define KOI_FLAG_MASK 0xffff000000000fff + +extern unsigned long __koi_code_start[]; +extern unsigned long __koi_code_end[]; +extern unsigned long __koi_data_start[]; +extern unsigned long __koi_data_end[]; +#ifdef CONFIG_IEE +extern unsigned long __iee_si_data_start[]; +extern unsigned long __iee_si_text_start[]; +extern unsigned long _iee_read_token_ttbr1(unsigned long iee_offset, + struct task_struct *tsk); +#endif + +__koi_data unsigned long koi_swapper_ttbr1 = 0; +EXPORT_SYMBOL(koi_swapper_ttbr1); +#define KOI_SWAPPER_MASK 0x0000fffffffffff0 + +__attribute__((aligned(PAGE_SIZE))) +DEFINE_PER_CPU(unsigned long[PAGE_SIZE / sizeof(unsigned long)], + koi_irq_current_ttbr1); +EXPORT_SYMBOL(koi_irq_current_ttbr1); + +extern void koi_switch_to_ko_stack(unsigned long stack_top); +extern void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp); +extern void koi_check_and_switch_context(struct mm_struct *mm); +extern void koi_add_page_mapping(void *token, void *new); +/** +*struct koi_mem_list - maintain a linked list of free memory in the kernel +*@addr: stating address of this memory +*@size: the size of the memory +*@list: the head of the koi_mem_list +*@rcu: for rcu +*/ +struct koi_mem_list { + unsigned long addr; + unsigned long size; + struct list_head list; + struct rcu_head rcu; +}; +//mapping parameter pointer to copy +struct koi_addr_map { + unsigned long buffer_addr; + unsigned long orig_addr; + int offset; + struct hlist_node node; + struct rcu_head rcu; +}; + +DEFINE_HASHTABLE(koi_mem_htbl, HASH_TABLE_BIT); +EXPORT_SYMBOL(koi_mem_htbl); +DEFINE_SPINLOCK(koi_mem_htbl_spin_lock); +EXPORT_SYMBOL(koi_mem_htbl_spin_lock); + +// EXPORT_SYMBOL(koi_do_switch_to_kernel_stack); +// EXPORT_SYMBOL(koi_do_switch_to_ko_stack); + +/** +* koi_ttbr_ctor - return ttbr1 for the given driver module +*/ +unsigned long koi_ttbr_ctor(struct module *mod) +{ + struct koi_mem_hash_node *ko; + struct mm_struct *ko_mm; + unsigned long ttbr1; + unsigned long asid; + int bkt; + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + if (ko->mod == mod) { + ko_mm = ko->ko_mm; + break; + } + } + rcu_read_unlock(); + if (!ko_mm) { + printk(KERN_ERR "cannot found module %s in koi_mem_htbl", + mod->name); + return 0; + } + koi_check_and_switch_context(ko_mm); + asid = ASID(ko_mm) | USER_ASID_FLAG; + ttbr1 = ko->ko_ttbr1 | FIELD_PREP(TTBR_ASID_MASK, asid); + return ttbr1; +} +EXPORT_SYMBOL(koi_ttbr_ctor); +//release the hash node +static __maybe_unused void koi_mem_hash_node_free(struct rcu_head *rcu) +{ + struct koi_mem_hash_node *node = + container_of(rcu, struct koi_mem_hash_node, rcu); + kfree(node); +} +//release free memory linked list nodes +static void koi_mem_node_free(struct rcu_head *rcu) +{ + struct koi_mem_list *mem_node = + container_of(rcu, struct koi_mem_list, rcu); + kfree(mem_node); +} +//release the node in koi_addr_map +static void koi_addr_map_node_free(struct rcu_head *rcu) +{ + struct koi_addr_map *addr_map_node = + container_of(rcu, struct koi_addr_map, rcu); + kfree(addr_map_node); +} + +static __koi_code noinline unsigned int koi_ldtrh_wrapper(unsigned long src_addr) +{ + unsigned int ret; + asm volatile("ldtrh w0, [%1]\n" : "=r"(ret) : "r"(src_addr)); + return ret; +} + +static __koi_code noinline unsigned long koi_ldtr_wrapper(unsigned long src_addr) +{ + unsigned long ret; + asm volatile("ldtr x0, [%1]\n" : "=r"(ret) : "r"(src_addr)); + return ret; +} + +static __koi_code noinline void koi_sttr_wrapper(unsigned long src, + unsigned long dst_addr) +{ + asm volatile("sttr x0, [x1]\n" : :); +} + +static __koi_code noinline void koi_sttrh_wrapper(unsigned int src, + unsigned long dst_addr) +{ + asm volatile("sttrh w0, [x1]\n" : :); +} + +#ifndef CONFIG_IEE +/* + * This function is used to switch to ko's pgtable. + */ +__koi_code noinline unsigned long koi_do_switch_to_ko_pgtbl(void) +{ + struct koi_mem_hash_node *ko; + // struct mm_struct *ko_mm; + unsigned long addr; + unsigned long ttbr1, asid; + unsigned long *ptr; + struct task_token *token_addr = + (struct task_token *)((unsigned long)current + + (unsigned long)koi_offset); + int bkt; + unsigned long flags; + asm volatile(" mrs %0, elr_el1\n" : "=r"(addr)); + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, __kern_my_cpu_offset()); + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + spin_lock_irqsave(&ko->mod_lock, flags); + if (!ko->is_valid) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + goto out; + } + if (ko->mod != NULL && ko->mod->mem[MOD_INIT_TEXT].base != NULL) { + if (addr >= (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base + ko->mod->mem[MOD_INIT_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + if (token_addr->current_ttbr1 == ko->ko_ttbr1 || + *ptr == ko->ko_ttbr1) { + // ko_mm = ko->ko_mm; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1; + // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + asm volatile("mrs %0, ttbr0_el1\n" + : "=r"(asid) + :); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + rcu_read_unlock(); + return ttbr1; + } + goto out; + } + } + if (addr >= (unsigned long)ko->mod->mem[MOD_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_TEXT].base + ko->mod->mem[MOD_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + if (token_addr->current_ttbr1 == ko->ko_ttbr1 || + *ptr == ko->ko_ttbr1) { + // ko_mm = ko->ko_mm; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1; + // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + asm volatile("mrs %0, ttbr0_el1\n" + : "=r"(asid) + :); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + rcu_read_unlock(); + return ttbr1; + } + goto out; + } + spin_unlock_irqrestore(&ko->mod_lock, flags); + } +out: + rcu_read_unlock(); + return 0; +} +/** +* koi_do_switch_to_kernel_pgtbl - switch to kernel pagetable +*/ +__koi_code noinline int koi_do_switch_to_kernel_pgtbl(void) +{ + unsigned long curr_ttbr1, asid; + // if (!cpu_online(smp_processor_id())) + // return 0; + asm volatile(" mrs %0, ttbr1_el1\n" : "=r"(curr_ttbr1)); + if ((curr_ttbr1 & KOI_SWAPPER_MASK) == + (koi_swapper_ttbr1 & KOI_SWAPPER_MASK)) { + return 0; + } + // if (((curr_ttbr1 & TTBR_ASID_MASK) >> 48) <= 1) { + // return 0; + // } + asm volatile("mrs %0, ttbr0_el1\n" : "=r"(asid) :); + asid &= ~USER_ASID_FLAG; + asid &= TTBR_ASID_MASK; + write_sysreg(koi_swapper_ttbr1 | asid, ttbr1_el1); + isb(); + asm volatile(ALTERNATIVE("nop; nop; nop", "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456)); + return 1; +} +#else +__koi_code noinline unsigned long koi_do_switch_to_ko_pgtbl(void) +{ + struct koi_mem_hash_node *ko; + unsigned long addr, pan_flag, current_ttbr1, asid, ttbr1, flags; + unsigned long *ptr; + struct mm_struct *ko_mm; + int bkt; + asm volatile("mrs %0, pan\n" + "msr pan, 0x0\n" + : "=r"(pan_flag) + :); + current_ttbr1 = _iee_read_token_ttbr1(iee_offset, current); + asm volatile("msr pan, %0\n" : : "r"(pan_flag)); + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, __kern_my_cpu_offset()); + if (current_ttbr1 == 0 && *ptr == 0) + return 0; + asm volatile(" mrs %0, elr_el1\n" : "=r"(addr)); + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + spin_lock_irqsave(&ko->mod_lock, flags); + if (!ko->is_valid) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + goto out; + } + if (ko->mod != NULL && ko->mod->mem[MOD_INIT_TEXT].base != NULL) { + if (addr >= (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base + ko->mod->mem[MOD_INIT_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + if (current_ttbr1 == ko->ko_ttbr1 || *ptr == ko->ko_ttbr1) { + asm volatile("mrs %0, ttbr0_el1\n" + : "=r"(asid) + :); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + rcu_read_unlock(); + return ttbr1; + } + goto out; + } + } + if (addr >= (unsigned long)ko->mod->mem[MOD_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_TEXT].base + ko->mod->mem[MOD_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + if (current_ttbr1 == ko->ko_ttbr1 || *ptr == ko->ko_ttbr1) { + asm volatile("mrs %0, ttbr0_el1\n" + : "=r"(asid) + :); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + rcu_read_unlock(); + return ttbr1; + } + goto out; + } + spin_unlock_irqrestore(&ko->mod_lock, flags); + } +out: + rcu_read_unlock(); + return 0; +} + +__koi_code noinline int koi_do_switch_to_kernel_pgtbl(void) +{ + unsigned long curr_ttbr1, asid, error_addr; + // if (!cpu_online(smp_processor_id())) + // return 0; + asm volatile(" mrs %0, ttbr1_el1\n" : "=r"(curr_ttbr1)); + if ((curr_ttbr1 & KOI_SWAPPER_MASK) == + (koi_swapper_ttbr1 & KOI_SWAPPER_MASK)) { + return 0; + } + // if (((curr_ttbr1 & TTBR_ASID_MASK) >> 48) <= 1) { + // return 0; + // } + error_addr = read_sysreg(elr_el1); + if ((error_addr >= (unsigned long)__iee_si_data_start) && + (error_addr <= (unsigned long)__iee_si_text_start)) { + unsigned long esr = read_sysreg(esr_el1); + asm volatile("mrs %0, ttbr1_el1\n" : "=r"(asid) :); + asid &= ~USER_ASID_FLAG; + asid &= TTBR_ASID_MASK; + write_sysreg(koi_swapper_ttbr1 | asid, ttbr1_el1); + isb(); + printk(KERN_ERR + "IEE SI: Error on switch to kernel. ELR_EL1:0x%llx, ESR_EL1:0x%llx, TTBR1:0x%llx", + error_addr, esr, curr_ttbr1); + } else { + iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); + } + return 1; +} +#endif +/** +* koi_save_ttbr - save ttbr of each driver module +* @mod: driver module +* @pgdp:pointer to driver module top page table,pgd +*/ +static void koi_save_ttbr(struct module *mod, pgd_t *pgdp, + struct koi_mem_hash_node *node) +{ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + if (system_supports_cnp()) + ttbr1 |= TTBR_CNP_BIT; + node->ko_ttbr1 = ttbr1; +} + +/** + * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page + * is required to copy this pte. +*/ +static inline int koi_copy_present_pte(struct mm_struct *ko_mm, pte_t *dst_pte, + pte_t *src_pte, unsigned long addr) +{ + pte_t pte = *src_pte; + struct page *page; + + debug_printk("addr=0x%16llx, dst_pte=0x%16llx\n", addr, dst_pte); +#ifdef CONFIG_IEE + if ((pte_val(pte) & PTE_USER) && (pte_val(pte) & PTE_DBM) && + !(pte_val(pte) & PTE_PXN)) { + set_pte(dst_pte, __pte(pte_val(pte) & (~PTE_DBM) | PTE_RDONLY)); + } else { +#endif + set_pte(dst_pte, pte); +#ifdef CONFIG_IEE + } +#endif + return 0; +} +/** +* copy huge pmd from kernel space to driver space. +*/ +static int koi_copy_huge_pmd(struct mm_struct *ko_mm, pmd_t *dst_pmd, + pmd_t *src_pmd, unsigned long addr, pteval_t prot) +{ + spinlock_t *src_ptl, *dst_ptl; + pmd_t pmd; + int ret = -ENOMEM; + debug_printk( + "hugepmd: src_pmd=0x%16llx, dst_pmd=0x%16llx, src_pmd_val=0x%16llx, dst_pmd_val=0x%16llx, addr=0x%16llx\n", + src_pmd, dst_pmd, pmd_val(*src_pmd), pmd_val(*dst_pmd), addr); + dst_ptl = pmd_lockptr(ko_mm, dst_pmd); + src_ptl = pmd_lockptr(&init_mm, src_pmd); + spin_lock_bh(dst_ptl); + spin_lock_bh(src_ptl); + + set_pmd(dst_pmd, __pmd(pmd_val(*src_pmd) | prot)); + ret = 0; + spin_unlock_bh(src_ptl); + spin_unlock_bh(dst_ptl); + return ret; +} + +int __koi_pte_alloc(struct mm_struct *mm, pmd_t *pmd) +{ + spinlock_t *ptl; + // pgtable_t new = alloc_page(GFP_PGTABLE_KERNEL); + pte_t *new = pte_alloc_one_kernel(mm); + printk(KERN_ERR "alloc new=0x%16llx\n", new); + if (!new) + return -ENOMEM; + + /* + * Ensure all pte setup (eg. pte page lock and page clearing) are + * visible before the pte is made visible to other CPUs by being + * put into page tables. + * + * The other side of the story is the pointer chasing in the page + * table walking code (when walking the page table without locking; + * ie. most of the time). Fortunately, these data accesses consist + * of a chain of data-dependent loads, meaning most CPUs (alpha + * being the notable exception) will already guarantee loads are + * seen in-order. See the alpha page table accessors for the + * smp_rmb() barriers in page table walking code. + */ + smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ + + ptl = pmd_lockptr(mm, pmd); + spin_lock_bh(ptl); + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + mm_inc_nr_ptes(mm); + pmd_populate_kernel(mm, pmd, new); + new = NULL; + } + spin_unlock_bh(ptl); + if (new) { + pte_free_kernel(mm, new); + } + return 0; +} + +#define koi_pte_alloc(mm, pmd) \ + (unlikely(pmd_none(*(pmd))) && __koi_pte_alloc(mm, pmd)) + +#define koi_pte_offset_map_lock(mm, pmd, address, ptlp) \ + ({ \ + spinlock_t *__ptl = pte_lockptr(mm, pmd); \ + pte_t *__pte = pte_offset_kernel(pmd, address); \ + *(ptlp) = __ptl; \ + spin_lock_bh(__ptl); \ + __pte; \ + }) + +#define koi_pte_alloc_map_lock(mm, pmd, address, ptlp) \ + (koi_pte_alloc(mm, pmd) ? \ + NULL : \ + koi_pte_offset_map_lock(mm, pmd, address, ptlp)) + +/** +*koi_copy_pte_range - copy pte from kernel space to driver space +*/ +static int koi_copy_pte_range(struct mm_struct *ko_mm, pmd_t *dst_pmd, + pmd_t *src_pmd, unsigned long addr, + unsigned long end, pteval_t prot) +{ + pte_t *src_pte, *dst_pte; + spinlock_t *src_ptl, *dst_ptl; + + int ret = 0; +again: + dst_pte = koi_pte_alloc_map_lock(ko_mm, dst_pmd, addr, &dst_ptl); + if (!dst_pte) { + ret = -ENOMEM; + goto unlock; + } + src_pte = pte_offset_kernel(src_pmd, addr); + src_ptl = pte_lockptr(&init_mm, src_pmd); + spin_lock_bh(src_ptl); + do { + if (pte_none(*src_pte)) + continue; + if (unlikely(!pte_present(*src_pte))) { + printk(KERN_ERR + "present pte found: addr=0x%16llx, end=0x%16llx\n", + addr, end); + continue; + } + if (pte_valid(*dst_pte)) { + continue; + } + /* koi_copy_present_pte() will clear `*prealloc` if consumed */ + ret = koi_copy_present_pte(ko_mm, dst_pte, src_pte, addr); + debug_printk( + "dst_pte=0x%16llx, dst_pte_val=0x%16llx, src_pte=0x%16llx, src_pte_val=0x%16llx, addr=0x%16llx\n", + dst_pte, pte_val(*dst_pte), src_pte, pte_val(*src_pte), + addr); + if (unlikely(ret == -EAGAIN)) + break; + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + spin_unlock_bh(src_ptl); + spin_unlock_bh(dst_ptl); + + if (ret) { + WARN_ON_ONCE(ret != -EAGAIN); + ret = 0; + } + if (addr != end) + goto again; + + return ret; +unlock: + spin_unlock_bh(dst_ptl); + return ret; +} + +int __koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) +{ + spinlock_t *ptl; + // struct page* page = alloc_page(GFP_PGTABLE_KERNEL); + // pmd_t *new = (pmd_t *)page_address(page); + pmd_t *new = pmd_alloc_one(mm, address); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + ptl = pud_lockptr(mm, pud); + spin_lock_bh(ptl); + if (!pud_present(*pud)) { + mm_inc_nr_pmds(mm); + pud_populate(mm, pud, new); + } else { /* Another has populated it */ + pmd_free(mm, new); + // free_page((unsigned long)new); + } + spin_unlock_bh(ptl); + return 0; +} + +static inline pmd_t *koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, + unsigned long address) +{ + return (unlikely(pud_none(*pud)) && __koi_pmd_alloc(mm, pud, address)) ? + NULL : + pmd_offset(pud, address); +} + +/** +*kio_copy_pmd_range - copy pmd from kernel to driver space +*/ +static inline int koi_copy_pmd_range(struct mm_struct *ko_mm, pud_t *dst_pud, + pud_t *src_pud, unsigned long addr, + unsigned long end, pteval_t prot) +{ + pmd_t *src_pmd, *dst_pmd; + unsigned long next, flag; + int err; + debug_printk( + "copy_pud_range src_pud=0x%16llx, dst_pud=0x%16llx, addr=0x%16llx, end=0x%16llx\n", + src_pud, dst_pud, addr, end); + dst_pmd = koi_pmd_alloc(ko_mm, dst_pud, addr); + if (!dst_pmd) { + return -ENOMEM; + } + src_pmd = pmd_offset(src_pud, addr); + do { + next = pmd_addr_end(addr, end); + flag = pmd_val(*src_pmd) & KOI_FLAG_MASK; + // debug_printk("src_pmd=0x%16llx, dst_pmd=0x%16llx, addr=0x%16llx\n", src_pmd, dst_pmd, next); + // CONFIG_TRANSPARENT_HUGEPAGE is enabled, so we must add copy_huge_pmd + if (pmd_none(*src_pmd)) + continue; + if (!(pmd_val(*src_pmd) & PMD_TABLE_BIT)) { + // if src_pmd is huge page + debug_printk( + "src_pmd=0x%16llx, dst_pmd=0x%16llx, addr=0x%16llx\n", + src_pmd, dst_pmd, addr); + err = koi_copy_huge_pmd(ko_mm, dst_pmd, src_pmd, addr, prot); + if (err == -ENOMEM) + return -ENOMEM; + continue; + } + if (koi_copy_pte_range(ko_mm, dst_pmd, src_pmd, addr, next, prot)) + return -ENOMEM; + set_pmd(dst_pmd, __pmd((pmd_val(*dst_pmd) & (~KOI_FLAG_MASK)) | flag)); + debug_printk( + "src_pmd_val=0x%16llx, dst_pmd_val=0x%16llx, addr=0x%16llx\n", + pmd_val(*src_pmd), pmd_val(*dst_pmd), addr); + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + return 0; +} + +int __koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) +{ + // pud_t *new = pud_alloc_one(mm, address); + + // struct page *page = alloc_page(GFP_PGTABLE_KERNEL); + // pud_t *new = (pud_t *)page_address(page); + pud_t *new = pud_alloc_one(mm, address); + printk(KERN_ERR "pud alloc pud=0x%16llx\n", new); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + spin_lock_bh(&mm->page_table_lock); + if (!p4d_present(*p4d)) { + mm_inc_nr_puds(mm); + p4d_populate(mm, p4d, new); + } else {/* Another has populated it */ + pud_free(mm, new); + // free_page((unsigned long)new); + } + spin_unlock_bh(&mm->page_table_lock); + return 0; +} + +static inline pud_t *koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) +{ + return (unlikely(p4d_none(*p4d)) && __koi_pud_alloc(mm, p4d, address)) ? + NULL : + pud_offset(p4d, address); +} + +static int koi_copy_huge_pud(struct mm_struct *ko_mm, pud_t *dst_pud, + pud_t *src_pud, unsigned long addr, pteval_t prot) +{ + spinlock_t *src_ptl, *dst_ptl; + // pmd_t pmd; + int ret = -ENOMEM; + debug_printk("src_pud=0x%16llx, dst_pud=0x%16llx, addr=0x%16llx\n", + src_pud, dst_pud, addr); + dst_ptl = pud_lockptr(ko_mm, dst_pud); + src_ptl = pud_lockptr(&init_mm, src_pud); + spin_lock_bh(dst_ptl); + spin_lock_bh(src_ptl); + set_pte((pte_t *)dst_pud, __pte(pud_val(*src_pud) | prot)); + spin_unlock_bh(src_ptl); + spin_unlock_bh(dst_ptl); + ret = 0; + return ret; +} + +/** +*koi_copy_pud_range - copy pud from kernel to driver +*/ +static inline int koi_copy_pud_range(struct mm_struct *ko_mm, p4d_t *dst_p4d, + p4d_t *src_p4d, unsigned long addr, + unsigned long end, pteval_t prot) +{ + pud_t *src_pud, *dst_pud; + unsigned long next, flag; + dst_pud = koi_pud_alloc(ko_mm, dst_p4d, addr); + if (!dst_pud) + return -ENOMEM; + src_pud = pud_offset(src_p4d, addr); + do { + next = pud_addr_end(addr, end); + flag = pud_val(*src_pud) & KOI_FLAG_MASK; + debug_printk( + "src_pud=0x%16llx, dst_pud=0x%16llx, addr=0x%16llx\n", + src_pud, dst_pud, next); + if (pud_none(*src_pud)) { + continue; + } + if (!(pud_val(*src_pud) & PMD_TABLE_BIT)) { + // indicates that the src_pud maps to a huge page + koi_copy_huge_pud(ko_mm, dst_pud, src_pud, addr, prot); + continue; + } + // if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { + // debug_printk("pud_trans_huge=%d, pud_devmap=%d, src_pud=0x%16llx\n", pud_trans_huge(*src_pud) , pud_devmap(*src_pud), src_pud); + // continue; + // /* fall through */ + // } + if (koi_copy_pmd_range(ko_mm, dst_pud, src_pud, addr, next, prot)) + return -ENOMEM; + set_pud(dst_pud, __pud((pud_val(*dst_pud) & (~KOI_FLAG_MASK)) | flag)); + } while (dst_pud++, src_pud++, addr = next, addr != end); + return 0; +} + +/** +* koi_copy_p4d_range - map the kernel pagetable to the driver space level by level +* @ko_mm: the mm_struct of driver module +* @dst_pgd: destination pgd +* @src_pgd: source pgd +* @addr: the start of address +* @end: the end of address +*/ +static inline int koi_copy_p4d_range(struct mm_struct *ko_mm, pgd_t *dst_pgd, + pgd_t *src_pgd, unsigned long addr, + unsigned long end, pteval_t prot) +{ + p4d_t *src_p4d, *dst_p4d; + unsigned long next; + dst_p4d = p4d_alloc(ko_mm, dst_pgd, addr); + if (!dst_p4d) + return -ENOMEM; + src_p4d = p4d_offset(src_pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none(*src_p4d) || p4d_bad(*src_p4d)) + continue; + debug_printk( + "dst_p4d=0x%16llx, dst_p4d_val=0x%16llx\n", + dst_p4d, p4d_val(*dst_p4d)); + if (koi_copy_pud_range(ko_mm, dst_p4d, src_p4d, addr, next, prot)) { + return -ENOMEM; + } + } while (dst_p4d++, src_p4d++, addr = next, addr != end); + return 0; +} +int koi_share_kstack(struct module *mod) +{ + unsigned long kstack_start; + struct koi_mem_hash_node *target = NULL; + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + kstack_start = (unsigned long)current->stack; + + return koi_copy_pagetable(target->ko_mm, target->pgdp, kstack_start, + kstack_start + THREAD_SIZE, (0)); +} +EXPORT_SYMBOL(koi_share_kstack); +/** +*int koi_copy_pagetable - map the address range from "addr" to "end" to the driver pagetable +*@ko_mm: the mm_struct of the driver module +*@koi_pg_dir: koi_pg_dir, related to the driver module, the entry for driver pagetable +*@addr: the starting address of mapping zone +*@end: the end address of mapping zone +*/ +int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, + unsigned long addr, unsigned long end, pteval_t prot) +{ + int ret = 0; + unsigned long next; + + pgd_t *src_pgd, *dst_pgd; + unsigned long flag; + src_pgd = pgd_offset_pgd(swapper_pg_dir, addr); + dst_pgd = pgd_offset_pgd(koi_pg_dir, addr); + do { + flag = pgd_val(*src_pgd) & KOI_FLAG_MASK; + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd) || pgd_bad(*src_pgd)) + continue; + if (unlikely(koi_copy_p4d_range(ko_mm, dst_pgd, src_pgd, addr, + next, prot))) { + ret = -ENOMEM; + break; + } + set_pgd(dst_pgd, __pgd((pgd_val(*dst_pgd) & (~KOI_FLAG_MASK)) | flag)); + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return ret; +} +EXPORT_SYMBOL(koi_copy_pagetable); +void koi_set_rdonly(unsigned long addr, pgd_t *pgdir) +{ + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + if (pgd_none(*pgdp) || pgd_bad(*pgdp)) { + return; + } + + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp) || p4d_bad(*p4dp)) { + return; + } + + pudp = pud_offset(p4dp, addr); + if (pud_none(*pudp) || pud_bad(*pudp)) { + return; + } + pmdp = pmd_offset(pudp, addr); + if (pmd_none(*pmdp) || pmd_bad(*pmdp)) { + return; + } + + ptep = pte_offset_kernel(pmdp, addr); + if (pte_none(*ptep)) { + // printk(KERN_ERR "ptep 0x%16llx not available\n", ptep); + return; + } + set_pte(ptep, __pte(pte_val(*ptep) | PTE_RDONLY)); + // printk(KERN_ERR "set_readonly successfully\n"); + return; +} + +void koi_set_upage(struct module *mod, unsigned long addr, unsigned long size) { + struct koi_mem_hash_node *target = NULL; + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + + koi_copy_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK, PTE_USER); +} +EXPORT_SYMBOL(koi_set_upage); + +void koi_unmap_pte_table(struct mm_struct *ko_mm, pmd_t *pmd, + unsigned long addr, unsigned long end) +{ + pte_t *pte; + if (!pmd) + return; + debug_printk("pmd=0x%16llx, addr=0x%16llx, end=0x%16llx\n", pmd, addr, + end); + + // struct page *page = pte_page(*pte); + // printk(KERN_ERR "pte=0x%16llx, pte_val=0x%16llx\n", pte, pte_val(*pte)); + // debug_printk("free pte table 0x%16llx, pmd=0x%16llx, page=0x%16llx, pmd points to page=0x%16llx\n", pte, pmd, page, pte_page(pmd_pte(*pmd))); + // printk(KERN_ERR "pmd_pfn=0x%16llx, pte_pfn=0x%16llx\n", pmd_pfn(*pmd), pte_pfn(*pte)); + // pte_free(ko_mm, pte_page(pmd_pte(*pmd))); + do { + pte = pte_offset_kernel(pmd, addr); + debug_printk("pte=0x%16llx, pte_val=0x%16llx\n", pte, + pte_val(*pte)); + set_pte(pte, __pte(0)); + } while (addr += PAGE_SIZE, addr != end); +} + +void koi_unmap_pmd_range(struct mm_struct *ko_mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + pmd_t *pmd, *orig_pmd; + unsigned long next; + if (!pud) + return; + orig_pmd = pmd_offset(pud, addr); + pmd = orig_pmd; + debug_printk("pud=0x%16llx, addr=0x%16llx, end=0x%16llx\n", pud, addr, + end); + // printk(KERN_ERR "pud_pfn=0x%16llx, pmd_pfn=0x%16llx\n", pud_pfn(*pud), pmd_pfn(*pmd)); + do { + debug_printk(KERN_ERR "pmd=0x%16llx, pmd_val=0x%16llx\n", pmd, pmd_val(*pmd)); + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) + continue; + if (pmd_bad(*pmd)) { + set_pmd(pmd, __pmd(0)); + continue; + } + koi_unmap_pte_table(ko_mm, pmd, addr, next); + } while (pmd++, addr = next, addr != end); +} + +void koi_unmap_pud_range(struct mm_struct *ko_mm, p4d_t *p4d, + unsigned long addr, unsigned long end) +{ + pud_t *pud, *orig_pud; + unsigned long next; + if (!p4d) + return; + orig_pud = pud_offset(p4d, addr); + pud = orig_pud; + debug_printk("p4d=0x%16llx, addr=0x%16llx, end=0x%16llx\n", p4d, addr, + end); + do { + // printk(KERN_ERR "pud=0x%16llx, pud_val=0x%16llx\n", pud, pud_val(*pud)); + next = pud_addr_end(addr, end); + if (pud_none(*pud)) + continue; + if (pud_bad(*pud)) { + set_pud(pud, __pud(0)); + continue; + } + koi_unmap_pmd_range(ko_mm, pud, addr, next); + } while (pud++, addr = next, addr != end); + debug_printk("free pud 0x%16llx, p4d=0x%16llx, orig_pud=0x%16llx\n", + pud, p4d, orig_pud); + // pud_free(ko_mm, (unsigned long)orig_pud & PAGE_MASK); +} + +void koi_unmap_p4d_range(struct mm_struct *ko_mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + p4d_t *p4d, *orig_p4d; + unsigned long next; + if (!pgd) + return; + debug_printk("pgd=0x%16llx, addr=0x%16llx, end=0x%16llx\n", pgd, addr, + end); + orig_p4d = p4d_offset(pgd, addr); + p4d = orig_p4d; + do { + next = p4d_addr_end(addr, end); + debug_printk(KERN_ERR "p4d=0x%16llx, p4d_val=0x%16llx, p4d_none=%d\n", p4d, p4d_val(*p4d), p4d_none(*p4d)); + if (p4d_none_or_clear_bad(p4d)) { + // printk(KERN_ERR "p4d none=%d, p4d_bad=%d\n", p4d_none(*p4d), p4d_bad(*p4d)); + continue; + } + koi_unmap_pud_range(ko_mm, p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + +void koi_unmap_pagetable(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd = pgd_offset_pgd(ko_pg_dir, addr); + debug_printk("freepagetable addr=0x%16llx, end=0x%16llx\n", addr, end); + do { + next = pgd_addr_end(addr, end); + // printk(KERN_ERR "pgd=0x%16llx, pgd_val=0x%16llx\n", pgd, pgd_val(*pgd)); + if (pgd_none_or_clear_bad(pgd)) { + // printk(KERN_ERR "pgd none\n"); + continue; + } + koi_unmap_p4d_range(ko_mm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +void koi_remove_pte_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, pmd_t *pmd) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + debug_printk( + "pte=0x%16llx, page=0x%16llx, pmd=0x%16llx, pmd_val=0x%16llx\n", + pte, pte_page(pmd_pte(*pmd)), pmd, pmd_val(*pmd)); + // printk(KERN_ERR "pte=0x%16llx, pte_val=0x%16llx, pte_page=0x%16llx\n", pte, pte_val(*pte), pte_page(*pte)); + printk(KERN_ERR "free orig_pte=0x%16llx\n", pte); + pte_free_kernel(ko_mm, pte); + // __free_page((unsigned long)pte_page(pmd_pte(*pmd))); + // free_page((unsigned long)pte); +} + +void koi_remove_pmd_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, pud_t *pud) +{ + pmd_t *orig_pmd = pud_pgtable(*pud); + pmd_t *pmd; + int i; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = orig_pmd + i; + if (pmd_none(*pmd) || pmd_bad(*pmd)) + continue; + debug_printk("pmd=0x%16llx, pmd_val=0x%16llx\n", pmd, + pmd_val(*pmd)); + koi_remove_pte_range(ko_mm, ko_pg_dir, pmd); + } + debug_printk("free pmd=0x%16llx, page=0x%16llx\n", orig_pmd, + pte_page(pud_pte(*pud))); + printk(KERN_ERR "free orig_pmd=0x%16llx\n", orig_pmd); + pmd_free(ko_mm, orig_pmd); + // free_page((unsigned long)orig_pmd); +} + +void koi_remove_pud_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, p4d_t *p4d) +{ + pud_t *orig_pud = p4d_pgtable(*p4d); + pud_t *pud; + int i; + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = orig_pud + i; + if (pud_none(*pud) || pud_bad(*pud)) + continue; + debug_printk("pud=0x%16llx, pud_val=0x%16llx\n", pud, + pud_val(*pud)); + koi_remove_pmd_range(ko_mm, ko_pg_dir, pud); + } + debug_printk("free pud=0x%16llx, page=0x%16llx\n", orig_pud, + pte_page(p4d_pte(*p4d))); + printk(KERN_ERR "free orig_pud=0x%16llx\n", orig_pud); + pud_free(ko_mm, orig_pud); + // free_page((unsigned long)orig_pud); +} + +void koi_remove_pagetable(struct mm_struct *ko_mm, pgd_t *ko_pg_dir) +{ + pgd_t *pgd; + int i; + for (i = 0; i < PTRS_PER_PGD; i++) { + pgd = ko_pg_dir + i; + if (p4d_none(*(p4d_t *)pgd) || p4d_bad(*(p4d_t *)pgd)) + continue; + debug_printk("pgd=0x%16llx, pgd_val=0x%16llx\n", pgd, + pgd_val(*pgd)); + koi_remove_pud_range(ko_mm, ko_pg_dir, (p4d_t *)pgd); + } + debug_printk("free pgd=0x%16llx\n", ko_pg_dir); + pgd_free(ko_mm, ko_pg_dir); + flush_tlb_all(); +} + +void koi_destroy_pagetable(struct module *mod) +{ + // int cpu; + // unsigned long *ptr; + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct koi_addr_map *addr_map_node; + unsigned long bkt; + unsigned long vbar; + unsigned long flags; + asm volatile("mrs %0, VBAR_EL1\n" : "=r"(vbar) :); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + // printk(KERN_ERR "mem node for module: %s not found, maybe destroyed before?\n", + // mod->name); + return; + } + spin_lock_irqsave(&target->mod_lock, flags); + target->is_valid = false; + spin_unlock_irqrestore(&target->mod_lock, flags); + + spin_lock_irqsave(&koi_mem_htbl_spin_lock, flags); + hash_del_rcu(&target->node); + call_rcu(&target->rcu, koi_mem_hash_node_free); + spin_unlock_irqrestore(&koi_mem_htbl_spin_lock, flags); + + // free addr_htbl + spin_lock(&target->addr_htbl_spin_lock); + hash_for_each_rcu (target->addr_htbl, bkt, addr_map_node, node) { + hash_del_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + } + spin_unlock(&target->addr_htbl_spin_lock); + // free free mem list + spin_lock(&target->spin_lock); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } + spin_unlock(&target->spin_lock); + + koi_remove_pagetable(target->ko_mm, target->ko_mm->pgd); + kfree(target->ko_mm); +} + +/** +* koi_create_pagetable - create pagetable for driver +* @mod: driver module +* 1.create a new koi_mem_hash_node new_node +* 2.create page table return the pgd address, init the new_node->pgdp +* 3.create and init the new_node->ko_mm +* 4.map swapper_ttbr1 to the newly created pagetable +* 5.map the interrupt vector table to the newly created pagetable +* 6.map the init_layout of the module +* 7.map the core_layout of the module +* 8.map switch_to_kernel_pgtable into driver view +* 9.map share memory +*/ +void koi_create_pagetable(struct module *mod) +{ + int ret = 0, cpu; + unsigned long vbar, addr, ttbr1; + pgd_t *pgdp; + unsigned long *ptr; + struct koi_mem_list *new_mem_node; + struct koi_mem_hash_node *new_node = + kzalloc(sizeof(struct koi_mem_hash_node), GFP_KERNEL); + if (!new_node) { + // printk(KERN_ERR "NULL new_node\n"); + return; + }; + if (koi_swapper_ttbr1 == 0) { + pgdp = lm_alias(swapper_pg_dir); + ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + if (system_supports_cnp() && + !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) + ttbr1 |= TTBR_CNP_BIT; + + koi_swapper_ttbr1 = ttbr1; + // __WRITE_ONCE(koi_swapper_ttbr1, ttbr1); + // koi_set_rdonly(&koi_swapper_ttbr1, swapper_pg_dir); + } + new_node->pgdp = koi_pgd_alloc(); + new_node->ko_mm = + kzalloc(sizeof(struct mm_struct) + + sizeof(unsigned long) * BITS_TO_LONGS(NR_CPUS), + GFP_KERNEL); + init_ko_mm(new_node->ko_mm, new_node->pgdp); + new_node->mod = mod; + koi_save_ttbr(mod, new_node->pgdp, new_node); + debug_printk("copying koi_data, start=0x%16llx, end=0x%16llx\n", + (unsigned long)__koi_data_start, + (unsigned long)__koi_data_end); + // copy koi_swapper_ttbr1, which records page dir base for kernel view + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__koi_data_start, + (unsigned long)__koi_data_end, (0)); + asm volatile("mrs %0, VBAR_EL1\n" : "=r"(vbar) :); + + // copy interrupt vectors + printk(KERN_ERR + "\033[33mcopying interrupt vectors, start=0x%16llx, end=0x%16llx, vbar=0x%16llx\033[0m\n", + vbar & PAGE_MASK, (vbar + PAGE_SIZE) & PAGE_MASK, vbar); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, vbar & PAGE_MASK, + (vbar + PAGE_SIZE) & PAGE_MASK, (0)); + + for_each_mod_mem_type(type) { + printk(KERN_ERR "\033[33mcopying mem range, start=0x%16llx, end=0x%16llx\033[0m\n", + (unsigned long)mod->mem[type].base, + (unsigned long)mod->mem[type].base + mod->mem[type].size); + if (!mod->mem[type].base || !mod->mem[type].size) { + continue; + } + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)mod->mem[type].base, (unsigned long)mod->mem[type].base + mod->mem[type].size, (0)); + if (ret != 0) + printk(KERN_ERR + "\033[33mError occured when copying range from 0x%llx to 0x%llx, Eno:%d\033[0m\n", + (unsigned long)mod->mem[type].base, + (unsigned long)mod->mem[type].base + mod->mem[type].size, + ret); + } + + // mapping switch_to_kernel_pgtable into driver view, which is used to switch to kernel view when entering INT + printk(KERN_ERR "\033[33mcopying koi_code_range, start=0x%16llx, end=0x%16llx\033[0m\n", + (unsigned long)__koi_code_start, + (unsigned long)__koi_code_end); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__koi_code_start, + (unsigned long)__koi_code_end, (0)); + + for_each_possible_cpu (cpu) { + ptr = per_cpu(irq_stack_ptr, cpu); + debug_printk( + "\033[33mirq_stack_ptr on cpu %d addr=0x%16llx, end=0x%16llx\033[0m\n", + cpu, (unsigned long)ptr, + (unsigned long)ptr + IRQ_STACK_SIZE); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)ptr, + (unsigned long)ptr + IRQ_STACK_SIZE, (0)); + } + + for_each_possible_cpu (cpu) { + ptr = per_cpu(koi_irq_current_ttbr1, cpu); + debug_printk( + "\033[33mirq_current_ptr on cpu %d addr=0x%16llx, end=0x%16llx\033[0m\n", + cpu, (unsigned long)ptr, + (unsigned long)ptr + PAGE_SIZE); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)ptr, + (unsigned long)ptr + PAGE_SIZE, (0)); + } + +#ifdef CONFIG_IEE + debug_printk("\033[33miee_si addr=0x%16llx, end=0x%16llx\033[0m\n", + (unsigned long)__iee_si_data_start, + (unsigned long)__iee_si_text_start); + // mapping iee_rwx_gate_entry and iee_si_base to ko's pagetable + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__iee_si_data_start, + (unsigned long)__iee_si_text_start, (0)); + debug_printk("\033[33miee_si mapping finished\n"); +#endif + + // alloc 16KB memory for new ko, and add it into hashtable + addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); + if ((void *)addr == NULL) { + printk(KERN_ERR "alloc buffer error\n"); + } + debug_printk( + "\033[33mcopying buffer, start=0x%16llx, end=0x%16llx\033[0m\n", + addr, addr + THREAD_SIZE); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, + addr + THREAD_SIZE, (0)); +#ifdef CONFIG_IEE + iee_rw_gate(IEE_SET_KOI_PGD, new_node->ko_mm->pgd); +#endif + new_mem_node = kmalloc(sizeof(struct koi_mem_list), GFP_KERNEL); + if (new_mem_node == NULL) { + printk(KERN_ERR "alloc new_mem_node error\n"); + } + new_mem_node->addr = addr; + new_mem_node->size = THREAD_SIZE; + + new_node->mem_list_head = + (struct list_head)LIST_HEAD_INIT(new_node->mem_list_head); + hash_init(new_node->addr_htbl); + spin_lock_init(&new_node->addr_htbl_spin_lock); + spin_lock_init(&new_node->spin_lock); + spin_lock_init(&new_node->mod_lock); + new_node->is_valid = true; + + spin_lock(&new_node->spin_lock); + list_add_rcu(&new_mem_node->list, &new_node->mem_list_head); + spin_unlock(&new_node->spin_lock); + + spin_lock(&koi_mem_htbl_spin_lock); + hash_add_rcu(koi_mem_htbl, &new_node->node, + (unsigned long)new_node->mod); + spin_unlock(&koi_mem_htbl_spin_lock); + + // printk(KERN_DEBUG "mod=0x%16llx, end=0x16llx\n", mod, (unsigned long)mod + sizeof(struct module)); + printk(KERN_ERR "[KOI] create pagetable pgd=0x%16llx for module %s\n", new_node->pgdp, mod->name); + // printk(KERN_ERR "koi mm=0x%16llx\n", (unsigned long)(new_node->ko_mm)); +} +/** +* koi_mem_alloc +*@mod: driver module +*@orig_addr: the starting address of the parameter in kernel +*@size: the size of the parameter +*/ +unsigned long koi_mem_alloc(struct module *mod, unsigned long orig_addr, + unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct koi_addr_map *new_addr_node; + unsigned long addr = 0, flags; + struct koi_mem_list *new_mem_node; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return 0; + } + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->size >= size) { + addr = mem_node->addr; + mem_node->size -= size; + if (mem_node->size == 0) { + list_del_rcu(&mem_node->list); + } else { + new_mem_node = + kmalloc(sizeof(struct koi_mem_list), + GFP_ATOMIC); + new_mem_node->addr = addr + size; + new_mem_node->size = mem_node->size; + list_replace_rcu(&mem_node->list, + &new_mem_node->list); + } + call_rcu(&mem_node->rcu, koi_mem_node_free); + } + } + spin_unlock_irqrestore(&target->spin_lock, flags); + if (!addr) { + addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); + if ((void *)addr == NULL) { + return 0; + } + koi_copy_pagetable(target->ko_mm, target->pgdp, addr, + addr + THREAD_SIZE, (0)); + mem_node = kmalloc(sizeof(struct koi_mem_list), GFP_KERNEL); + if (!mem_node) { + printk(KERN_ERR "NULL mem_node\n"); + } + if (size > THREAD_SIZE) { + return 0; + } + mem_node->addr = addr + size; + mem_node->size = THREAD_SIZE - size; + spin_lock_irqsave(&target->spin_lock, flags); + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); + } + + new_addr_node = kzalloc(sizeof(struct koi_addr_map), GFP_KERNEL); + new_addr_node->buffer_addr = addr; + new_addr_node->orig_addr = orig_addr; + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + hash_add_rcu(target->addr_htbl, &new_addr_node->node, + new_addr_node->buffer_addr); + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + return addr; +} +EXPORT_SYMBOL(koi_mem_alloc); +// find the parameter pointer corresponding to the copy +noinline void *koi_mem_lookup(struct module *mod, unsigned long addr) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_addr_map *addr_map_node; + unsigned long orig_addr = addr; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return NULL; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == addr) { + break; + } + } + rcu_read_unlock(); + if (addr_map_node) { + return (void *)(addr_map_node->orig_addr); + } else { + return NULL; + } +} +EXPORT_SYMBOL(koi_mem_lookup); +/** +* kio_mem_free - recycle a copy of the copied parameters and synchronize the parameters +* @mod: driver module +* @addr: the starting addr of parameter +* @size: the size of the parameter +* @is_const: const pointers or not +* @count: contry the number of parameters +*/ +noinline void koi_mem_free(struct module *mod, unsigned long addr, + unsigned long size, bool is_const, int count, ...) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct list_head *pos = NULL; + struct koi_addr_map *addr_map_node; + unsigned long orig_size = size; + unsigned long orig_addr = addr; + va_list valist; + int i; + unsigned int offset; + unsigned long flags; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == orig_addr) { + break; + } + } + rcu_read_unlock(); + va_start(valist, count); + for (i = 0; i < count; i++) { + offset = va_arg(valist, int); + *(unsigned long *)(addr_map_node->buffer_addr + offset) = + *(unsigned long *)(addr_map_node->orig_addr + offset); + } + va_end(valist); + memcpy((void *)addr_map_node->orig_addr, + (void *)addr_map_node->buffer_addr, orig_size); + + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + hlist_del_init_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->addr + mem_node->size == addr) { + pos = mem_node->list.prev; + addr = mem_node->addr; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size == mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size < mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + break; + } + } + mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); + mem_node->addr = addr; + mem_node->size = size; + if (pos) + list_add_rcu(&mem_node->list, pos); + else + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); +} +EXPORT_SYMBOL(koi_mem_free); +/** +* koi_mem_free_callback - used to recycle the copy of parameter. +*@addr: the address of the parameter +*@(*func)(void*): callback func, used to release the copy of the parameter pointer +*/ +noinline void koi_mem_free_callback(struct module *mod, unsigned long addr, + unsigned long size, void (*func)(void *)) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct list_head *pos = NULL; + struct koi_addr_map *addr_map_node; + unsigned long flags; + unsigned long orig_size = size; + unsigned long orig_addr = addr; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + // printk("mem node for module: %s not found\n", mod->name); + return; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == orig_addr) { + break; + } + } + rcu_read_unlock(); + if (addr_map_node != NULL) { + memcpy((void *)addr_map_node->orig_addr, + (void *)addr_map_node->buffer_addr, orig_size); + func((void *)addr_map_node->orig_addr); + } else { + printk("Cannot find addr_map_node in addr_htbl, maybe addr is in kernel space!!\n"); + func((void *)orig_addr); + } + + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + if (addr_map_node != NULL) { + hlist_del_init_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + } + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->addr + mem_node->size == addr) { + pos = mem_node->list.prev; + addr = mem_node->addr; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size == mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size < mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + break; + } + } + mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); + mem_node->addr = addr; + mem_node->size = size; + if (pos) + list_add_rcu(&mem_node->list, pos); + else + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); +} +EXPORT_SYMBOL(koi_mem_free_callback); + +void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + koi_copy_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK, (0)); + flush_tlb_kernel_range(addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK); +} +EXPORT_SYMBOL(koi_map_mem); + +void koi_unmap_mem(struct module *mod, unsigned long addr, unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + if (!addr || ! size) { + return; + } + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + + if (target == NULL) { + printk(KERN_ERR "[KOI UNMAP] mem node for module: %s not found\n", + mod->name); + return; + } + koi_unmap_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK); + flush_tlb_kernel_range(addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK); +} +EXPORT_SYMBOL(koi_unmap_mem); +/** +* koi_mem_free_to_user - function 'copy_to_user' in driver space +*/ +void koi_mem_free_to_user(struct module *mod, unsigned long addr, + unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct list_head *pos = NULL; + struct koi_addr_map *addr_map_node; + unsigned long flags; + unsigned long orig_size = size; + unsigned long orig_addr = addr; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == orig_addr) { + break; + } + } + rcu_read_unlock(); + if (copy_to_user((void *)addr_map_node->orig_addr, + (void *)addr_map_node->buffer_addr, orig_size)) { + return; + } + + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + hlist_del_init_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->addr + mem_node->size == addr) { + pos = mem_node->list.prev; + addr = mem_node->addr; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size == mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size < mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + break; + } + } + mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); + mem_node->addr = addr; + mem_node->size = size; + if (pos) + list_add_rcu(&mem_node->list, pos); + else + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); +} +EXPORT_SYMBOL(koi_mem_free_to_user); +// map the driver stack to kernel +void koi_map_kostack(struct module *mod) +{ +} +EXPORT_SYMBOL(koi_map_kostack); + +#ifndef CONFIG_IEE +void koi_init_token(struct task_struct *tsk) +{ + struct task_token *token_addr = + (struct task_token *)(__phys_to_virt(__pa(tsk)) + KOI_OFFSET); + + token_addr->koi_kernel_stack = NULL; + // token_addr->koi_stack = NULL; + // token_addr->koi_stack_base = NULL; + token_addr->current_ttbr1 = 0; +} + +#endif \ No newline at end of file diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index c4999d2d7ce8..601b5bc6a2ae 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -79,8 +79,13 @@ int memcmp_pages(struct page *page1, struct page *page2) static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); +#endif isb(); pr_info_once("MTE: enabled in %s mode at EL1\n", mode); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 068e5bb2661b..e545a2df805f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -471,7 +471,14 @@ DEFINE_PER_CPU(struct task_struct *, __entry_task); static void entry_task_switch(struct task_struct *next) { + #if defined(CONFIG_IEE) || defined (CONFIG_KOI) + if(next == &init_task) + __this_cpu_write(__entry_task, (struct task_struct *)__va(__pa_symbol(next))); + else + __this_cpu_write(__entry_task, next); + #else __this_cpu_write(__entry_task, next); + #endif } /* @@ -506,11 +513,15 @@ static void erratum_1418040_new_exec(void) */ void update_sctlr_el1(u64 sctlr) { - /* + /* * EnIA must not be cleared while in the kernel as this is necessary for * in-kernel PAC. It will be cleared on kernel exit if needed. */ + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); + #else sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); + #endif /* ISB required for the kernel uaccess routines when setting TCF0. */ isb(); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 4bca7ad7b5e3..a584557625f7 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -551,7 +551,11 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) return state; if (spectre_v4_mitigations_off()) { +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_ELx_DSSBS); +#else sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); +#endif set_pstate_ssbs(1); return SPECTRE_VULNERABLE; } @@ -987,7 +991,11 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) if (arm64_kernel_unmapped_at_el0()) return; +#ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_vbar_el1, v); +#else write_sysreg(v, vbar_el1); +#endif isb(); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index d82fd6902ea8..84f4fce0b8bc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -33,6 +33,11 @@ #include #include +#ifdef CONFIG_IEE +#include +#include +#endif + #include #include #include @@ -336,10 +341,31 @@ u64 cpu_logical_map(unsigned int cpu) return __cpu_logical_map[cpu]; } +#ifdef CONFIG_IEE +/* used for secure modification of vbar*/ +extern char __bp_harden_el1_vectors[]; +/* prepare iee rwx gate for senario of ttbr1=init_pg_dir */ +static void __init iee_si_init_early(void) +{ + /* prepare data used for iee rwx gate. */ + iee_base_swapper_pg_dir = phys_to_ttbr(__pa_symbol(swapper_pg_dir)); + iee_base_idmap_pg_dir = phys_to_ttbr(__pa_symbol(idmap_pg_dir)); + iee_base_reserved_pg_dir = phys_to_ttbr(__pa_symbol(reserved_pg_dir)) + | FIELD_PREP(TTBR_ASID_MASK, 1); + iee_base__bp_harden_el1_vectors = (unsigned long)__bp_harden_el1_vectors; + iee_si_tcr = 0; +} +#endif + void __init __no_sanitize_address setup_arch(char **cmdline_p) { setup_initial_init_mm(_stext, _etext, _edata, _end); + #ifdef CONFIG_IEE + init_new_context(&init_task, &init_mm); + atomic64_set(&init_mm.context.id, (1UL << get_cpu_asid_bits()) | INIT_ASID); + #endif + *cmdline_p = boot_command_line; kaslr_init(); @@ -375,6 +401,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); +#ifdef CONFIG_IEE + /* + * Map iee si codes to init_pg_dir to run the following + * cpu_uninstall_idmap() which writes ttbr0. + */ + iee_si_init_early(); +#endif + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. diff --git a/arch/arm64/kernel/sfi_bpf_arch.c b/arch/arm64/kernel/sfi_bpf_arch.c new file mode 100644 index 000000000000..1201cd11a5de --- /dev/null +++ b/arch/arm64/kernel/sfi_bpf_arch.c @@ -0,0 +1,85 @@ +#include +#include +#include + +pte_t *bpf_sfi_get_ptep(u64 addr) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + /* DEBUG check pgd */ + // u64 ttbr1_phy, ttbr1_vir; + // __asm__ volatile( + // "mrs %0, ttbr1_el1\n\t" + // : "=r" (ttbr1_phy) + // :: + // ); + // pr_debug("phy = 0x%llx, after mask = 0x%llx\n", ttbr1_phy, (u64)(ttbr1_phy << 16) >> 16); + // ttbr1_vir = (u64)__phys_to_kimg((u64)(ttbr1_phy << 16) >> 16); + // pr_info("1, ttbr1_vir = 0x%llx, \n", ttbr1_vir); + // pr_info("2, init_mm.pgd = 0x%llx\n", (u64)init_mm.pgd); + // pr_info("3, swapper_pg_dir = 0x%llx\n", (u64)swapper_pg_dir); + + pgdp = pgd_offset(&init_mm, addr); + if (pgd_none(*pgdp) || pgd_bad(*pgdp)) { + pr_err("get pgdp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp) || p4d_bad(*p4dp)) { + pr_err("get p4dp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + /* IMPORTANT judge huge page first, then judge table */ + pudp = pud_offset(p4dp, addr); + if (pud_huge(*pudp)) { + // pud is huge page + pr_warn("pud of 0x%llx is huge page", addr); + // return (pte_t *)pudp; + return ERR_PTR(-ENOTSUPP); + } + if (pud_none(*pudp) || pud_bad(*pudp)) { + pr_err("get pudp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + pmdp = pmd_offset(pudp, addr); + if (pmd_huge(*pmdp)) { + // pmd is huge page + pr_warn("pmd of 0x%llx is huge page", addr); + // return (pte_t *)pmdp; + return ERR_PTR(-ENOTSUPP); + } + if (pmd_none(*pmdp) || pmd_bad(*pmdp)) { + pr_err("get pmdp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + ptep = pte_offset_kernel(pmdp, addr); + if (!ptep) { + pr_err("get ptep of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + return ptep; +} + +int bpf_sfi_hook_kernel_fault(u64 addr) +{ + pte_t *ptep; + + ptep = bpf_sfi_get_ptep(addr); + if (IS_ERR(ptep)) + return PTR_ERR(ptep); + + if (pte_val(*ptep) & PTE_BPF_SFI_GP) { + return true; + } + else + return false; +} \ No newline at end of file diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b7b7afb4a8c7..168a9390d6e9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -902,6 +902,32 @@ const char *esr_get_class_string(unsigned long esr) return esr_class_str[ESR_ELx_EC(esr)]; } +#ifdef CONFIG_IEE +extern void arm64_enter_nmi(struct pt_regs *regs); +static const char *handler[]= { + "SP_EL0", + "ELR_EL1", + "TCR_EL1", + "TTBR0 ASID" + "IEE_SI" +}; + +asmlinkage void notrace iee_bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +{ + arm64_enter_nmi(regs); + + console_verbose(); + + pr_crit("IEE : Bad mode in %s check detected on CPU%d, code 0x%08x -- %s\n", + handler[reason], smp_processor_id(), esr, + esr_get_class_string(esr)); + + __show_regs(regs); + local_daif_mask(); + panic("bad mode"); +} +#endif + /* * bad_el0_sync handles unexpected, but potentially recoverable synchronous * exceptions taken from EL0. diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3cd7e76cc562..008b69a83b91 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -134,6 +134,51 @@ jiffies = jiffies_64; #define UNWIND_DATA_SECTIONS #endif +#ifdef CONFIG_IEE +#define IEE_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __iee_code_start = .; \ + *(.iee.text.header) \ + *(.iee.text) \ + . = ALIGN(PAGE_SIZE); \ + __iee_code_end = .; +#else +#define IEE_TEXT +#endif + +#ifdef CONFIG_IEE +#define IEE_SI_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_data_start = .; \ + *(.iee.si_data) \ + . = ALIGN(PAGE_SIZE); \ + __iee_exec_entry_start = .; \ + __iee_si_no_irq = . + (16); \ + *(.iee.exec_entry) \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_text_start = .; \ + *(.iee.si_text) \ + . = ALIGN(PAGE_SIZE); \ + . += PAGE_SIZE - (24); \ + __iee_si_text_end = . + (24); \ + *(.iee.exec_exit) \ + . = ALIGN(PAGE_SIZE); + +#else +#define IEE_SI_TEXT +#endif + +#ifdef CONFIG_KOI +#define KOI_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __koi_code_start = .; \ + *(.koi.text) \ + . = ALIGN(PAGE_SIZE); \ + __koi_code_end = .; +#else +#define KOI_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from _stext to _edata, must be a round multiple of the PE/COFF @@ -176,10 +221,13 @@ SECTIONS SOFTIRQENTRY_TEXT ENTRY_TEXT TEXT_TEXT + IEE_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT HYPERVISOR_TEXT + IEE_SI_TEXT + KOI_TEXT *(.gnu.warning) } @@ -318,6 +366,13 @@ SECTIONS . += INIT_DIR_SIZE; init_pg_end = .; + #ifdef CONFIG_IEE + . = ALIGN(PAGE_SIZE*8); + init_iee_stack_begin = .; + . += PAGE_SIZE*4; + init_iee_stack_end = .; + #endif + . = ALIGN(SEGMENT_ALIGN); __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 188197590fc9..b6d8a7d0aeca 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,6 +17,10 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif + static u32 asid_bits; static DEFINE_RAW_SPINLOCK(cpu_asid_lock); @@ -39,7 +43,11 @@ static unsigned long *pinned_asid_map; #define asid2ctxid(asid, genid) ((asid) | (genid)) /* Get the ASIDBits supported by the current CPU */ +#ifdef CONFIG_IEE +u32 get_cpu_asid_bits(void) +#else static u32 get_cpu_asid_bits(void) +#endif { u32 asid; int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1), @@ -96,6 +104,16 @@ static void set_reserved_asid_bits(void) set_kpti_asid_bits(asid_map); else bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + #ifdef CONFIG_IEE + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(asid_map, 0xaa, len); + __set_bit(INIT_ASID, asid_map); + #else + #ifdef CONFIG_KOI + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(asid_map, 0xaa, len); + #endif + #endif } #define asid_gen_match(asid) \ @@ -212,6 +230,38 @@ static u64 new_context(struct mm_struct *mm) return asid2ctxid(asid, generation); } +#ifdef CONFIG_KOI +/* + * This function is used to check and allocate ASID for ko's pgd + * The mm MUST point to the isolated kos' mm_struct, other behaviours are undefined. + */ +void koi_check_and_switch_context(struct mm_struct *mm) { + u64 asid = atomic64_read(&mm->context.id); + u64 old_active_asid; + unsigned long flags; + unsigned int cpu; + + old_active_asid = atomic64_read(this_cpu_ptr(&active_asids)); + if (old_active_asid && asid_gen_match(asid) && atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids), old_active_asid, asid)) { + return; + } + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + asid = atomic64_read(&mm->context.id); + if (!asid_gen_match(asid)) { + asid = new_context(mm); + atomic64_set(&mm->context.id, asid); + } + + cpu = smp_processor_id(); + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + + atomic64_set(this_cpu_ptr(&active_asids), asid); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + void check_and_switch_context(struct mm_struct *mm) { unsigned long flags; @@ -348,7 +398,9 @@ asmlinkage void post_ttbr_update_workaround(void) void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) { + #ifndef CONFIG_IEE unsigned long ttbr1 = read_sysreg(ttbr1_el1); + #endif unsigned long asid = ASID(mm); unsigned long ttbr0 = phys_to_ttbr(pgd_phys); @@ -360,14 +412,28 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN)) ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid); - /* Set ASID in TTBR1 since TCR.A1 is set */ + #ifdef CONFIG_IEE + ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid+1); + iee_rwx_gate_entry(IEE_CONTEXT_SWITCH, ttbr0); + // TODO : if defined CONFIG_IEE and defined CONFIG_KOI + #else + /* Set ASID in TTBR0 since TCR.A1 is set 0*/ + + #ifdef CONFIG_KOI + ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid+1); + ttbr1 &= ~TTBR_ASID_MASK; + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + #else ttbr1 &= ~TTBR_ASID_MASK; ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); - + + #endif cpu_set_reserved_ttbr0_nosync(); write_sysreg(ttbr1, ttbr1_el1); write_sysreg(ttbr0, ttbr0_el1); isb(); + #endif + post_ttbr_update_workaround(); } @@ -375,11 +441,28 @@ static int asids_update_limit(void) { unsigned long num_available_asids = NUM_USER_ASIDS; - if (arm64_kernel_unmapped_at_el0()) { - num_available_asids /= 2; - if (pinned_asid_map) - set_kpti_asid_bits(pinned_asid_map); + if (arm64_kernel_unmapped_at_el0()) { + num_available_asids /= 2; + if (pinned_asid_map) + set_kpti_asid_bits(pinned_asid_map); } + #if defined(CONFIG_IEE) + num_available_asids /= 2; + if (pinned_asid_map) { + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(pinned_asid_map, 0xaa, len); + __set_bit(INIT_ASID, pinned_asid_map); + } + #else + #ifdef CONFIG_KOI + num_available_asids /= 2; + if (pinned_asid_map) { + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(pinned_asid_map, 0xaa, len); + } + #endif + #endif + /* * Expect allocation after rollover to fail if we don't have at least * one more ASID than CPUs. ASID #0 is reserved for init_mm. @@ -400,6 +483,10 @@ arch_initcall(asids_update_limit); static int asids_init(void) { + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + unsigned int len; + #endif + asid_bits = get_cpu_asid_bits(); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); @@ -410,13 +497,25 @@ static int asids_init(void) pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); nr_pinned_asids = 0; - /* - * We cannot call set_reserved_asid_bits() here because CPU - * caps are not finalized yet, so it is safer to assume KPTI - * and reserve kernel ASID's from beginning. - */ - if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) - set_kpti_asid_bits(asid_map); + #ifdef CONFIG_IEE + len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(asid_map, 0xaa, len); + __set_bit(INIT_ASID, asid_map); + #else + #ifdef CONFIG_KOI + len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(asid_map, 0xaa, len); + #else + /* + * We cannot call set_reserved_asid_bits() here because CPU + * caps are not finalized yet, so it is safer to assume KPTI + * and reserve kernel ASID's from beginning. + */ + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + set_kpti_asid_bits(asid_map); + #endif + #endif + return 0; } early_initcall(asids_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4ea07caba71c..00f85acfc671 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -26,6 +26,9 @@ #include #include #include +#ifdef CONFIG_HIVE +#include +#endif #include #include @@ -44,6 +47,10 @@ #include #include +#ifdef CONFIG_HIVE +extern int bpf_sfi_hook_kernel_fault(u64 addr); +#endif + static int sysctl_machine_check_safe = IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC); #ifdef CONFIG_ARCH_HAS_COPY_MC @@ -261,7 +268,15 @@ int __ptep_set_access_flags(struct vm_area_struct *vma, pteval ^= PTE_RDONLY; pteval |= pte_val(entry); pteval ^= PTE_RDONLY; + #ifdef CONFIG_KOI + if (pteval & PTE_VALID) + pteval |= PTE_NG; + #endif + #ifdef CONFIG_PTP + pteval = iee_set_cmpxchg_relaxed(ptep, old_pteval, pteval); + #else pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); + #endif } while (pteval != old_pteval); /* Invalidate a stale read-only entry */ @@ -335,6 +350,14 @@ static void die_kernel_fault(const char *msg, unsigned long addr, { bust_spinlocks(1); +// #ifdef CONFIG_HIVE +// if (bpf_sfi_hook_kernel_fault(addr)) { +// pr_err("detected bpf sfi guard page %lx access\n", addr); +// regs->pc += 4; +// return; +// } +// #endif + pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, addr); @@ -376,8 +399,13 @@ static void do_tag_recovery(unsigned long addr, unsigned long esr, * It will be done lazily on the other CPUs when they will hit a * tag fault. */ + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); + #else sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); + #endif isb(); } diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index a55b36d04590..fb80a7251a8e 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -32,6 +32,22 @@ static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +#ifdef CONFIG_IEE +void *bm_pte_addr = (void *)bm_pte; +void *bm_pmd_addr = (void *)bm_pmd; +void *bm_pud_addr = (void *)bm_pud; +#endif + +#ifdef CONFIG_PTP +extern void __iee_p4d_populate_pre_init(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot); +extern void __iee_pud_populate_pre_init(pud_t *pudp, phys_addr_t pmdp, pudval_t prot); +extern void __iee_pmd_populate_pre_init(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot); + +extern void iee_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d); +#define set_pgd_init(pgdptr, pgdval) iee_set_p4d_pre_init((p4d_t *)(pgdptr), (p4d_t) { pgdval }) +#endif + static inline pte_t *fixmap_pte(unsigned long addr) { return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)]; @@ -44,8 +60,12 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) if (pmd_none(pmd)) { ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; + #ifdef CONFIG_PTP + __iee_pmd_populate_pre_init(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE | PMD_TABLE_AF); + #else __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE | PMD_TABLE_AF); + #endif } } @@ -56,10 +76,14 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, pud_t pud = READ_ONCE(*pudp); pmd_t *pmdp; - if (pud_none(pud)) + if (pud_none(pud)) { + #ifdef CONFIG_PTP + __iee_pud_populate_pre_init(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE | PUD_TABLE_AF); + #else __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE | PUD_TABLE_AF); - + #endif + } pmdp = pmd_offset_kimg(pudp, addr); do { next = pmd_addr_end(addr, end); @@ -84,9 +108,14 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); } - if (p4d_none(p4d)) + if (p4d_none(p4d)) { + #ifdef CONFIG_PTP + __iee_p4d_populate_pre_init(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE | P4D_TABLE_AF); + #else __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE | P4D_TABLE_AF); + #endif + } pudp = pud_offset_kimg(p4dp, addr); early_fixmap_init_pmd(pudp, addr, end); @@ -109,6 +138,27 @@ void __init early_fixmap_init(void) early_fixmap_init_pud(p4dp, addr, end); } +#ifdef CONFIG_PTP +extern void iee_set_pte_pre_init(pte_t *ptep, pte_t pte); +void __iee_set_fixmap_pre_init(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + + if (pgprot_val(flags)) { + iee_set_pte_pre_init(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + iee_set_pte_pre_init(ptep, __pte(0)); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} +#endif + /* * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we * ever need to use IPIs for TLB broadcasting, then we're in trouble here. @@ -124,9 +174,17 @@ void __set_fixmap(enum fixed_addresses idx, ptep = fixmap_pte(addr); if (pgprot_val(flags)) { + #ifdef CONFIG_PTP + iee_set_bm_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + #else __set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + #endif } else { + #ifdef CONFIG_PTP + iee_set_bm_pte(ptep, __pte(0)); + #else __pte_clear(&init_mm, addr, ptep); + #endif flush_tlb_kernel_range(addr, addr+PAGE_SIZE); } } @@ -182,8 +240,13 @@ void __init fixmap_copy(pgd_t *pgdir) * live in the carveout for the swapper_pg_dir. We can simply * re-use the existing dir for the fixmap. */ + #ifdef CONFIG_PTP + set_pgd_init(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), + READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); + #else set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); + #endif } else if (CONFIG_PGTABLE_LEVELS > 3) { pgd_t *bm_pgdp; p4d_t *bm_p4dp; @@ -197,9 +260,15 @@ void __init fixmap_copy(pgd_t *pgdir) BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START); bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START); + #ifdef CONFIG_PTP + bm_pudp = pud_set_fixmap_offset_init(bm_p4dp, FIXADDR_TOT_START); + __iee_pud_populate_pre_init(bm_pudp, __pa(lm_alias(bm_pmd)), PMD_TYPE_TABLE); + pud_clear_fixmap_init(); + #else bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START); pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); pud_clear_fixmap(); + #endif } else { BUG(); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66a7fff9f373..fdd5ad51307d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -56,8 +56,20 @@ * that cannot be mistaken for a real physical address. */ s64 memstart_addr __ro_after_init = -1; +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +s64 memstart_addr_init __ro_after_init = -1; +#endif +#ifdef CONFIG_KOI +s64 koi_offset __ro_after_init = -1; +EXPORT_SYMBOL(koi_offset); +#endif +#ifdef CONFIG_IEE +s64 iee_offset __ro_after_init = -1; +extern s64 iee_si_offset; +#endif EXPORT_SYMBOL(memstart_addr); + /* * If the corresponding config options are enabled, we create both ZONE_DMA * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory @@ -422,7 +434,11 @@ early_param("memmap", parse_memmap_opt); void __init arm64_memblock_init(void) { + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + s64 linear_region_size = BIT(vabits_actual - 2); + #else s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); + #endif /* * Corner case: 52-bit VA capable systems running KVM in nVHE mode may @@ -439,13 +455,24 @@ void __init arm64_memblock_init(void) } /* Remove memory above our supported physical address size */ + #ifdef CONFIG_IEE + // If config iee, phys size can not be above 0x400000000000 + if(__pa_symbol(_end) > BIT_ULL(vabits_actual - 2)) + panic("Image on too high phys mem.\n"); + else + memblock_remove(BIT_ULL(vabits_actual - 2), ULLONG_MAX); + #else memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); + #endif /* * Select a suitable value for the base of physical memory. */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + memstart_addr_init = memstart_addr; + #endif if ((memblock_end_of_DRAM() - memstart_addr) > linear_region_size) pr_warn("Memory doesn't fit in the linear mapping, VA_BITS too small\n"); @@ -532,6 +559,15 @@ void __init arm64_memblock_init(void) ((range * memstart_offset_seed) >> 16); } } + + #ifdef CONFIG_KOI + koi_offset = memstart_addr - memstart_addr_init + ((unsigned long)BIT(vabits_actual - 2)); + #endif + #ifdef CONFIG_IEE + iee_offset = memstart_addr - memstart_addr_init + ((unsigned long)BIT(vabits_actual - 2)); + iee_si_offset = iee_offset; + #endif + //printk(KERN_ERR "koi_offset: 0x%16llx\n", koi_offset); /* * Register the kernel text, kernel data, initrd, and initial @@ -579,6 +615,10 @@ void __init bootmem_init(void) * done after the fixed reservations */ sparse_init(); +#ifdef CONFIG_PTP + extern void iee_ptdesc_sparse_init(void); + iee_ptdesc_sparse_init(); +#endif zone_sizes_init(); /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c846cc54e9ce..1148d95e4888 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -6,6 +6,7 @@ * Copyright (C) 2012 ARM Ltd. */ +#include "asm/pgtable.h" #include #include #include @@ -41,6 +42,15 @@ #include #include #include +#ifdef CONFIG_IEE +#include +#include +#include +#include +#endif +#ifdef CONFIG_PTP +#include +#endif #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -77,8 +87,236 @@ EXPORT_SYMBOL(empty_zero_page); static DEFINE_SPINLOCK(swapper_pgdir_lock); static DEFINE_MUTEX(fixmap_lock); +#ifdef CONFIG_IEE +extern struct cred init_cred; + +extern unsigned long __iee_si_data_start[]; +extern unsigned long __iee_exec_entry_start[]; +extern unsigned long __iee_si_text_start[]; +extern unsigned long __iee_si_text_end[]; + +extern void *bm_pte_addr; +extern void *bm_pmd_addr; +extern void *bm_pud_addr; + +void *init_token_page_vaddr; + +#ifdef CONFIG_PTP + +/* Funcs to set pgtable before iee initialized. */ +static void iee_set_swapper_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap_init(__pa_symbol(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap_init(); + spin_unlock(&swapper_pgdir_lock); +} + +void iee_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d) +{ + if (in_swapper_pgdir(p4dp)) { + iee_set_swapper_pgd_pre_init((pgd_t *)p4dp, __pgd(p4d_val(p4d))); + return; + } + + WRITE_ONCE(*p4dp, p4d); + dsb(ishst); + isb(); +} + +static inline void iee_set_pud_pre_init(pud_t *pudp, pud_t pud) +{ +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + iee_set_swapper_pgd_pre_init((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ +#ifdef CONFIG_KOI + pudval_t val = pud_val(pud); + if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { + // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. + pud = __pud(val | PMD_SECT_NG); + } +#endif + WRITE_ONCE(*pudp, pud); + + if (pud_valid(pud)) { + dsb(ishst); + isb(); + } +} + +static inline void iee_set_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) +{ +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + iee_set_swapper_pgd_pre_init((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ +#ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } +#endif + WRITE_ONCE(*pmdp, pmd); + + if (pmd_valid(pmd)) { + dsb(ishst); + isb(); + } +} + + +void __iee_p4d_populate_pre_init(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) +{ + iee_set_p4d_pre_init(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); +} + +void __iee_pud_populate_pre_init(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) +{ + iee_set_pud_pre_init(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); +} + +void __iee_pmd_populate_pre_init(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot) +{ + iee_set_pmd_pre_init(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); +} + +/* Funcs to set fixmap before iee initialized. */ +bool pgattr_change_is_safe(u64 old, u64 new); + +static int iee_pmd_set_huge_fixmap(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) +{ + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); + + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) + return 0; + + VM_BUG_ON(phys & ~PMD_MASK); + iee_set_fixmap_pmd_pre_init(pmdp, new_pmd); + return 1; +} + +static inline void __iee_pmd_populate_fixmap(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot) +{ + iee_set_fixmap_pmd_pre_init(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); +} + +static inline void __iee_pud_populate_fixmap(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) +{ + iee_set_fixmap_pud_pre_init(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); +} +#endif /* END CONFIG_PTP*/ + +void iee_set_pte_pre_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_KOI + if (pte_valid(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif + WRITE_ONCE(*ptep, pte); + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +} + +static void __init iee_set_token_page_valid_pre_init(void *token, void *new) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + #ifdef CONFIG_PTP + iee_set_pte_pre_init(ptep, pte); + #else + set_pte(ptep, pte); + #endif + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); + isb(); +} +#endif /* END CONFIG_IEE*/ + +#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) + +void koi_add_page_mapping(void *token, void *new) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + set_pte(ptep, pte); + dsb(ishst); + isb(); + + flush_tlb_kernel_range((unsigned long)new, (unsigned long)new+PAGE_SIZE); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)token+PAGE_SIZE); + isb(); +} + +void koi_remove_page_mapping(unsigned long token, void *__unused, unsigned long order) { + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, token); + + p4d_t *p4dp = p4d_offset(pgdp, token); + + pud_t *pudp = pud_offset(p4dp, token); + + pmd_t *pmdp = pmd_offset(pudp, token); + + pte_t *ptep = pte_offset_kernel(pmdp, token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~((unsigned long)0x1)) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token - KOI_OFFSET))); + set_pte(ptep, pte); + flush_tlb_kernel_range(token, token+PAGE_SIZE); + isb(); +} +#endif + void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { + #ifdef CONFIG_PTP + spin_lock(&swapper_pgdir_lock); + iee_rw_gate(IEE_OP_SET_SWAPPER_PGD, pgdp, pgd); + spin_unlock(&swapper_pgdir_lock); + #else pgd_t *fixmap_pgdp; spin_lock(&swapper_pgdir_lock); @@ -91,6 +329,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) */ pgd_clear_fixmap(); spin_unlock(&swapper_pgdir_lock); + #endif } pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -104,6 +343,34 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); +#ifdef CONFIG_PTP +phys_addr_t __init early_pgtable_alloc(int shift) +{ + phys_addr_t phys; + void *ptr; + + phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + if (!phys) + panic("Failed to allocate page table page\n"); + + /* + * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE + * slot will be free, so we can (ab)use the FIX_PTE slot to initialise + * any level of table. + */ + ptr = pte_set_fixmap_init(phys); + + memset(ptr, 0, PAGE_SIZE); + + /* + * Implicit barriers also ensure the zeroed page is visible to the page + * table walker + */ + pte_clear_fixmap_init(); + + return phys; +} +#else static phys_addr_t __init early_pgtable_alloc(int shift) { phys_addr_t phys; @@ -119,7 +386,11 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * slot will be free, so we can (ab)use the FIX_PTE slot to initialise * any level of table. */ + #ifdef CONFIG_PTP + ptr = pte_set_fixmap_init(phys); + #else ptr = pte_set_fixmap(phys); + #endif memset(ptr, 0, PAGE_SIZE); @@ -127,10 +398,15 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * Implicit barriers also ensure the zeroed page is visible to the page * table walker */ + #ifdef CONFIG_PTP + pte_clear_fixmap_init(); + #else pte_clear_fixmap(); + #endif return phys; } +#endif bool pgattr_change_is_safe(u64 old, u64 new) { @@ -179,7 +455,11 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, do { pte_t old_pte = __ptep_get(ptep); + #ifdef CONFIG_PTP + iee_set_fixmap_pte_pre_init(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + #else __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + #endif /* * After the PTE entry has been populated once, we @@ -212,7 +492,11 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); + #ifdef CONFIG_PTP + __iee_pmd_populate_fixmap(pmdp, pte_phys, pmdval); + #else __pmd_populate(pmdp, pte_phys, pmdval); + #endif pmd = READ_ONCE(*pmdp); } BUG_ON(pmd_bad(pmd)); @@ -247,9 +531,17 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ + #ifdef CONFIG_IEE + if (!((pmd_val(old_pmd) & PTE_VALID) && (pmd_val(old_pmd) & PTE_TABLE_BIT)) && (((addr | next | phys) & ~PMD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0)) { + #else if (((addr | next | phys) & ~PMD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { + #endif + #ifdef CONFIG_PTP + iee_pmd_set_huge_fixmap(pmdp, phys, prot); + #else pmd_set_huge(pmdp, phys, prot); + #endif /* * After the PMD entry has been populated once, we @@ -290,7 +582,11 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); - __pud_populate(pudp, pmd_phys, pudval); + #ifdef CONFIG_PTP + __iee_pud_populate_fixmap(pudp, pmd_phys, PUD_TYPE_TABLE); + #else + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); + #endif pud = READ_ONCE(*pudp); } BUG_ON(pud_bad(pud)); @@ -343,6 +639,13 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ + #ifdef CONFIG_IEE + alloc_init_cont_pmd(pudp, addr, next, phys, prot, + pgtable_alloc, flags); + + BUG_ON(pud_val(old_pud) != 0 && + pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); + #else if (pud_sect_supported() && ((addr | next | phys) & ~PUD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { @@ -361,6 +664,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); } + #endif phys += next - addr; } while (pudp++, addr = next, addr != end); @@ -375,6 +679,10 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, { unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + #ifdef CONFIG_IEE + p4d_t *p4dp; + p4d_t p4d; + #endif /* * If the virtual and physical address don't have the same offset @@ -391,10 +699,14 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, next = pgd_addr_end(addr, end); alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, flags); + #ifdef CONFIG_IEE + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + __p4d_populate(p4dp, __p4d_to_phys(p4d), (PGD_APT | PUD_TYPE_TABLE)); + #endif phys += next - addr; } while (pgdp++, addr = next, addr != end); } - static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, @@ -414,103 +726,363 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t (*pgtable_alloc)(int), int flags); #endif -static phys_addr_t __pgd_pgtable_alloc(int shift) +#ifdef CONFIG_PTP +static int __init iee_pmd_set_huge_pre_init(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); - BUG_ON(!ptr); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); - return __pa(ptr); + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) + return 0; + + VM_BUG_ON(phys & ~PMD_MASK); + iee_set_pmd_pre_init(pmdp, new_pmd); + return 1; } -static phys_addr_t pgd_pgtable_alloc(int shift) +static __init void iee_init_pte_pre_init(pmd_t *pmdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) { - phys_addr_t pa = __pgd_pgtable_alloc(shift); - struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); + pte_t *ptep; - /* - * Call proper page table ctor in case later we need to - * call core mm functions like apply_to_page_range() on - * this pre-allocated page table. - * - * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is - * folded, and if so pagetable_pte_ctor() becomes nop. - */ - if (shift == PAGE_SHIFT) - BUG_ON(!pagetable_pte_ctor(ptdesc)); - else if (shift == PMD_SHIFT) - BUG_ON(!pagetable_pmd_ctor(ptdesc)); + ptep = pte_set_fixmap_offset_init(pmdp, addr); + do { + pte_t old_pte = READ_ONCE(*ptep); - return pa; -} + iee_set_pte_pre_init(ptep, pfn_pte(__phys_to_pfn(phys), prot)); -/* - * This function can only be used to modify existing table entries, - * without allocating new levels of table. Note that this permits the - * creation of new section or page entries. - */ -void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) -{ - if (virt < PAGE_OFFSET) { - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", - &phys, virt); - return; - } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + /* + * After the PTE entry has been populated once, we + * only allow updates to the permission attributes. + */ + BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), + READ_ONCE(pte_val(*ptep)))); + + phys += PAGE_SIZE; + } while (ptep++, addr += PAGE_SIZE, addr != end); + + pte_clear_fixmap_init(); } -void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, bool page_mappings_only) +static __init void iee_alloc_init_cont_pte_pre_init(pmd_t *pmdp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { - int flags = 0; + unsigned long next; + pmd_t pmd = READ_ONCE(*pmdp); - BUG_ON(mm == &init_mm); + BUG_ON(pmd_sect(pmd)); + if (pmd_none(pmd)) { + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; + phys_addr_t pte_phys; - if (page_mappings_only) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + if (flags & NO_EXEC_MAPPINGS) + pmdval |= PMD_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pte_phys = pgtable_alloc(PAGE_SHIFT); + __iee_pmd_populate_pre_init(pmdp, pte_phys, pmdval); + pmd = READ_ONCE(*pmdp); + } + BUG_ON(pmd_bad(pmd)); - __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, flags); -} + do { + pgprot_t __prot = prot; -static void update_mapping_prot(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) -{ - if (virt < PAGE_OFFSET) { - pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", - &phys, virt); - return; - } + next = pte_cont_addr_end(addr, end); - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - /* flush the TLBs after updating live kernel mappings */ - flush_tlb_kernel_range(virt, virt + size); -} + iee_init_pte_pre_init(pmdp, addr, next, phys, __prot); -static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, - phys_addr_t end, pgprot_t prot, int flags) -{ - __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, - prot, early_pgtable_alloc, flags); + phys += next - addr; + } while (addr = next, addr != end); } -void __init mark_linear_text_alias_ro(void) +static __init void iee_init_pmd_pre_init(pud_t *pudp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) { - /* - * Remove the write permissions from the linear alias of .text/.rodata - */ - update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), - (unsigned long)__init_begin - (unsigned long)_stext, - PAGE_KERNEL_RO); -} + unsigned long next; + pmd_t *pmdp; -#ifdef CONFIG_KFENCE + pmdp = pmd_set_fixmap_offset_init(pudp, addr); + do { + pmd_t old_pmd = READ_ONCE(*pmdp); + + next = pmd_addr_end(addr, end); + + /* try section mapping first */ + if (((addr | next | phys) & ~PMD_MASK) == 0 && + (flags & NO_BLOCK_MAPPINGS) == 0) { + iee_pmd_set_huge_pre_init(pmdp, phys, prot); + + /* + * After the PMD entry has been populated once, we + * only allow updates to the permission attributes. + */ + BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), + READ_ONCE(pmd_val(*pmdp)))); + } else { + iee_alloc_init_cont_pte_pre_init(pmdp, addr, next, phys, prot, + pgtable_alloc, flags); + + BUG_ON(pmd_val(old_pmd) != 0 && + pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); + } + phys += next - addr; + } while (pmdp++, addr = next, addr != end); + + pmd_clear_fixmap_init(); +} + +static __init void iee_alloc_init_cont_pmd_pre_init(pud_t *pudp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + + /* + * Check for initial section mappings in the pgd/pud. + */ + BUG_ON(pud_sect(pud)); + if (pud_none(pud)) { + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; + phys_addr_t pmd_phys; + + if (flags & NO_EXEC_MAPPINGS) + pudval |= PUD_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(PMD_SHIFT); + __iee_pud_populate_pre_init(pudp, pmd_phys, pudval); + pud = READ_ONCE(*pudp); + } + BUG_ON(pud_bad(pud)); + + do { + pgprot_t __prot = prot; + + next = pmd_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + iee_init_pmd_pre_init(pudp, addr, next, phys, __prot, pgtable_alloc, flags); + + phys += next - addr; + } while (addr = next, addr != end); +} + +static __init void iee_alloc_init_pud_pre_init(pgd_t *pgdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long next; + pud_t *pudp; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + if (p4d_none(p4d)) { + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; + phys_addr_t pud_phys; + + if (flags & NO_EXEC_MAPPINGS) + p4dval |= P4D_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pud_phys = pgtable_alloc(PUD_SHIFT); + __iee_p4d_populate_pre_init(p4dp, pud_phys, p4dval); + p4d = READ_ONCE(*p4dp); + } + BUG_ON(p4d_bad(p4d)); + + pudp = pud_set_fixmap_offset_init(p4dp, addr); + do { + pud_t old_pud = READ_ONCE(*pudp); + + next = pud_addr_end(addr, end); + + /* + * For 4K granule only, attempt to put down a 1GB block + */ + iee_alloc_init_cont_pmd_pre_init(pudp, addr, next, phys, prot, + pgtable_alloc, flags); + + BUG_ON(pud_val(old_pud) != 0 && + pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); + phys += next - addr; + } while (pudp++, addr = next, addr != end); + + pud_clear_fixmap_init(); +} + +static __init void __iee_create_pgd_mapping_locked_pre_init(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + p4d_t *p4dp; + p4d_t p4d; + + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; + addr = virt & PAGE_MASK; + end = PAGE_ALIGN(virt + size); + + do { + next = pgd_addr_end(addr, end); + iee_alloc_init_pud_pre_init(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + __iee_p4d_populate_pre_init(p4dp, __p4d_to_phys(p4d), (PGD_APT | PUD_TYPE_TABLE)); + phys += next - addr; + } while (pgdp++, addr = next, addr != end); +} + +static __init void __iee_create_pgd_mapping_pre_init(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + mutex_lock(&fixmap_lock); + __iee_create_pgd_mapping_locked_pre_init(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); + mutex_unlock(&fixmap_lock); +} +#endif + +static phys_addr_t __pgd_pgtable_alloc(int shift) +{ + #ifdef CONFIG_PTP + void *ptr = get_iee_pgtable_page(GFP_PGTABLE_KERNEL); + #else + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); + #endif + BUG_ON(!ptr); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + +static phys_addr_t pgd_pgtable_alloc(int shift) +{ + phys_addr_t pa = __pgd_pgtable_alloc(shift); + struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); + + /* + * Call proper page table ctor in case later we need to + * call core mm functions like apply_to_page_range() on + * this pre-allocated page table. + * + * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is + * folded, and if so pagetable_pte_ctor() becomes nop. + */ + if (shift == PAGE_SHIFT) + BUG_ON(!pagetable_pte_ctor(ptdesc)); + else if (shift == PMD_SHIFT) + BUG_ON(!pagetable_pmd_ctor(ptdesc)); + + return pa; +} + +/* + * This function can only be used to modify existing table entries, + * without allocating new levels of table. Note that this permits the + * creation of new section or page entries. + */ +void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < PAGE_OFFSET) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + #else + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + #endif +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, bool page_mappings_only) +{ + int flags = 0; + + BUG_ON(mm == &init_mm); + + if (page_mappings_only) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, + pgd_pgtable_alloc, flags); +} + +static void update_mapping_prot(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < PAGE_OFFSET) { + pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + + /* flush the TLBs after updating live kernel mappings */ + flush_tlb_kernel_range(virt, virt + size); +} + +static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) +{ + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgdp, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); + #else + __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); + #endif +} + +void __init mark_linear_text_alias_ro(void) +{ + /* + * Remove the write permissions from the linear alias of .text/.rodata + */ + update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), + (unsigned long)__init_begin - (unsigned long)_stext, + PAGE_KERNEL_RO); +} + +#ifdef CONFIG_KFENCE bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; @@ -655,147 +1227,808 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(pa_start)); BUG_ON(!PAGE_ALIGNED(size)); - __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, flags); + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgdp, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc, flags); + #else + __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc, flags); + #endif + + if (!(vm_flags & VM_NO_GUARD)) + size += PAGE_SIZE; + + vma->addr = va_start; + vma->phys_addr = pa_start; + vma->size = size; + vma->flags = VM_MAP | vm_flags; + vma->caller = __builtin_return_address(0); + + vm_area_add_early(vma); +} + +static pgprot_t kernel_exec_prot(void) +{ + return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; +} + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ + int i; + + pgprot_t prot = kernel_exec_prot(); + phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + + /* The trampoline is always mapped and can therefore be global */ + pgprot_val(prot) &= ~PTE_NG; + + /* Map only the text into the trampoline page table */ + memset(tramp_pg_dir, 0, PGD_SIZE); + // #ifdef CONFIG_PTP + // iee_set_logical_mem_ro((unsigned long)tramp_pg_dir); + // #endif + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); + + /* Map both the text and data into the kernel page table */ + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + + if (IS_ENABLED(CONFIG_RELOCATABLE)) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); + + return 0; +} +core_initcall(map_entry_trampoline); +#endif + +/* + * Open coded check for BTI, only for use to determine configuration + * for early mappings for before the cpufeature code has run. + */ +static bool arm64_early_this_cpu_has_bti(void) +{ + u64 pfr1; + + if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + return false; + + pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); + return cpuid_feature_extract_unsigned_field(pfr1, + ID_AA64PFR1_EL1_BT_SHIFT); +} + +#ifdef CONFIG_IEE +/* Set PMD APTable of iee si codes as (1,1) to revert it to ROX P pages when HPD1=0. */ +static void __init iee_si_set_pmd_APtable(unsigned long addr, pgd_t *pgdir) +{ + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + #ifdef CONFIG_PTP + pud_t *pudp = pud_set_fixmap_offset_init(p4dp, addr); + + pmd_t *pmdp = pmd_set_fixmap_offset_init(pudp, addr); + + pmd_t pmd = READ_ONCE(*pmdp); + + __iee_pmd_populate_pre_init(pmdp, __pmd_to_phys(pmd), PGD_APT_RO | PGD_APT | PMD_TYPE_TABLE); + + pud_clear_fixmap_init(); + pmd_clear_fixmap_init(); + #else + pud_t *pudp = pud_set_fixmap_offset(p4dp, addr); + + pmd_t *pmdp = pmd_set_fixmap_offset(pudp, addr); + + pmd_t pmd = READ_ONCE(*pmdp); + + __pmd_populate(pmdp, __pmd_to_phys(pmd), PGD_APT_RO | PGD_APT | PMD_TYPE_TABLE); + + pud_clear_fixmap(); + pmd_clear_fixmap(); + #endif +} +/* Set PMD APTable of iee si codes as (1,1) to revert it to ROX P pages when HPD1=0. */ +static void __init mark_iee_si_pmd_APtable(pgd_t *pgdir) +{ + unsigned long addr = (unsigned long)__iee_si_text_start; + iee_si_set_pmd_APtable(addr, pgdir); + // iee rwx gate exit may be mapped by another pmd. + iee_si_set_pmd_APtable(addr + PAGE_SIZE, pgdir); +} +#endif + +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgdp) +{ + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, + vmlinux_initdata, vmlinux_data; + + #ifdef CONFIG_IEE + static struct vm_struct vmlinux_iee_code, vmlinux_iee_data, vmlinux_iee_gate, vmlinux_text_end; + #endif + + /* + * External debuggers may need to write directly to the text + * mapping to install SW breakpoints. Allow this (only) when + * explicitly requested with rodata=off. + */ + pgprot_t text_prot = kernel_exec_prot(); + + /* + * If we have a CPU that supports BTI and a kernel built for + * BTI then mark the kernel executable text as guarded pages + * now so we don't have to rewrite the page tables later. + */ + if (arm64_early_this_cpu_has_bti()) + text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + + /* + * Only rodata will be remapped with different permissions later on, + * all other segments are allowed to use contiguous mappings. + */ + #ifdef CONFIG_IEE + map_kernel_segment(pgdp, _stext, __iee_si_data_start, text_prot, &vmlinux_text, + 0, VM_NO_GUARD); + /* Set iee si data RW. */ + map_kernel_segment(pgdp, __iee_si_data_start, __iee_exec_entry_start, SET_NG(PAGE_KERNEL), + &vmlinux_iee_data, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + /* Set iee entry codes NG. */ + map_kernel_segment(pgdp, __iee_exec_entry_start, __iee_si_text_start, SET_NG(text_prot), &vmlinux_iee_gate, + NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + /* Map __iee_si_text_start - __iee_si_text_end as U RWX pages and set PMD APTABLE = (1,1). */ + map_kernel_segment(pgdp, __iee_si_text_start, __iee_si_text_end, SET_NG((PAGE_KERNEL_EXEC)), + &vmlinux_iee_code, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + mark_iee_si_pmd_APtable(pgdp); + + map_kernel_segment(pgdp, __iee_si_text_end, _etext, text_prot, &vmlinux_text_end, 0, + VM_NO_GUARD); + + map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, 0); + #else + map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, + VM_NO_GUARD); + map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); + map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); + #endif + + + fixmap_copy(pgdp); + kasan_copy_shadow(pgdp); +} + +static void __init create_idmap(void) +{ + u64 start = __pa_symbol(__idmap_text_start); + u64 size = __pa_symbol(__idmap_text_end) - start; + pgd_t *pgd = idmap_pg_dir; + u64 pgd_phys; + + /* check if we need an additional level of translation */ + if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { + pgd_phys = early_pgtable_alloc(PAGE_SHIFT); + set_pgd(&idmap_pg_dir[start >> VA_BITS], + __pgd(pgd_phys | P4D_TYPE_TABLE)); + pgd = __va(pgd_phys); + } + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgd, start, start, size, PAGE_KERNEL_ROX, + early_pgtable_alloc, 0); + #else + __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, + early_pgtable_alloc, 0); + #endif + + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + extern u32 __idmap_kpti_flag; + u64 pa = __pa_symbol(&__idmap_kpti_flag); + + /* + * The KPTI G-to-nG conversion code needs a read-write mapping + * of its synchronization flag in the ID map. + */ + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, + early_pgtable_alloc, 0); + #else + __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, + early_pgtable_alloc, 0); + #endif + } +} + +#ifdef CONFIG_IEE +#ifndef CONFIG_PTP +static void set_init_iee_stack_page(unsigned long addr) +{ + unsigned long iee_addr = __phys_to_iee(__pa_symbol(addr)); + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + int i; + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + set_pte(ptep, pte); + ptep++; + } + + pgdp = pgd_offset_pgd(pgdir, iee_addr); + + p4dp = p4d_offset(pgdp, iee_addr); + p4d = READ_ONCE(*p4dp); + + pudp = pud_offset(p4dp, iee_addr); + + pmdp = pmd_offset(pudp, iee_addr); + + ptep = pte_offset_kernel(pmdp, iee_addr); + + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + set_pte(ptep, pte); + ptep++; + } + flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); + isb(); +} +#endif + +static void __create_pgd_mapping_for_iee_locked(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + p4d_t *p4dp; + p4d_t p4d; + + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; + addr = virt & PAGE_MASK; + end = PAGE_ALIGN(virt + size); + + do { + next = pgd_addr_end(addr, end); + #ifdef CONFIG_PTP + iee_alloc_init_pud_pre_init(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + #else + alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + #endif + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + #ifdef CONFIG_PTP + __iee_p4d_populate_pre_init(p4dp, __p4d_to_phys(p4d), (PGD_APT | PGD_PXN | PGD_UXN | PUD_TYPE_TABLE)); + #else + __p4d_populate(p4dp, __p4d_to_phys(p4d), (PGD_APT | PGD_PXN | PGD_UXN | PUD_TYPE_TABLE)); + #endif + phys += next - addr; + } while (pgdp++, addr = next, addr != end); +} + +static void __create_pgd_mapping_for_iee(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + mutex_lock(&fixmap_lock); + __create_pgd_mapping_for_iee_locked(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); + mutex_unlock(&fixmap_lock); +} + +static void __init __map_memblock_for_iee(pgd_t *pgdp, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) +{ + #ifdef CONFIG_PTP + __create_pgd_mapping_for_iee(pgdp, start, __phys_to_iee(start), end - start, + prot, early_pgtable_alloc, flags); + #else + __create_pgd_mapping_for_iee(pgdp, start, __phys_to_iee(start), end - start, + prot, early_pgtable_alloc, flags); + #endif +} + +static void __init map_iee(pgd_t *pgdp) +{ + static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); + phys_addr_t kernel_start = __pa_symbol(_stext); + phys_addr_t kernel_end = __pa_symbol(__init_begin); + phys_addr_t start, end; + phys_addr_t early_kfence_pool; + int flags = NO_EXEC_MAPPINGS; + u64 i; + + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + /* + * Setting hierarchical PXNTable attributes on table entries covering + * the linear region is only possible if it is guaranteed that no table + * entries at any level are being shared between the linear region and + * the vmalloc region. Check whether this is true for the PGD level, in + * which case it is guaranteed to be true for all other levels as well. + */ + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + + early_kfence_pool = arm64_kfence_alloc_pool(); + + /* + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. + * So temporarily mark them as NOMAP to skip mappings in + * the following for-loop + */ + memblock_mark_nomap(kernel_start, kernel_end - kernel_start); + + /* map all the memory banks */ + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + __map_memblock_for_iee(pgdp, start, end, SET_NG(SET_INVALID(SET_UPAGE(PAGE_KERNEL))), flags); + } + + /* + * Map the linear alias of the [_text, __init_begin) interval + * as non-executable now, and remove the write permission in + * mark_linear_text_alias_ro() below (which will be called after + * alternative patching has completed). This makes the contents + * of the region accessible to subsystems such as hibernate, + * but protects it from inadvertent modification or execution. + * Note that contiguous mappings cannot be remapped in this way, + * so we should avoid them here. + */ + __map_memblock_for_iee(pgdp, kernel_start, kernel_end, + SET_NG(SET_INVALID(SET_UPAGE(PAGE_KERNEL))), flags); + memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + arm64_kfence_map_pool(early_kfence_pool, pgdp); +} + +/* + * Change page access permission, whereas not handling huge pages. + * Only used on IEE init functions. + */ +static void __init iee_si_set_page_attr(unsigned long addr, pteval_t attr) +{ + unsigned long flag; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if(attr & PTE_RDONLY) + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + pte = __pte(pte_val(pte) | attr); + #ifdef CONFIG_PTP + // Write pgtable in IEE directly. + flag = local_daif_save(); + asm volatile ("msr pan, #0"); + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); + asm volatile ("msr pan, #1"); + local_daif_restore(flag); + #else + WRITE_ONCE(*ptep, pte); + #endif +} + +/* Prepare data used for iee rwx gates. These data are setted only once. */ +void __init iee_si_prepare_data(void) +{ + unsigned long va; + // Record current TCR val after system init. + // iee_si_tcr = read_sysreg(tcr_el1) & ~(SYS_TCR_IEE_SI); + // CNP maybe enable. + if (system_supports_cnp()) { + iee_base_swapper_pg_dir |= TTBR_CNP_BIT; + } + // Mark iee data as RO and move it to iee after setting up. + va = (unsigned long)__iee_si_data_start; + iee_si_set_page_attr(va, PTE_RDONLY); + // iee_si_set_page_attr(lm_alias(va)+iee_offset, 0x1 | PTE_RDONLY); + // Set iee sensitive inst code page U RWX here to hide it from kernel. + va = (unsigned long)__iee_si_text_start; + iee_si_set_page_attr(va, PTE_USER); + va = (unsigned long)__iee_si_text_start + PAGE_SIZE; + iee_si_set_page_attr(va, PTE_USER); + flush_tlb_all(); +} + +#endif + +#ifdef CONFIG_PTP +// Attention : Using set_xxx without adding offset. +static void __init set_iee_valid_pre_init(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) + return; + + pte = __pte(pte_val(pte) | 0x1); + iee_set_pte_pre_init(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} + +static void __init move_pte_table_into_iee(pmd_t *pmdp, unsigned long addr, unsigned long end) +{ + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long iee_addr = __phys_to_iee(__pmd_to_phys(pmd)); + set_iee_valid_pre_init(iee_addr); +} + +static void __init move_pmd_table_into_iee(pud_t *pudp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + unsigned long iee_addr = __phys_to_iee(__pud_to_phys(pud)); + set_iee_valid_pre_init(iee_addr); + + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if((pmd_val(pmd) & PMD_TABLE_BIT) == 0) + { + continue; + } + else + { + move_pte_table_into_iee(pmdp, addr, next); + } + } while (pmdp++, addr = next, addr != end); +} + +static void __init move_pud_table_into_iee(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + unsigned long iee_addr = __phys_to_iee(__p4d_to_phys(p4d)); + set_iee_valid_pre_init(iee_addr); + + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if ((pud_val(pud) & PUD_TABLE_BIT) == 0) + { + continue; + } + else + { + move_pmd_table_into_iee(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); +} + +static void __init init_iee_for_one_region(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); + + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); + + do { + next = pgd_addr_end(addr, end); + move_pud_table_into_iee(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); +} + +static void __init init_iee(void) +{ + unsigned long iee_addr; + phys_addr_t start, end; + u64 i; + pgd_t *pgdp; + + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + // handling 1-level tramp page table tramp_pg_dir + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(tramp_pg_dir)); + set_iee_valid_pre_init(iee_addr); + #endif + // handling 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(swapper_pg_dir)); + set_iee_valid_pre_init(iee_addr); + // handling 2/3/4-level page table for kernel + init_iee_for_one_region(pgdp, (unsigned long)_text, (unsigned long)_etext); + init_iee_for_one_region(pgdp, (unsigned long)__start_rodata, (unsigned long)__inittext_begin); + init_iee_for_one_region(pgdp, (unsigned long)__inittext_begin, (unsigned long)__inittext_end); + init_iee_for_one_region(pgdp, (unsigned long)__initdata_begin, (unsigned long)__initdata_end); + init_iee_for_one_region(pgdp, (unsigned long)_data, (unsigned long)_end); + // handling 2/3/4-level page table for fixmap i.e. remap bm_xxx + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pte_addr)); + set_iee_valid_pre_init(iee_addr); + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pmd_addr)); + set_iee_valid_pre_init(iee_addr); + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pud_addr)); + set_iee_valid_pre_init(iee_addr); + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + init_iee_for_one_region(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); + init_iee_for_one_region(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); + } +} + +static void set_init_iee_stack_page_pre_init(unsigned long addr) +{ + unsigned long iee_addr = __phys_to_iee(__pa_symbol(addr)); + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); - if (!(vm_flags & VM_NO_GUARD)) - size += PAGE_SIZE; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); - vma->addr = va_start; - vma->phys_addr = pa_start; - vma->size = size; - vma->flags = VM_MAP | vm_flags; - vma->caller = __builtin_return_address(0); + pud_t *pudp = pud_offset(p4dp, addr); - vm_area_add_early(vma); -} + pmd_t *pmdp = pmd_offset(pudp, addr); -static pgprot_t kernel_exec_prot(void) -{ - return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; -} + pte_t *ptep = pte_offset_kernel(pmdp, addr); -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -static int __init map_entry_trampoline(void) -{ int i; + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) & ~PTE_VALID); + iee_set_pte_pre_init(ptep, pte); + ptep++; + } - pgprot_t prot = kernel_exec_prot(); - phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + pgdp = pgd_offset_pgd(pgdir, iee_addr); - /* The trampoline is always mapped and can therefore be global */ - pgprot_val(prot) &= ~PTE_NG; + p4dp = p4d_offset(pgdp, iee_addr); + p4d = READ_ONCE(*p4dp); - /* Map only the text into the trampoline page table */ - memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, - entry_tramp_text_size(), prot, - __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); + pudp = pud_offset(p4dp, iee_addr); - /* Map both the text and data into the kernel page table */ - for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) - __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, - pa_start + i * PAGE_SIZE, prot); + pmdp = pmd_offset(pudp, iee_addr); - if (IS_ENABLED(CONFIG_RELOCATABLE)) - __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, - pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); + ptep = pte_offset_kernel(pmdp, iee_addr); - return 0; + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_VALID); + iee_set_pte_pre_init(ptep, pte); + ptep++; + } + flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); + isb(); } -core_initcall(map_entry_trampoline); -#endif -/* - * Open coded check for BTI, only for use to determine configuration - * for early mappings for before the cpufeature code has run. - */ -static bool arm64_early_this_cpu_has_bti(void) +static void __init iee_set_pte_table_ro(pmd_t *pmdp, unsigned long addr, unsigned long end) { - u64 pfr1; - - if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - return false; - - pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); - return cpuid_feature_extract_unsigned_field(pfr1, - ID_AA64PFR1_EL1_BT_SHIFT); + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long logical_addr = (unsigned long)__va(__pmd_to_phys(pmd)); + iee_set_logical_mem_ro(logical_addr); } -/* - * Create fine-grained mappings for the kernel. - */ -static void __init map_kernel(pgd_t *pgdp) +static void __init iee_set_pmd_table_ro(pud_t *pudp, unsigned long addr, unsigned long end) { - static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, - vmlinux_initdata, vmlinux_data; - - /* - * External debuggers may need to write directly to the text - * mapping to install SW breakpoints. Allow this (only) when - * explicitly requested with rodata=off. - */ - pgprot_t text_prot = kernel_exec_prot(); + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + unsigned long logical_addr = (unsigned long)__va(__pud_to_phys(pud)); + iee_set_logical_mem_ro(logical_addr); - /* - * If we have a CPU that supports BTI and a kernel built for - * BTI then mark the kernel executable text as guarded pages - * now so we don't have to rewrite the page tables later. - */ - if (arm64_early_this_cpu_has_bti()) - text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if((pmd_val(pmd) & PMD_TABLE_BIT) == 0) + { + continue; + } + else + { + iee_set_pte_table_ro(pmdp, addr, next); + } + } while (pmdp++, addr = next, addr != end); +} - /* - * Only rodata will be remapped with different permissions later on, - * all other segments are allowed to use contiguous mappings. - */ - map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, - VM_NO_GUARD); - map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, - &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); - map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, - &vmlinux_inittext, 0, VM_NO_GUARD); - map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, - &vmlinux_initdata, 0, VM_NO_GUARD); - map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); +static void __init iee_set_pud_table_ro(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + unsigned long logical_addr = (unsigned long)__va(__p4d_to_phys(p4d)); + iee_set_logical_mem_ro(logical_addr); - fixmap_copy(pgdp); - kasan_copy_shadow(pgdp); + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if ((pud_val(pud) & PUD_TABLE_BIT) == 0) + { + continue; + } + else + { + iee_set_pmd_table_ro(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); } -static void __init create_idmap(void) +static void __init iee_mark_pgtable_for_one_region_ro(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) { - u64 start = __pa_symbol(__idmap_text_start); - u64 size = __pa_symbol(__idmap_text_end) - start; - pgd_t *pgd = idmap_pg_dir; - u64 pgd_phys; + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); - /* check if we need an additional level of translation */ - if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { - pgd_phys = early_pgtable_alloc(PAGE_SHIFT); - set_pgd(&idmap_pg_dir[start >> VA_BITS], - __pgd(pgd_phys | P4D_TYPE_TABLE)); - pgd = __va(pgd_phys); - } - __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, - early_pgtable_alloc, 0); + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); - if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { - extern u32 __idmap_kpti_flag; - u64 pa = __pa_symbol(&__idmap_kpti_flag); + do { + next = pgd_addr_end(addr, end); + iee_set_pud_table_ro(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); +} +// Mark pgtable outside as RO. +void __init iee_mark_all_lm_pgtable_ro(void) +{ + extern void *bm_pte_addr; + iee_set_logical_mem_ro((unsigned long)bm_pte_addr); + unsigned long logical_addr; + phys_addr_t start, end; + u64 i; + pgd_t *pgdp; + + // handling static allocated page table + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + // handling 1-level tramp page table tramp_pg_dir + // logical_addr = (unsigned long)__va(__pa_symbol(tramp_pg_dir)); + // iee_set_logical_mem_ro(logical_addr); + #endif + // handling 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + // iee_set_logical_mem_ro((unsigned long)swapper_pg_dir); + // logical_addr = (unsigned long)__va(__pa_symbol(swapper_pg_dir)); + // iee_set_logical_mem_ro(logical_addr); + + // handling 2/3/4-level page table for kernel + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_text, (unsigned long)_etext); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__start_rodata, (unsigned long)__inittext_begin); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__inittext_begin, (unsigned long)__inittext_end); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__initdata_begin, (unsigned long)__initdata_end); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_data, (unsigned long)_end); + + // handling 2/3/4-level page table for fixmap i.e. remap bm_xxx + logical_addr = (unsigned long)__va(__pa_symbol(bm_pte_addr)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)bm_pmd_addr); + logical_addr = (unsigned long)__va(__pa_symbol(bm_pmd_addr)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)bm_pud_addr); + logical_addr = (unsigned long)__va(__pa_symbol(bm_pud_addr)); + iee_set_logical_mem_ro(logical_addr); + + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; /* - * The KPTI G-to-nG conversion code needs a read-write mapping - * of its synchronization flag in the ID map. + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. */ - __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, - early_pgtable_alloc, 0); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); } } +#endif + +#ifdef CONFIG_KOI +extern s64 koi_offset; +#endif void __init paging_init(void) { + #ifdef CONFIG_IEE + unsigned long SP_EL0; + void *new; + void *init_token; + struct task_token *token; + unsigned long tcr; + + // Check if cpu has PAN and HPDS. + if(!cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR1_EL1), + ID_AA64MMFR1_EL1_PAN_SHIFT)) + panic("Architecture doesn't support PAN, please disable CONFIG_IEE.\n"); + + if(!cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR1_EL1), + ID_AA64MMFR1_EL1_HPDS_SHIFT)) + panic("Architecture doesn't support HPDS, please disable CONFIG_IEE.\n"); + #endif + + // Avoid using iee code to modify pgtable before iee initialized. + #ifdef CONFIG_PTP + pgd_t *pgdp = pgd_set_fixmap_init(__pa_symbol(swapper_pg_dir)); + #else pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); + #endif + + extern pgd_t init_idmap_pg_dir[]; idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); @@ -803,7 +2036,21 @@ void __init paging_init(void) map_kernel(pgdp); map_mem(pgdp); + // Map the whole physical mem into IEE, but set invalid. +#ifdef CONFIG_IEE + map_iee(pgdp); +#else +#ifdef CONFIG_KOI + map_koi(pgdp); +#endif +#endif + + // Avoid using iee code to modify pgtable before iee initialized. + #ifdef CONFIG_PTP + pgd_clear_fixmap_init(); + #else pgd_clear_fixmap(); + #endif cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); init_mm.pgd = swapper_pg_dir; @@ -814,6 +2061,83 @@ void __init paging_init(void) memblock_allow_resize(); create_idmap(); + + #ifdef CONFIG_IEE + // test iee_exec_entry + iee_rwx_gate_entry(IEE_SI_TEST); + // Initialize init iee stack. + #ifdef CONFIG_PTP + set_init_iee_stack_page_pre_init((unsigned long)init_iee_stack_begin); + #else + set_init_iee_stack_page((unsigned long)init_iee_stack_begin); + #endif + #endif + + // Init token for init_task. + #ifdef CONFIG_IEE + // Change SP_EL0 from Image VA to Logical VA. + SP_EL0 = (unsigned long)__va(__pa_symbol(&init_task)); + write_sysreg(SP_EL0, sp_el0); + init_task.cpus_ptr = &(((struct task_struct *)(__va(__pa_symbol(&init_task))))->cpus_mask); + init_task.children.prev = (__va(__pa_symbol(init_task.children.prev))); + init_task.children.next = (__va(__pa_symbol(init_task.children.next))); + // Alloc a page for init_token. + new = __va(early_pgtable_alloc(0)); + init_token_page_vaddr = new; + init_token = (void *)__phys_to_iee(__pa_symbol(&init_task)); + // Use lm to write token before IEE initialized. + token = (struct task_token *)((unsigned long)new + (((unsigned long)&init_task) & ~PAGE_MASK)); + token->pgd = NULL; + token->iee_stack = (void *)__phys_to_iee(__pa_symbol((void *)init_iee_stack_end)); + token->valid = true; + iee_set_token_page_valid_pre_init(init_token, new); + #endif + + #ifdef CONFIG_PTP + // Map the existing pgtable into IEE, set valid. + init_iee(); + #endif + + #ifdef CONFIG_IEE + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SPAN); + #endif + + // IEE ready. + // Pgtable writing before uses logical memory and after uses IEE memory. + + #ifdef CONFIG_IEE + // Set HPD1 as 1. + tcr = read_sysreg(tcr_el1); + tcr |= ((unsigned long)0x1 << 42); + write_sysreg(tcr, tcr_el1); + isb(); + + // Flush tlb to enable IEE. + flush_tlb_all(); + + // mark that iee is prepared. + iee_init_done = true; +#else +#ifdef CONFIG_KOI + unsigned long SP_EL0 = __va(__pa_symbol(&init_task)); + write_sysreg(SP_EL0, sp_el0); + init_task.cpus_ptr = &(((struct task_struct *)(__va(__pa_symbol(&init_task))))->cpus_mask); + init_task.children.prev = (__va(__pa_symbol(init_task.children.prev))); + init_task.children.next = (__va(__pa_symbol(init_task.children.next))); + // create a new page for token + void *alloc_token = __va(early_pgtable_alloc(0)); + // get the address of token + void *token_addr = __phys_to_virt(__pa_symbol(&init_task)) + KOI_OFFSET; + // add memory mapping for token + koi_add_page_mapping(token_addr, (void *)alloc_token); + // printk(KERN_ERR "token_addr=0x%16llx, alloc_token=0x%16llx, init_task=0x%16llx, virt=0x%16llx\n", token_addr, alloc_token, &init_task, __phys_to_virt(__pa_symbol(&init_task))); + struct task_token *token = (struct task_token *)((unsigned long)alloc_token + (((unsigned long)&init_task) & ~PAGE_MASK)); + token->koi_kernel_stack = NULL; + token->koi_stack = NULL; + token->koi_stack_base = NULL; + token->current_ttbr1 = 0; +#endif +#endif } #ifdef CONFIG_MEMORY_HOTPLUG @@ -1202,6 +2526,139 @@ void vmemmap_free(unsigned long start, unsigned long end, } #endif /* CONFIG_MEMORY_HOTPLUG */ +#ifdef CONFIG_PTP + +static void * __init iee_ptdesc_alloc_block_zero(unsigned long size, int node, bool pt) +{ + void *p = memblock_alloc_try_nid_raw(size, size, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, node); + + if (!p) + return NULL; + memset(p, 0, size); + + /* Vaild iee address to enable page table operations. */ + if (pt) + set_iee_page_valid((unsigned long)__phys_to_iee(__pa(p))); + + return p; +} + +static pte_t * __init iee_ptdesc_pte_populate(pmd_t *pmd, unsigned long addr, int node) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(ptep_get(pte))) { + pte_t entry; + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, false); + if (!p) + return NULL; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + set_pte_at(&init_mm, addr, pte, entry); + } + return pte; +} + +static pmd_t * __init iee_ptdesc_pmd_populate(pud_t *pud, unsigned long addr, int node) +{ + pmd_t *pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pmd_populate_kernel(&init_mm, pmd, p); + } + return pmd; +} + +static pud_t * __init iee_ptdesc_pud_populate(p4d_t *p4d, unsigned long addr, int node) +{ + pud_t *pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pmd_init(p); + pud_populate(&init_mm, pud, p); + } + return pud; +} + +static p4d_t * __init iee_ptdesc_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pud_init(p); + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + +static pgd_t * __init iee_ptdesc_pgd_populate(unsigned long addr, int node) +{ + pgd_t *pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pgd_populate(&init_mm, pgd, p); + } + return pgd; +} + +/* Create mappings if that address is not mapped. */ +static pte_t * __init iee_ptdesc_populate_address(unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int node = NUMA_NO_NODE; + + pgd = iee_ptdesc_pgd_populate(addr, node); + if (!pgd) + return NULL; + p4d = iee_ptdesc_p4d_populate(pgd, addr, node); + if (!p4d) + return NULL; + pud = iee_ptdesc_pud_populate(p4d, addr, node); + if (!pud) + return NULL; + pmd = iee_ptdesc_pmd_populate(pud, addr, node); + if (!pmd) + return NULL; + pte = iee_ptdesc_pte_populate(pmd, addr, node); + if (!pte) + return NULL; + + return pte; +} + +/* Init ptdesc array used by iee. */ +int __init iee_ptdesc_sparse_init(void) +{ + unsigned long start_pfn, end_pfn; + int i, nid; + /* */ + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + struct ptdesc** ptdesc_start = ALIGN_DOWN((u64)(__pfn_to_ptdesc(start_pfn)), PAGE_SIZE); + struct ptdesc** end = ALIGN((u64)(__pfn_to_ptdesc(end_pfn)), PAGE_SIZE); + unsigned long addr = ptdesc_start; + pte_t *pte; + for (; addr < end; addr += PAGE_SIZE) { + pte = iee_ptdesc_populate_address(addr); + if (!pte) + return -ENOMEM; + } + } + return 0; +} +#endif /* CONFIG_PTP */ + int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); @@ -1261,6 +2718,9 @@ int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) table = pte_offset_kernel(pmdp, addr); pmd_clear(pmdp); __flush_tlb_kernel_pgtable(addr); + #ifdef CONFIG_PTP + iee_memset(table, 0, PAGE_SIZE); + #endif pte_free_kernel(NULL, table); return 1; } @@ -1521,3 +2981,93 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte { set_pte_at(vma->vm_mm, addr, ptep, pte); } + +#if !defined(CONFIG_IEE) && defined (CONFIG_KOI) +static void __create_pgd_mapping_for_koi(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; + addr = virt & PAGE_MASK; + end = PAGE_ALIGN(virt + size); + + do { + next = pgd_addr_end(addr, end); + alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + __p4d_populate(p4dp, __p4d_to_phys(p4d), PUD_TYPE_TABLE); + phys += next - addr; + } while (pgdp++, addr = next, addr != end); +} + +static void __init __map_memblock_for_koi(pgd_t *pgdp, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) +{ + __create_pgd_mapping_for_koi(pgdp, start, __phys_to_virt(start) + KOI_OFFSET, end - start, + prot, early_pgtable_alloc, flags); +} + +static void __init map_koi(pgd_t *pgdp) +{ + phys_addr_t kernel_start = __pa_symbol(_text); + phys_addr_t kernel_end = __pa_symbol(__init_begin); + phys_addr_t start, end; + int flags = 0; + u64 i; + + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + /* + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. + * So temporarily mark them as NOMAP to skip mappings in + * the following for-loop + */ + memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); +#endif + + /* map all the memory banks */ + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + __map_memblock_for_koi(pgdp, start, end, SET_NG(SET_INVALID(PAGE_KERNEL)), flags); + } +#ifdef CONFIG_KEXEC_CORE + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ + if (crashk_res.end) { + __map_memblock_for_koi(pgdp, crashk_res.start, crashk_res.end + 1, + SET_NG(SET_INVALID(PAGE_KERNEL)), + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif +} +#endif \ No newline at end of file diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 4a64089e5771..20008c35d150 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -15,14 +15,38 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + static struct kmem_cache *pgd_cache __ro_after_init; +#ifdef CONFIG_KOI +pgd_t *koi_pgd_alloc(void) +{ + pgd_t *pgd; +#ifdef CONFIG_PTP + pgd = (pgd_t *)get_iee_pgtable_page(GFP_PGTABLE_KERNEL); +#else + pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); +#endif + return pgd; +} +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { gfp_t gfp = GFP_PGTABLE_USER; if (PGD_SIZE == PAGE_SIZE) +#ifdef CONFIG_PTP + { + pgd_t* new = (pgd_t *)get_iee_pgtable_page(gfp); + return new; + } +#else return (pgd_t *)__get_free_page(gfp); +#endif else return kmem_cache_alloc(pgd_cache, gfp); } @@ -30,7 +54,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { if (PGD_SIZE == PAGE_SIZE) +#ifdef CONFIG_PTP + { + struct page *page = virt_to_page((void *)pgd); + set_page_count(page, 1); + free_iee_pgtable_page((void *)pgd); + } +#else free_page((unsigned long)pgd); +#endif else kmem_cache_free(pgd_cache, pgd); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9ef7b07349d7..b8561144a45a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -22,6 +22,8 @@ #include #include + + #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K #elif defined(CONFIG_ARM64_16K_PAGES) @@ -190,6 +192,7 @@ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 offset_ttbr1 x0, x3 + msr ttbr1_el1, x0 isb @@ -459,6 +462,11 @@ SYM_FUNC_START(__cpu_setup) #endif /* CONFIG_ARM64_HAFT */ 1: #endif /* CONFIG_ARM64_HW_AFDBM */ + +#ifdef CONFIG_IEE + orr tcr, tcr, #TCR_HPD1 // Hierarchical permission disables +#endif + msr mair_el1, mair msr tcr_el1, tcr diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 5139a28130c0..23d8a4b91574 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -25,6 +25,9 @@ #include #include #include +#ifdef CONFIG_PTP +#include +#endif static void *trans_alloc(struct trans_pgd_info *info) { @@ -65,10 +68,12 @@ static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, pte_t *src_ptep; pte_t *dst_ptep; unsigned long addr = start; - dst_ptep = trans_alloc(info); if (!dst_ptep) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)dst_ptep,0); + #endif pmd_populate_kernel(NULL, dst_pmdp, dst_ptep); dst_ptep = pte_offset_kernel(dst_pmdp, start); @@ -92,6 +97,9 @@ static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, dst_pmdp = trans_alloc(info); if (!dst_pmdp) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)dst_pmdp,0); + #endif pud_populate(NULL, dst_pudp, dst_pmdp); } dst_pmdp = pmd_offset(dst_pudp, start); @@ -128,6 +136,9 @@ static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, dst_pudp = trans_alloc(info); if (!dst_pudp) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)dst_pudp,0); + #endif p4d_populate(NULL, dst_p4dp, dst_pudp); } dst_pudp = pud_offset(dst_p4dp, start); @@ -212,6 +223,10 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, return -ENOMEM; } + #ifdef CONFIG_PTP + set_iee_page((unsigned long)trans_pgd,0); + #endif + rc = copy_page_tables(info, trans_pgd, start, end); if (!rc) *dst_pgdp = trans_pgd; @@ -247,12 +262,20 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, if (!levels[this_level]) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)levels[this_level],0); + #endif + level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); level_msb = min(level_lsb + bits_mapped, max_msb); level_mask = GENMASK_ULL(level_msb, level_lsb); index = (dst_addr & level_mask) >> level_lsb; + #ifdef CONFIG_PTP + set_pte((pte_t *)(levels[this_level] + index), __pte(prev_level_entry)); + #else *(levels[this_level] + index) = prev_level_entry; + #endif pfn = virt_to_pfn(levels[this_level]); prev_level_entry = pte_val(pfn_pte(pfn, diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 76ae4a3131ba..b3b962fcc3f8 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_HIVE +#include +#endif #include #include @@ -67,6 +70,10 @@ static const int bpf2a64[] = { /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), [FP_BOTTOM] = A64_R(27), +#ifdef CONFIG_HIVE + /* fbpf base addr */ + [BPF_REG_BASE] = A64_R(28), +#endif }; struct jit_ctx { @@ -283,7 +290,11 @@ static bool is_lsi_offset(int offset, int scale) #define POKE_OFFSET (BTI_INSNS + 1) /* Tail call offset to jump into */ +#ifdef CONFIG_HIVE +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8 + 6 - 1) +#else #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) +#endif static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -296,6 +307,9 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tcc = bpf2a64[TCALL_CNT]; const u8 fpb = bpf2a64[FP_BOTTOM]; +#ifdef CONFIG_HIVE + const u8 base = bpf2a64[BPF_REG_BASE]; +#endif const int idx0 = ctx->idx; int cur_offset; @@ -345,10 +359,35 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx); emit(A64_PUSH(fp, tcc, A64_SP), ctx); +#ifdef CONFIG_HIVE + emit(A64_PUSH(fpb, base, A64_SP), ctx); +#else emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); +#endif +#ifdef CONFIG_HIVE + /* Set up BPF prog stack base and sfi base register */ + u64 sfi_base = (u64)prog->shadow_region_addr; + u64 sfi_stack = (u64)(prog->shadow_stack_addr); + int idx_before; + + pr_info("fp=%016llx, base=%016llx\n", sfi_stack, sfi_base); + + idx_before = ctx->idx; + emit_a64_mov_i64(fp, sfi_stack, ctx); // 3 insns + while (ctx->idx - idx_before < 3) { + emit(A64_NOP, ctx); + } + idx_before = ctx->idx; + emit_a64_mov_i64(base, sfi_base, ctx); // 3 insns + while (ctx->idx - idx_before < 3) { + emit(A64_NOP, ctx); + } +#else /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); +#endif + if (!ebpf_from_cbpf && is_main_prog) { /* Initialize tail_call_cnt */ @@ -356,8 +395,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) cur_offset = ctx->idx - idx0; if (cur_offset != PROLOGUE_OFFSET) { + #ifdef CONFIG_HIVE + pr_err("PROLOGUE_OFFSET = %d, expected %d!\n", + cur_offset, PROLOGUE_OFFSET); + #else pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", cur_offset, PROLOGUE_OFFSET); + #endif return -1; } @@ -662,12 +706,20 @@ static void build_epilogue(struct jit_ctx *ctx) const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fpb = bpf2a64[FP_BOTTOM]; +#ifdef CONFIG_HIVE + const u8 base = bpf2a64[BPF_REG_BASE]; +#endif /* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); +#ifdef CONFIG_HIVE + /* Restore x27 and base (x28) */ + emit(A64_POP(fpb, base, A64_SP), ctx); +#else /* Restore x27 and x28 */ emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); +#endif /* Restore fs (x25) and x26 */ emit(A64_POP(fp, A64_R(26), A64_SP), ctx); @@ -769,6 +821,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fpb = bpf2a64[FP_BOTTOM]; const s16 off = insn->off; +#ifdef CONFIG_HIVE + u8 off_reg; +#endif const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64 || @@ -1237,6 +1292,33 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, return ret; break; +#ifdef CONFIG_HIVE + case BPF_LDX | BPF_REG | BPF_W: + case BPF_LDX | BPF_REG | BPF_H: + case BPF_LDX | BPF_REG | BPF_B: + case BPF_LDX | BPF_REG | BPF_DW: + off_reg = bpf2a64[off]; + switch (BPF_SIZE(code)) { + case BPF_W: + emit(A64_LDR32(dst, src, off_reg), ctx); + break; + case BPF_H: + emit(A64_LDRH(dst, src, off_reg), ctx); + break; + case BPF_B: + emit(A64_LDRB(dst, src, off_reg), ctx); + break; + case BPF_DW: + emit(A64_LDR64(dst, src, off_reg), ctx); + break; + } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; + break; +#endif + /* speculation barrier */ case BPF_ST | BPF_NOSPEC: /* @@ -1347,6 +1429,29 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; } break; + + #ifdef CONFIG_HIVE + case BPF_STX | BPF_REG | BPF_W: + case BPF_STX | BPF_REG | BPF_H: + case BPF_STX | BPF_REG | BPF_B: + case BPF_STX | BPF_REG | BPF_DW: + off_reg = bpf2a64[off]; + switch (BPF_SIZE(code)) { + case BPF_W: + emit(A64_STR32(src, dst, off_reg), ctx); + break; + case BPF_H: + emit(A64_STRH(src, dst, off_reg), ctx); + break; + case BPF_B: + emit(A64_STRB(src, dst, off_reg), ctx); + break; + case BPF_DW: + emit(A64_STR64(src, dst, off_reg), ctx); + break; + } + break; + #endif case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df023e1cb5dd..f791ba71b796 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1548,6 +1548,25 @@ config AMD_MEM_ENCRYPT This requires an AMD processor that supports Secure Memory Encryption (SME). +config CREDP + bool "Config for credentials isolation" + depends on IEE + def_bool y + +config IEE + bool "Config for Isolated Executed Environment" + depends on X86_64 + def_bool y + +config KEYP + depends on IEE + def_bool y + +config IEE_SELINUX_P + depends on IEE + depends on SECURITY_SELINUX + def_bool n + # Common NUMA Features config NUMA bool "NUMA Memory Allocation and Scheduler Support" @@ -2466,6 +2485,12 @@ source "kernel/livepatch/Kconfig" endmenu +# Config for kernel module isolation +config KOI + bool "Config for Kernel Module Isolation" + depends on X86_64 + def_bool y + config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index a7b4148a943f..23e85a84e45b 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -23,7 +23,11 @@ /* Use the static base for this part of the boot process */ #undef __PAGE_OFFSET #define __PAGE_OFFSET __PAGE_OFFSET_BASE +#ifdef CONFIG_PTP +#include "../../mm/ident_map_for_iee.c" +#else #include "../../mm/ident_map.c" +#endif #define _SETUP #include /* For COMMAND_LINE_SIZE */ @@ -101,9 +105,15 @@ void kernel_add_identity_map(unsigned long start, unsigned long end) return; /* Build the mapping. */ + #ifdef CONFIG_PTP + ret = kernel_ident_mapping_init_for_iee(&mapping_info, (pgd_t *)top_level_pgt, start, end); + if (ret) + error("Error: kernel_ident_mapping_init_for_iee() failed\n"); + #else ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); if (ret) error("Error: kernel_ident_mapping_init() failed\n"); + #endif } /* Locates and clears a region for a new top level page table. */ @@ -179,7 +189,11 @@ void initialize_identity_maps(void *rmode) sev_prep_identity_maps(top_level_pgt); /* Load the new page-table. */ + #ifdef CONFIG_IEE + native_write_cr3_pre_init(top_level_pgt); + #else write_cr3(top_level_pgt); + #endif /* * Now that the required page table mappings are established and a @@ -207,7 +221,11 @@ static pte_t *split_large_pmd(struct x86_mapping_info *info, /* Populate the PTEs */ for (i = 0; i < PTRS_PER_PMD; i++) { + #ifdef CONFIG_PTP + iee_set_pte_pre_init(&pte[i], __pte(address | page_flags)); + #else set_pte(&pte[i], __pte(address | page_flags)); + #endif address += PAGE_SIZE; } @@ -221,9 +239,17 @@ static pte_t *split_large_pmd(struct x86_mapping_info *info, * of a TLB multihit. */ pmd = __pmd((unsigned long)pte | info->kernpg_flag); + #ifdef CONFIG_PTP + iee_set_pmd_pre_init(pmdp, pmd); + #else set_pmd(pmdp, pmd); + #endif /* Flush TLB to establish the new PMD */ + #ifdef CONFIG_IEE + native_write_cr3_pre_init(top_level_pgt); + #else write_cr3(top_level_pgt); + #endif return pte + pte_index(__address); } @@ -313,7 +339,11 @@ static int set_clr_page_flags(struct x86_mapping_info *info, pte = *ptep; pte = pte_set_flags(pte, set); pte = pte_clear_flags(pte, clr); + #ifdef CONFIG_PTP + iee_set_pte_pre_init(ptep, pte); + #else set_pte(ptep, pte); + #endif /* * If the encryption attribute is being set, then change the page state to @@ -324,7 +354,11 @@ static int set_clr_page_flags(struct x86_mapping_info *info, snp_set_page_private(__pa(address & PAGE_MASK)); /* Flush TLB after changing encryption attribute */ + #ifdef CONFIG_IEE + native_write_cr3_pre_init(top_level_pgt); + #else write_cr3(top_level_pgt); + #endif return 0; } diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 7939eb6e6ce9..c922aec0d88a 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -196,7 +196,11 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) * Move the top level page table out of trampoline memory. */ memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + #ifdef CONFIG_IEE + native_write_cr3_pre_init((unsigned long)pgtable); + #else native_write_cr3((unsigned long)pgtable); + #endif /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2192b6c33ea0..d7dd2d7b6138 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -663,6 +663,17 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) jz 1f ud2 1: +#endif +#ifdef CONFIG_KOI + movq %cr4, %rcx + testq $(1 << 17), %rcx + jz 2f + # movq koi_kern_cr3(%rip), %rcx + # cmpq $0, %rcx + # je 2f + movq CS-8(%rsp), %rdi + call koi_error_return_to_ko +2: #endif POP_REGS addq $8, %rsp /* skip regs->orig_ax */ @@ -1072,6 +1083,27 @@ SYM_CODE_START(error_entry) * for these here too. */ .Lerror_kernelspace: +#ifdef CONFIG_KOI + movq %cr3, %rcx + testq $(1 << 11), %rcx + jz 1f + pushq %rdx + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + call koi_error_entry_to_kernel + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rdi + popq %rsi + popq %rdx +1: +#endif leaq native_irq_return_iret(%rip), %rcx cmpq %rcx, RIP+8(%rsp) je .Lerror_bad_iret diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ab97b22ac04a..cef8de1be0c0 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -13,6 +13,10 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif + static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) { desc->limit0 = info->limit & 0x0ffff; @@ -210,9 +214,23 @@ static inline void native_load_gdt(const struct desc_ptr *dtr) asm volatile("lgdt %0"::"m" (*dtr)); } +#ifdef CONFIG_IEE +static __always_inline void iee_load_idt_pre_init(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} +#endif + static __always_inline void native_load_idt(const struct desc_ptr *dtr) { + /* IEE note: load_idt in __restore_processor_state() is not triggered in qemu, + * but still hooked + */ + #ifdef CONFIG_IEE + iee_rwx_gate(IEE_LOAD_IDT, dtr); + #else asm volatile("lidt %0"::"m" (*dtr)); + #endif } static inline void native_store_gdt(struct desc_ptr *dtr) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index d0dcefb5cc59..c6dbea6ab469 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -196,5 +196,10 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); +#ifdef CONFIG_PTP +void __iee_set_fixmap_pre_init(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/iee-access.h b/arch/x86/include/asm/iee-access.h new file mode 100644 index 000000000000..3eb2346afd6a --- /dev/null +++ b/arch/x86/include/asm/iee-access.h @@ -0,0 +1,39 @@ +#ifndef _LINUX_IEE_ACCESS_H +#define _LINUX_IEE_ACCESS_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +static inline void iee_memcpy(void *dst, const void *src, size_t n) +{ + iee_rw_gate(IEE_OP_MEMCPY, dst, src, n); +} + +static inline void iee_memset(void *ptr, int data, size_t n) +{ + iee_rw_gate(IEE_OP_MEMSET, ptr, data, n); +} + +static inline void iee_set_track(struct track *ptr, struct track *data) +{ + iee_rw_gate(IEE_OP_SET_TRACK, ptr, data); +} + +static inline void iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr) +{ + iee_rw_gate(IEE_OP_SET_FREEPTR, pptr, ptr); +} + +static inline void iee_split_huge_pmd(pmd_t *pmdp, pte_t *pgtable) +{ + iee_rw_gate(IEE_OP_SPLIT_HUGE_PMD, pmdp, pgtable); +} + +static inline unsigned long iee_test_and_clear_bit(long nr, volatile unsigned long *addr) +{ + return iee_rw_gate(IEE_OP_TEST_CLEAR_BIT, nr, addr); +} + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-cred.h b/arch/x86/include/asm/iee-cred.h new file mode 100644 index 000000000000..44ae20ce05ad --- /dev/null +++ b/arch/x86/include/asm/iee-cred.h @@ -0,0 +1,148 @@ +#ifndef _LINUX_IEE_CRED_H +#define _LINUX_IEE_CRED_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +static void __maybe_unused iee_copy_cred(const struct cred *old, struct cred *new) +{ + iee_rw_gate(IEE_OP_COPY_CRED,old,new); +} + +static void __maybe_unused iee_set_cred_uid(struct cred *cred, kuid_t uid) +{ + iee_rw_gate(IEE_OP_SET_CRED_UID,cred,uid); +} + +static void __maybe_unused iee_set_cred_gid(struct cred *cred, kgid_t gid) +{ + iee_rw_gate(IEE_OP_SET_CRED_GID,cred,gid); +} + +static void __maybe_unused iee_set_cred_suid(struct cred *cred, kuid_t suid) +{ + iee_rw_gate(IEE_OP_SET_CRED_SUID,cred,suid); +} + +static void __maybe_unused iee_set_cred_sgid(struct cred *cred, kgid_t sgid) +{ + iee_rw_gate(IEE_OP_SET_CRED_SGID,cred,sgid); +} + +static void __maybe_unused iee_set_cred_euid(struct cred *cred, kuid_t euid) +{ + iee_rw_gate(IEE_OP_SET_CRED_EUID,cred,euid); +} + +static void __maybe_unused iee_set_cred_egid(struct cred *cred, kgid_t egid) +{ + iee_rw_gate(IEE_OP_SET_CRED_EGID,cred,egid); +} + +static void __maybe_unused iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) +{ + iee_rw_gate(IEE_OP_SET_CRED_FSUID,cred,fsuid); +} + +static void __maybe_unused iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) +{ + iee_rw_gate(IEE_OP_SET_CRED_FSGID,cred,fsgid); +} + +static void __maybe_unused iee_set_cred_user(struct cred *cred, struct user_struct *user) +{ + iee_rw_gate(IEE_OP_SET_CRED_USER,cred,user); +} + +static void __maybe_unused iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + iee_rw_gate(IEE_OP_SET_CRED_USER_NS,cred,user_ns); +} + +static void __maybe_unused iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) +{ + iee_rw_gate(IEE_OP_SET_CRED_UCOUNTS,cred,ucounts); +} + +static void __maybe_unused iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) +{ + iee_rw_gate(IEE_OP_SET_CRED_GROUP_INFO,cred,group_info); +} + +static void __maybe_unused iee_set_cred_securebits(struct cred *cred, unsigned securebits) +{ + iee_rw_gate(IEE_OP_SET_CRED_SECUREBITS,cred,securebits); +} + +static void __maybe_unused iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_INHER,cred,cap_inheritable); +} + +static void __maybe_unused iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_PERM,cred,cap_permitted); +} + +static void __maybe_unused iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_EFFECT,cred,cap_effective); +} + +static void __maybe_unused iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_BSET,cred,cap_bset); +} + +static void __maybe_unused iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_AMBIENT,cred,cap_ambient); +} + +#ifdef CONFIG_KEYS +static void __maybe_unused iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_JIT_KEYRING,cred,jit_keyring); +} + +static void __maybe_unused iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_SESS_KEYRING,cred,session_keyring); +} + +static void __maybe_unused iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_PROC_KEYRING,cred,process_keyring); +} + +static void __maybe_unused iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_THREAD_KEYRING,cred,thread_keyring); +} + +static void __maybe_unused iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth) +{ + iee_rw_gate(IEE_OP_SET_CRED_REQ_KEYRING,cred,request_key_auth); +} +#endif + +static void __maybe_unused iee_set_cred_atomic_set_usage(struct cred *cred, int i) +{ + iee_rw_gate(IEE_OP_SET_CRED_ATSET_USAGE,cred,i); +} + +#ifdef CONFIG_SECURITY +static void __maybe_unused iee_set_cred_security(struct cred *cred, void *security) +{ + iee_rw_gate(IEE_OP_SET_CRED_SECURITY,cred,security); +} +#endif + +static void __maybe_unused iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) +{ + iee_rw_gate(IEE_OP_SET_CRED_RCU,cred,rcu); +} + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-def.h b/arch/x86/include/asm/iee-def.h new file mode 100644 index 000000000000..d9a712c2420e --- /dev/null +++ b/arch/x86/include/asm/iee-def.h @@ -0,0 +1,124 @@ +#ifndef _LINUX_IEE_DEF_H +#define _LINUX_IEE_DEF_H + +#ifdef CONFIG_CREDP +#define AT_ADD 1 +#define AT_INC_NOT_ZERO 2 +#define AT_SUB_AND_TEST 3 +#endif + +#ifdef CONFIG_KEYP +#define REFCOUNT_INC 1 +#define REFCOUNT_SET 2 +#define REFCOUNT_DEC_AND_TEST 3 +#define REFCOUNT_INC_NOT_ZERO 4 + +#define SET_BIT_OP 1 +#define TEST_AND_CLEAR_BIT 2 +#define TEST_AND_SET_BIT 3 +#endif + +enum { + #ifdef CONFIG_PTP + IEE_OP_SET_PTE, + IEE_OP_SET_PMD, + IEE_OP_SET_PUD, + IEE_OP_SET_P4D, + IEE_OP_SET_PGD, + #endif + IEE_OP_MEMCPY, + IEE_OP_MEMSET, + IEE_OP_SET_FREEPTR, + IEE_OP_SPLIT_HUGE_PMD, + IEE_OP_SET_TOKEN_PGD, + IEE_OP_INIT_TOKEN, + IEE_OP_INVALIDATE_TOKEN, + IEE_OP_VALIDATE_TOKEN, + IEE_OP_UNSET_TOKEN, + IEE_OP_SET_TOKEN, + IEE_OP_SET_TRACK, + IEE_OP_TEST_CLEAR_BIT, + IEE_SET_SENSITIVE_PTE, + IEE_UNSET_SENSITIVE_PTE, +#ifdef CONFIG_CREDP + IEE_OP_COPY_CRED, // Parameters: struct cred *old, struct cred *new + IEE_OP_SET_CRED_UID, // Parameters: struct cred *cred, kuid_t uid + IEE_OP_SET_CRED_GID, // Parameters: struct cred *cred, kgid_t gid + IEE_OP_SET_CRED_SUID, // Parameters: struct cred *cred, kuid_t suid + IEE_OP_SET_CRED_SGID, // Parameters: struct cred *cred, kgid_t sgid + IEE_OP_SET_CRED_EUID, // Parameters: struct cred *cred, kuid_t euid + IEE_OP_SET_CRED_EGID, // Parameters: struct cred *cred, kgid_t egid + IEE_OP_SET_CRED_FSUID, // Parameters: struct cred *cred, kuid_t fsuid + IEE_OP_SET_CRED_FSGID, // Parameters: struct cred *cred, kgid_t fsgid + IEE_OP_SET_CRED_USER, // Parameters: struct cred *cred, struct user_struct *user + IEE_OP_SET_CRED_USER_NS, // Parameters: struct cred *cred, struct user_namespace *user_ns + IEE_OP_SET_CRED_GROUP_INFO, // Parameters: struct cred *cred, struct group_info *group_info + IEE_OP_SET_CRED_SECUREBITS, // Parameters: struct cred *cred, unsigned securebits + IEE_OP_SET_CRED_CAP_INHER, // Parameters: struct cred *cred, kernel_cap_t cap_inheritable + IEE_OP_SET_CRED_CAP_PERM, // Parameters: struct cred *cred, kernel_cap_t cap_permitted + IEE_OP_SET_CRED_CAP_EFFECT, // Parameters: struct cred *cred, kernel_cap_t cap_effective + IEE_OP_SET_CRED_CAP_BSET, // Parameters: struct cred *cred, kernel_cap_t cap_bset + IEE_OP_SET_CRED_CAP_AMBIENT, // Parameters: struct cred *cred, kernel_cap_t cap_ambient + IEE_OP_SET_CRED_JIT_KEYRING, // Parameters: struct cred *cred, unsigned char jit_keyring + IEE_OP_SET_CRED_SESS_KEYRING, // Parameters: struct cred *cred, struct key *session_keyring + IEE_OP_SET_CRED_PROC_KEYRING, // Parameters: struct cred *cred, struct key *process_keyring + IEE_OP_SET_CRED_THREAD_KEYRING, // Parameters: struct cred *cred, struct key *thread_keyring + IEE_OP_SET_CRED_REQ_KEYRING, // Parameters: struct cred *cred, struct key *request_key_auth + IEE_OP_SET_CRED_NON_RCU, // Parameters: struct cred *cred, int non_rcu + IEE_OP_SET_CRED_ATSET_USAGE, // Parameters: struct cred *cred, int i + IEE_OP_SET_CRED_ATOP_USAGE, // Parameters: struct cred *cred, int flag + IEE_OP_SET_CRED_SECURITY, // Parameters: struct cred *cred, void *security + IEE_OP_SET_CRED_RCU, // Parameters: struct cred *cred, struct rcu_head *rcu + IEE_OP_SET_CRED_UCOUNTS, // Parameters: struct cred *cred, struct ucounts *ucounts +#endif +#ifdef CONFIG_KEYP + IEE_OP_SET_KEY_UNION, + IEE_OP_SET_KEY_STRUCT, + IEE_OP_SET_KEY_PAYLOAD, + IEE_OP_SET_KEY_USAGE, + IEE_OP_SET_KEY_SERIAL, + IEE_OP_SET_KEY_WATCHERS, + IEE_OP_SET_KEY_USERS, + IEE_OP_SET_KEY_SECURITY, + IEE_OP_SET_KEY_EXPIRY, + IEE_OP_SET_KEY_REVOKED_AT, + IEE_OP_SET_KEY_LAST_USED_AT, + IEE_OP_SET_KEY_UID, + IEE_OP_SET_KEY_GID, + IEE_OP_SET_KEY_PERM, + IEE_OP_SET_KEY_QUOTALEN, + IEE_OP_SET_KEY_DATALEN, + IEE_OP_SET_KEY_STATE, + IEE_OP_SET_KEY_MAGIC, + IEE_OP_SET_KEY_FLAGS, + IEE_OP_SET_KEY_INDEX_KEY, + IEE_OP_SET_KEY_HASH, + IEE_OP_SET_KEY_LEN_DESC, + IEE_OP_SET_KEY_TYPE, + IEE_OP_SET_KEY_TAG, + IEE_OP_SET_KEY_DESCRIPTION, + IEE_OP_SET_KEY_RESTRICT_LINK, + IEE_OP_SET_KEY_FLAG_BIT, +#endif +#ifdef CONFIG_IEE_SELINUX_P + IEE_SEL_SET_STATUS_PG, // Parameters: struct page* new_page + IEE_SEL_SET_ENFORCING, // Parameters: bool value + IEE_SEL_SET_INITIALIZED, + IEE_SEL_SET_POLICY_CAP, // Parameters: unsigned int idx, int cap + IEE_SEL_RCU_ASSIGN_POLICY, // Parameters: struct selinux_policy* new_policy, struct selinux_policy* iee_new_policy +#endif +#ifdef CONFIG_KOI + IEE_READ_KOI_STACK, // Parameters: struct task_struct *tsk + IEE_WRITE_KOI_STACK, // Parameters: struct task_struct *tsk, unsigned long koi_stack + IEE_READ_TOKEN_TTBR1, // Parameters: struct task_struct *tsk + IEE_WRITE_TOKEN_TTBR1, // Parameters: struct task_struct *tsk, unsigned long current_ttbr1 + IEE_READ_KOI_KERNEL_STACK, // Parameters: struct task_struct *tsk + IEE_WRITE_KOI_KERNEL_STACK, // Parameters: struct task_struct *tsk, unsigned long kernel_stack + IEE_READ_KOI_STACK_BASE, // Parameters: struct task_struct *tsk + IEE_WRITE_KOI_STACK_BASE, // Parameters: struct task_struct *tsk, unsigned long koi_stack_base + IEE_SET_KOI_PGD, // Parameters: unsigned long koi_pgd_addr +#endif + IEE_FLAG_END +}; + +#endif /* _LINUX_IEE_DEF_H */ \ No newline at end of file diff --git a/arch/x86/include/asm/iee-key.h b/arch/x86/include/asm/iee-key.h new file mode 100644 index 000000000000..fcac44558cce --- /dev/null +++ b/arch/x86/include/asm/iee-key.h @@ -0,0 +1,147 @@ +#ifndef _LINUX_IEE_KEY_H +#define _LINUX_IEE_KEY_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +static void __maybe_unused iee_set_key_union(struct key *key, struct key_union *key_union) +{ + iee_rw_gate(IEE_OP_SET_KEY_UNION, key, key_union); +} + +static void __maybe_unused iee_set_key_struct(struct key *key, struct key_struct *key_struct) +{ + iee_rw_gate(IEE_OP_SET_KEY_STRUCT, key, key_struct); +} + +static void __maybe_unused iee_set_key_payload(struct key *key, union key_payload *key_payload) +{ + iee_rw_gate(IEE_OP_SET_KEY_PAYLOAD, key, key_payload); +} + +extern bool iee_set_key_usage(struct key *key, int n, int flag); + +static void __maybe_unused iee_set_key_serial(struct key *key, key_serial_t serial) +{ + iee_rw_gate(IEE_OP_SET_KEY_SERIAL, key, serial); +} + +#ifdef CONFIG_KEY_NOTIFICATIONS +static void __maybe_unused iee_set_key_watchers(struct key *key, struct watch_list *watchers) +{ + iee_rw_gate(IEE_OP_SET_KEY_WATCHERS, key, watchers); +} +#endif + +static void __maybe_unused iee_set_key_user(struct key *key, struct key_user *user) +{ + iee_rw_gate(IEE_OP_SET_KEY_USERS, key, user); +} + +static void __maybe_unused iee_set_key_security(struct key *key, void *security) +{ + iee_rw_gate(IEE_OP_SET_KEY_SECURITY, key, security); +} + +static void __maybe_unused iee_set_key_expiry(struct key *key, time64_t expiry) +{ + iee_rw_gate(IEE_OP_SET_KEY_EXPIRY, key, expiry); +} + +static void __maybe_unused iee_set_key_revoked_at(struct key *key, time64_t revoked_at) +{ + iee_rw_gate(IEE_OP_SET_KEY_REVOKED_AT, key, revoked_at); +} + +static void __maybe_unused iee_set_key_last_used_at(struct key *key, time64_t last_used_at) +{ + iee_rw_gate(IEE_OP_SET_KEY_LAST_USED_AT, key, last_used_at); +} + +static void __maybe_unused iee_set_key_uid(struct key *key, kuid_t uid) +{ + iee_rw_gate(IEE_OP_SET_KEY_UID, key, uid); +} + +static void __maybe_unused iee_set_key_gid(struct key *key, kgid_t gid) +{ + iee_rw_gate(IEE_OP_SET_KEY_GID, key, gid); +} + +static void __maybe_unused iee_set_key_perm(struct key *key, key_perm_t perm) +{ + iee_rw_gate(IEE_OP_SET_KEY_PERM, key, perm); +} + +static void __maybe_unused iee_set_key_quotalen(struct key *key, unsigned short quotalen) +{ + iee_rw_gate(IEE_OP_SET_KEY_QUOTALEN, key, quotalen); +} + +static void __maybe_unused iee_set_key_datalen(struct key *key, unsigned short datalen) +{ + iee_rw_gate(IEE_OP_SET_KEY_DATALEN, key, datalen); +} + +static void __maybe_unused iee_set_key_state(struct key *key, short state) +{ + iee_rw_gate(IEE_OP_SET_KEY_STATE, key, state); +} + +#ifdef KEY_DEBUGGING +static void __maybe_unused iee_set_key_magic(struct key *key, unsigned magic) +{ + iee_rw_gate(IEE_OP_SET_KEY_MAGIC, key, magic); +} +#endif + +static void __maybe_unused iee_set_key_flags(struct key *key, unsigned long flags) +{ + iee_rw_gate(IEE_OP_SET_KEY_FLAGS, key, flags); +} + +static void __maybe_unused iee_set_key_index_key(struct key *key, struct keyring_index_key* index_key) +{ + iee_rw_gate(IEE_OP_SET_KEY_INDEX_KEY, key, index_key); +} + +static void __maybe_unused iee_set_key_hash(struct key *key, unsigned long hash) +{ + iee_rw_gate(IEE_OP_SET_KEY_HASH, key, hash); +} + +static void __maybe_unused iee_set_key_len_desc(struct key *key, unsigned long len_desc) +{ + iee_rw_gate(IEE_OP_SET_KEY_LEN_DESC, key, len_desc); +} + +static void __maybe_unused iee_set_key_type(struct key *key, struct key_type *type) +{ + iee_rw_gate(IEE_OP_SET_KEY_TYPE, key, type); +} + +static void __maybe_unused iee_set_key_domain_tag(struct key *key, struct key_tag *domain_tag) +{ + iee_rw_gate(IEE_OP_SET_KEY_TAG, key, domain_tag); +} + +static void __maybe_unused iee_set_key_description(struct key *key, char *description) +{ + iee_rw_gate(IEE_OP_SET_KEY_DESCRIPTION, key, description); +} + +static void __maybe_unused iee_set_key_restrict_link(struct key *key, struct key_restriction *restrict_link) +{ + iee_rw_gate(IEE_OP_SET_KEY_RESTRICT_LINK, key, restrict_link); +} + +static bool __maybe_unused iee_set_key_flag_bit(struct key *key, long nr, int flag) +{ + bool ret; + ret = iee_rw_gate(IEE_OP_SET_KEY_FLAG_BIT, key, nr, flag); + return ret; +} + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-koi.h b/arch/x86/include/asm/iee-koi.h new file mode 100644 index 000000000000..f55526f8aa60 --- /dev/null +++ b/arch/x86/include/asm/iee-koi.h @@ -0,0 +1,5 @@ +#if defined(CONFIG_KOI) && defined(CONFIG_IEE) +#define IEE_SWITCH_TO_KERNEL 5 +#define IEE_SWITCH_TO_KOI 6 + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-selinuxp.h b/arch/x86/include/asm/iee-selinuxp.h new file mode 100644 index 000000000000..29bcadb9979f --- /dev/null +++ b/arch/x86/include/asm/iee-selinuxp.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_IEE_SELINUX_P_H +#define _LINUX_IEE_SELINUX_P_H + +#include +#include "security.h" +#include "ss/services.h" + +static inline struct mutex* iee_get_selinux_policy_lock(void) +{ + return (struct mutex*)(selinux_state.policy_mutex.owner.counter); +} + +static inline struct mutex* iee_get_selinux_status_lock(void) +{ + return (struct mutex*)(selinux_state.status_lock.owner.counter); +} + +/* APIs for modifying selinux_state */ +extern void iee_set_selinux_status_pg(struct page* new_page); +extern void iee_set_sel_policy_cap(unsigned int idx, int cap); +extern void iee_sel_rcu_assign_policy(struct selinux_policy* new_policy, + struct selinux_policy* iee_new_policy); + +extern struct kmem_cache *policy_jar; + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-si.h b/arch/x86/include/asm/iee-si.h new file mode 100644 index 000000000000..19601dd5a6e4 --- /dev/null +++ b/arch/x86/include/asm/iee-si.h @@ -0,0 +1,25 @@ +#ifndef _LINUX_IEE_SI_H +#define _LINUX_IEE_SI_H +#define __iee_si_code __section(".iee.si_text") +#define __iee_si_base __section(".iee.si_base") +#define __iee_si_data __section(".iee.si_data") + +extern bool iee_pgt_jar_init; +extern bool iee_init_done; +extern unsigned long iee_base_swapper_pg_dir; +extern unsigned long __iee_si_text_start[]; +extern unsigned long __iee_si_text_end[]; +extern unsigned long __iee_si_data_start[]; +extern unsigned long __iee_si_data_end[]; +extern void iee_rwx_gate(int flag, ...); +// Handler function for sensitive inst +u64 iee_si_handler(int flag, ...); + +#define IEE_SI_TEST 0 +#define IEE_WRITE_CR0 1 +#define IEE_WRITE_CR3 2 +#define IEE_WRITE_CR4 3 +#define IEE_LOAD_IDT 4 +#define IEE_SWITCH_TO_KERNEL 5 +#define IEE_SWITCH_TO_KOI 6 +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-slab.h b/arch/x86/include/asm/iee-slab.h new file mode 100644 index 000000000000..6c79bea4406a --- /dev/null +++ b/arch/x86/include/asm/iee-slab.h @@ -0,0 +1,21 @@ +#ifndef _LINUX_IEE_SLAB_H +#define _LINUX_IEE_SLAB_H + +/* + * Tracking user of a slab. + */ +#include +#define TRACK_ADDRS_COUNT 16 +struct track { + unsigned long addr; /* Called from address */ +#ifdef CONFIG_STACKDEPOT + depot_stack_handle_t handle; +#endif + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ +}; +enum track_item { TRACK_ALLOC, TRACK_FREE }; +typedef struct { unsigned long v; } freeptr_t; + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee-token.h b/arch/x86/include/asm/iee-token.h new file mode 100644 index 000000000000..cd38b16beb25 --- /dev/null +++ b/arch/x86/include/asm/iee-token.h @@ -0,0 +1,32 @@ +#ifndef _LINUX_IEE_TOKEN_H +#define _LINUX_IEE_TOKEN_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); +struct task_token; +struct task_struct; +struct mm_struct; + +static inline void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) +{ + iee_rw_gate(IEE_OP_SET_TOKEN_PGD, tsk, pgd); +} + +static inline void iee_init_token(struct task_struct *tsk, void *iee_stack, void *tmp_page) +{ + iee_rw_gate(IEE_OP_INIT_TOKEN, tsk, iee_stack, tmp_page); +} + +static inline void iee_invalidate_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_OP_INVALIDATE_TOKEN, tsk); +} + +static inline void iee_validate_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_OP_VALIDATE_TOKEN, tsk); +} + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/iee.h b/arch/x86/include/asm/iee.h new file mode 100644 index 000000000000..9b116751df6e --- /dev/null +++ b/arch/x86/include/asm/iee.h @@ -0,0 +1,5 @@ +#ifndef _LINUX_IEE_H +#define _LINUX_IEE_H +#define __iee_code __section(".iee.text") +#define __iee_header __section(".iee.text.header") +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/koi.h b/arch/x86/include/asm/koi.h new file mode 100644 index 000000000000..f0f3ae1623f8 --- /dev/null +++ b/arch/x86/include/asm/koi.h @@ -0,0 +1,432 @@ +#include "asm/pgtable_types.h" +#include "linux/percpu-defs.h" +#include "linux/spinlock_types.h" +#include "linux/hashtable.h" +#include "asm/tlbflush.h" + +#define MAX_VAR_NAME 64 +#define DRIVER_ISOLATION_VAR_ARRAY_SIZE 32 +#define DRIVER_ISOLATION_MAX_VAL 256 + +#define HASH_TABLE_BIT 10 + +#define PTI_USER_PCID_MASK (1 << X86_CR3_PTI_PCID_USER_BIT) + +DECLARE_PER_CPU_PAGE_ALIGNED(unsigned long, koi_kern_cr3); + +extern struct hlist_head koi_mem_htbl[1024]; + +extern unsigned long koi_offset; + +extern unsigned long koi_cr3_ctor(struct module *mod); + +extern int koi_share_kstack(struct module *mod); + +void koi_map_kostack(struct module *mod); + +#ifdef CONFIG_IEE +#include "asm/page.h" +extern unsigned long IEE_OFFSET; +#endif +/** +* struct koi_mem_hash_node - +*@mod:pointer to driver module +*@mem_list_head:free memory list head +*@ko_mm: mm_struct in each driver +*@pgdp:entry to Page Global Directory :pgd +*@node:hash linked list node +*@addr_htbl[1 << (HASH_TABLE_BIT)]: +*@rcu: +*/ +struct koi_mem_hash_node { + struct module *mod; + struct mm_struct *ko_mm; + pgd_t *pgdp; + unsigned long ko_cr3; + struct hlist_node node; + bool is_valid; + spinlock_t mod_lock; +}; + +struct shared_variable_descriptor { + unsigned int id; + unsigned int type; + char name[MAX_VAR_NAME]; + unsigned long offset; + unsigned int size; + unsigned int self_ptr_ids[DRIVER_ISOLATION_VAR_ARRAY_SIZE]; +}; + +int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, + unsigned long addr, unsigned long end, pteval_t prot); + +void koi_create_pagetable(struct module *mod); +void koi_destroy_pagetable(struct module *mod); +void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size); +void koi_unmap_mem(struct module *mod, unsigned long addr, unsigned long size); + +#ifndef CONFIG_IEE +#define __koi_switch_to_ko(mod) \ + do { \ + unsigned long flags, new_cr3; \ + struct task_token *token = (struct task_token *)(__phys_to_koi(__pa(current))); \ + asm volatile( \ + "pushf\n\t" \ + "pop %0\n\t" \ + "cli\n\t" \ + : "=r"(flags) \ + : \ + : "memory" \ + ); \ + new_cr3 = __read_cr3(); \ + this_cpu_write(koi_kern_cr3, new_cr3); \ + new_cr3 = koi_cr3_ctor(mod); \ + token->current_ttbr1 = new_cr3 & (~X86_CR3_PCID_MASK); \ + native_write_cr3(new_cr3); \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0); + +#define koi_switch_to_ko() \ + do { \ + unsigned long flags, new_cr3; \ + struct task_token *token = (struct task_token *)(__phys_to_koi(__pa(current))); \ + asm volatile( \ + "pushf\n\t" \ + "pop %0\n\t" \ + "cli\n\t" \ + : "=r"(flags) \ + : \ + : "memory" \ + ); \ + new_cr3 = __read_cr3(); \ + this_cpu_write(koi_kern_cr3, new_cr3); \ + new_cr3 = koi_cr3_ctor(THIS_MODULE); \ + token->current_ttbr1 = new_cr3 & (~X86_CR3_PCID_MASK); \ + native_write_cr3(new_cr3); \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0); + +#define koi_switch_to_kernel() \ + do { \ + unsigned long flags, new_cr3; \ + asm volatile( \ + "pushf\n\t" \ + "pop %0\n\t" \ + "cli\n\t" \ + : "=r"(flags) \ + : \ + : "memory" \ + ); \ + new_cr3 = this_cpu_read(koi_kern_cr3); \ + /*pcid = __read_cr3(); \ + pcid &= X86_CR3_PCID_MASK; \ + pcid &= ~PTI_USER_PCID_MASK;*/ \ + native_write_cr3(new_cr3); \ + struct task_token *token = (struct task_token *)(__phys_to_koi(__pa(current))); \ + token->current_ttbr1 = new_cr3; \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0); +#else +#define __koi_switch_to_ko(mod) \ + do { \ + unsigned long flags, new_cr3; \ + asm volatile( \ + "pushf\n\t" \ + "pop %0\n\t" \ + "cli\n\t" \ + : "=r"(flags) \ + : \ + : "memory" \ + ); \ + new_cr3 = __read_cr3(); \ + this_cpu_write(koi_kern_cr3, new_cr3); \ + new_cr3 = koi_cr3_ctor(mod); \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, new_cr3 & (~X86_CR3_PCID_MASK)); \ + /* iee_rwx_gate(IEE_SWITCH_TO_KOI, new_cr3); */ \ + asm volatile("mov %0,%%cr3": : "r" (new_cr3) : "memory"); \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0); + +#define koi_switch_to_ko() \ + do { \ + unsigned long flags, new_cr3; \ + asm volatile( \ + "pushf\n\t" \ + "pop %0\n\t" \ + "cli\n\t" \ + : "=r"(flags) \ + : \ + : "memory" \ + ); \ + new_cr3 = __read_cr3(); \ + this_cpu_write(koi_kern_cr3, new_cr3); \ + new_cr3 = koi_cr3_ctor(THIS_MODULE); \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, new_cr3 & (~X86_CR3_PCID_MASK)); \ + /* iee_rwx_gate(IEE_SWITCH_TO_KOI, new_cr3); */ \ + asm volatile("mov %0,%%cr3": : "r" (new_cr3) : "memory"); \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0); + +#define koi_switch_to_kernel() \ + do { \ + unsigned long flags, new_cr3; \ + asm volatile( \ + "pushf\n\t" \ + "pop %0\n\t" \ + "cli\n\t" \ + : "=r"(flags) \ + : \ + : "memory" \ + ); \ + new_cr3 = this_cpu_read(koi_kern_cr3); \ + /*pcid = __read_cr3(); \ + pcid &= X86_CR3_PCID_MASK; \ + pcid &= ~PTI_USER_PCID_MASK;*/ \ + asm volatile("mov %0,%%cr3": : "r" (new_cr3) : "memory"); \ + /* iee_rwx_gate(IEE_SWITCH_TO_KERNEL, new_cr3);*/ \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, new_cr3); \ + if (!arch_irqs_disabled_flags(flags)) \ + arch_local_irq_enable(); \ + } while (0); +#endif + + + + +#ifdef CONFIG_KOI + +extern void *koi_kcalloc_wrapper(struct module *mod, size_t n, size_t size, gfp_t flags); +extern void *koi_kmalloc_array_wrapper(struct module *mod, size_t n, size_t size, gfp_t flags); +extern void *koi_vmalloc_wrapper(struct module *mod, unsigned long size); +extern void *koi_kmalloc_wrapper(struct module *mod, size_t size, gfp_t flags); +extern void *koi_kzalloc_node_wrapper(struct module *mod, size_t size, gfp_t flags, int node); +extern void *koi_kzalloc_wrapper(struct module *mod, size_t size, gfp_t flags); + + +#define koi_copy_to_user_wrapper(to, from, n) \ +({ \ + koi_switch_to_kernel(); \ + long long ret = copy_to_user(to, from, n); \ + koi_switch_to_ko(); \ + ret; \ +}) + +#define koi_copy_from_user_wrapper(to, from, n) \ +({ \ + koi_switch_to_kernel(); \ + long long ret = copy_from_user(to, from, n); \ + koi_switch_to_ko(); \ + ret; \ +}) + +#define koi_kasprintf_wrapper(gfp, fmt, args...)\ + ({ \ + koi_switch_to_kernel(); \ + void *ret = kasprintf(gfp, fmt, ##args); \ + koi_map_mem(THIS_MODULE, (unsigned long)ret, sizeof(void *)); \ + koi_switch_to_ko(); \ + ret;\ + }) + +#define koi_sprintf_wrapper(buf, fmt, args...) \ +({ \ + koi_switch_to_kernel(); \ + int ret = sprintf(buf, fmt, ##args); \ + koi_switch_to_ko(); \ + ret; \ +}) + +#define koi_scnprintf_wrapper(buf, size, fmt, args...) \ + ({ \ + int ret; \ + koi_switch_to_kernel(); \ + ret = scnprintf(buf, size, fmt, ##args); \ + koi_switch_to_ko(); \ + ret; \ + }) + +#define koi_sscanf_wrapper(buf, fmt, args...) \ + ({ \ + int ret; \ + koi_switch_to_kernel(); \ + ret = sscanf(buf, fmt, ##args); \ + koi_switch_to_ko(); \ + ret; \ + }) + +#define koi_printk_wrapper(arg_0, args...) \ + ({ \ + int ret; \ + koi_switch_to_kernel(); \ + ret = printk(arg_0, ##args); \ + koi_switch_to_ko(); \ + ret; \ + }) + +#define PTR_ERR_wrapper(arg) \ +({ \ + long ret; \ + koi_switch_to_kernel(); \ + ret = PTR_ERR(arg); \ + koi_switch_to_ko(); \ + ret; \ +}) +#define koi_rcu_read_lock_wrapper() \ + do { \ + koi_switch_to_kernel(); \ + rcu_read_lock(); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_rcu_read_unlock_wrapper() \ + do { \ + koi_switch_to_kernel(); \ + rcu_read_unlock(); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_mutex_lock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + mutex_lock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_mutex_unlock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + mutex_unlock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_mutex_init_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + mutex_init(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_lock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_unlock_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_unlock(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_lock_irq_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock_irq(lock); \ + koi_switch_to_ko(); \ + } while(0); + + +#define koi_spin_unlock_irq_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_unlock_irq(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_lock_irqsave_wrapper(lock, flags) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock_irqsave(lock, flags); \ + koi_switch_to_ko(); \ + } while(0); + + +#define koi_spin_unlock_irqrestore_wrapper(lock, flags) \ + do { \ + koi_switch_to_kernel(); \ + spin_unlock_irqrestore(lock, flags); \ + koi_switch_to_ko(); \ + } while(0); + + +#define koi_spin_lock_bh_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_lock_bh(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_spin_unlock_bh_wrapper(lock) \ + do { \ + koi_switch_to_kernel(); \ + spin_unlock_bh(lock); \ + koi_switch_to_ko(); \ + } while(0); + +#define koi_dev_err_wrapper(dev, fmt, args...) \ + ({ \ + koi_switch_to_kernel(); \ + dev_err(dev, fmt, ##args); \ + koi_switch_to_ko(); \ + }) + +#else + +#define koi_copy_to_user_wrapper copy_to_user + +#define koi_copy_from_user_wrapper copy_from_user + +#define koi_kasprintf_wrapper kasprintf + +#define koi_scnprintf_wrapper scnprintf + +#define koi_sscanf_wrapper sscanf + +#define koi_sprintf_wrapper sprintf + +#define koi_rcu_read_lock_wrapper rcu_read_lock + +#define koi_rcu_read_unlock_wrapper rcu_read_unlock + +#define koi_mutex_lock_wrapper mutex_lock + +#define koi_mutex_unlock_wrapper mutex_unlock + +#define koi_mutex_init_wrapper mutex_init + +#define koi_spin_lock_irq_wrapper spin_lock_irq + +#define koi_spin_unlock_irq_wrapper spin_unlock_irq + +#define koi_spin_lock_wrapper spin_lock + +#define koi_spin_unlock_wrapper spin_unlock + +#define koi_spin_lock_irqsave_wrapper spin_lock_irqsave + +#define koi_spin_unlock_irqrestore_wrapper spin_lock_irqrestore + +#define koi_spin_lock_bh_wrapper spin_lock_bh + +#define koi_spin_unlock_bh_wrapper spin_unlock_bh + +#define koi_kzalloc_wrapper(mod, size, flags) kzalloc(size, flags) + +#define koi_kzalloc_node_wrapper(mod, size, flags, node) kzalloc_node(size, flags, node) + +#define koi_kmalloc_wrapper(mod, size, flags) kmalloc(size, flags) + +#define koi_vmalloc_wrapper(mod, size) vmalloc(size) + +#define koi_kmalloc_array_wrapper(mod, n, size, flags) kmalloc_array(n, size, flags) + +#define koi_kcalloc_wrapper(mod, n, size, flags) kcalloc(n, size, flags) + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 1b93ff80b43b..4b27230dff49 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -61,6 +61,26 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, #define __boot_va(x) __va(x) #define __boot_pa(x) __pa(x) +#ifdef CONFIG_IEE +extern unsigned long IEE_OFFSET; +#ifndef __iee_pa +#define __iee_pa(x) (__pa(x - IEE_OFFSET)) +#endif +#ifndef __phys_to_iee +#define __phys_to_iee(x) ((void *)(__va(x) + IEE_OFFSET)) +#endif +#else +#ifdef CONFIG_KOI +extern unsigned long KOI_OFFSET; +#ifndef __koi_pa +#define __koi_pa(x) (__pa(x - KOI_OFFSET)) +#endif +#ifndef __phys_to_koi +#define __phys_to_koi(x) ((void *)(__va(x) + KOI_OFFSET)) +#endif +#endif +#endif /* CONFIG_IEE*/ + /* * virt_to_page(kaddr) returns a valid pointer if and only if * virt_addr_valid(kaddr) returns true. diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 9dab85aba7af..e14b0b574e0f 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -19,6 +19,10 @@ extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; extern unsigned long physmem_end; +#ifdef CONFIG_PTP +extern unsigned long iee_ptdesc_base; +#endif + static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 86bd4311daf8..c8656ccb03c1 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -29,6 +29,13 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#ifdef CONFIG_IEE +#define SET_UPAGE(x) __pg(pgprot_val(x) | _PAGE_USER) +#endif /* CONFIG_IEE*/ +#if defined (CONFIG_IEE) || defined (CONFIG_KOI) +#define SET_NG(x) __pg(pgprot_val(x) & (~_PAGE_GLOBAL)) +#endif + #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c7ec5bb88334..cd9505d0c769 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -12,6 +12,10 @@ static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } +#ifdef CONFIG_KOI +pgd_t *koi_pgd_alloc(void); +#endif + #ifdef CONFIG_PARAVIRT_XXL #include #else @@ -147,13 +151,78 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4 set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } +#ifdef CONFIG_PTP +#include + +static inline void iee_pmd_populate_kernel_pre_init(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + iee_set_pmd_pre_init(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} +static inline void iee_pmd_populate_kernel_safe_pre_init(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + iee_set_pmd_safe_pre_init(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); +} + +static inline void iee_pud_populate_pre_init(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + iee_set_pud_pre_init(pud, __pud(_PAGE_TABLE | __pa(pmd))); +} + +static inline void iee_pud_populate_safe_pre_init(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + iee_set_pud_safe_pre_init(pud, __pud(_PAGE_TABLE | __pa(pmd))); +} + +static inline void iee_p4d_populate_pre_init(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + iee_set_p4d_pre_init(p4d, __p4d(_PAGE_TABLE | __pa(pud))); +} + +static inline void iee_p4d_populate_safe_pre_init(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + iee_set_p4d_safe_pre_init(p4d, __p4d(_PAGE_TABLE | __pa(pud))); +} + +static inline void iee_pgd_populate_pre_init(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); + iee_set_pgd_pre_init(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); +} + +static inline void iee_pgd_populate_safe_pre_init(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) +{ + if (!pgtable_l5_enabled()) + return; + paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); + iee_set_pgd_safe_pre_init(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); +} + +#endif + static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_KERNEL_ACCOUNT; if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; + #ifdef CONFIG_PTP + if (iee_pgt_jar_init) + return (p4d_t *)get_iee_pgtable_page(gfp); + else + return (p4d_t *)get_zeroed_page(gfp); + #else return (p4d_t *)get_zeroed_page(gfp); + #endif } static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) @@ -162,7 +231,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) return; BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + #ifdef CONFIG_PTP + free_iee_pgtable_page(p4d); + #else free_page((unsigned long)p4d); + #endif } extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 993d49cd379a..82fac64eccd5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -94,6 +94,14 @@ extern pmdval_t early_pmd_flags; #define pud_clear(pud) native_pud_clear(pud) #endif +#ifdef CONFIG_PTP +#define iee_set_pte_pre_init(ptep, pte) iee_early_set_pte(ptep, pte) +#define iee_set_pmd_pre_init(pmdp, pmd) iee_early_set_pmd(pmdp, pmd) +#define iee_set_pgd_pre_init(pgdp, pgd) iee_early_set_pgd(pgdp, pgd) +#define iee_set_p4d_pre_init(p4dp, p4d) iee_early_set_p4d(p4dp, p4d) +#define iee_set_pud_pre_init(pudp, pud) iee_early_set_pud(pudp, pud) +#endif + #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pmd_clear(pmd) native_pmd_clear(pmd) @@ -251,6 +259,14 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +#ifdef CONFIG_IEE +#define __pte_to_phys(pte) (pte_pfn(pte) << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__pte_to_phys(__pte(pmd_val(pmd)))) +#define __pud_to_phys(pud) (__pte_to_phys(__pte(pud_val(pud)))) +#define __p4d_to_phys(p4d) (__pte_to_phys(__pte(p4d_val(p4d)))) +#define __pgd_to_phys(pgd) (__pte_to_phys(__pte(pgd_val(pgd)))) +#endif + #define p4d_leaf p4d_large static inline int p4d_large(p4d_t p4d) { @@ -928,6 +944,13 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) #include #include +#ifdef CONFIG_KOI +static inline int pte_valid(pte_t pte) +{ + return pte.pte & _PAGE_PRESENT; +} +#endif + static inline int pte_none(pte_t pte) { return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK)); @@ -1189,6 +1212,20 @@ static inline int pgd_none(pgd_t pgd) extern int direct_gbpages; void init_mem_mapping(void); +#ifdef CONFIG_IEE +void init_iee_mapping(void); +unsigned long init_memory_mapping_for_iee(unsigned long start, + unsigned long end, pgprot_t prot); +#else +#ifdef CONFIG_KOI +void init_koi_mapping(void); +unsigned long init_memory_mapping_for_koi(unsigned long start, + unsigned long end, pgprot_t prot); +#endif +#endif /* CONFIG_IEE*/ +#ifdef CONFIG_PTP +void init_iee(void); +#endif void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); void __init poking_init(void); @@ -1289,6 +1326,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, return pte; } +#ifdef CONFIG_PTP +extern pgprotval_t iee_set_try_cmpxchg(pgprotval_t *pgprotp, pgprotval_t old_pgprotval, pgprotval_t new_pgprotval); +extern pgprotval_t iee_set_xchg(pgprotval_t *pgprotp, pgprotval_t pgprotval); +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -1303,7 +1345,12 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, old_pte = READ_ONCE(*ptep); do { new_pte = pte_wrprotect(old_pte); - } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); + } + #ifdef CONFIG_PTP + while (!iee_set_try_cmpxchg(__phys_to_iee(__pa(ptep)), pte_val(old_pte), pte_val(new_pte))); + #else + while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); + #endif } #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) @@ -1365,7 +1412,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, old_pmd = READ_ONCE(*pmdp); do { new_pmd = pmd_wrprotect(old_pmd); - } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); + } + #ifdef CONFIG_PTP + while (!iee_set_try_cmpxchg(__phys_to_iee(__pa(pmdp)), pmd_val(old_pmd), pmd_val(new_pmd))); + #else + while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); + #endif } #ifndef pmdp_establish @@ -1375,10 +1427,19 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { + #ifdef CONFIG_PTP + pmdval_t pmdval = iee_set_xchg(__phys_to_iee(__pa(pmdp)), pmd_val(pmd)); + return native_make_pmd(pmdval); + #else return xchg(pmdp, pmd); + #endif } else { pmd_t old = *pmdp; + #ifdef CONFIG_PTP + set_pmd(pmdp, pmd); + #else WRITE_ONCE(*pmdp, pmd); + #endif return old; } } @@ -1466,6 +1527,17 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_MEMCPY, dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + /* Clone the user space pgd as well */ + iee_rw_gate(IEE_OP_MEMCPY, kernel_to_user_pgdp(dst), + kernel_to_user_pgdp(src), count * sizeof(pgd_t)); +#endif + #else + memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) @@ -1474,6 +1546,7 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), count * sizeof(pgd_t)); #endif + #endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a629b1b9f65a..10134a82fa2c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -16,6 +16,22 @@ #include #include +#ifdef CONFIG_KOI + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_val(x) native_pud_val(x) +#define __pud(x) native_make_pud(x) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) +#endif + extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; @@ -31,6 +47,12 @@ extern pgd_t init_top_pgt[]; extern void paging_init(void); static inline void sync_initial_page_table(void) { } +#ifdef CONFIG_PTP +#include +extern unsigned long long iee_rw_gate(int flag, ...); +extern pgprotval_t iee_set_xchg(pgprotval_t *pgprotp, pgprotval_t pgprotval); +#endif + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pte_val(e)) @@ -62,9 +84,23 @@ static inline bool mm_p4d_folded(struct mm_struct *mm) void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); + +#ifdef CONFIG_KOI +static int pmd_present(pmd_t pmd); +static int pud_present(pud_t pud); +#endif static inline void native_set_pte(pte_t *ptep, pte_t pte) { +#ifdef CONFIG_KOI + if (pte_flags(pte) & _PAGE_PRESENT) { + pte = __pte(pte_val(pte) & ~_PAGE_GLOBAL); + } +#endif + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE, ptep, pte); + #else WRITE_ONCE(*ptep, pte); + #endif } static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, @@ -80,7 +116,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { +#ifdef CONFIG_KOI + if (pmd_present(pmd) && pmd_leaf(pmd)) { + pmd = __pmd(pmd_val(pmd) & ~_PAGE_GLOBAL); + } +#endif + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PMD, pmdp, pmd); + #else WRITE_ONCE(*pmdp, pmd); + #endif } static inline void native_pmd_clear(pmd_t *pmd) @@ -91,7 +136,12 @@ static inline void native_pmd_clear(pmd_t *pmd) static inline pte_t native_ptep_get_and_clear(pte_t *xp) { #ifdef CONFIG_SMP + #ifdef CONFIG_PTP + pteval_t pteval = iee_set_xchg(__phys_to_iee(__pa(xp)), 0); + return native_make_pte(pteval); + #else return native_make_pte(xchg(&xp->pte, 0)); + #endif #else /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */ @@ -104,7 +154,12 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { #ifdef CONFIG_SMP + #ifdef CONFIG_PTP + pmdval_t pmdval = iee_set_xchg(__phys_to_iee(__pa(xp)), 0); + return native_make_pmd(pmdval); + #else return native_make_pmd(xchg(&xp->pmd, 0)); + #endif #else /* native_local_pmdp_get_and_clear, but duplicated because of cyclic dependency */ @@ -116,7 +171,17 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { +#ifdef CONFIG_KOI + if ((pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT)) { + pud = __pud(pud_val(pud) & ~_PAGE_GLOBAL); + } +#endif + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PUD, pudp, pud); + #else WRITE_ONCE(*pudp, pud); + #endif } static inline void native_pud_clear(pud_t *pud) @@ -127,7 +192,12 @@ static inline void native_pud_clear(pud_t *pud) static inline pud_t native_pudp_get_and_clear(pud_t *xp) { #ifdef CONFIG_SMP + #ifdef CONFIG_PTP + pudval_t pudval = iee_set_xchg(__phys_to_iee(__pa(xp)), 0); + return native_make_pud(pudval); + #else return native_make_pud(xchg(&xp->pud, 0)); + #endif #else /* native_local_pudp_get_and_clear, * but duplicated because of cyclic dependency @@ -144,13 +214,21 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_P4D, p4dp, p4d); + #else WRITE_ONCE(*p4dp, p4d); + #endif return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_P4D, p4dp, native_make_p4d(native_pgd_val(pgd))); + #else WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); + #endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -160,7 +238,12 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PGD, pgdp, + pti_set_user_pgtbl(pgdp, pgd)); + #else WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); + #endif } static inline void native_pgd_clear(pgd_t *pgd) @@ -168,6 +251,42 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } +#ifdef CONFIG_PTP +static inline void iee_early_set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + +static inline void iee_early_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*pmdp, pmd); +} + +static inline void iee_early_set_pud(pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*pudp, pud); +} + +static inline void iee_early_set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + pgd_t pgd; + + if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + WRITE_ONCE(*p4dp, p4d); + return; + } + + pgd = native_make_pgd(native_p4d_val(p4d)); + pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); +} + +static inline void iee_early_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); +} +#endif + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 35c416f06155..bd0d82bb9b18 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -140,6 +140,11 @@ extern unsigned int ptrs_per_p4d; # define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ +#ifdef CONFIG_PTP +#define __VPTDMAP_BASE_L4 0xffffeb0000000000UL +#define __VPTDMAP_BASE_L5 0xffd6000000000000UL +#endif + #ifdef CONFIG_RANDOMIZE_MEMORY # define PHYSMEM_END physmem_end #endif diff --git a/arch/x86/include/asm/pgtable_slab.h b/arch/x86/include/asm/pgtable_slab.h new file mode 100644 index 000000000000..03452a3d0569 --- /dev/null +++ b/arch/x86/include/asm/pgtable_slab.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_PGTABLE_SLAB_H +#define _LINUX_PGTABLE_SLAB_H + +extern void __init iee_pgtable_init(void); +extern void iee_ptdesc_init(struct page *page); +extern void iee_ptdesc_free(struct page *page); + +extern void *get_iee_pgtable_page(gfp_t gfpflags); +extern void free_iee_pgtable_page(void *obj); +extern void *get_iee_pgd_page(gfp_t gfpflags); +extern void free_iee_pgd_page(void *obj); + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 48f8dd47cf68..47f697ce730c 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -10,6 +10,10 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif + /* * The compiler should not reorder volatile asm statements with respect to each * other: they should execute in program order. However GCC 4.9.x and 5.x have @@ -51,9 +55,20 @@ static inline unsigned long __native_read_cr3(void) static inline void native_write_cr3(unsigned long val) { + #ifdef CONFIG_IEE + iee_rwx_gate(IEE_WRITE_CR3, val); + #else asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); + #endif } +#ifdef CONFIG_IEE +static inline void native_write_cr3_pre_init(unsigned long val) +{ + asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); +} +#endif + static inline unsigned long native_read_cr4(void) { unsigned long val; diff --git a/arch/x86/include/asm/stack_slab.h b/arch/x86/include/asm/stack_slab.h new file mode 100644 index 000000000000..514c09e1c415 --- /dev/null +++ b/arch/x86/include/asm/stack_slab.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACK_SLAB_H +#define _LINUX_STACK_SLAB_H + +extern void __init iee_stack_init(void); +extern void *get_iee_stack(void); +extern void free_iee_stack(void *obj); + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 580636cdc257..f18a855dfa13 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -34,4 +34,10 @@ static inline void __tlb_remove_table(void *table) free_page_and_swap_cache(table); } +#ifdef CONFIG_PTP +static inline void __iee_tlb_remove_table(void *_table) { + struct page *page = (struct page *)_table; + free_iee_pgtable_page((void *)page_to_virt(page)); +} +#endif #endif /* _ASM_X86_TLB_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2b86fa2d8c64..d8d4e64af538 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -151,6 +151,10 @@ obj-$(CONFIG_X86_CET) += cet.o obj-$(CONFIG_X86_USER_SHADOW_STACK) += shstk.o +obj-$(CONFIG_IEE) += iee/ + +obj-$(CONFIG_HIVE) += sfi_bpf_arch.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) @@ -161,6 +165,7 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o obj-$(CONFIG_INTEL_IOMMU) += zhaoxin_kh40000.o + obj-$(CONFIG_KOI) += koi/ endif obj-$(CONFIG_HYGON_CSV) += csv.o diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index dc3576303f1a..d528f34a0541 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -119,6 +119,12 @@ static void __used common(void) #ifdef CONFIG_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif +#ifdef CONFIG_IEE + DEFINE(iee_from_token_offset, offsetof(struct task_token, iee_stack)); + DEFINE(tmp_page_from_token_offset, offsetof(struct task_token, tmp_page)); + DEFINE(kernel_from_token_offset,offsetof(struct task_token, kernel_stack)); + DEFINE(pgd_from_token_offset, offsetof(struct task_token, pgd)); +#endif #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) /* Offset for fields in aria_ctx */ BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b66364429f98..563de5754f3d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -68,6 +68,10 @@ #include "cpu.h" +#ifdef CONFIG_IEE +#include +#endif + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ @@ -403,11 +407,18 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE | X86_CR4_CET; +#ifdef CONFIG_IEE +DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +#else static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +#endif static unsigned long cr4_pinned_bits __ro_after_init; void native_write_cr0(unsigned long val) { +#ifdef CONFIG_IEE + iee_rwx_gate(IEE_WRITE_CR0, val); +#else unsigned long bits_missing = 0; set_register: @@ -422,6 +433,7 @@ void native_write_cr0(unsigned long val) /* Warn after we've set the missing bits. */ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); } +#endif } EXPORT_SYMBOL(native_write_cr0); diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 16f9814c9be0..729b426ab161 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -34,6 +34,9 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif /* * Note: we only need 6*8 = 48 bytes for the espfix stack, but round * it up to a cache line to avoid unnecessary sharing. @@ -106,6 +109,11 @@ void __init init_espfix_bsp(void) pgd_t *pgd; p4d_t *p4d; + #ifdef CONFIG_PTP + iee_set_logical_mem_ro((unsigned long)espfix_pud_page); + set_iee_page((unsigned long)__va(__pa_symbol(espfix_pud_page)), 0); + #endif + /* Install the espfix pud into the kernel page directory */ pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)]; p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR); @@ -158,6 +166,9 @@ void init_espfix_ap(int cpu) pmd_p = (pmd_t *)page_address(page); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pmd_p, 0); + #endif paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); @@ -170,6 +181,9 @@ void init_espfix_ap(int cpu) pte_p = (pte_t *)page_address(page); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pte_p, 0); + #endif paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) set_pmd(&pmd_p[n], pmd); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1defe865de67..148ea7acf075 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,6 +44,10 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif + /* * Manage page tables very early on. */ @@ -68,6 +72,11 @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; EXPORT_SYMBOL(vmemmap_base); #endif +#ifdef CONFIG_PTP +unsigned long iee_ptdesc_base __ro_after_init = __VPTDMAP_BASE_L4; +EXPORT_SYMBOL(iee_ptdesc_base); +#endif + /* * GDT used on the boot CPU before switching to virtual addresses. */ @@ -118,6 +127,10 @@ static bool __head check_la57_support(unsigned long physaddr) *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; + #ifdef CONFIG_PTP + *fixup_long(&iee_ptdesc_base, physaddr) = __VPTDMAP_BASE_L5; + #endif + return true; } #else @@ -720,7 +733,11 @@ static void startup_64_load_idt(unsigned long physbase) } desc->address = (unsigned long)idt; + #ifdef CONFIG_IEE + iee_load_idt_pre_init(desc); + #else native_load_idt(desc); + #endif } /* This is used when running on kernel addresses */ diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index fc77a96040b7..378d35807da9 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -163,13 +163,17 @@ static const __initconst struct idt_data apic_idts[] = { }; /* Must be page-aligned because the real IDT is used in the cpu entry area */ -static gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; +gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; -static struct desc_ptr idt_descr __ro_after_init = { +struct desc_ptr idt_descr __ro_after_init = { .size = IDT_TABLE_SIZE - 1, .address = (unsigned long) idt_table, }; +#ifdef CONFIG_KOI +const unsigned long koi_idt_descr_addr = (unsigned long)&idt_descr; +#endif + void load_current_idt(void) { lockdep_assert_irqs_disabled(); diff --git a/arch/x86/kernel/iee/Makefile b/arch/x86/kernel/iee/Makefile new file mode 100644 index 000000000000..b4f188f159d8 --- /dev/null +++ b/arch/x86/kernel/iee/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_IEE) += iee.o iee-gate.o iee-func.o stack-slab.o +ccflags-$(CONFIG_IEE_SELINUX_P) := -I$(srctree)/security/selinux -I$(srctree)/security/selinux/include +obj-$(CONFIG_IEE_SELINUX_P) += iee-selinuxp.o +obj-$(CONFIG_PTP) += pgtable-slab.o diff --git a/arch/x86/kernel/iee/iee-func.c b/arch/x86/kernel/iee/iee-func.c new file mode 100644 index 000000000000..d742691e7e32 --- /dev/null +++ b/arch/x86/kernel/iee/iee-func.c @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void iee_set_sensitive_pte(pte_t *lm_ptep, int order, int use_block_pmd) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_SET_SENSITIVE_PTE, lm_ptep, order, use_block_pmd); +#else + int i; + if (use_block_pmd) { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd((pmd_val(pmd) & (~__RW) & (~___D))); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte((pte_val(pte) & (~__RW) & (~___D))); + WRITE_ONCE(*lm_ptep, pte); + lm_ptep++; + } + } +#endif +} + +static inline void iee_unset_sensitive_pte(pte_t *lm_ptep, int order, int use_block_pmd) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_UNSET_SENSITIVE_PTE, lm_ptep, order, use_block_pmd); +#else + int i; + if (use_block_pmd) + { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd((pmd_val(pmd) | __RW | ___D)); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte(pte_val(pte) | __RW | ___D); + WRITE_ONCE(*lm_ptep, pte); + lm_ptep++; + } + } +#endif +} + +static void do_split_huge_pmd(pmd_t* pmdp) +{ + // pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + gfp_t gfp = (GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM) | __GFP_COMP; + struct page *new_page = alloc_pages(gfp, 0); + if (!new_page) + panic("IEE: failed to alloc pgtable\n"); + pte_t *pgtable = (pte_t *)page_to_virt(new_page); + #ifdef CONFIG_PTP + if (iee_pgt_jar_init) + iee_ptdesc_init(new_page); + iee_split_huge_pmd(pmdp, pgtable); + #else + int i; + struct page *page = pmd_page(*pmdp); + pte_t *ptep = (pte_t *)((unsigned long)pgtable); + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = pmd_pgprot(*pmdp); + entry = mk_pte(page + i, pgprot); + WRITE_ONCE(*ptep, entry); + } + #endif + spinlock_t *ptl = pmd_lock(&init_mm, pmdp); + if (pmd_leaf(READ_ONCE(*pmdp))) { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(ptl); + if(pgtable) + { + memset(pgtable, 0, PAGE_SIZE); + #ifdef CONFIG_PTP + if (iee_pgt_jar_init) + iee_ptdesc_free(new_page); + #endif + __free_pages(new_page, compound_order(new_page)); + // pte_free_kernel(&init_mm, pgtable); + } +} + +// Input is the lm vaddr of sensitive data. +void set_iee_page(unsigned long addr, int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp = pmd_offset(pudp, addr); + int use_block_pmd = 0; + + if (pmd_leaf(*pmdp) && order < 9) { + do_split_huge_pmd(pmdp); + } else if (pmd_leaf(*pmdp)) { + use_block_pmd = 1; + } + pte_t *lm_ptep; + if (use_block_pmd) { + lm_ptep = (pte_t *)pmdp; + } else { + lm_ptep = pte_offset_kernel(pmdp, addr); + } + + iee_set_sensitive_pte(lm_ptep, order, use_block_pmd); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE*(1 << order)); +} + +// Input is the lm vaddr of sensitive data. +void unset_iee_page(unsigned long addr, int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp = pmd_offset(pudp, addr); + pte_t *lm_ptep; + int use_block_pmd = 0; + // Use Block Descriptor. + if(pmd_leaf(*pmdp)) + { + use_block_pmd = 1; + lm_ptep = (pte_t *)pmdp; + } + else + lm_ptep = pte_offset_kernel(pmdp, addr); + + iee_unset_sensitive_pte(lm_ptep, order, use_block_pmd); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE*(1 << order)); +} + +void iee_set_logical_mem_ro(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + p4d_t *p4dp = p4d_offset(pgdp, addr); + pud_t *pudp = pud_offset(p4dp, addr); + pmd_t *pmdp = pmd_offset(pudp, addr); + if (pmd_leaf(*pmdp)) { + do_split_huge_pmd(pmdp); + } + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + pte = __pte((pte_val(pte) & (~__RW) & (~___D))); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); +} + +void set_iee_page_valid(unsigned long addr) {} + +void set_iee_page_invalid(unsigned long addr) {} + +void iee_set_token_page_valid(void *token, void *token_page, unsigned int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + pmd_t *token_pmdp = pmd_offset(pudp, (unsigned long)token); + pte_t *token_ptep = pte_offset_kernel(token_pmdp, (unsigned long)token); + + if(token_page == NULL) + panic("Token of task_struct was unset.\n"); + + pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_page); + p4dp = p4d_offset(pgdp, (unsigned long)token_page); + pudp = pud_offset(p4dp, (unsigned long)token_page); + pmd_t *token_page_pmdp = pmd_offset(pudp, (unsigned long)token_page); + pte_t *token_page_ptep; + + int use_block_pmd = 0; + if (pmd_leaf(*token_page_pmdp) && order < 9) { + do_split_huge_pmd(token_page_pmdp); + } else if (pmd_leaf(*token_page_pmdp)) { + use_block_pmd = 1; + } + + if (use_block_pmd) { + token_page_ptep = (pte_t *)token_page_pmdp; + } else { + token_page_ptep = pte_offset_kernel(token_page_pmdp, (unsigned long)token_page); + } + +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_TOKEN, token_ptep, token_page_ptep, token_page, order, use_block_pmd); +#else + if (use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)token_page_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) & ~__RW) & ~___D); + WRITE_ONCE(*pmdp, pmd); + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | (__phys_to_pfn(__pa(token_page + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + token_ptep++; + } + } + else { + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | (__phys_to_pfn(__pa(token_page + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte((pte_val(pte) & ~__RW) & ~___D); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } +#endif + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range((unsigned long)token_page, (unsigned long)(token_page + (PAGE_SIZE * (1 << order)))); +} + +void iee_set_token_page_invalid(void *token, void *__unused, unsigned long order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + pmd_t *token_pmdp = pmd_offset(pudp, (unsigned long)token); + pte_t *token_ptep = pte_offset_kernel(token_pmdp, (unsigned long)token); + void *token_page = page_address(pte_page(*token_ptep)); + + if(token_page == NULL) + panic("Token of task_struct was unset.\n"); + + pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_page); + p4dp = p4d_offset(pgdp, (unsigned long)token_page); + pudp = pud_offset(p4dp, (unsigned long)token_page); + pmd_t *token_page_pmdp = pmd_offset(pudp, (unsigned long)token_page); + pte_t *token_page_ptep; + int use_block_pmd = 0; + if (pmd_leaf(*token_page_pmdp)) { + use_block_pmd = 1; + token_page_ptep = (pte_t *)token_page_pmdp; + } else { + token_page_ptep = pte_offset_kernel(token_page_pmdp, (unsigned long)token_page); + } +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_UNSET_TOKEN, token_ptep, token_page_ptep, token, token_page, order); +#else + if (use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)token_page_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | ___D | __RW); + WRITE_ONCE(*pmdp, pmd); + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | (__phys_to_pfn(__iee_pa(token + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + token_ptep++; + } + } + else + { + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | (__phys_to_pfn(__iee_pa(token + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte(pte_val(pte) | ___D | __RW); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } +#endif + free_pages((unsigned long)token_page, order); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range((unsigned long)token_page, (unsigned long)(token_page + (PAGE_SIZE * (1 << order)))); +} + +void __init iee_set_kernel_upage(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + pgd_t pgd = READ_ONCE(*pgdp); + pgd = __pgd((pgd_val(pgd) | _USR) & ~___G); + set_pgd(pgdp, pgd); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + p4d = __p4d((p4d_val(p4d) | _USR) & ~___G); + set_p4d(p4dp, p4d); + + pud_t *pudp = pud_offset(p4dp, addr); + if (pud_leaf(*pudp)) { + panic("Huge pud page set upage!\n"); + } + pud_t pud = READ_ONCE(*pudp); + pud = __pud((pud_val(pud) | _USR) & ~___G); + set_pud(pudp, pud); + + pmd_t *pmdp = pmd_offset(pudp, addr); + if (pmd_leaf(*pmdp)) { + do_split_huge_pmd(pmdp); + } + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) | _USR) & ~___G); + set_pmd(pmdp, pmd); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + pte = __pte((pte_val(pte) | _USR) & ~___G); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} + +void set_iee_stack_page(unsigned long addr, int order) +{ + set_iee_page(addr ,order); +} + +void unset_iee_stack_page(unsigned long addr, int order) +{ + unset_iee_page(addr, order); +} + +void __init iee_rest_init(void) +{ + // Prepare data for iee rwx gate + unsigned long addr; + /* Map .iee.text as U RWX pages */ + addr = (unsigned long)__iee_si_text_start; + for (; addr < (unsigned long)__iee_si_text_end; addr += PAGE_SIZE){ + iee_set_kernel_upage((unsigned long)addr); + } + iee_init_done = true; + iee_base_swapper_pg_dir = __sme_pa(swapper_pg_dir); + /* Map .iee.data as RO pages */ + set_memory_ro((unsigned long)__iee_si_data_start, ((unsigned long)__iee_si_data_end - (unsigned long)__iee_si_data_start) / PAGE_SIZE); + // All initialization is done. Do some simple tests. + pr_err("IEE: testing iee_exec_entry si_test..."); + iee_rwx_gate(IEE_SI_TEST); + pr_err("IEE: testing iee_exec_entry si_test..."); +} \ No newline at end of file diff --git a/arch/x86/kernel/iee/iee-gate.S b/arch/x86/kernel/iee/iee-gate.S new file mode 100644 index 000000000000..c80e3f6b45b3 --- /dev/null +++ b/arch/x86/kernel/iee/iee-gate.S @@ -0,0 +1,209 @@ +#include +#include +#include +#include +#include + +#define X86_CR4_SMEP_SMAP (X86_CR4_SMEP | X86_CR4_SMAP) + +#ifdef CONFIG_PTP +SYM_FUNC_START(iee_set_xchg) + /* save RFLAGS, close irq */ + pushfq + cli + /* stac, disable SMAP */ + stac + + xchg %rsi, (%rdi) + mov %rsi, %rax + + /* clac, enable SMAP */ + clac + /* restore RFLAGS*/ + popfq + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_set_xchg) + +SYM_FUNC_START(iee_set_try_cmpxchg) + /* save RFLAGS, close irq */ + pushfq + cli + /* stac, disable SMAP */ + stac + + mov %rsi, %rax + lock cmpxchgq %rdx, (%rdi) + + /* clac, enable SMAP */ + clac + /* restore RFLAGS*/ + popfq + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_set_try_cmpxchg) +#endif /* CONFIG_PTP */ + +SYM_FUNC_START(iee_rw_gate) + /* save Interrupt flag */ + pushf + /* close irq*/ + cli + + /* stac, disable SMAP */ + stac + + pushq %r12 + + /* switch to iee stack */ + movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %r12 /* r12 -> task_struct */ + addq iee_offset(%rip), %r12 + movq %rsp, kernel_from_token_offset(%r12) + movq iee_from_token_offset(%r12), %rsp + + /* call iee func */ + leaq iee_funcs(%rip), %rax + call *(%rax, %rdi, 8) + + /* switch to kernel stack */ + movq kernel_from_token_offset(%r12), %rsp + + popq %r12 + + /* clac, enable SMAP */ + clac + + /* restore irq*/ + popf + + lfence /* Serializing instruction before the ret */ + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_rw_gate) +#if defined(CONFIG_CREDP) || defined(CONFIG_KEYP) || defined(CONFIG_PTP) || defined(CONFIG_KOI) +EXPORT_SYMBOL(iee_rw_gate) +#endif + +SYM_FUNC_START(iee_read_token_stack) + /* save Interrupt flag */ + pushf + /* close irq*/ + cli + + push %r12 + + /* stac, disable SMAP */ + stac + + addq iee_offset(%rip), %rdi + movq iee_from_token_offset(%rdi), %rax + + /* clac, enable SMAP */ + clac + + pop %r12 + + /* restore irq*/ + popf + + lfence /* Serializing instruction before the ret */ + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_read_token_stack) + +SYM_FUNC_START(iee_read_tmp_page) + /* save Interrupt flag */ + pushf + /* close irq*/ + cli + + push %r12 + + /* stac, disable SMAP */ + stac + + addq iee_offset(%rip), %rdi + movq tmp_page_from_token_offset(%rdi), %rax + + /* clac, enable SMAP */ + clac + + pop %r12 + + /* restore irq*/ + popf + lfence /* Serializing instruction before the ret */ + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_read_tmp_page) + +SYM_FUNC_START(iee_read_freeptr) + /* save Interrupt flag */ + pushf + /* close irq*/ + cli + + push %r12 + + /* stac, disable SMAP */ + stac + + addq iee_offset(%rip), %rdi + movq (%rdi), %rax + + /* clac, enable SMAP */ + clac + + pop %r12 + + /* restore irq*/ + popf + lfence /* Serializing instruction before the ret */ + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_read_freeptr) + +SYM_FUNC_START(iee_rwx_gate) + pushq %r12 + + /* save Interrupt flag*/ + pushfq + /* close irq */ + cli + + /* set SMEP=0 to enable supervisor-mode exec user-mode insn */ + movq %cr4, %rax /* rax -> cr4 */ + andq $(~X86_CR4_SMEP_SMAP), %rax + movq %rax, %cr4 + + movq %rsp, %r12 + + /* If iee hasn't been initialized, skip stack switch. */ + cmpb $0, iee_init_done(%rip) + jz 2f +#ifdef CONFIG_KOI + cmpq $IEE_SWITCH_TO_KOI, %rdi + jz 2f + cmpq $IEE_SWITCH_TO_KERNEL, %rdi + jz 2f +#endif + /* switch to iee stack */ + movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rax /* rax -> task_struct */ + addq iee_offset(%rip), %rax + movq %rsp, kernel_from_token_offset(%rax) + movq iee_from_token_offset(%rax), %rsp + +2: call iee_si_handler + + /* switch to kernel stack. If iee hasn't been initialized, skip switch*/ + movq %r12, %rsp + + /* set SMEP=1 to disable supervisor-mode exec user-mode insn */ + movq %cr4, %rax /* rax -> cr4 */ +1: orq $X86_CR4_SMEP_SMAP, %rax + movq %rax, %cr4 + andq $(X86_CR4_SMEP_SMAP), %rax + cmpq $(X86_CR4_SMEP_SMAP), %rax + jnz 1 + + /* restore irq*/ + popfq + + popq %r12 + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_rwx_gate) +EXPORT_SYMBOL(iee_rwx_gate) \ No newline at end of file diff --git a/arch/x86/kernel/iee/iee-selinuxp.c b/arch/x86/kernel/iee/iee-selinuxp.c new file mode 100644 index 000000000000..47d6eeb7a784 --- /dev/null +++ b/arch/x86/kernel/iee/iee-selinuxp.c @@ -0,0 +1,36 @@ +#include +#include +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_IEE_SELINUX_P +inline void iee_set_selinux_status_pg(struct page *new_page) +{ + iee_rw_gate(IEE_SEL_SET_STATUS_PG, new_page); +} + +inline void enforcing_set(bool value) +{ + iee_rw_gate(IEE_SEL_SET_ENFORCING, value); +} + +inline void selinux_mark_initialized(void) +{ + iee_rw_gate(IEE_SEL_SET_INITIALIZED); +} + +inline void iee_set_sel_policy_cap(unsigned int idx, int cap) +{ + iee_rw_gate(IEE_SEL_SET_POLICY_CAP, idx, cap); +} + +/* + * Please make sure param iee_new_policy is from policy_jar memcache. + * Need to free new_policy after calling this func as it's only used to + * trans data from kernel. + */ +inline void iee_sel_rcu_assign_policy(struct selinux_policy *new_policy, + struct selinux_policy *iee_new_policy) +{ + iee_rw_gate(IEE_SEL_RCU_ASSIGN_POLICY, new_policy, iee_new_policy); +} +#endif diff --git a/arch/x86/kernel/iee/iee.c b/arch/x86/kernel/iee/iee.c new file mode 100644 index 000000000000..107167a0c5ea --- /dev/null +++ b/arch/x86/kernel/iee/iee.c @@ -0,0 +1,1048 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PTP +void __iee_code _iee_set_pte(unsigned long __unused, pte_t *ptep, pte_t pte); +void __iee_code _iee_set_pmd(unsigned long __unused, pmd_t *pmdp, pmd_t pmd); +void __iee_code _iee_set_pud(unsigned long __unused, pud_t *pudp, pud_t pud); +void __iee_code _iee_set_p4d(unsigned long __unused, p4d_t *p4dp, p4d_t p4d); +void __iee_code _iee_set_pgd(unsigned long __unused, pgd_t *pgdp, pgd_t pgd); +#endif +void __iee_code _iee_memcpy(unsigned long __unused, void *dst, void *src, size_t n); +void __iee_code _iee_memset(unsigned long __unused, void *ptr, int data, size_t n); +void __iee_code _iee_set_freeptr(unsigned long __unused, void **pptr, void *ptr); +void __iee_code _iee_split_huge_pmd(unsigned long __unused, pmd_t *pmdp, pte_t *pgtable); +void __iee_code _iee_set_token_pgd(unsigned long __unused, struct task_struct *tsk, pgd_t *pgd); +void __iee_code _iee_init_token(unsigned long __unused, struct task_struct *tsk, void *iee_stack, void *tmp_page); +void __iee_code _iee_invalidate_token(unsigned long __unused, struct task_struct *tsk); +void __iee_code _iee_validate_token(unsigned long __unused, struct task_struct *tsk); +void __iee_code _iee_unset_token(unsigned long __unused, pte_t *token_ptep, pte_t *token_page_ptep, void *token, void *token_page, unsigned long order, int use_block_pmd); +void __iee_code _iee_set_token(unsigned long __unused, pte_t *token_ptep, pte_t *new_ptep, void *new, unsigned long order, int use_block_pmd); +void __iee_code _iee_set_track(unsigned long __unused, struct track *ptr, struct track *data); +unsigned long __iee_code _iee_test_and_clear_bit(unsigned long __unused, long nr, volatile unsigned long *addr); +void __iee_code _iee_set_sensitive_pte(unsigned long __unused, pte_t *lm_ptep, int order, int use_block_pmd); +void __iee_code _iee_unset_sensitive_pte(unsigned long __unused, pte_t *lm_ptep, int order, int use_block_pmd); + +#ifdef CONFIG_CREDP +extern struct cred init_cred; +void __iee_code _iee_set_cred_uid(unsigned long __unused, struct cred *cred, kuid_t uid); +void __iee_code _iee_set_cred_gid(unsigned long __unused, struct cred *cred, kgid_t gid); +void __iee_code _iee_copy_cred(unsigned long __unused, struct cred *old, struct cred *new); +void __iee_code _iee_set_cred_suid(unsigned long __unused, struct cred *cred, kuid_t suid); +void __iee_code _iee_set_cred_sgid(unsigned long __unused, struct cred *cred, kgid_t sgid); +void __iee_code _iee_set_cred_euid(unsigned long __unused, struct cred *cred, kuid_t euid); +void __iee_code _iee_set_cred_egid(unsigned long __unused, struct cred *cred, kgid_t egid); +void __iee_code _iee_set_cred_fsuid(unsigned long __unused, struct cred *cred, kuid_t fsuid); +void __iee_code _iee_set_cred_fsgid(unsigned long __unused, struct cred *cred, kgid_t fsgid); +void __iee_code _iee_set_cred_user(unsigned long __unused, struct cred *cred, struct user_struct *user); +void __iee_code _iee_set_cred_user_ns(unsigned long __unused, struct cred *cred, struct user_namespace *user_ns); +void __iee_code _iee_set_cred_group_info(unsigned long __unused, struct cred *cred, struct group_info *group_info); +void __iee_code _iee_set_cred_securebits(unsigned long __unused, struct cred *cred, unsigned securebits); +void __iee_code _iee_set_cred_cap_inheritable(unsigned long __unused, struct cred *cred, kernel_cap_t cap_inheritable); +void __iee_code _iee_set_cred_cap_permitted(unsigned long __unused, struct cred *cred, kernel_cap_t cap_permitted); +void __iee_code _iee_set_cred_cap_effective(unsigned long __unused, struct cred *cred, kernel_cap_t cap_effective); +void __iee_code _iee_set_cred_cap_bset(unsigned long __unused, struct cred *cred, kernel_cap_t cap_bset); +void __iee_code _iee_set_cred_cap_ambient(unsigned long __unused, struct cred *cred, kernel_cap_t cap_ambient); +void __iee_code _iee_set_cred_jit_keyring(unsigned long __unused, struct cred *cred, unsigned char jit_keyring); +void __iee_code _iee_set_cred_session_keyring(unsigned long __unused, struct cred *cred, struct key *session_keyring); +void __iee_code _iee_set_cred_process_keyring(unsigned long __unused, struct cred *cred, struct key *process_keyring); +void __iee_code _iee_set_cred_thread_keyring(unsigned long __unused, struct cred *cred, struct key *thread_keyring); +void __iee_code _iee_set_cred_request_key_auth(unsigned long __unused, struct cred *cred, struct key *request_key_auth); +void __iee_code _iee_set_cred_non_rcu(unsigned long __unused, struct cred *cred, int non_rcu); +void __iee_code _iee_set_cred_atomic_set_usage(unsigned long __unused, struct cred *cred, int i); +unsigned long __iee_code _iee_set_cred_atomic_op_usage(unsigned long __unused, struct cred *cred, int flag, int nr); +void __iee_code _iee_set_cred_security(unsigned long __unused, struct cred *cred, void *security); +void __iee_code _iee_set_cred_rcu(unsigned long __unused, struct cred *cred, struct rcu_head *rcu); +void __iee_code _iee_set_cred_ucounts(unsigned long __unused, struct cred *cred, struct ucounts *ucounts); +#endif + +#ifdef CONFIG_KEYP +struct watch_list; +void __iee_code _iee_set_key_union(unsigned long __unused, struct key *key, struct key_union *key_union); +void __iee_code _iee_set_key_struct(unsigned long __unused, struct key *key, struct key_struct *key_struct); +void __iee_code _iee_set_key_payload(unsigned long __unused, struct key *key, union key_payload *key_payload); +unsigned long __iee_code _iee_set_key_usage(unsigned long __unused, struct key *key, int n, int flag); +void __iee_code _iee_set_key_serial(unsigned long __unused, struct key *key, key_serial_t serial); +void __iee_code _iee_set_key_watchers(unsigned long __unused, struct key *key, struct watch_list *watchers); +void __iee_code _iee_set_key_user(unsigned long __unused, struct key *key, struct key_user *user); +void __iee_code _iee_set_key_security(unsigned long __unused, struct key *key, void *security); +void __iee_code _iee_set_key_expiry(unsigned long __unused, struct key *key, time64_t expiry); +void __iee_code _iee_set_key_revoked_at(unsigned long __unused, struct key *key, time64_t revoked_at); +void __iee_code _iee_set_key_last_used_at(unsigned long __unused, struct key *key, time64_t last_used_at); +void __iee_code _iee_set_key_uid(unsigned long __unused, struct key *key, kuid_t uid); +void __iee_code _iee_set_key_gid(unsigned long __unused, struct key *key, kgid_t gid); +void __iee_code _iee_set_key_perm(unsigned long __unused, struct key *key, key_perm_t perm); +void __iee_code _iee_set_key_quotalen(unsigned long __unused, struct key *key, unsigned short quotalen); +void __iee_code _iee_set_key_datalen(unsigned long __unused, struct key *key, unsigned short datalen); +void __iee_code _iee_set_key_state(unsigned long __unused, struct key *key, short state); +void __iee_code _iee_set_key_magic(unsigned long __unused, struct key *key, unsigned magic); +void __iee_code _iee_set_key_flags(unsigned long __unused, struct key *key, unsigned long flags); +void __iee_code _iee_set_key_index_key(unsigned long __unused, struct key *key, struct keyring_index_key* index_key); +void __iee_code _iee_set_key_hash(unsigned long __unused, struct key *key, unsigned long hash); +void __iee_code _iee_set_key_len_desc(unsigned long __unused, struct key *key, unsigned long len_desc); +void __iee_code _iee_set_key_type(unsigned long __unused, struct key *key, struct key_type *type); +void __iee_code _iee_set_key_domain_tag(unsigned long __unused, struct key *key, struct key_tag *domain_tag); +void __iee_code _iee_set_key_description(unsigned long __unused, struct key *key, char *description); +void __iee_code _iee_set_key_restrict_link(unsigned long __unused, struct key *key, struct key_restriction *restrict_link); +unsigned long __iee_code _iee_set_key_flag_bit(unsigned long __unused, struct key *key, long nr, int flag); +#endif +#ifdef CONFIG_KOI +unsigned long __iee_code _iee_read_koi_stack(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_koi_stack(unsigned long iee_offset, struct task_struct *tsk, unsigned long koi_stack); +unsigned long __iee_code _iee_read_token_ttbr1(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_token_ttbr1(unsigned long iee_offset, struct task_struct *tsk, unsigned long current_ttbr1); +unsigned long __iee_code _iee_read_koi_kernel_stack(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_koi_kernel_stack(unsigned long iee_offset, struct task_struct *tsk, unsigned long kernel_stack); +unsigned long __iee_code _iee_read_koi_stack_base(unsigned long iee_offset, struct task_struct *tsk); +void __iee_code _iee_write_koi_stack_base(unsigned long iee_offset, struct task_struct *tsk, unsigned long koi_stack_base); +void __iee_code _iee_set_koi_pgd(unsigned long iee_offset, unsigned long koi_pgd_addr); +#endif + +#ifdef CONFIG_IEE_SELINUX_P +#include +void __iee_code _iee_set_selinux_status_pg(unsigned long __unused, struct page* new_page); +void __iee_code _iee_set_selinux_enforcing(unsigned long __unused, bool value); +void __iee_code _iee_mark_selinux_initialized(unsigned long __unused); +void __iee_code _iee_set_sel_policy_cap(unsigned long __unused, unsigned int idx, int cap); +void __iee_code _iee_sel_rcu_assign_policy(unsigned long __unused, struct selinux_policy* new_policy, struct selinux_policy* iee_new_policy); +#endif + +typedef void (*iee_func)(void); +iee_func iee_funcs[] = { + #ifdef CONFIG_PTP + (iee_func)_iee_set_pte, + (iee_func)_iee_set_pmd, + (iee_func)_iee_set_pud, + (iee_func)_iee_set_p4d, + (iee_func)_iee_set_pgd, + #endif + (iee_func)_iee_memcpy, + (iee_func)_iee_memset, + (iee_func)_iee_set_freeptr, + (iee_func)_iee_split_huge_pmd, + (iee_func)_iee_set_token_pgd, + (iee_func)_iee_init_token, + (iee_func)_iee_invalidate_token, + (iee_func)_iee_validate_token, + (iee_func)_iee_unset_token, + (iee_func)_iee_set_token, + (iee_func)_iee_set_track, + (iee_func)_iee_test_and_clear_bit, + (iee_func)_iee_set_sensitive_pte, + (iee_func)_iee_unset_sensitive_pte, +#ifdef CONFIG_CREDP + (iee_func)_iee_copy_cred, + (iee_func)_iee_set_cred_uid, + (iee_func)_iee_set_cred_gid, + (iee_func)_iee_set_cred_suid, + (iee_func)_iee_set_cred_sgid, + (iee_func)_iee_set_cred_euid, + (iee_func)_iee_set_cred_egid, + (iee_func)_iee_set_cred_fsuid, + (iee_func)_iee_set_cred_fsgid, + (iee_func)_iee_set_cred_user, + (iee_func)_iee_set_cred_user_ns, + (iee_func)_iee_set_cred_group_info, + (iee_func)_iee_set_cred_securebits, + (iee_func)_iee_set_cred_cap_inheritable, + (iee_func)_iee_set_cred_cap_permitted, + (iee_func)_iee_set_cred_cap_effective, + (iee_func)_iee_set_cred_cap_bset, + (iee_func)_iee_set_cred_cap_ambient, + (iee_func)_iee_set_cred_jit_keyring, + (iee_func)_iee_set_cred_session_keyring, + (iee_func)_iee_set_cred_process_keyring, + (iee_func)_iee_set_cred_thread_keyring, + (iee_func)_iee_set_cred_request_key_auth, + (iee_func)_iee_set_cred_non_rcu, + (iee_func)_iee_set_cred_atomic_set_usage, + (iee_func)_iee_set_cred_atomic_op_usage, + (iee_func)_iee_set_cred_security, + (iee_func)_iee_set_cred_rcu, + (iee_func)_iee_set_cred_ucounts, +#endif +#ifdef CONFIG_KEYP + (iee_func)_iee_set_key_union, + (iee_func)_iee_set_key_struct, + (iee_func)_iee_set_key_payload, + (iee_func)_iee_set_key_usage, + (iee_func)_iee_set_key_serial, + (iee_func)_iee_set_key_watchers, + (iee_func)_iee_set_key_user, + (iee_func)_iee_set_key_security, + (iee_func)_iee_set_key_expiry, + (iee_func)_iee_set_key_revoked_at, + (iee_func)_iee_set_key_last_used_at, + (iee_func)_iee_set_key_uid, + (iee_func)_iee_set_key_gid, + (iee_func)_iee_set_key_perm, + (iee_func)_iee_set_key_quotalen, + (iee_func)_iee_set_key_datalen, + (iee_func)_iee_set_key_state, + (iee_func)_iee_set_key_magic, + (iee_func)_iee_set_key_flags, + (iee_func)_iee_set_key_index_key, + (iee_func)_iee_set_key_hash, + (iee_func)_iee_set_key_len_desc, + (iee_func)_iee_set_key_type, + (iee_func)_iee_set_key_domain_tag, + (iee_func)_iee_set_key_description, + (iee_func)_iee_set_key_restrict_link, + (iee_func)_iee_set_key_flag_bit, +#endif +#ifdef CONFIG_IEE_SELINUX_P + (iee_func)_iee_set_selinux_status_pg, + (iee_func)_iee_set_selinux_enforcing, + (iee_func)_iee_mark_selinux_initialized, + (iee_func)_iee_set_sel_policy_cap, + (iee_func)_iee_sel_rcu_assign_policy, +#endif +#ifdef CONFIG_KOI + (iee_func)_iee_read_koi_stack, + (iee_func)_iee_write_koi_stack, + (iee_func)_iee_read_token_ttbr1, + (iee_func)_iee_write_token_ttbr1, + (iee_func)_iee_read_koi_kernel_stack, + (iee_func)_iee_write_koi_kernel_stack, + (iee_func)_iee_read_koi_stack_base, + (iee_func)_iee_write_koi_stack_base, + (iee_func)_iee_set_koi_pgd, +#endif + NULL +}; + +#ifdef CONFIG_KOI +unsigned long __iee_code _iee_read_koi_stack(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + return (unsigned long)token->koi_stack; +} + +void __iee_code _iee_write_koi_stack(unsigned long __unused, struct task_struct *tsk, unsigned long koi_stack) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->koi_stack = (void *) koi_stack; +} + +unsigned long __iee_code _iee_read_token_ttbr1(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + return token->current_ttbr1; +} + +void __iee_code _iee_write_token_ttbr1(unsigned long __unused, struct task_struct *tsk, unsigned long current_ttbr1) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->current_ttbr1 = current_ttbr1; +} + +unsigned long __iee_code _iee_read_koi_kernel_stack(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + return (unsigned long) token->koi_kernel_stack; +} + +void __iee_code _iee_write_koi_kernel_stack(unsigned long __unused, struct task_struct *tsk, unsigned long kernel_stack) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->koi_kernel_stack = (void *) kernel_stack; +} + +unsigned long __iee_code _iee_read_koi_stack_base(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + return (unsigned long)token->koi_stack_base; +} + +void __iee_code _iee_write_koi_stack_base(unsigned long __unused, struct task_struct *tsk, unsigned long koi_stack_base) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->koi_stack_base = (void *) koi_stack_base; +} + +static inline void iee_set_koi_pgd_writeable(unsigned long koi_pgd_addr, unsigned long iee_si_addr) +{ + return; +} +/* + * Set IEE SI codes U RWX here to avoid IEE DEP checking fault. + * Mark koi pgd in the same time. + */ +void __iee_code _iee_set_koi_pgd(unsigned long iee_offset, unsigned long koi_pgd_addr) +{ + return; +} +#endif + +#ifdef CONFIG_PTP +void __iee_code _iee_set_pte(unsigned long __unused, pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*(pte_t *)(__phys_to_iee(__pa(ptep))), pte); +} + +void __iee_code _iee_set_pmd(unsigned long __unused, pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*(pmd_t *)(__phys_to_iee(__pa(pmdp))), pmd); +} + +void __iee_code _iee_set_pud(unsigned long __unused, pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*(pud_t *)(__phys_to_iee(__pa(pudp))), pud); +} + +void __iee_code _iee_set_p4d(unsigned long __unused, p4d_t *p4dp, p4d_t p4d) +{ + WRITE_ONCE(*(p4d_t *)(__phys_to_iee(__pa(p4dp))), p4d); +} + +void __iee_code _iee_set_pgd(unsigned long __unused, pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*(pgd_t *)(__phys_to_iee(__pa(pgdp))), pgd); +} +#endif + +void __iee_code _iee_memcpy(unsigned long __unused, void *dst, void *src, size_t n) +{ + char *_dst, *_src; + + _dst = (char *)(__phys_to_iee(__pa(dst))); + _src = (char *)src; + + while (n--) + *_dst++ = *_src++; +} + +void __iee_code _iee_set_track(unsigned long __unused, struct track *ptr, struct track *data) +{ + _iee_memcpy(__unused, ptr, data, sizeof(struct track)); +} + +void __iee_code _iee_memset(unsigned long __unused, void *ptr, int data, size_t n) +{ + char *_ptr; + + _ptr = (char *)(__phys_to_iee(__pa(ptr))); + + while (n--) + *_ptr++ = data; +} + +void __iee_code _iee_set_freeptr(unsigned long __unused, void **pptr, void *ptr) +{ + pptr = (void **)(__phys_to_iee(__pa(pptr))); + *pptr = ptr; +} + +void __iee_code _iee_split_huge_pmd(unsigned long __unused, pmd_t *pmdp, pte_t *pgtable) +{ + int i; + struct page *page = pmd_page(*pmdp); + pte_t *ptep = (pte_t *)(__phys_to_iee(__pa(pgtable))); + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = pmd_pgprot(*pmdp); + entry = mk_pte(page + i, pgprot); + WRITE_ONCE(*ptep, entry); + } +} + +void __iee_code _iee_set_token_pgd(unsigned long __unused, struct task_struct *tsk, pgd_t *pgd) +{ + struct task_token *token; + + token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->pgd = pgd; +} + +void __iee_code _iee_init_token(unsigned long __unused, struct task_struct *tsk, void *iee_stack, void *tmp_page) +{ + struct task_token *token; + + token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->iee_stack = iee_stack; + token->tmp_page = tmp_page; +} + +void __iee_code _iee_invalidate_token(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->pgd = NULL; + token->valid = false; + token->kernel_stack = NULL; +} + +void __iee_code _iee_validate_token(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)(__phys_to_iee(__pa(tsk))); + token->valid = true; +} + +void __iee_code _iee_unset_token(unsigned long __unused, pte_t *token_ptep, pte_t *token_page_ptep, void *token, void *token_page, unsigned long order, int use_block_pmd) +{ + token_ptep = (pte_t *)(__phys_to_iee(__pa(token_ptep))); + token_page_ptep = (pte_t *)(__phys_to_iee(__pa(token_page_ptep))); + if (use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)token_page_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | ___D | __RW); + WRITE_ONCE(*pmdp, pmd); + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | (__phys_to_pfn(__iee_pa(token + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + token_ptep++; + } + } + else + { + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | (__phys_to_pfn(__iee_pa(token + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte(pte_val(pte) | ___D | __RW); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } +} + +void __iee_code _iee_set_token(unsigned long __unused, pte_t *token_ptep, pte_t *token_page_ptep, void *token_page, unsigned long order, int use_block_pmd) +{ + token_ptep = (pte_t *)(__phys_to_iee(__pa(token_ptep))); + token_page_ptep = (pte_t *)(__phys_to_iee(__pa(token_page_ptep))); + if (use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)token_page_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) & ~__RW) & ~___D); + WRITE_ONCE(*pmdp, pmd); + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte(((pte_val(pte) & ~PTE_PFN_MASK)) | (__phys_to_pfn(__pa(token_page + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + token_ptep++; + } + } + else { + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte(((pte_val(pte) & ~PTE_PFN_MASK)) | (__phys_to_pfn(__pa(token_page + i*PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte((pte_val(pte) & ~__RW) & ~___D); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } +} + +unsigned long __iee_code _iee_test_and_clear_bit(unsigned long __unused, long nr, volatile unsigned long *addr) +{ + unsigned long *iee_addr = (unsigned long*)__phys_to_iee(__pa(addr)); + kcsan_mb(); + instrument_atomic_read_write(iee_addr + BIT_WORD(nr), sizeof(long)); + return arch_test_and_clear_bit(nr, iee_addr); +} + +void __iee_code _iee_set_sensitive_pte(unsigned long __unused, pte_t *lm_ptep, int order, int use_block_pmd) +{ + int i; + lm_ptep = (pte_t *)(__phys_to_iee(__pa(lm_ptep))); + if (use_block_pmd) { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd((pmd_val(pmd) & (~__RW) & (~___D))); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte((pte_val(pte) & (~__RW) & (~___D))); + WRITE_ONCE(*lm_ptep, pte); + lm_ptep++; + } + } +} + +void __iee_code _iee_unset_sensitive_pte(unsigned long __unused, pte_t *lm_ptep, int order, int use_block_pmd) +{ + int i; + lm_ptep = (pte_t *)(__phys_to_iee(__pa(lm_ptep))); + if (use_block_pmd) + { + pmd_t pmd = __pmd(pte_val(READ_ONCE(*lm_ptep))); + pmd = __pmd((pmd_val(pmd) | __RW | ___D)); + WRITE_ONCE(*lm_ptep, __pte(pmd_val(pmd))); + } + else + { + for(i = 0; i < (1 << order); i++) + { + pte_t pte = READ_ONCE(*lm_ptep); + pte = __pte(pte_val(pte) | __RW | ___D); + WRITE_ONCE(*lm_ptep, pte); + lm_ptep++; + } + } +} + +#ifdef CONFIG_CREDP +static struct cred* iee_cred(unsigned long __unused, struct cred *cred) { + if(cred == &init_cred) + cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); + else + cred = (struct cred *)(__phys_to_iee(__pa(cred))); + return cred; +} + +void __iee_code _iee_set_cred_rcu(unsigned long __unused, struct cred *cred, struct rcu_head *rcu) +{ + cred = iee_cred(__unused, cred); + *((struct rcu_head **)(&(cred->rcu.func))) = rcu; +} + +void __iee_code _iee_set_cred_security(unsigned long __unused, struct cred *cred, void *security) +{ + cred = iee_cred(__unused, cred); + cred->security = security; +} + +unsigned long __iee_code _iee_set_cred_atomic_op_usage(unsigned long __unused, struct cred *cred, int flag, int nr) +{ + cred = iee_cred(__unused, cred); + switch (flag) + { + case AT_ADD: { + atomic_long_add(nr, &cred->usage); + return 0; + } + case AT_INC_NOT_ZERO: { + return atomic_long_inc_not_zero(&cred->usage); + } + case AT_SUB_AND_TEST: { + return atomic_long_sub_and_test(nr, &cred->usage); + } + } + return 0; +} + +void __iee_code _iee_set_cred_atomic_set_usage(unsigned long __unused, struct cred *cred, int i) +{ + cred = iee_cred(__unused, cred); + atomic_long_set(&cred->usage,i); +} + +void __iee_code _iee_set_cred_non_rcu(unsigned long __unused, struct cred *cred, int non_rcu) +{ + cred = iee_cred(__unused, cred); + cred->non_rcu = non_rcu; +} + +void __iee_code _iee_set_cred_session_keyring(unsigned long __unused, struct cred *cred, struct key *session_keyring) +{ + cred = iee_cred(__unused, cred); + cred->session_keyring = session_keyring; +} + +void __iee_code _iee_set_cred_process_keyring(unsigned long __unused, struct cred *cred, struct key *process_keyring) +{ + cred = iee_cred(__unused, cred); + cred->process_keyring = process_keyring; +} + +void __iee_code _iee_set_cred_thread_keyring(unsigned long __unused, struct cred *cred, struct key *thread_keyring) +{ + cred = iee_cred(__unused, cred); + cred->thread_keyring = thread_keyring; +} + +void __iee_code _iee_set_cred_request_key_auth(unsigned long __unused, struct cred *cred, struct key *request_key_auth) +{ + cred = iee_cred(__unused, cred); + cred->request_key_auth = request_key_auth; +} + +void __iee_code _iee_set_cred_jit_keyring(unsigned long __unused, struct cred *cred, unsigned char jit_keyring) +{ + cred = iee_cred(__unused, cred); + cred->jit_keyring = jit_keyring; +} + +void __iee_code _iee_set_cred_cap_inheritable(unsigned long __unused, struct cred *cred, kernel_cap_t cap_inheritable) +{ + cred = iee_cred(__unused, cred); + cred->cap_inheritable = cap_inheritable; +} + +void __iee_code _iee_set_cred_cap_permitted(unsigned long __unused, struct cred *cred, kernel_cap_t cap_permitted) +{ + cred = iee_cred(__unused, cred); + cred->cap_permitted = cap_permitted; +} + +void __iee_code _iee_set_cred_cap_effective(unsigned long __unused, struct cred *cred, kernel_cap_t cap_effective) +{ + cred = iee_cred(__unused, cred); + cred->cap_effective = cap_effective; +} + +void __iee_code _iee_set_cred_cap_bset(unsigned long __unused, struct cred *cred, kernel_cap_t cap_bset) +{ + cred = iee_cred(__unused, cred); + cred->cap_bset = cap_bset; +} + +void __iee_code _iee_set_cred_cap_ambient(unsigned long __unused, struct cred *cred, kernel_cap_t cap_ambient) +{ + cred = iee_cred(__unused, cred); + cred->cap_ambient = cap_ambient; +} + +void __iee_code _iee_set_cred_securebits(unsigned long __unused, struct cred *cred, unsigned securebits) +{ + cred = iee_cred(__unused, cred); + cred->securebits = securebits; +} + +void __iee_code _iee_set_cred_group_info(unsigned long __unused, struct cred *cred, struct group_info *group_info) +{ + cred = iee_cred(__unused, cred); + cred->group_info = group_info; +} + +void __iee_code _iee_set_cred_ucounts(unsigned long __unused, struct cred *cred, struct ucounts *ucounts) +{ + cred = iee_cred(__unused, cred); + cred->ucounts = ucounts; +} + +void __iee_code _iee_set_cred_user_ns(unsigned long __unused, struct cred *cred, struct user_namespace *user_ns) +{ + cred = iee_cred(__unused, cred); + cred->user_ns = user_ns; +} + +void __iee_code _iee_set_cred_user(unsigned long __unused, struct cred *cred, struct user_struct *user) +{ + cred = iee_cred(__unused, cred); + cred->user = user; +} + +void __iee_code _iee_set_cred_fsgid(unsigned long __unused, struct cred *cred, kgid_t fsgid) +{ + cred = iee_cred(__unused, cred); + cred->fsgid = fsgid; +} + +void __iee_code _iee_set_cred_fsuid(unsigned long __unused, struct cred *cred, kuid_t fsuid) +{ + cred = iee_cred(__unused, cred); + cred->fsuid = fsuid; +} + +void __iee_code _iee_set_cred_egid(unsigned long __unused, struct cred *cred, kgid_t egid) +{ + cred = iee_cred(__unused, cred); + cred->egid = egid; +} + +void __iee_code _iee_set_cred_euid(unsigned long __unused, struct cred *cred, kuid_t euid) +{ + cred = iee_cred(__unused, cred); + cred->euid = euid; +} + +void __iee_code _iee_set_cred_sgid(unsigned long __unused, struct cred *cred, kgid_t sgid) +{ + cred = iee_cred(__unused, cred); + cred->sgid = sgid; +} + +void __iee_code _iee_set_cred_suid(unsigned long __unused, struct cred *cred, kuid_t suid) +{ + cred = iee_cred(__unused, cred); + cred->suid = suid; +} + +void __iee_code _iee_copy_cred(unsigned long __unused, struct cred *old, struct cred *new) +{ + if (new == &init_cred) { + panic("copy_cred for init_cred: %lx\n", (unsigned long)new); + } + struct rcu_head *rcu = (struct rcu_head *)(new->rcu.func); + struct cred *_new = (struct cred *)__phys_to_iee(__pa(new)); + _iee_memcpy(__unused, new, old, sizeof(struct cred)); + *(struct rcu_head **)(&(_new->rcu.func)) = rcu; + *(struct rcu_head *)(_new->rcu.func) = *(struct rcu_head *)(old->rcu.func); +} + +void __iee_code _iee_set_cred_gid(unsigned long __unused, struct cred *cred, kgid_t gid) +{ + cred = iee_cred(__unused, cred); + cred->gid = gid; +} + +void __iee_code _iee_set_cred_uid(unsigned long __unused, struct cred *cred, kuid_t uid) +{ + cred = iee_cred(__unused, cred); + cred->uid = uid; +} +#endif + +#ifdef CONFIG_KEYP +unsigned long __iee_code _iee_set_key_flag_bit(unsigned long __unused, struct key *key, + long nr, int flag) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + switch (flag) { + case SET_BIT_OP: { + set_bit(nr, &key->flags); + break; + } + case TEST_AND_CLEAR_BIT: { + return test_and_clear_bit(nr, &key->flags); + } + case TEST_AND_SET_BIT: { + return test_and_set_bit(nr, &key->flags); + } + } + return 0; +} + +void __iee_code _iee_set_key_restrict_link(unsigned long __unused, + struct key *key, + struct key_restriction *restrict_link) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->restrict_link = restrict_link; +} + +void __iee_code _iee_set_key_magic(unsigned long __unused, struct key *key, + unsigned magic) +{ +#ifdef KEY_DEBUGGING + key = (struct key *)(__phys_to_iee(__pa(key))); + key->magic = magic; +#endif +} + +void __iee_code _iee_set_key_flags(unsigned long __unused, struct key *key, + unsigned long flags) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->flags = flags; +} + +void __iee_code _iee_set_key_index_key(unsigned long __unused, + struct key *key, + struct keyring_index_key* index_key) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->index_key = *index_key; +} + +void __iee_code _iee_set_key_hash(unsigned long __unused, struct key *key, + unsigned long hash) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->hash = hash; +} + +void __iee_code _iee_set_key_len_desc(unsigned long __unused, struct key *key, + unsigned long len_desc) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->len_desc = len_desc; +} + +void __iee_code _iee_set_key_type(unsigned long __unused, struct key *key, + struct key_type *type) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->type = type; +} + +void __iee_code _iee_set_key_domain_tag(unsigned long __unused, + struct key *key, + struct key_tag *domain_tag) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->domain_tag = domain_tag; +} + +void __iee_code _iee_set_key_description(unsigned long __unused, + struct key *key, char *description) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->description = description; +} + +void __iee_code _iee_set_key_uid(unsigned long __unused, struct key *key, + kuid_t uid) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->uid = uid; +} + +void __iee_code _iee_set_key_gid(unsigned long __unused, struct key *key, + kgid_t gid) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->gid = gid; +} + +void __iee_code _iee_set_key_perm(unsigned long __unused, struct key *key, + key_perm_t perm) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->perm = perm; +} + +void __iee_code _iee_set_key_quotalen(unsigned long __unused, struct key *key, + unsigned short quotalen) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->quotalen = quotalen; +} + +void __iee_code _iee_set_key_datalen(unsigned long __unused, struct key *key, + unsigned short datalen) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->datalen = datalen; +} + +void __iee_code _iee_set_key_state(unsigned long __unused, struct key *key, + short state) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + WRITE_ONCE(key->state, state); +} + +void __iee_code _iee_set_key_user(unsigned long __unused, struct key *key, + struct key_user *user) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->user = user; +} + +void __iee_code _iee_set_key_security(unsigned long __unused, struct key *key, + void *security) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->security = security; +} + +void __iee_code _iee_set_key_expiry(unsigned long __unused, struct key *key, + time64_t expiry) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->expiry = expiry; +} + +void __iee_code _iee_set_key_revoked_at(unsigned long __unused, + struct key *key, time64_t revoked_at) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->revoked_at = revoked_at; +} + +void __iee_code _iee_set_key_last_used_at(unsigned long __unused, + struct key *key, + time64_t last_used_at) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->last_used_at = last_used_at; +} + +unsigned long __iee_code _iee_set_key_usage(unsigned long __unused, struct key *key, + int n, int flag) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + switch (flag) { + case REFCOUNT_INC: { + refcount_inc(&key->usage); + break; + } + case REFCOUNT_SET: { + refcount_set(&key->usage, n); + break; + } + case REFCOUNT_DEC_AND_TEST: { + return refcount_dec_and_test(&key->usage); + } + case REFCOUNT_INC_NOT_ZERO: { + return refcount_inc_not_zero(&key->usage); + } + } + return 0; +} + +void __iee_code _iee_set_key_serial(unsigned long __unused, struct key *key, + key_serial_t serial) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->serial = serial; +} + +void __iee_code _iee_set_key_watchers(unsigned long __unused, struct key *key, struct watch_list *watchers) +{ +#ifdef CONFIG_KEY_NOTIFICATIONS + key = (struct key *)(__phys_to_iee(__pa(key))); + key->watchers = watchers; +#endif +} + +void __iee_code _iee_set_key_union(unsigned long __unused, struct key *key, + struct key_union *key_union) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->graveyard_link.next = (struct list_head *)key_union; +} + +void __iee_code _iee_set_key_struct(unsigned long __unused, struct key *key, + struct key_struct *key_struct) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->name_link.prev = (struct list_head *)key_struct; +} + +void __iee_code _iee_set_key_payload(unsigned long __unused, struct key *key, + union key_payload *key_payload) +{ + key = (struct key *)(__phys_to_iee(__pa(key))); + key->name_link.next = (struct list_head *)key_payload; +} +#endif + +#ifdef CONFIG_IEE_SELINUX_P +void __iee_code _iee_set_selinux_status_pg(unsigned long __unused, struct page* new_page) +{ + struct page** iee_addr = (struct page**)__phys_to_iee(__pa_symbol(&(selinux_state.status_page))); + *iee_addr = new_page; +} + +void __iee_code _iee_set_selinux_enforcing(unsigned long __unused, bool value) +{ + *(bool*)__phys_to_iee(__pa_symbol(&(selinux_state.enforcing))) = value; +} + +void __iee_code _iee_mark_selinux_initialized(unsigned long __unused) +{ + smp_store_release(((bool*)__phys_to_iee(__pa_symbol(&(selinux_state.initialized)))), true); + printk("IEE: Mark selinux initialized."); +} + +void __iee_code _iee_set_sel_policy_cap(unsigned long __unused, unsigned int idx, int cap) +{ + *(bool*)__phys_to_iee(__pa_symbol(&(selinux_state.policycap[idx]))) = cap; +} + +/* + * Please make sure param iee_new_policy is from policy_jar memcache. + * Need to free new_policy after calling this func as it's only used to + * trans data from kernel. + */ +void __iee_code _iee_sel_rcu_assign_policy(unsigned long __unused, struct selinux_policy* new_policy, + struct selinux_policy* iee_new_policy) +{ + /* TODO: Verify informations from incoming policy. */ + // /* Make sure iee_new_policy is from policy_jar memcache. */ + // struct slab* policy_pg = (struct slab*)pfn_to_page(__pa(iee_new_policy) >> PAGE_SHIFT); + // if (policy_pg->slab_cache != policy_jar) + // printk("IEE SELINUXP ERROR: new policy is not from iee memcache."); + /* Copy data from kernel to new allocated policy struct inside iee. */ + struct selinux_policy* iee_addr = (struct selinux_policy *)(__phys_to_iee(__pa(iee_new_policy))); + memcpy(iee_addr, new_policy, sizeof(struct selinux_policy)); + + rcu_assign_pointer(*((struct selinux_policy**)__phys_to_iee(__pa_symbol(&(selinux_state.policy)))), + iee_new_policy); + printk("IEE: assigned rcu pointer selinux_state.policy."); +} +#endif + +/* iee si */ +bool iee_pgt_jar_init __iee_si_data; +bool iee_init_done __iee_si_data; +unsigned long iee_base_swapper_pg_dir __iee_si_data; +extern struct static_key_false cr_pinning; + +u64 __iee_si_code notrace iee_si_handler(int flag, ...) +{ + va_list pArgs; + u64 val; + + va_start(pArgs, flag); + switch (flag) { + case IEE_SI_TEST: + break; + case IEE_WRITE_CR0: { + val = va_arg(pArgs, u64); + unsigned long bits_missing = 0; + set_register_cr0: + asm volatile("mov %0,%%cr0" : "+r"(val) : : "memory"); + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register_cr0; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } + break; + } + #ifdef CONFIG_KOI + case IEE_SWITCH_TO_KOI: + #endif + case IEE_WRITE_CR3: { + val = va_arg(pArgs, u64); + asm volatile("mov %0,%%cr3" : : "r"(val) : "memory"); + break; + } + #ifdef CONFIG_KOI + case IEE_SWITCH_TO_KERNEL: { + val = iee_base_swapper_pg_dir; + asm volatile("mov %0,%%cr3" : : "r"(val) : "memory"); + break; + } + #endif + case IEE_WRITE_CR4: { + break; + } + case IEE_LOAD_IDT: { + const struct desc_ptr *new_val = va_arg(pArgs, const struct desc_ptr*); + asm volatile("lidt %0"::"m" (*new_val)); + break; + } + } + va_end(pArgs); + return 0; +} \ No newline at end of file diff --git a/arch/x86/kernel/iee/pgtable-slab.c b/arch/x86/kernel/iee/pgtable-slab.c new file mode 100644 index 000000000000..518bdfa5fc98 --- /dev/null +++ b/arch/x86/kernel/iee/pgtable-slab.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include + +#define PGTABLE_INIT_ORDER 7 +struct kmem_cache *pgtable_jar; +struct kmem_cache *ptdesc_jar; +struct kmem_cache *pgd_jar; +unsigned long pgtable_jar_offset; +unsigned long pgd_jar_offset; + +extern void early_pgtable_jar_alloc(struct kmem_cache *pgtable_jar); + +void iee_ptdesc_init(struct page *page) +{ + struct ptdesc_t *tmp = kmem_cache_alloc(ptdesc_jar, GFP_KERNEL | __GFP_ZERO); + if (!tmp) { + panic("IEE: failed to alloc ptdesc_jar"); + } + spin_lock_init(&tmp->ptl); + page_to_iee_ptdesc(page) = tmp; + + page_to_iee_ptdesc(page)->page = page; +} + +void iee_ptdesc_free(struct page *page) +{ + kmem_cache_free(ptdesc_jar, page_to_iee_ptdesc(page)); + page_to_iee_ptdesc(page) = NULL; +} + +void __init iee_pmd_pgtable_init(pud_t *pud) { + struct page *page; + pmd_t *orig_pmd = pud_pgtable(*pud); + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = orig_pmd + i; + if (pmd_none(*pmd) || pmd_bad(*pmd)) + continue; + page = pmd_page(*pmd); + iee_ptdesc_init(page); + } +} + +void __init iee_pud_pgtable_init(p4d_t *p4d) { + struct page *page; + pud_t *orig_pud = p4d_pgtable(*p4d); + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = orig_pud + i; + if (pud_none(*pud) || pud_bad(*pud)) + continue; + iee_pmd_pgtable_init(pud); + page = pud_page(*pud); + iee_ptdesc_init(page); + } +} + +void __init iee_p4d_pgtable_init(pgd_t *pgd) { + struct page *page; + p4d_t *orig_p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_t *p4d; + int i; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = orig_p4d + i; + if (p4d_none(*p4d) || p4d_bad(*p4d)) + continue; + iee_pud_pgtable_init(p4d); + page = p4d_page(*p4d); + iee_ptdesc_init(page); + } +} + +void __init iee_pgtable_init(void) +{ + int i; + pgd_t *pgd; + struct page* page; + + pr_err("IEE: iee_ptdesc_base=%lx, vmemmap_base=%lx", iee_ptdesc_base, vmemmap_base); + + ptdesc_jar = kmem_cache_create("ptdesc_jar", sizeof(struct ptdesc_t), 0, SLAB_PANIC|SLAB_RED_ZONE, NULL); + pgtable_jar = kmem_cache_create("pgtable_jar", PAGE_SIZE, PAGE_SIZE, + SLAB_PANIC, NULL); + pgd_jar = kmem_cache_create("pgd_jar", (PAGE_SIZE << PGD_ALLOCATION_ORDER), + (PAGE_SIZE << PGD_ALLOCATION_ORDER), SLAB_PANIC, NULL); + + for(i = 0; i < ((1 << (PGTABLE_INIT_ORDER))/nr_cpu_ids); i++) { + early_pgtable_jar_alloc(pgtable_jar); + // early_pgtable_jar_alloc(pgd_jar); + } + for (i = 0; i < PTRS_PER_PGD; i++) { + pgd = swapper_pg_dir + i; + if (pgtable_l5_enabled()) { + if (pgd_none_or_clear_bad(pgd)) + continue; + iee_p4d_pgtable_init(pgd); + } + else { + if (p4d_none_or_clear_bad((p4d_t *)pgd)) + continue; + iee_pud_pgtable_init((p4d_t *)pgd); + } + page = pgd_page(*pgd); + iee_ptdesc_init(page); + } + iee_pgt_jar_init = true; +} + +void *get_iee_pgtable_page(gfp_t gfpflags) +{ + struct page *page; + void *res = kmem_cache_alloc(pgtable_jar, gfpflags); + if(!res) { + panic("IEE: failed to alloc pgtable_jar"); + } + iee_rw_gate(IEE_OP_SET_FREEPTR, (void **)((unsigned long)res + pgtable_jar_offset), NULL); + + page = virt_to_page(res); + iee_ptdesc_init(page); + return res; +} + +void free_iee_pgtable_page(void *obj) +{ + struct page *page = virt_to_page(obj); + iee_ptdesc_free(page); + + kmem_cache_free(pgtable_jar, obj); +} + +void *get_iee_pgd_page(gfp_t gfpflags) +{ + struct page *page; + void *res = kmem_cache_alloc(pgd_jar, gfpflags); + if(!res) { + panic("IEE: failed to alloc pgd_jar"); + } + iee_rw_gate(IEE_OP_SET_FREEPTR, (void **)((unsigned long)res + pgd_jar_offset), NULL); + + page = virt_to_page(res); + iee_ptdesc_init(page); + + #if PGD_ALLOCATION_ORDER == 1 + page = virt_to_page((unsigned long)res + PAGE_SIZE); + iee_ptdesc_init(page); + #endif + return res; +} + +void free_iee_pgd_page(void *obj) +{ + struct page *page = virt_to_page(obj); + iee_ptdesc_free(page); + + #if PGD_ALLOCATION_ORDER == 1 + page = virt_to_page((unsigned long)obj + PAGE_SIZE); + iee_ptdesc_free(page); + #endif + + kmem_cache_free(pgd_jar, obj); +} \ No newline at end of file diff --git a/arch/x86/kernel/iee/stack-slab.c b/arch/x86/kernel/iee/stack-slab.c new file mode 100644 index 000000000000..533cb7183d63 --- /dev/null +++ b/arch/x86/kernel/iee/stack-slab.c @@ -0,0 +1,25 @@ +#include +#include + +struct kmem_cache *iee_stack_jar; + +void __init iee_stack_init(void) +{ + iee_stack_jar = kmem_cache_create("iee_stack_jar", (PAGE_SIZE << 2), + (PAGE_SIZE << 2), SLAB_PANIC, NULL); +} + +void *get_iee_stack(void) +{ + void *obj = kmem_cache_alloc(iee_stack_jar, GFP_KERNEL); + if (!obj) { + pr_err("IEE: failed to alloc iee_stack_jar"); + return NULL; + } + return __phys_to_iee(__pa(obj)); +} + +void free_iee_stack(void *obj) +{ + kmem_cache_free(iee_stack_jar, __va(__iee_pa(obj))); +} diff --git a/arch/x86/kernel/koi/Makefile b/arch/x86/kernel/koi/Makefile new file mode 100644 index 000000000000..9be8710b714a --- /dev/null +++ b/arch/x86/kernel/koi/Makefile @@ -0,0 +1 @@ +obj-y += koi.o \ No newline at end of file diff --git a/arch/x86/kernel/koi/koi.c b/arch/x86/kernel/koi/koi.c new file mode 100644 index 000000000000..77500a7de3ef --- /dev/null +++ b/arch/x86/kernel/koi/koi.c @@ -0,0 +1,1414 @@ +#include "asm-generic/rwonce.h" +#include "asm/current.h" +#include "asm/desc_defs.h" +#include "asm/page.h" +#include "asm/page_64_types.h" +#include "asm/page_types.h" +#include "asm/percpu.h" +#include "asm/pgtable.h" +#include "asm/pgalloc.h" +#include "asm/pgtable_64.h" +#include "asm/pgtable_64_types.h" +#include "asm/pgtable_types.h" +#include "asm/processor-flags.h" +#include "asm/special_insns.h" +#include "linux/cpumask.h" +#include "linux/export.h" +#include "linux/mm_types.h" +#include "linux/percpu-defs.h" +#include "linux/pgtable.h" +#include "linux/mm.h" +#include "asm/pgalloc.h" +#include "linux/printk.h" +#include "linux/slab.h" +#include "asm/koi.h" +#include "linux/module.h" +#include "linux/spinlock.h" +#include "linux/vmalloc.h" +#include "asm-generic/sections.h" +#include "asm/desc.h" +#ifdef CONFIG_IEE +#include "asm/iee-si.h" +#endif + +// #define DEBUG + +#ifdef DEBUG +#define debug_printk(...) printk(KERN_ERR __VA_ARGS__) +#else +#define debug_printk(...) +#endif + +#define __koi_code __section(".koi.text") +#define __koi_data __section(".data..koi") + +extern unsigned long __koi_code_start[]; +extern unsigned long __koi_code_end[]; +extern unsigned long __koi_data_start[]; +extern unsigned long __koi_data_end[]; +#ifdef CONFIG_IEE +extern unsigned long __iee_si_data_start[]; +extern unsigned long __iee_si_text_start[]; +extern unsigned long iee_offset; +#endif + +// extern unsigned long koi_idt_descr_addr; +extern struct desc_ptr idt_descr; +extern gate_desc idt_table[IDT_ENTRIES]; + +#define IDT_TABLE_SIZE (IDT_ENTRIES * sizeof(gate_desc)) + +extern void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp); + +extern void error_entry(void); + +extern void error_return(void); + +DEFINE_PER_CPU_PAGE_ALIGNED(unsigned long, koi_kern_cr3); +EXPORT_SYMBOL(koi_kern_cr3); + +DEFINE_HASHTABLE(koi_mem_htbl, HASH_TABLE_BIT); + +DEFINE_SPINLOCK(koi_mem_htbl_spin_lock); +EXPORT_SYMBOL(koi_mem_htbl_spin_lock); + +#ifndef CONFIG_IEE +__koi_code void koi_error_entry_to_kernel(void) { + unsigned long cr3 = this_cpu_read(koi_kern_cr3); + asm volatile("mov %0,%%cr3": : "r" (cr3) : "memory"); +} + +__koi_code void koi_error_return_to_ko(unsigned long addr) { + struct koi_mem_hash_node *ko; + unsigned long cr3; + int bkt; + unsigned long flags; + struct task_token *token_addr = + (struct task_token *)((unsigned long)current + (unsigned long)KOI_OFFSET); + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + spin_lock_irqsave(&ko->mod_lock, flags); + if (!ko->is_valid) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + goto out; + } + if (ko->mod != NULL && ko->mod->mem[MOD_INIT_TEXT].base != NULL) { + if (addr >= (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base + ko->mod->mem[MOD_INIT_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + if (token_addr->current_ttbr1 == ko->ko_cr3) { + cr3 = __read_cr3(); + this_cpu_write(koi_kern_cr3, cr3); + cr3 = (cr3 & X86_CR3_PCID_MASK) | PTI_USER_PCID_MASK; + cr3 |= ko->ko_cr3; + rcu_read_unlock(); + native_write_cr3(cr3); + return; + } + goto out; + } + } + if (addr >= (unsigned long)ko->mod->mem[MOD_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_TEXT].base + ko->mod->mem[MOD_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + if (token_addr->current_ttbr1 == ko->ko_cr3) { + cr3 = __read_cr3(); + this_cpu_write(koi_kern_cr3, cr3); + cr3 = (cr3 & X86_CR3_PCID_MASK) | PTI_USER_PCID_MASK; + cr3 |= ko->ko_cr3; + rcu_read_unlock(); + native_write_cr3(cr3); + return; + } + goto out; + } + spin_unlock_irqrestore(&ko->mod_lock, flags); + } +out: + rcu_read_unlock(); + return; +} +#else +__koi_code void koi_error_entry_to_kernel(void) { + unsigned long cr3 = this_cpu_read(koi_kern_cr3); + asm volatile("mov %0,%%cr3": : "r" (cr3) : "memory"); +} + +__koi_code void koi_error_return_to_ko(unsigned long addr) { + struct koi_mem_hash_node *ko; + unsigned long cr3; + int bkt; + unsigned long flags; + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + spin_lock_irqsave(&ko->mod_lock, flags); + if (!ko->is_valid) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + goto out; + } + if (ko->mod != NULL && ko->mod->mem[MOD_INIT_TEXT].base != NULL) { + if (addr >= (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_INIT_TEXT].base + ko->mod->mem[MOD_INIT_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + unsigned long current_ttbr1 = iee_rw_gate(IEE_READ_TOKEN_TTBR1, current); + if (current_ttbr1 == ko->ko_cr3) { + cr3 = __read_cr3(); + this_cpu_write(koi_kern_cr3, cr3); + cr3 = (cr3 & X86_CR3_PCID_MASK) | PTI_USER_PCID_MASK; + cr3 |= ko->ko_cr3; + rcu_read_unlock(); + native_write_cr3(cr3); + return; + } + goto out; + } + } + if (addr >= (unsigned long)ko->mod->mem[MOD_TEXT].base && + addr < (unsigned long)ko->mod->mem[MOD_TEXT].base + ko->mod->mem[MOD_TEXT].size) { + spin_unlock_irqrestore(&ko->mod_lock, flags); + unsigned long current_ttbr1 = iee_rw_gate(IEE_READ_TOKEN_TTBR1, current); + if (current_ttbr1 == ko->ko_cr3) { + cr3 = __read_cr3(); + this_cpu_write(koi_kern_cr3, cr3); + cr3 = (cr3 & X86_CR3_PCID_MASK) | PTI_USER_PCID_MASK; + cr3 |= ko->ko_cr3; + rcu_read_unlock(); + native_write_cr3(cr3); + return; + } + goto out; + } + spin_unlock_irqrestore(&ko->mod_lock, flags); + } +out: + rcu_read_unlock(); + return; +} +#endif + +pte_t *find_pte(unsigned long addr, pgd_t *pgdir) +{ + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + if (pgd_none(*pgdp) || pgd_bad(*pgdp)) { + debug_printk("pgdp 0x%lx not available\n", (unsigned long) pgdp); + return NULL; + } + debug_printk( "pgd=0x%lx, pgd_val=0x%lx\n", (unsigned long) pgdp, pgd_val(*pgdp)); + + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp) || p4d_bad(*p4dp)) { + debug_printk("p4dp 0x%lx not available\n", (unsigned long)p4dp); + return NULL; + } + debug_printk( "p4d=0x%lx, p4d_val=0x%lx\n", (unsigned long) p4dp, p4d_val(*p4dp)); + + pudp = pud_offset(p4dp, addr); + if (pud_none(*pudp) || pud_bad(*pudp)) { + debug_printk("pudp 0x%lx not available\n", (unsigned long)pudp); + return NULL; + } + debug_printk( "pud=0x%lx, pud_val=0x%lx\n", (unsigned long)pudp, pud_val(*pudp)); + pmdp = pmd_offset(pudp, addr); + debug_printk( "pmd=0x%lx, pmd_val=0x%lx\n",(unsigned long) pmdp, pmd_val(*pmdp)); + if (is_swap_pmd(*pmdp) || pmd_trans_huge(*pmdp) || + (pmd_devmap(*pmdp))) { + return (pte_t *)pmdp; + } + if (pmd_none(*pmdp)) { + debug_printk("pmdp 0x%lx not available none\n", (unsigned long)pmdp); + return NULL; + } + if (pmd_bad(*pmdp)) { + debug_printk("pmdp 0x%lx bad, pmd_val=0x%16lx\n",(unsigned long) pmdp, pmd_val(*pmdp)); + return NULL; + } + + ptep = pte_offset_kernel(pmdp, addr); + debug_printk( "pte=0x%lx, pte_val=0x%lx\n", (unsigned long)ptep, pte_val(*ptep)); + if (pte_none(*ptep)) { + debug_printk("ptep 0x%lx not available\n", (unsigned long)ptep); + return NULL; + } + return ptep; +} + +struct page *find_page(unsigned long addr, pgd_t *pgdir) +{ + struct page *curr_page; + pte_t *ptep; + debug_printk("addr=0x%lx\n", addr); + // pgd_t *pgdir = swapper_pg_dir; + ptep = find_pte(addr, pgdir); + if (!ptep) { + return NULL; + } + curr_page = pte_page(*ptep); + debug_printk("page=0x%16lx\n", (unsigned long) curr_page); + return curr_page; +} + +__attribute__((unused)) static void check_pgd(pgd_t *new_pgd, pgd_t *old_pgd, + unsigned long addr) +{ + pte_t *old_pte = find_pte(addr, old_pgd); + pte_t *new_pte = find_pte(addr, new_pgd); + if (old_pte == NULL || new_pte == NULL) { + debug_printk( "old_pte=0x%16lx, new_pte=0x%16lx\n", (unsigned long)old_pte, (unsigned long)new_pte); + return; + } + unsigned long old_pfn = pte_pfn(*old_pte); + unsigned long new_pfn = pte_pfn(*new_pte); + struct page *old_page, *new_page; + unsigned long *new_address, *old_address; + if (old_pfn != new_pfn) { + debug_printk( + "pfn different! old_pfn=0x%16lx, new_pfn=0x%16lx\n", + old_pfn, new_pfn); + } + old_page = find_page(addr, old_pgd); + debug_printk("swapper_pg_dir walk finished\n"); + new_page = find_page(addr, new_pgd); + debug_printk("new pgd walk finished\n"); + + if (new_page != old_page) + debug_printk("Wrong page walk\n"); + if (new_page == NULL) { + debug_printk("new page not available\n"); + } + if (old_page == NULL) { + debug_printk("old page not available\n"); + } + if (old_page == NULL || new_page == NULL) + return; + new_address = (unsigned long *)page_address(new_page); + old_address = (unsigned long *)page_address(old_page); + + debug_printk("addr=0x%lx, new=0x%lx, old=0x%lx\n", addr, (unsigned long)new_address, + (unsigned long)old_address); + if (new_address != old_address) { + debug_printk("Different page address!\n"); + } else { + debug_printk("check pgd pass successfully!\n"); + } +} + +// int __koi_pte_alloc(struct mm_struct *mm, pmd_t *pmd) +// { +// spinlock_t *ptl; +// // pgtable_t new = alloc_page(GFP_PGTABLE_KERNEL); +// pte_t *new = pte_alloc_one_kernel(mm); +// // debug_printk("alloc new=0x%16lx\n", (unsigned long)new); +// if (!new) +// return -ENOMEM; + +// /* +// * Ensure all pte setup (eg. pte page lock and page clearing) are +// * visible before the pte is made visible to other CPUs by being +// * put into page tables. +// * +// * The other side of the story is the pointer chasing in the page +// * table walking code (when walking the page table without locking; +// * ie. most of the time). Fortunately, these data accesses consist +// * of a chain of data-dependent loads, meaning most CPUs (alpha +// * being the notable exception) will already guarantee loads are +// * seen in-order. See the alpha page table accessors for the +// * smp_rmb() barriers in page table walking code. +// */ +// smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ + +// ptl = pmd_lockptr(mm, pmd); +// debug_printk("pte_alloc ptl=0x%16lx\n", (unsigned long)ptl); +// spin_lock(ptl); +// if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ +// mm_inc_nr_ptes(mm); +// pmd_populate_kernel(mm, pmd, new); +// new = NULL; +// } +// spin_unlock(ptl); +// debug_printk("unlock pte_alloc ptl=0x%16lx\n", (unsigned long)ptl); +// if (new) { +// pte_free_kernel(mm, new); +// } +// return 0; +// } +/* +#define koi_pte_alloc(mm, pmd) \ + (unlikely(pmd_none(*(pmd))) && __koi_pte_alloc(mm, pmd)) + +#define koi_pte_offset_map_lock(mm, pmd, address, ptlp) \ + ({ \ + spinlock_t *__ptl = pte_lockptr(mm, pmd); \ + pte_t *__pte = pte_offset_kernel(pmd, address); \ + *(ptlp) = __ptl; \ + debug_printk("pre lock ptlp=0x%16lx\n", (unsigned long)__ptl); \ + __pte; \ + }) + +#define koi_pte_alloc_map_lock(mm, pmd, address, ptlp) \ + (koi_pte_alloc(mm, pmd) ? \ + NULL : \ + koi_pte_offset_map_lock(mm, pmd, address, ptlp)) +*/ +// static inline pud_t *koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { +// return (unlikely(p4d_none(*p4d)) && __koi_pud_alloc(mm, p4d, address)) ? +// NULL : pud_offset(p4d, address); +// } + +// static inline pmd_t *koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) +// { +// return (unlikely(pud_none(*pud)) && __koi_pmd_alloc(mm, pud, address))? +// NULL: pmd_offset(pud, address); +// } + +/** + * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page + * is required to copy this pte. +*/ +static inline int koi_copy_present_pte(struct mm_struct *ko_mm, pte_t *dst_pte, + pte_t *src_pte, unsigned long addr) +{ + pte_t pte = *src_pte; + + set_pte(dst_pte, pte); + return 0; +} + +/** +*koi_copy_pte_range - copy pte from kernel space to driver space +*/ +static int koi_copy_pte_range(struct mm_struct *ko_mm, pmd_t *dst_pmd, + pmd_t *src_pmd, unsigned long addr, + unsigned long end, pteval_t prot) +{ + pte_t *src_pte, *dst_pte; + pte_t *orig_src_pte, *orig_dst_pte; + spinlock_t *dst_ptl; + int ret = 0; + + dst_pte = pte_alloc_map_lock(ko_mm, dst_pmd, addr, &dst_ptl); + if (!dst_pte) { + ret = -ENOMEM; + goto unlock; + } + debug_printk("lock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + src_pte = pte_offset_kernel(src_pmd, addr); + if (!src_pte) { + pte_unmap_unlock(dst_pte, dst_ptl); + debug_printk("unlock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + goto unlock; + } + // spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + orig_src_pte = src_pte; + orig_dst_pte = dst_pte; + + do { + if (pte_none(*src_pte)) + continue; + if (unlikely(!pte_present(*src_pte))) { + debug_printk( + "not present pte found: addr=0x%16lx, end=0x%16lx, pte_val=0x%16lx\n", + addr, end, pte_val(*src_pte)); + continue; + } + if (pte_valid(*dst_pte)) { + continue; + } + /* koi_copy_present_pte() will clear `*prealloc` if consumed */ + ret = koi_copy_present_pte(ko_mm, dst_pte, src_pte, addr); + debug_printk( + "dst_pte=0x%16lx, dst_pte_val=0x%16lx, src_pte=0x%16lx, src_pte_val=0x%16lx, addr=0x%16lx\n", + (unsigned long)dst_pte, pte_val(*dst_pte), (unsigned long)src_pte, pte_val(*src_pte), + addr); + debug_printk( "pte=0x%16lx, pte_val=0x%16lx\n", (unsigned long)dst_pte, (unsigned long)pte_val(*dst_pte)); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(orig_dst_pte, dst_ptl); + debug_printk("unlock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + +unlock: + // spin_unlock_irq(dst_ptl); + debug_printk( "unlock1 dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + return ret; +} + +/** +* copy huge pmd from kernel space to driver space. +*/ +static int koi_copy_huge_pmd(struct mm_struct *ko_mm, pmd_t *dst_pmd, + pmd_t *src_pmd, unsigned long addr, pteval_t prot) +{ + spinlock_t *dst_ptl; + int ret = -ENOMEM; + debug_printk( + "hugepmd: src_pmd=0x%16lx, dst_pmd=0x%16lx, src_pmd_val=0x%16lx, dst_pmd_val=0x%16lx, addr=0x%16lx\n", + (unsigned long)src_pmd, (unsigned long)dst_pmd, pmd_val(*src_pmd), pmd_val(*dst_pmd), addr); + dst_ptl = pmd_lockptr(ko_mm, dst_pmd); + debug_printk("pmd lock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + spin_lock(dst_ptl); + + set_pmd(dst_pmd, __pmd(pmd_val(*src_pmd) | prot)); + ret = 0; + spin_unlock(dst_ptl); + debug_printk("pmd unlock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + debug_printk( + "hugepmd: src_pmd=0x%16lx, dst_pmd=0x%16lx, src_pmd_val=0x%16lx, dst_pmd_val=0x%16lx, addr=0x%16lx\n", + (unsigned long)src_pmd, (unsigned long)dst_pmd, pmd_val(*src_pmd), pmd_val(*dst_pmd), addr); + return ret; +} + +/** +*kio_copy_pmd_range - copy pmd from kernel to driver space +*/ +static inline int koi_copy_pmd_range(struct mm_struct *ko_mm, pud_t *dst_pud, + pud_t *src_pud, unsigned long addr, + unsigned long end, pteval_t prot) +{ + pmd_t *src_pmd, *dst_pmd; + unsigned long next, flag; + int err; + debug_printk( + "koi_copy_pmd_range src_pud=0x%16lx, dst_pud=0x%16lx, addr=0x%16lx, end=0x%16lx\n", + (unsigned long)src_pud, (unsigned long)dst_pud, addr, end); + dst_pmd = pmd_alloc(ko_mm, dst_pud, addr); + if (!dst_pmd) { + return -ENOMEM; + } + src_pmd = pmd_offset(src_pud, addr); + do { + next = pmd_addr_end(addr, end); + flag = pmd_val(*src_pmd) & PTE_FLAGS_MASK; + // debug_printk("src_pmd=0x%16lx, dst_pmd=0x%16lx, addr=0x%16lx\n", src_pmd, dst_pmd, next); + // CONFIG_TRANSPARENT_HUGEPAGE is enabled, so we must add copy_huge_pmd + if (!pmd_present(*src_pmd)) + continue; + if (pmd_leaf(*src_pmd)) { + // if src_pmd is huge page + debug_printk( + "koi_copy_pmd_range src_pmd=0x%16lx, dst_pmd=0x%16lx, addr=0x%16lx\n", + (unsigned long)src_pmd, (unsigned long)dst_pmd, addr); + err = koi_copy_huge_pmd(ko_mm, dst_pmd, src_pmd, addr, prot); + if (err == -ENOMEM) + return -ENOMEM; + continue; + } + if (koi_copy_pte_range(ko_mm, dst_pmd, src_pmd, addr, next, prot)) + return -ENOMEM; + set_pmd(dst_pmd, __pmd((pmd_val(*dst_pmd) & (~PTE_FLAGS_MASK)) | flag)); + debug_printk( + "koi_copy_pmd_range src_pmd_val=0x%16lx, dst_pmd_val=0x%16lx, addr=0x%16lx\n", + pmd_val(*src_pmd), pmd_val(*dst_pmd), addr); + debug_printk("pmd=0x%16lx, pmd_val=0x%16lx\n", (unsigned long)dst_pmd, (unsigned long)pmd_val(*dst_pmd)); + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + return 0; +} + + +static int koi_copy_huge_pud(struct mm_struct *ko_mm, pud_t *dst_pud, + pud_t *src_pud, unsigned long addr) +{ + spinlock_t *dst_ptl; + // pmd_t pmd; + int ret = -ENOMEM; + debug_printk("huge src_pud=0x%16lx, dst_pud=0x%16lx, addr=0x%16lx\n", + (unsigned long)src_pud, (unsigned long)dst_pud, addr); + dst_ptl = pud_lockptr(ko_mm, dst_pud); + + debug_printk("pud lock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + spin_lock(dst_ptl); + set_pte((pte_t *)dst_pud, __pte(pud_val(*src_pud))); + spin_unlock(dst_ptl); + debug_printk("pud unlock dst_ptl=0x%16lx\n", (unsigned long)dst_ptl); + ret = 0; + return ret; +} + +/** +*koi_copy_pud_range - copy pud from kernel to driver +*/ +static inline int koi_copy_pud_range(struct mm_struct *ko_mm, p4d_t *dst_p4d, + p4d_t *src_p4d, unsigned long addr, + unsigned long end, pteval_t prot) +{ + pud_t *src_pud, *dst_pud; + unsigned long next, flag; + dst_pud = pud_alloc(ko_mm, dst_p4d, addr); + if (!dst_pud) + return -ENOMEM; + src_pud = pud_offset(src_p4d, addr); + do { + next = pud_addr_end(addr, end); + flag = pud_val(*src_pud) & PTE_FLAGS_MASK; + debug_printk( + "koi_copy_pud_range src_pud=0x%16lx, dst_pud=0x%16lx, addr=0x%16lx, next=0x%16lx\n", + (unsigned long)src_pud, (unsigned long)dst_pud, addr, next); + if (!pud_present(*src_pud)) { + continue; + } + if (pud_leaf(*src_pud)) { + // indicates that the src_pud maps to a huge page + koi_copy_huge_pud(ko_mm, dst_pud, src_pud, addr); + continue; + } + // if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { + // debug_printk("pud_trans_huge=%d, pud_devmap=%d, src_pud=0x%16lx\n", pud_trans_huge(*src_pud) , pud_devmap(*src_pud), src_pud); + // continue; + // /* fall through */ + // } + if (koi_copy_pmd_range(ko_mm, dst_pud, src_pud, addr, next, prot)) + return -ENOMEM; + debug_printk("koi_copy_pud_range dst_p4d=0x%16lx, dst_p4d_val=0x%16lx, pud=0x%16lx, pud_val=0x%16lx, addr=0x%16lx, next=0x%16lx\n", (unsigned long)dst_p4d, (unsigned long)p4d_val(*dst_p4d), (unsigned long)dst_pud,(unsigned long) pud_val(*dst_pud), addr, next); + set_pud(dst_pud, __pud((pud_val(*dst_pud) & (~PTE_FLAGS_MASK)) | flag)); + debug_printk("koi_copy_pud_range pud=0x%16lx, pud_val=0x%16lx\n", (unsigned long)dst_pud,(unsigned long) pud_val(*dst_pud)); + } while (dst_pud++, src_pud++, addr = next, addr != end); + return 0; +} + +/** +* koi_copy_p4d_range - map the kernel pagetable to the driver space level by level +* @ko_mm: the mm_struct of driver module +* @dst_pgd: destination pgd +* @src_pgd: source pgd +* @addr: the start of address +* @end: the end of address +*/ +static inline int koi_copy_p4d_range(struct mm_struct *ko_mm, pgd_t *dst_pgd, + pgd_t *src_pgd, unsigned long addr, + unsigned long end, pteval_t prot) +{ + p4d_t *src_p4d, *dst_p4d; + unsigned long next; + dst_p4d = p4d_alloc(ko_mm, dst_pgd, addr); + if (!dst_p4d) + return -ENOMEM; + src_p4d = p4d_offset(src_pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none(*src_p4d) || p4d_bad(*src_p4d)) + continue; + unsigned long flag = p4d_val(*src_p4d) & PTE_FLAGS_MASK; + debug_printk( + "koi_copy_p4d_range dst_p4d=0x%16lx, dst_p4d_val=0x%16lx, src_p4d=0x%16lx, src_p4d_val=0x%16lx, addr=0x%16lx\n", + (unsigned long)dst_p4d, p4d_val(*dst_p4d), (unsigned long)src_p4d, p4d_val(*src_p4d), addr); + if (koi_copy_pud_range(ko_mm, dst_p4d, src_p4d, addr, next, prot)) { + return -ENOMEM; + } + set_p4d(dst_p4d, __p4d((p4d_val(*dst_p4d) & (~PTE_FLAGS_MASK)) | flag)); + debug_printk( "p4d=0x%16lx, p4d_val=0x%16lx\n", (unsigned long)dst_p4d, (unsigned long)p4d_val(*dst_p4d)); + } while (dst_p4d++, src_p4d++, addr = next, addr != end); + return 0; +} + +/** +*int koi_copy_pagetable - map the address range from "addr" to "end" to the driver pagetable +*@ko_mm: the mm_struct of the driver module +*@koi_pg_dir: koi_pg_dir, related to the driver module, the entry for driver pagetable +*@addr: the starting address of mapping zone +*@end: the end address of mapping zone +*/ +int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, + unsigned long addr, unsigned long end, pteval_t prot) +{ + int ret = 0; + unsigned long next; + + pgd_t *src_pgd, *dst_pgd; + unsigned long flag; + if (addr == 0 || end <= addr || (addr & (PAGE_SIZE - 1)) != 0) { + printk(KERN_INFO "Wrong Arguments! addr=0x%16lx, end=0x%16lx, %ld\n", addr, end, addr & (PAGE_SIZE - 1)); + return 0; + } + src_pgd = pgd_offset_pgd(swapper_pg_dir, addr); + dst_pgd = pgd_offset_pgd(koi_pg_dir, addr); + do { + flag = pgd_val(*src_pgd) & PTE_FLAGS_MASK; + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd) || pgd_bad(*src_pgd)) + continue; + debug_printk("koi_copy_pagetable pgd=0x%16lx, pgd_val=0x%16lx, addr=0x%16lx, next=0x%16lx\n", (unsigned long)dst_pgd, (unsigned long)pgd_val(*dst_pgd), addr, next); + if (unlikely(koi_copy_p4d_range(ko_mm, dst_pgd, src_pgd, addr, + next, prot))) { + ret = -ENOMEM; + break; + } + set_pgd(dst_pgd, __pgd((pgd_val(*dst_pgd) & (~PTE_FLAGS_MASK)) | flag)); + debug_printk("koi_copy_pagetable set pgd=0x%16lx, pgd_val=0x%16lx, addr=0x%16lx, next=0x%16lx\n", (unsigned long)dst_pgd, (unsigned long)pgd_val(*dst_pgd), addr, next); + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + return ret; +} + +void koi_create_pagetable(struct module *mod) +{ + int ret = 0; + int cpu; + int i; + unsigned long addr; + struct desc_ptr desc; + struct koi_mem_hash_node *new_node = kzalloc(sizeof(struct koi_mem_hash_node), GFP_KERNEL); + if (!new_node) { + debug_printk( "[NO MEM] KOI ALLOC new node failed\n"); + return; + } + // if (koi_kern_cr3 == 0) { + // // CONFIG_ADDRESS_MASKING is not set in opoeneuler_defconfig, so we don't take lam into consideration. + // koi_kern_cr3 = __sme_pa(swapper_pg_dir) | CR3_NOFLUSH; + // } + new_node->pgdp = koi_pgd_alloc(); + new_node->ko_mm = kzalloc(sizeof(struct mm_struct) + sizeof(unsigned long) * BITS_TO_LONGS(NR_CPUS),GFP_KERNEL); + new_node->mod = mod; + init_ko_mm(new_node->ko_mm, new_node->pgdp); + new_node->ko_cr3 = __sme_pa(new_node->pgdp) /*| CR3_NOFLUSH */; + printk(KERN_ERR "ko_cr3=0x%16lx, pgdp=0x%16lx\n", new_node->ko_cr3, (unsigned long)new_node->pgdp); + // map module layout into module pagetable. + for_each_mod_mem_type(type) { + printk(KERN_ERR "\033[33mcopying mem range, start=0x%16lx, end=0x%16lx\033[0m\n", + (unsigned long)mod->mem[type].base, + (unsigned long)mod->mem[type].base + mod->mem[type].size); + if (!mod->mem[type].base || !mod->mem[type].size) { + continue; + } + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)mod->mem[type].base, (unsigned long)mod->mem[type].base + mod->mem[type].size, 0); + if (ret != 0) + printk(KERN_ERR + "\033[33mError occured when copying range from 0x%lx to 0x%lx, Eno:%d\033[0m\n", + (unsigned long)mod->mem[type].base, + (unsigned long)mod->mem[type].base + mod->mem[type].size, + ret); + } + + for_each_possible_cpu(cpu) { + addr = (unsigned long)per_cpu_ptr(&koi_kern_cr3, cpu); + + debug_printk( "cpu=%d, addr=0x%16lx, this_cpu_off=0x%16lx, this_cpu_off_addr=0x%16lx\n", cpu, addr, this_cpu_read(this_cpu_off), (unsigned long)per_cpu_ptr(&this_cpu_off, cpu)); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, addr + PAGE_SIZE, 0); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)per_cpu_ptr(&this_cpu_off, cpu) & PAGE_MASK, ((unsigned long)per_cpu_ptr(&this_cpu_off, cpu) & PAGE_MASK) + PAGE_SIZE, 0); + } + printk(KERN_ERR "mapping koi_data\n"); + // map koi_data into module pagetable. + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__koi_data_start, + (unsigned long)__koi_data_end, 0); + printk(KERN_ERR "mapping koi_text\n"); + // map koi_text into module pagetable. + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)__koi_code_start, (unsigned long)__koi_code_end, 0); + + // map exception entry into module pagetable. + printk(KERN_ERR "idt_desct=0x%16lx, size =0x%16lx, end=0x%16lx\n", (unsigned long)&idt_descr & PAGE_MASK, sizeof(idt_descr), ((unsigned long)&idt_descr + sizeof(idt_descr) + PAGE_SIZE) & PAGE_MASK); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)&idt_descr & PAGE_MASK, ((unsigned long)&idt_descr + sizeof(idt_descr) + PAGE_SIZE) & PAGE_MASK, 0); + printk(KERN_ERR "idt_base=0x%16lx, end=0x%16lx, size=0x%16lx\n", idt_descr.address, (unsigned long)idt_descr.address + IDT_TABLE_SIZE, IDT_TABLE_SIZE - 1); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, idt_descr.address, (unsigned long)idt_descr.address + IDT_TABLE_SIZE, 0); + printk(KERN_ERR "__entry_text_start=0x%16lx, __entry_text_end=0x%16lx, error_entry=0x%16lx\n", (unsigned long)__entry_text_start & PAGE_MASK, (unsigned long)(__entry_text_end + PAGE_SIZE) & PAGE_MASK, (unsigned long)error_entry); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)__entry_text_start & PAGE_MASK, (unsigned long)(__entry_text_end + PAGE_SIZE) & PAGE_MASK, 0); + // struct desc_ptr dtr; + // asm volatile("sidt %0":"=m" (dtr)); + // printk(KERN_ERR "dtr.size=%d, address=0x%16lx\n", dtr.size, dtr.address); + for (i = 0; i < IDT_ENTRIES; i++) { + gate_desc desc = idt_table[i]; + addr = desc.offset_low | ((unsigned long)desc.offset_middle << 16) | ((unsigned long)desc.offset_high << 32); + printk(KERN_ERR "idt handler addr=0x%16lx, segment=%x, ist=%d, zero=%d, type=%d, dpl=%d, p=%d\n", + addr, desc.segment, desc.bits.ist, desc.bits.zero, desc.bits.type, desc.bits.dpl, desc.bits.p); + if (addr > (unsigned long)__entry_text_start && addr < (unsigned long)__entry_text_end) + continue; + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr & PAGE_MASK, (addr + PAGE_SIZE) & PAGE_MASK, 0); + } + + // mapping gdt into module pagetable. + for_each_possible_cpu(cpu) { + native_store_gdt(&desc); + printk(KERN_ERR "address=0x%16lx, size=%d\n", desc.address, desc.size); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, desc.address, desc.address + PAGE_SIZE, 0); + + addr = (long)get_cpu_gdt_rw(cpu); // this address is logical address. + printk(KERN_ERR "gdt rw=0x%16lx, addr=0x%16lx\n", (unsigned long)per_cpu_ptr(gdt_page.gdt, cpu), addr); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, addr + PAGE_SIZE, 0); + + addr = (long)get_cpu_gdt_ro(cpu); // this address is same to base stored in GDTR. + printk(KERN_ERR "gdtp=0x%16lx, addr=0x%16lx\n", (unsigned long)per_cpu_ptr(gdt_page.gdt, cpu), addr); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, addr + PAGE_SIZE, 0); + // struct desc_struct desc = per_cpu(gdt_page.gdt[GDT_ENTRY_KERNEL_CS], cpu); + // debug_printk("base0=0x%x, base1=0x%x, base2=0x%x, type=0x%x, s=%d, dpl=%d, p=%d, avl=%d, l=%d, d=%d, g=%d\n", + // desc.base0, desc.base1, desc.base2, desc.type, desc.s, desc.dpl, desc.p, desc.avl, desc.l, desc.d, desc.g); + } + + // map irq stack into module pagetable. + for_each_possible_cpu(cpu) { + // unsigned long top_of_stack = per_cpu(pcpu_hot.top_of_stack, cpu); + unsigned long hard_irq_stack = (unsigned long)per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) + 8 - IRQ_STACK_SIZE; + debug_printk("top_of_stack=0x%16lx, hard_irq_stack=0x%16lx, current_stack=0x%16lx\n", top_of_stack, hard_irq_stack, (unsigned long)current->stack); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, hard_irq_stack, hard_irq_stack + IRQ_STACK_SIZE, 0); + } + + for_each_possible_cpu(cpu) { + // map TSS pointed by TSS descriptor in GDT + // We can use segment selector in tr to get the descriptor, and the corresponding base address and limit stored in the GDT entry. + addr = (unsigned long) &get_cpu_entry_area(cpu)->tss.x86_tss; + debug_printk( "addr=0x%16lx\n",(unsigned long) &get_cpu_entry_area(cpu)->tss.x86_tss); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, (addr + sizeof(struct tss_struct) + PAGE_SIZE) & PAGE_MASK, 0); + // map ist stack + struct tss_struct *tss = (struct tss_struct *)addr; + // printk(KERN_ERR "tss=0x%16lx, tss->ist=0x%16lx, DF=0x%16lx, NMI=0x%16lx, DB=0x%16lx, MCE=0x%16lx\n", + // (unsigned long)tss, (unsigned long)tss->x86_tss.ist, (unsigned long)tss->x86_tss.ist[IST_INDEX_DF], (unsigned long)tss->x86_tss.ist[IST_INDEX_NMI], (unsigned long)tss->x86_tss.ist[IST_INDEX_DB],(unsigned long) tss->x86_tss.ist[IST_INDEX_MCE] + // ); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)tss->x86_tss.ist[IST_INDEX_DF], (unsigned long)tss->x86_tss.ist[IST_INDEX_DF] + PAGE_SIZE, 0); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)tss->x86_tss.ist[IST_INDEX_NMI], (unsigned long)tss->x86_tss.ist[IST_INDEX_NMI] + PAGE_SIZE, 0); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)tss->x86_tss.ist[IST_INDEX_DB], (unsigned long)tss->x86_tss.ist[IST_INDEX_DB] + PAGE_SIZE, 0); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)tss->x86_tss.ist[IST_INDEX_MCE], (unsigned long)tss->x86_tss.ist[IST_INDEX_MCE] + PAGE_SIZE, 0); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, (unsigned long)tss->x86_tss.ist[IST_INDEX_VC], (unsigned long)tss->x86_tss.ist[IST_INDEX_VC] + PAGE_SIZE, 0); + } + spin_lock_init(&new_node->mod_lock); + new_node->is_valid = true; + + spin_lock(&koi_mem_htbl_spin_lock); + hash_add_rcu(koi_mem_htbl, &new_node->node, (unsigned long)new_node->mod); + spin_unlock(&koi_mem_htbl_spin_lock); + printk(KERN_ERR "create pagetable finished\n"); + unsigned long gs = 0; + asm volatile( + "mov %%gs, %0" + : "=r"(gs) + : + ); + printk(KERN_ERR "gs=0x%16lx\n", gs); +} + +void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + debug_printk("mapping addr=0x%16lx, end=0x%16lx\n", addr & PAGE_MASK, (addr + size + PAGE_SIZE) & PAGE_MASK); + koi_copy_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK, 0); + // flush_tlb_one_kernel(addr & PAGE_MASK); +} +EXPORT_SYMBOL(koi_map_mem); + +void koi_unmap_pte_table(struct mm_struct *ko_mm, pmd_t *pmd, + unsigned long addr, unsigned long end) +{ + pte_t *pte; + if (!pmd) + return; + debug_printk("pmd=0x%16lx, addr=0x%16lx, end=0x%16lx\n", (unsigned long)pmd, addr, end); + + // struct page *page = pte_page(*pte); + // printk(KERN_ERR "pte=0x%16llx, pte_val=0x%16llx\n", pte, pte_val(*pte)); + // debug_printk("free pte table 0x%16llx, pmd=0x%16llx, page=0x%16llx, pmd points to page=0x%16llx\n", pte, pmd, page, pte_page(pmd_pte(*pmd))); + // printk(KERN_ERR "pmd_pfn=0x%16llx, pte_pfn=0x%16llx\n", pmd_pfn(*pmd), pte_pfn(*pte)); + // pte_free(ko_mm, pte_page(pmd_pte(*pmd))); + do { + pte = pte_offset_kernel(pmd, addr); + debug_printk("pte=0x%16lx, pte_val=0x%16lx\n", (unsigned long)pte, pte_val(*pte)); + set_pte(pte, __pte(0)); + } while (addr += PAGE_SIZE, addr != end); +} + +void koi_unmap_pmd_range(struct mm_struct *ko_mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + pmd_t *pmd, *orig_pmd; + unsigned long next; + if (!pud) + return; + orig_pmd = pmd_offset(pud, addr); + pmd = orig_pmd; + debug_printk("pud=0x%16lx, addr=0x%16lx, end=0x%16lx\n", (unsigned long) pud, addr, end); + // printk(KERN_ERR "pud_pfn=0x%16llx, pmd_pfn=0x%16llx\n", pud_pfn(*pud), pmd_pfn(*pmd)); + do { + debug_printk( "pmd=0x%16lx, pmd_val=0x%16lx\n", (unsigned long)pmd, pmd_val(*pmd)); + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) + continue; + if (pmd_bad(*pmd)) { + set_pmd(pmd, __pmd(0)); + continue; + } + koi_unmap_pte_table(ko_mm, pmd, addr, next); + } while (pmd++, addr = next, addr != end); +} + +void koi_unmap_pud_range(struct mm_struct *ko_mm, p4d_t *p4d, + unsigned long addr, unsigned long end) +{ + pud_t *pud, *orig_pud; + unsigned long next; + if (!p4d) + return; + orig_pud = pud_offset(p4d, addr); + pud = orig_pud; + debug_printk("p4d=0x%16lx, addr=0x%16lx, end=0x%16lx\n", (unsigned long)p4d, addr, end); + do { + debug_printk( "pud=0x%16llx, pud_val=0x%16llx\n", pud, pud_val(*pud)); + next = pud_addr_end(addr, end); + if (pud_none(*pud)) + continue; + if (pud_bad(*pud)) { + set_pud(pud, __pud(0)); + continue; + } + koi_unmap_pmd_range(ko_mm, pud, addr, next); + } while (pud++, addr = next, addr != end); + debug_printk("free pud 0x%16lx, p4d=0x%16lx, orig_pud=0x%16lx\n", (unsigned long) pud, (unsigned long) p4d, (unsigned long) orig_pud); +} + +void koi_unmap_p4d_range(struct mm_struct *ko_mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + p4d_t *p4d, *orig_p4d; + unsigned long next; + if (!pgd) + return; + debug_printk("pgd=0x%16lx, addr=0x%16lx, end=0x%16lx\n", (unsigned long)pgd, addr, end); + orig_p4d = p4d_offset(pgd, addr); + p4d = orig_p4d; + do { + next = p4d_addr_end(addr, end); + debug_printk( "p4d=0x%16lx, p4d_val=0x%16lx, p4d_none=%d\n", (unsigned long)p4d, p4d_val(*p4d), p4d_none(*p4d)); + if (p4d_none_or_clear_bad(p4d)) { + continue; + } + koi_unmap_pud_range(ko_mm, p4d, addr, next); + } while (p4d++, addr = next, addr != end); +} + +void koi_unmap_pagetable(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd = pgd_offset_pgd(ko_pg_dir, addr); + debug_printk("freepagetable addr=0x%16lx, end=0x%16lx\n", addr, end); + do { + next = pgd_addr_end(addr, end); + debug_printk( "pgd=0x%16llx, pgd_val=0x%16llx\n", pgd, pgd_val(*pgd)); + if (pgd_none_or_clear_bad(pgd)) { + continue; + } + koi_unmap_p4d_range(ko_mm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +void koi_unmap_mem(struct module *mod, unsigned long addr, unsigned long size) +{ + debug_printk( "koi_unmap_mem addr=0x%16lx, size=0x%16lx\n", addr, size); + struct koi_mem_hash_node *target = NULL; + if (!addr || ! size) { + return; + } + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + + if (target == NULL) { + printk(KERN_ERR "[KOI UNMAP] mem node for module: %s not found\n", + mod->name); + return; + } + koi_unmap_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK); + flush_tlb_kernel_range(addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK); +} +EXPORT_SYMBOL(koi_unmap_mem); + +void koi_remove_pte_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, pmd_t *pmd) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + debug_printk( + "pte=0x%16lx, page=0x%16lx, pmd=0x%16lx, pmd_val=0x%16lx\n", + (unsigned long)pte, (unsigned long)pmd_page(*pmd), (unsigned long)pmd, (unsigned long)pmd_val(*pmd)); + debug_printk( "free orig_pte=0x%16lx\n", (unsigned long)pte); + pte_free_kernel(ko_mm, pte); +} + +void koi_remove_pmd_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, pud_t *pud) +{ + pmd_t *orig_pmd = pud_pgtable(*pud); + pmd_t *pmd; + int i; + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = orig_pmd + i; + if (!pmd_present(*pmd) || pmd_bad(*pmd)) + continue; + debug_printk("pmd=0x%16lx, pmd_val=0x%16lx\n", (unsigned long)pmd, pmd_val(*pmd)); + koi_remove_pte_range(ko_mm, ko_pg_dir, pmd); + } + debug_printk( "free pmd=0x%16lx, page=0x%16lx\n",(unsigned long) orig_pmd, (unsigned long) pud_page(*pud)); + + pmd_free(ko_mm, orig_pmd); +} + +void koi_remove_pud_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, p4d_t *p4d) +{ + pud_t *orig_pud = p4d_pgtable(*p4d); + pud_t *pud; + int i; + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = orig_pud + i; + debug_printk( "p4d=0x%16lx, pud=0x%16lx, orig_pud=0x%16lx\n", (unsigned long)p4d, (unsigned long)pud, (unsigned long)orig_pud); + if (!pud_present(*pud) || pud_bad(*pud)) + continue; + debug_printk("pud=0x%16lx, pud_val=0x%16lx\n", (unsigned long)pud, pud_val(*pud)); + koi_remove_pmd_range(ko_mm, ko_pg_dir, pud); + } + debug_printk( "free pud=0x%16lx, page=0x%16lx\n", (unsigned long)orig_pud, (unsigned long) p4d_pgtable(*p4d)); + // free pud page dir + pud_free(ko_mm, orig_pud); +} + +void koi_remove_p4d_range(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, pgd_t *pgd) +{ + p4d_t *orig_p4d = __va(pgd_val(*pgd) & PTE_PFN_MASK); + p4d_t *p4d; + int i; + for (i = 0; i < PTRS_PER_PGD; i++) { + p4d = orig_p4d + i; + if (!p4d_present(*p4d) || p4d_bad(*p4d)) + continue; + debug_printk( "p4d=0x%16lx, p4d_val=0x%16lx\n", (unsigned long)p4d, (unsigned long)p4d_val(*p4d)); + koi_remove_pud_range(ko_mm, ko_pg_dir, p4d); + } + debug_printk( "free orig_p4d=0x%16lx\n", (unsigned long) orig_p4d); + + // free p4d page dir. + p4d_free(ko_mm, orig_p4d); +} + +void koi_remove_pagetable(struct mm_struct *ko_mm, pgd_t *ko_pg_dir) +{ + pgd_t *pgd; + int i; + for (i = 0; i < PTRS_PER_PGD; i++) { + pgd = ko_pg_dir + i; + if (pgd_none(*pgd) || pgd_bad(*pgd)) + continue; + debug_printk("pgd=0x%16lx, pgd_val=0x%16lx\n", (unsigned long)pgd, pgd_val(*pgd)); + koi_remove_p4d_range(ko_mm, ko_pg_dir, pgd); + } + debug_printk( "free pgd=0x%16lx\n", (unsigned long) ko_pg_dir); + // free pgd page dir + free_pages((unsigned long)ko_pg_dir, 0); +} + +void koi_destroy_pagetable(struct module *mod) +{ + // int cpu; + // unsigned long *ptr; + struct koi_mem_hash_node *target = NULL; + unsigned long flags; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found, maybe destroyed before?\n", + mod->name); + return; + } + spin_lock_irqsave(&target->mod_lock, flags); + target->is_valid = false; + spin_unlock_irqrestore(&target->mod_lock, flags);; + + koi_remove_pagetable(target->ko_mm, target->ko_mm->pgd); + kfree(target->ko_mm); +} + +/** +* koi_cr3_ctor - return ttbr1 for the given driver module +*/ +unsigned long koi_cr3_ctor(struct module *mod) +{ + struct koi_mem_hash_node *ko; + struct mm_struct *ko_mm; + unsigned long cr3; + + int bkt; + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + if (ko->mod == mod) { + ko_mm = ko->ko_mm; + break; + } + } + rcu_read_unlock(); + if (!ko || !ko_mm) { + panic("cannot found module %s in koi_mem_htbl, mod=0x%16lx", + mod->name, (unsigned long)mod); + return 0; + } + asm volatile("mov %%cr3,%0\n\t" : "=r" (cr3) : __FORCE_ORDER); + this_cpu_write(koi_kern_cr3, cr3); + cr3 = (cr3 & X86_CR3_PCID_MASK) | PTI_USER_PCID_MASK | ko->ko_cr3; + + return cr3; +} +EXPORT_SYMBOL(koi_cr3_ctor); + +int koi_share_kstack(struct module *mod) +{ + unsigned long kstack_start, cur_sp; + struct koi_mem_hash_node *target = NULL; + asm volatile( + "mov %%rsp, %0\n" + : "=r"(cur_sp) + : + ); + kstack_start = (unsigned long)current->stack; + debug_printk("cur_sp=0x%16lx, kstack_start=0x%16lx\n", cur_sp, kstack_start); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + + return koi_copy_pagetable(target->ko_mm, target->pgdp, kstack_start, + kstack_start + THREAD_SIZE, 0); +} +EXPORT_SYMBOL(koi_share_kstack); + +// map the driver stack to kernel +void koi_map_kostack(struct module *mod) +{ +} +EXPORT_SYMBOL(koi_map_kostack); + +__koi_code void koi_set_upage(struct module *mod, unsigned long addr, unsigned long size) { + struct koi_mem_hash_node *target = NULL; + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + + koi_copy_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK, _PAGE_USER); +} +EXPORT_SYMBOL(koi_set_upage); + +//kzalloc function in driver space +__koi_code void *koi_kzalloc_wrapper(struct module *mod, size_t size, gfp_t flags) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + panic("mem node for module: %s not found\n", mod->name); + } + printk(KERN_ERR "kzalloc wrapper\n"); + addr = kzalloc(cnt * PAGE_SIZE, flags); + printk(KERN_ERR "kzalloc mapping addr=0x%16lx, end=0x%16lx\n", (unsigned long)addr, (unsigned long)addr + PAGE_SIZE * cnt); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); + printk(KERN_ERR "kzalloc wrapper return\n"); + + __koi_switch_to_ko(mod); + return addr; +} +EXPORT_SYMBOL(koi_kzalloc_wrapper); + +__koi_code void *koi_kzalloc_node_wrapper(struct module *mod, size_t size, gfp_t flags, int node) { + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr = NULL; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + + rcu_read_lock(); + hash_for_each_possible_rcu(koi_mem_htbl, target, node, (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + if (target == NULL) { + panic(KERN_ERR "mem node for module: %s not found\n", mod->name); + } + addr = kzalloc_node(cnt * PAGE_SIZE, flags, node); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); + __koi_switch_to_ko(mod); + return (void *)addr; +} +EXPORT_SYMBOL(koi_kzalloc_node_wrapper); + +//kmalloc function in driver space +__koi_code void * +koi_kmalloc_wrapper(struct module *mod, size_t size, gfp_t flags) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr = NULL; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + panic(KERN_ERR"mem node for module: %s not found\n", mod->name); + } + + addr = kmalloc(cnt * PAGE_SIZE, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); + __koi_switch_to_ko(mod); + return (void *)addr; +} +EXPORT_SYMBOL(koi_kmalloc_wrapper); + +//vmalloc function in driver space +__koi_code void *koi_vmalloc_wrapper(struct module *mod, + unsigned long size) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + panic("mem node for module: %s not found\n", mod->name); + } + addr = vmalloc(cnt * PAGE_SIZE); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt, 0); + __koi_switch_to_ko(mod); + return addr; +} +EXPORT_SYMBOL(koi_vmalloc_wrapper); + +//kmalloc_array function in driver space +__koi_code void *koi_kmalloc_array_wrapper(struct module *mod, + size_t n, size_t size, + gfp_t flags) +{ + int kpage; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + panic("mem node for module: %s not found\n", mod->name); + } + kpage = (n * size + PAGE_SIZE - 1) / PAGE_SIZE; + n = (kpage * PAGE_SIZE) / size; + addr = kmalloc_array(n, size, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * kpage, 0); + __koi_switch_to_ko(mod); + return addr; +} +EXPORT_SYMBOL(koi_kmalloc_array_wrapper); + +__koi_code void *koi_kcalloc_wrapper(struct module *mod, size_t n, size_t size, gfp_t flags) { + return koi_kmalloc_array_wrapper(mod, n, size, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(koi_kcalloc_wrapper); + +#ifndef CONFIG_IEE +void koi_init_token(struct task_struct *tsk) +{ + struct task_token *token_addr = + (struct task_token *)((unsigned long)tsk + KOI_OFFSET); + + token_addr->koi_kernel_stack = NULL; + // token_addr->koi_stack = NULL; + // token_addr->koi_stack_base = NULL; + token_addr->current_ttbr1 = 0; +} + +#endif + + +#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) + + +// mapping the new page to token, whose address is new+KOI_OFFSET. + +static void do_split_huge_pmd(pmd_t* pmdp) +{ + pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + int i; + struct page *page = pmd_page(*pmdp); + pte_t *ptep = (pte_t *)((unsigned long)pgtable); + for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = pmd_pgprot(*pmdp); + entry = mk_pte(page + i, pgprot); + WRITE_ONCE(*ptep, entry); + } + spinlock_t *ptl = pmd_lock(&init_mm, pmdp); + if (pmd_leaf(READ_ONCE(*pmdp))) { + smp_wmb(); + pmd_populate_kernel(&init_mm, pmdp, pgtable); + pgtable = NULL; + } + spin_unlock(ptl); + if(pgtable) + { + pte_free_kernel(&init_mm, pgtable); + } +} +void koi_add_page_mapping(void *token, void *token_page, unsigned int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + pmd_t *token_pmdp = pmd_offset(pudp, (unsigned long)token); + pte_t *token_ptep; + + pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_page); + p4dp = p4d_offset(pgdp, (unsigned long)token_page); + pudp = pud_offset(p4dp, (unsigned long)token_page); + pmd_t *token_page_pmdp = pmd_offset(pudp, (unsigned long)token_page); + pte_t *token_page_ptep; + + int use_block_pmd = 0; + if (pmd_leaf(*token_pmdp) && order < 9) { + do_split_huge_pmd(token_pmdp); + do_split_huge_pmd(token_page_pmdp); + } else if (pmd_leaf(*token_pmdp)) { + use_block_pmd = 1; + } + + if (use_block_pmd) { + token_ptep = (pte_t *)token_pmdp; + token_page_ptep = (pte_t *)token_page_pmdp; + } else { + token_ptep = pte_offset_kernel(token_pmdp, (unsigned long)token); + token_page_ptep = pte_offset_kernel(token_page_pmdp, (unsigned long)token_page); + } + + if (use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)token_page_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) & ~__RW) & ~___D & ~__PP); + WRITE_ONCE(*pmdp, pmd); + pmdp = (pmd_t *)token_ptep; + pmd = READ_ONCE(*pmdp); + pmd = __pmd(((pmd_val(pmd) & ~PTE_PFN_MASK) | __PP) | (__phys_to_pfn(__pa(token_page)) << PAGE_SHIFT)); + WRITE_ONCE(*pmdp, pmd); + } + else { + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte(((pte_val(pte) & ~PTE_PFN_MASK) | __PP) | (__phys_to_pfn(__pa(token_page) + i * PAGE_SIZE) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte((pte_val(pte) & ~__RW) & ~___D & ~__PP); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range((unsigned long)token_page, (unsigned long)(token_page + (PAGE_SIZE * (1 << order)))); +} + +void koi_remove_page_mapping(unsigned long token, void *__unused, unsigned long order) { + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + pmd_t *token_pmdp = pmd_offset(pudp, (unsigned long)token); + pte_t *token_ptep; + void *token_page; + int use_block_pmd = 0; + if (pmd_leaf(*token_pmdp)) { + use_block_pmd = 1; + token_ptep = (pte_t *)token_pmdp; + token_page = page_address(pmd_page(*token_pmdp)); + } else { + token_ptep = pte_offset_kernel(token_pmdp, (unsigned long)token); + token_page = page_address(pte_page(*token_ptep)); + } + pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_page); + p4dp = p4d_offset(pgdp, (unsigned long)token_page); + pudp = pud_offset(p4dp, (unsigned long)token_page); + pmd_t *token_page_pmdp = pmd_offset(pudp, (unsigned long)token_page); + pte_t *token_page_ptep; + if (use_block_pmd) { + token_page_ptep = (pte_t *)token_page_pmdp; + } else { + token_page_ptep = pte_offset_kernel(token_page_pmdp, (unsigned long)token_page); + } + if (use_block_pmd) + { + pmd_t *pmdp = (pmd_t *)token_page_ptep; + pmd_t pmd = READ_ONCE(*pmdp); + pmd = __pmd(pmd_val(pmd) | ___D | __RW | __PP); + WRITE_ONCE(*pmdp, pmd); + pmdp = (pmd_t *)token_ptep; + pmd = READ_ONCE(*pmdp); + pmd = __pmd((pmd_val(pmd) & ~PTE_PFN_MASK & ~__PP) | (__phys_to_pfn(__pa(token - KOI_OFFSET)) << PAGE_SHIFT)); + WRITE_ONCE(*pmdp, pmd); + } + else + { + for(int i = 0; i < (0x1 << order); i++) + { + pte_t pte = READ_ONCE(*token_ptep); + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK & ~__PP) | (__phys_to_pfn(__pa(token - KOI_OFFSET) + i * PAGE_SIZE) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte(pte_val(pte) | ___D | __RW | __PP); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } + free_pages((unsigned long)token_page, order); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token + (PAGE_SIZE * (1 << order)))); + flush_tlb_kernel_range((unsigned long)token_page, (unsigned long)(token_page + (PAGE_SIZE * (1 << order)))); +} + +#endif \ No newline at end of file diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index adc67f98819a..5eda6c3a5fac 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -408,7 +408,11 @@ static void free_ldt_pgtables(struct mm_struct *mm) */ tlb_gather_mmu_fullmm(&tlb, mm); free_pgd_range(&tlb, start, end, start, end); + #ifdef CONFIG_PTP + iee_tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif #endif } diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index bb2af7bcc47d..a5cf20bbbc5f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -30,6 +30,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + #ifdef CONFIG_ACPI /* * Used while adding mapping for ACPI tables. @@ -139,12 +143,27 @@ map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p) static void free_transition_pgtable(struct kimage *image) { + #ifdef CONFIG_PTP + unset_iee_page((unsigned long)image->arch.p4d, 0); + #endif free_page((unsigned long)image->arch.p4d); image->arch.p4d = NULL; + + #ifdef CONFIG_PTP + unset_iee_page((unsigned long)image->arch.pud, 0); + #endif free_page((unsigned long)image->arch.pud); image->arch.pud = NULL; + + #ifdef CONFIG_PTP + unset_iee_page((unsigned long)image->arch.pmd, 0); + #endif free_page((unsigned long)image->arch.pmd); image->arch.pmd = NULL; + + #ifdef CONFIG_PTP + unset_iee_page((unsigned long)image->arch.pte, 0); + #endif free_page((unsigned long)image->arch.pte); image->arch.pte = NULL; } @@ -166,6 +185,9 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); if (!p4d) goto err; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)p4d, 0); + #endif image->arch.p4d = p4d; set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); } @@ -174,6 +196,9 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) pud = (pud_t *)get_zeroed_page(GFP_KERNEL); if (!pud) goto err; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pud, 0); + #endif image->arch.pud = pud; set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); } @@ -182,6 +207,9 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); if (!pmd) goto err; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pmd, 0); + #endif image->arch.pmd = pmd; set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); } @@ -190,6 +218,9 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) pte = (pte_t *)get_zeroed_page(GFP_KERNEL); if (!pte) goto err; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pte, 0); + #endif image->arch.pte = pte; set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); } @@ -216,6 +247,10 @@ static void *alloc_pgt_page(void *data) clear_page(p); } + #ifdef CONFIG_PTP + set_iee_page((unsigned long)page_to_virt(page), 0); + #endif + return p; } @@ -234,7 +269,10 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); - + #ifdef CONFIG_PTP + set_iee_page((unsigned long)level4p, 0); + #endif + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { info.page_flag |= _PAGE_ENC; info.kernpg_flag |= _PAGE_ENC; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8d51c86caa41..c719f4dc4cdc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -81,7 +81,11 @@ void __init native_pv_lock_init(void) static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) { + #ifdef CONFIG_PTP + iee_tlb_remove_page(tlb, table); + #else tlb_remove_page(tlb, table); + #endif } unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bd33c4f7c125..4241d0e712c2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -57,6 +57,13 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +void *init_token_page_vaddr; +#endif + /* * max_low_pfn_mapped: highest directly mapped pfn < 4 GB * max_pfn_mapped: highest directly mapped pfn > 4 GB @@ -837,6 +844,101 @@ static void __init x86_report_nx(void) } } +#ifdef CONFIG_KOI +unsigned long KOI_OFFSET = 0x200000000000; // IEE_OFFSET = pgtable_l5_enabled() ? 0x40000000000000 : 0x200000000000; +unsigned long koi_offset = 0x200000000000; +#endif + +#ifdef CONFIG_IEE +#include +unsigned long IEE_OFFSET = 0x200000000000; // IEE_OFFSET = pgtable_l5_enabled() ? 0x40000000000000 : 0x200000000000; +unsigned long iee_offset = 0x200000000000; +EXPORT_SYMBOL(IEE_OFFSET); +#ifdef CONFIG_X86_5LEVEL +void init_iee_offset(void) { + if(pgtable_l5_enabled()) { + IEE_OFFSET = 0x40000000000000; + iee_offset = IEE_OFFSET; + } +} +#endif /* CONFIG_X86_5LEVEL */ + +void __init iee_set_token_page_valid_pre_init(void *token, void *token_page) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + if (pmd_leaf(*pmdp)) { + extern void *alloc_low_pages(unsigned int num); + pte_t* pgtable = alloc_low_pages(1); + struct page *page = pmd_page(*pmdp); + pte_t *ptep = (pte_t *)((unsigned long)pgtable); + for (int i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = pmd_pgprot(*pmdp); + entry = mk_pte(page + i, pgprot); + WRITE_ONCE(*ptep, entry); + } + #ifdef CONFIG_PTP + iee_pmd_populate_kernel_pre_init(&init_mm, pmdp, pgtable); + #else + pmd_populate_kernel(&init_mm, pmdp, pgtable); + #endif + } + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~PTE_PFN_MASK) | __PP) | (__phys_to_pfn(__pa(token_page)) << PAGE_SHIFT)); + #ifdef CONFIG_PTP + iee_set_pte_pre_init(ptep, pte); + #else + set_pte(ptep, pte); + #endif + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); +} +#endif +#ifdef CONFIG_KOI +#include +EXPORT_SYMBOL(KOI_OFFSET); +#ifdef CONFIG_X86_5LEVEL +void init_koi_offset(void) { + if(pgtable_l5_enabled()) { + KOI_OFFSET = 0x40000000000000; + koi_offset = KOI_OFFSET; + } +} +#endif /* CONFIG_X86_5LEVEL */ + + +void __init koi_set_token_page_valid_pre_init(void *token, void *token_page) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + if (pmd_leaf(*pmdp)) { + extern void *alloc_low_pages(unsigned int num); + pte_t* pgtable = alloc_low_pages(1); + struct page *page = pmd_page(*pmdp); + pte_t *ptep = (pte_t *)((unsigned long)pgtable); + for (int i = 0; i < PMD_SIZE / PAGE_SIZE; i++, ptep++) { + pte_t entry; + pgprot_t pgprot = pmd_pgprot(*pmdp); + entry = mk_pte(page + i, pgprot); + WRITE_ONCE(*ptep, entry); + } + pmd_populate_kernel(&init_mm, pmdp, pgtable); + } + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~PTE_PFN_MASK) | __PP) | (__phys_to_pfn(__pa(token_page)) << PAGE_SHIFT)); + set_pte(ptep, pte); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); +} +#endif /* CONFIG_KOI */ + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -1078,6 +1180,13 @@ void __init setup_arch(char **cmdline_p) * Define random base addresses for memory sections after max_pfn is * defined and before each memory section base is used. */ + #if defined(CONFIG_IEE) && defined(CONFIG_X86_5LEVEL) + init_iee_offset(); + #else + #if defined(CONFIG_KOI) && defined(CONFIG_X86_5LEVEL) + init_koi_offset(); + #endif + #endif kernel_randomize_memory(); #ifdef CONFIG_X86_32 @@ -1163,6 +1272,70 @@ void __init setup_arch(char **cmdline_p) init_mem_mapping(); + #ifdef CONFIG_IEE + printk(KERN_ERR "init_iee_mapping begin ...\n"); + init_iee_mapping(); + printk(KERN_ERR "init_iee_mapping done ...\n"); + + printk(KERN_ERR "init token begin ...\n"); + // Change init_task image va to Logival VA + unsigned long init_task_la = (unsigned long)__va(__pa_symbol(&init_task)); + raw_cpu_write(pcpu_hot.current_task, (struct task_struct *)init_task_la); + init_task.cpus_ptr = &(((struct task_struct *)(__va(__pa_symbol(&init_task))))->cpus_mask); + init_task.children.prev = (__va(__pa_symbol(init_task.children.prev))); + init_task.children.next = (__va(__pa_symbol(init_task.children.next))); + + void *new; + void *init_token; + struct task_token *token; + + // Alloc a page for init_token. + extern void *alloc_low_pages(unsigned int num); + new = alloc_low_pages(1); + init_token_page_vaddr = new; + init_token = (void *)__phys_to_iee(__pa_symbol(&init_task)); + // Use lm to write token before IEE initialized. + token = (struct task_token *)((unsigned long)new + (((unsigned long)&init_task) & ~PAGE_MASK)); + token->pgd = NULL; + token->iee_stack = (void *)__phys_to_iee(__pa_symbol(init_iee_stack_end)); + token->valid = true; + iee_set_token_page_valid_pre_init(init_token, new); + printk(KERN_ERR "init token end ...\n"); + #else + #ifdef CONFIG_KOI + printk(KERN_ERR "init_iee_mapping begin ...\n"); + init_koi_mapping(); + printk(KERN_ERR "init_iee_mapping done ...\n"); + + // Change init_task image va to Logival VA + unsigned long init_task_la = (unsigned long)__va(__pa_symbol(&init_task)); + raw_cpu_write(pcpu_hot.current_task, (struct task_struct *)init_task_la); + init_task.cpus_ptr = &(((struct task_struct *)(__va(__pa_symbol(&init_task))))->cpus_mask); + init_task.children.prev = (__va(__pa_symbol(init_task.children.prev))); + init_task.children.next = (__va(__pa_symbol(init_task.children.next))); + + void *new; + void *init_token; + struct task_token *token; + + // Alloc a page for init_token. + extern void *alloc_low_pages(unsigned int num); + new = alloc_low_pages(1); + init_token_page_vaddr = new; + init_token = (void *)__phys_to_koi(__pa_symbol(&init_task)); + // Use lm to write token before IEE initialized. + token = (struct task_token *)((unsigned long)new + (((unsigned long)&init_task) & ~PAGE_MASK)); + koi_set_token_page_valid_pre_init(init_token, new); + printk(KERN_ERR "init token end ...\n"); + #endif + #endif /* CONFIG_IEE*/ + + #ifdef CONFIG_PTP + printk(KERN_ERR "mapping page table into iee begin ...\n"); + init_iee(); + printk(KERN_ERR "mapping page table into iee done ...\n"); + #endif + idt_setup_early_pf(); /* @@ -1355,3 +1528,4 @@ static int __init register_kernel_offset_dumper(void) return 0; } __initcall(register_kernel_offset_dumper); + diff --git a/arch/x86/kernel/sfi_bpf_arch.c b/arch/x86/kernel/sfi_bpf_arch.c new file mode 100644 index 000000000000..0021c3ed36f6 --- /dev/null +++ b/arch/x86/kernel/sfi_bpf_arch.c @@ -0,0 +1,85 @@ +#include +#include +#include + +pte_t *bpf_sfi_get_ptep(u64 addr) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + /* DEBUG check pgd */ + // u64 ttbr1_phy, ttbr1_vir; + // __asm__ volatile( + // "mrs %0, ttbr1_el1\n\t" + // : "=r" (ttbr1_phy) + // :: + // ); + // pr_debug("phy = 0x%llx, after mask = 0x%llx\n", ttbr1_phy, (u64)(ttbr1_phy << 16) >> 16); + // ttbr1_vir = (u64)__phys_to_kimg((u64)(ttbr1_phy << 16) >> 16); + // pr_info("1, ttbr1_vir = 0x%llx, \n", ttbr1_vir); + // pr_info("2, init_mm.pgd = 0x%llx\n", (u64)init_mm.pgd); + // pr_info("3, swapper_pg_dir = 0x%llx\n", (u64)swapper_pg_dir); + + pgdp = pgd_offset(&init_mm, addr); + if (pgd_none(*pgdp) || pgd_bad(*pgdp)) { + pr_err("get pgdp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp) || p4d_bad(*p4dp)) { + pr_err("get p4dp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + /* IMPORTANT judge huge page first, then judge table */ + pudp = pud_offset(p4dp, addr); + if (pud_huge(*pudp)) { + // pud is huge page + pr_warn("pud of 0x%llx is huge page", addr); + // return (pte_t *)pudp; + return ERR_PTR(-ENOTSUPP); + } + if (pud_none(*pudp) || pud_bad(*pudp)) { + pr_err("get pudp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + pmdp = pmd_offset(pudp, addr); + if (pmd_huge(*pmdp)) { + // pmd is huge page + pr_warn("pmd of 0x%llx is huge page", addr); + // return (pte_t *)pmdp; + return ERR_PTR(-ENOTSUPP); + } + if (pmd_none(*pmdp) || pmd_bad(*pmdp)) { + pr_err("get pmdp of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + ptep = pte_offset_kernel(pmdp, addr); + if (!ptep) { + pr_err("get ptep of 0x%llx failed\n", addr); + return ERR_PTR(-ENOENT); + } + + return ptep; +} + +// int bpf_sfi_hook_kernel_fault(u64 addr) +// { +// pte_t *ptep; + +// ptep = bpf_sfi_get_ptep(addr); +// if (IS_ERR(ptep)) +// return PTR_ERR(ptep); + +// if (pte_val(*ptep) & PTE_BPF_SFI_GP) { +// return true; +// } +// else +// return false; +// } \ No newline at end of file diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index fbd34ee394a7..3a60fb1bd88a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -99,6 +99,60 @@ jiffies = jiffies_64; #endif +#ifdef CONFIG_IEE +#define IEE_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __iee_code_start = .; \ + *(.iee.text.header) \ + *(.iee.text) \ + . = ALIGN(PAGE_SIZE); \ + __iee_code_end = .; +#else +#define IEE_TEXT +#endif +#ifdef CONFIG_IEE +#define IEE_SI_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_text_start = .; \ + *(.iee.si_text) \ + . = ALIGN(PAGE_SIZE); \ + __iee_trampoline_si_text_start = .; \ + *(.iee.trampoline.si_text) \ + __iee_trampoline_si_text_end = .; \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_text_end = .; +#else +#define IEE_SI_TEXT +#endif +#ifdef CONFIG_IEE +#define IEE_SI_DATA \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_data_start = .; \ + *(.iee.si_data) \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_data_end = .; +#else +#define IEE_SI_DATA +#endif + +#ifdef CONFIG_CREDP + #define CRED_DATA \ + . = ALIGN(PAGE_SIZE); \ + *(.iee.cred) \ + . = ALIGN(PAGE_SIZE); +#else + #define CRED_DATA +#endif + +#ifdef CONFIG_IEE_SELINUX_P + #define IEE_SELINUX_DATA \ + . = ALIGN(PAGE_SIZE); \ + *(.iee.selinux) \ + . = ALIGN(PAGE_SIZE); +#else + #define IEE_SELINUX_DATA +#endif + PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(6); /* RW_ */ @@ -111,6 +165,17 @@ PHDRS { note PT_NOTE FLAGS(0); /* ___ */ } +#ifdef CONFIG_KOI +#define KOI_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __koi_code_start = .; \ + *(.koi.text) \ + . = ALIGN(PAGE_SIZE); \ + __koi_code_end = .; +#else +#define KOI_TEXT +#endif + SECTIONS { #ifdef CONFIG_X86_32 @@ -127,10 +192,12 @@ SECTIONS _stext = .; /* bootstrapping code */ HEAD_TEXT + IEE_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT + IEE_SI_TEXT SOFTIRQENTRY_TEXT #ifdef CONFIG_RETPOLINE *(.text..__x86.indirect_thunk) @@ -151,6 +218,7 @@ SECTIONS *(.text..__x86.rethunk_safe) #endif ALIGN_ENTRY_TEXT_END + KOI_TEXT *(.gnu.warning) } :text = 0xcccccccc @@ -181,6 +249,9 @@ SECTIONS CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) DATA_DATA + IEE_SI_DATA + CRED_DATA + IEE_SELINUX_DATA CONSTRUCTORS /* rarely changed data like cpu maps */ @@ -410,6 +481,13 @@ SECTIONS __bss_stop = .; } +#ifdef CONFIG_IEE + . = ALIGN(PAGE_SIZE*4); + init_iee_stack_begin = .; + . += PAGE_SIZE*4; + init_iee_stack_end = .; +#endif + /* * The memory occupied from _text to here, __end_of_kernel_reserve, is * automatically reserved in setup_arch(). Anything after here must be diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e568b64a2b6b..3320f4a9fe5b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -272,6 +272,9 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end) spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif spinlock_t *pgt_lock; /* the pgt_lock only for Xen */ diff --git a/arch/x86/mm/ident_map_for_iee.c b/arch/x86/mm/ident_map_for_iee.c new file mode 100644 index 000000000000..8b1f1ea52ec4 --- /dev/null +++ b/arch/x86/mm/ident_map_for_iee.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helper routines for building identity mapping page tables. This is + * included by both the compressed kernel and the regular kernel. + */ + +#ifdef CONFIG_PTP +static void ident_pmd_init_for_iee(struct x86_mapping_info *info, pmd_t *pmd_page, + unsigned long addr, unsigned long end) +{ + addr &= PMD_MASK; + for (; addr < end; addr += PMD_SIZE) { + pmd_t *pmd = pmd_page + pmd_index(addr); + + if (pmd_present(*pmd)) + continue; + + #ifdef CONFIG_PTP + iee_set_pmd_pre_init(pmd, __pmd((addr - info->offset) | info->page_flag)); + #else + set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag)); + #endif + } +} + +static int ident_pud_init_for_iee(struct x86_mapping_info *info, pud_t *pud_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + + for (; addr < end; addr = next) { + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; + + next = (addr & PUD_MASK) + PUD_SIZE; + if (next > end) + next = end; + + if (info->direct_gbpages) { + pud_t pudval; + + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; + pudval = __pud((addr - info->offset) | info->page_flag); + #ifdef CONFIG_PTP + iee_set_pud_pre_init(pud, pudval); + #else + set_pud(pud, pudval); + #endif + continue; + } + + if (pud_present(*pud)) { + pmd = pmd_offset(pud, 0); + #ifdef CONFIG_PTP + ident_pmd_init_for_iee(info, pmd, addr, next); + #else + ident_pmd_init(info, pmd, addr, next); + #endif + continue; + } + pmd = (pmd_t *)info->alloc_pgt_page(info->context); + if (!pmd) + return -ENOMEM; + #ifdef CONFIG_PTP + ident_pmd_init_for_iee(info, pmd, addr, next); + #else + ident_pmd_init(info, pmd, addr, next); + #endif + #ifdef CONFIG_PTP + iee_set_pud_pre_init(pud, __pud(__pa(pmd) | info->kernpg_flag)); + #else + set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag)); + #endif + } + + return 0; +} + +static int ident_p4d_init_for_iee(struct x86_mapping_info *info, p4d_t *p4d_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + int result; + + for (; addr < end; addr = next) { + p4d_t *p4d = p4d_page + p4d_index(addr); + pud_t *pud; + + next = (addr & P4D_MASK) + P4D_SIZE; + if (next > end) + next = end; + + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, 0); + #ifdef CONFIG_PTP + result = ident_pud_init_for_iee(info, pud, addr, next); + #else + result = ident_pud_init(info, pud, addr, next); + #endif + if (result) + return result; + + continue; + } + pud = (pud_t *)info->alloc_pgt_page(info->context); + if (!pud) + return -ENOMEM; + + #ifdef CONFIG_PTP + result = ident_pud_init_for_iee(info, pud, addr, next); + #else + result = ident_pud_init(info, pud, addr, next); + #endif + if (result) + return result; + + #ifdef CONFIG_PTP + iee_set_p4d_pre_init(p4d, __p4d(__pa(pud) | info->kernpg_flag)); + #else + set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); + #endif + } + + return 0; +} + +int kernel_ident_mapping_init_for_iee(struct x86_mapping_info *info, pgd_t *pgd_page, + unsigned long pstart, unsigned long pend) +{ + unsigned long addr = pstart + info->offset; + unsigned long end = pend + info->offset; + unsigned long next; + int result; + + /* Set the default pagetable flags if not supplied */ + if (!info->kernpg_flag) + info->kernpg_flag = _KERNPG_TABLE; + + /* Filter out unsupported __PAGE_KERNEL_* bits: */ + info->kernpg_flag &= __default_kernel_pte_mask; + + for (; addr < end; addr = next) { + pgd_t *pgd = pgd_page + pgd_index(addr); + p4d_t *p4d; + + next = (addr & PGDIR_MASK) + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_present(*pgd)) { + p4d = p4d_offset(pgd, 0); + #ifdef CONFIG_PTP + result = ident_p4d_init_for_iee(info, p4d, addr, next); + #else + result = ident_p4d_init(info, p4d, addr, next); + #endif + if (result) + return result; + continue; + } + + p4d = (p4d_t *)info->alloc_pgt_page(info->context); + if (!p4d) + return -ENOMEM; + #ifdef CONFIG_PTP + result = ident_p4d_init_for_iee(info, p4d, addr, next); + #else + result = ident_p4d_init(info, p4d, addr, next); + #endif + if (result) + return result; + if (pgtable_l5_enabled()) { + #ifdef CONFIG_PTP + iee_set_pgd_pre_init(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); + #else + set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); + #endif + } else { + /* + * With p4d folded, pgd is equal to p4d. + * The pgd entry has to point to the pud page table in this case. + */ + pud_t *pud = pud_offset(p4d, 0); + #ifdef CONFIG_PTP + iee_set_pgd_pre_init(pgd, __pgd(__pa(pud) | info->kernpg_flag)); + #else + set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); + #endif + } + } + + return 0; +} +#endif \ No newline at end of file diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6215dfa23578..a13a5c41e44c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -28,6 +28,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + /* * We need to define the tracepoints somewhere, and tlb.c * is only compiled when SMP=y. @@ -252,6 +256,8 @@ static void __init probe_page_size_mask(void) if (cpu_feature_enabled(X86_FEATURE_PTI)) __default_kernel_pte_mask &= ~_PAGE_GLOBAL; + #ifndef CONFIG_IEE + #ifndef CONFIG_KOI /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { printk(KERN_INFO "Using GB pages for direct mapping\n"); @@ -259,6 +265,8 @@ static void __init probe_page_size_mask(void) } else { direct_gbpages = 0; } + #endif + #endif } /* @@ -445,6 +453,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, } #ifdef CONFIG_X86_64 +#ifndef CONFIG_IEE +#ifndef CONFIG_KOI /* big page (1G) range */ start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); @@ -463,6 +473,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, page_size_mask & (1<= end) + continue; + /* + * if it is overlapping with brk pgt, we need to + * alloc pgt buf from memblock instead. + */ + can_use_brk_pgt = max(start, (u64)pgt_buf_end<= + min(end, (u64)pgt_buf_top<> PAGE_SHIFT; + last_start = real_end; + /* + * We start from the top (end of memory) and go to the bottom. + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (last_start > map_start) { + unsigned long start; + if (last_start > step_size) { + start = round_down(last_start - 1, step_size); + if (start < map_start) + start = map_start; + } else + start = map_start; + mapped_ram_size += init_range_memory_mapping_for_iee(start, + last_start); + last_start = start; + min_pfn_mapped = last_start >> PAGE_SHIFT; + if (mapped_ram_size >= step_size) + step_size = get_new_step_size(step_size); + } + if (real_end < map_end) + init_range_memory_mapping_for_iee(real_end, map_end); +} +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up_for_iee(unsigned long map_start, + unsigned long map_end) +{ + unsigned long next, start; + unsigned long mapped_ram_size = 0; + /* step_size need to be small so pgt_buf from BRK could cover it */ + unsigned long step_size = PMD_SIZE; + start = map_start; + min_pfn_mapped = start >> PAGE_SHIFT; + /* + * We start from the bottom (@map_start) and go to the top (@map_end). + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (start < map_end) { + if (step_size && map_end - start > step_size) { + next = round_up(start + 1, step_size); + if (next > map_end) + next = map_end; + } else { + next = map_end; + } + mapped_ram_size += init_range_memory_mapping_for_iee(start, next); + start = next; + if (mapped_ram_size >= step_size) + step_size = get_new_step_size(step_size); + } +} +unsigned long __ref init_memory_mapping_for_iee(unsigned long start, + unsigned long end, pgprot_t prot) +{ + struct map_range mr[NR_RANGE_MR]; + unsigned long ret = 0; + int nr_range, i; + pr_debug("init_memory_mapping_for_iee: [mem %#010lx-%#010lx]\n", + start, end - 1); + memset(mr, 0, sizeof(mr)); + nr_range = split_mem_range(mr, 0, start, end); + for (i = 0; i < nr_range; i++) + ret = kernel_physical_mapping_init_for_iee(mr[i].start, mr[i].end, + 0, + prot); + + add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); + return ret >> PAGE_SHIFT; +} +void __init init_iee_mapping(void) +{ + unsigned long end; +#ifdef CONFIG_X86_64 + end = max_pfn << PAGE_SHIFT; +#else + end = max_low_pfn << PAGE_SHIFT; +#endif + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping_for_iee(0, ISA_END_ADDRESS, SET_NG(SET_UPAGE(PAGE_KERNEL))); + if(__pa_symbol(_end) > IEE_OFFSET) { + panic("Image on too high phys mem.\n"); + } + /* + * If the allocation is in bottom-up direction, we setup direct mapping + * in bottom-up, otherwise we setup direct mapping in top-down. + */ + if (memblock_bottom_up()) { + unsigned long kernel_end = __pa_symbol(_end); + + /* + * we need two separate calls here. This is because we want to + * allocate page tables above the kernel. So we first map + * [kernel_end, end) to make memory above the kernel be mapped + * as soon as possible. And then use page tables allocated above + * the kernel to map [ISA_END_ADDRESS, kernel_end). + */ + + printk("memory_map_bottom_up_for_iee...\n"); + memory_map_bottom_up_for_iee(kernel_end, end); + memory_map_bottom_up_for_iee(ISA_END_ADDRESS, kernel_end); + } else { + printk("memory_map_top_down_for_iee...\n"); + memory_map_top_down_for_iee(ISA_END_ADDRESS, end); + } +#ifdef CONFIG_X86_64 + if (max_pfn > max_low_pfn) { + /* can we preserve max_low_pfn ?*/ + max_low_pfn = max_pfn; + } +#else + early_ioremap_page_table_range_init(); +#endif + early_memtest(0, max_pfn_mapped << PAGE_SHIFT); +} +#else +#ifdef CONFIG_KOI +static unsigned long __init init_range_memory_mapping_for_koi( + unsigned long r_start, + unsigned long r_end) +{ + unsigned long start_pfn, end_pfn; + unsigned long mapped_ram_size = 0; + int i; + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { + u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); + u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); + if (start >= end) + continue; + /* + * if it is overlapping with brk pgt, we need to + * alloc pgt buf from memblock instead. + */ + can_use_brk_pgt = max(start, (u64)pgt_buf_end<= + min(end, (u64)pgt_buf_top<> PAGE_SHIFT; + last_start = real_end; + /* + * We start from the top (end of memory) and go to the bottom. + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (last_start > map_start) { + unsigned long start; + if (last_start > step_size) { + start = round_down(last_start - 1, step_size); + if (start < map_start) + start = map_start; + } else + start = map_start; + mapped_ram_size += init_range_memory_mapping_for_koi(start, + last_start); + last_start = start; + min_pfn_mapped = last_start >> PAGE_SHIFT; + if (mapped_ram_size >= step_size) + step_size = get_new_step_size(step_size); + } + if (real_end < map_end) + init_range_memory_mapping_for_koi(real_end, map_end); +} +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up_for_koi(unsigned long map_start, + unsigned long map_end) +{ + unsigned long next, start; + unsigned long mapped_ram_size = 0; + /* step_size need to be small so pgt_buf from BRK could cover it */ + unsigned long step_size = PMD_SIZE; + start = map_start; + min_pfn_mapped = start >> PAGE_SHIFT; + /* + * We start from the bottom (@map_start) and go to the top (@map_end). + * The memblock_find_in_range() gets us a block of RAM from the + * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages + * for page table. + */ + while (start < map_end) { + if (step_size && map_end - start > step_size) { + next = round_up(start + 1, step_size); + if (next > map_end) + next = map_end; + } else { + next = map_end; + } + mapped_ram_size += init_range_memory_mapping_for_koi(start, next); + start = next; + if (mapped_ram_size >= step_size) + step_size = get_new_step_size(step_size); + } +} +unsigned long __ref init_memory_mapping_for_koi(unsigned long start, + unsigned long end, pgprot_t prot) +{ + struct map_range mr[NR_RANGE_MR]; + unsigned long ret = 0; + int nr_range, i; + pr_debug("init_memory_mapping_for_koi: [mem %#010lx-%#010lx]\n", + start, end - 1); + memset(mr, 0, sizeof(mr)); + nr_range = split_mem_range(mr, 0, start, end); + for (i = 0; i < nr_range; i++) + ret = kernel_physical_mapping_init_for_koi(mr[i].start, mr[i].end, + mr[i].page_size_mask, + prot); + + add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); + return ret >> PAGE_SHIFT; +} +void __init init_koi_mapping(void) +{ + unsigned long end; + end = max_pfn << PAGE_SHIFT; + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping_for_koi(0, ISA_END_ADDRESS, SET_NG(PAGE_KERNEL)); + if(__pa_symbol(_end) > KOI_OFFSET) { + panic("Image on too high phys mem.\n"); + } + /* + * If the allocation is in bottom-up direction, we setup direct mapping + * in bottom-up, otherwise we setup direct mapping in top-down. + */ + if (memblock_bottom_up()) { + unsigned long kernel_end = __pa_symbol(_end); + + /* + * we need two separate calls here. This is because we want to + * allocate page tables above the kernel. So we first map + * [kernel_end, end) to make memory above the kernel be mapped + * as soon as possible. And then use page tables allocated above + * the kernel to map [ISA_END_ADDRESS, kernel_end). + */ + + printk("memory_map_bottom_up_for_iee...\n"); + memory_map_bottom_up_for_koi(kernel_end, end); + memory_map_bottom_up_for_koi(ISA_END_ADDRESS, kernel_end); + } else { + printk("memory_map_top_down_for_iee...\n"); + memory_map_top_down_for_koi(ISA_END_ADDRESS, end); + } + if (max_pfn > max_low_pfn) { + /* can we preserve max_low_pfn ?*/ + max_low_pfn = max_pfn; + } + early_memtest(0, max_pfn_mapped << PAGE_SHIFT); +} + +#endif /* CONFIG_KOI */ +#endif /* CONFIG_IEE */ + +#ifdef CONFIG_PTP +extern int DIRECT_MAP_SHIFT; +void __init set_iee_valid_pre_init(unsigned long addr) {} + +static void __init move_pte_table_into_iee(pmd_t *pmdp, unsigned long addr, unsigned long end) +{ + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long iee_addr = (unsigned long)__phys_to_iee(__pmd_to_phys(pmd)); + // printk("move_pte_table_into_iee:\n"); + set_iee_valid_pre_init(iee_addr); +} + +static void __init move_pmd_table_into_iee(pud_t *pudp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + + unsigned long iee_addr = (unsigned long)__phys_to_iee(__pud_to_phys(pud)); + // printk("move_pmd_table_into_iee:\n"); + set_iee_valid_pre_init(iee_addr); + + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if(pmd_val(pmd) & _PSE) { + continue; + } else { + move_pte_table_into_iee(pmdp, addr, next); + } + } while (pmdp++, addr = next, addr != end); +} + +static void __init move_pud_table_into_iee(p4d_t *p4dp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + + // printk("p4d_phys: 0x%16lx\n", __p4d_to_phys(p4d)); + unsigned long iee_addr = (unsigned long)__phys_to_iee(__p4d_to_phys(p4d)); + // printk("move_pud_table_into_iee:\n"); + set_iee_valid_pre_init(iee_addr); + + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if(pud_val(pud) & _PSE) { + // _PSE = 1 means a page, not a table + continue; + } else { + move_pmd_table_into_iee(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); +} + +static void __init move_p4d_table_into_iee(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t pgd = READ_ONCE(*pgdp); + p4d_t *p4dp; + p4d_t p4d; + + // printk("pgdp: 0x%16lx\n", pgd.pgd); + unsigned long iee_addr = (unsigned long)__phys_to_iee(__pgd_to_phys(pgd)); + // printk("move_p4d_table_into_iee:\n"); + set_iee_valid_pre_init(iee_addr); + + p4dp = p4d_offset(pgdp, addr); + do { + next = p4d_addr_end(addr, end); + p4d = READ_ONCE(*p4dp); + /* No 512 GiB huge pages yet */ + move_pud_table_into_iee(p4dp, addr, next); + } while (p4dp++, addr = next, addr != end); +} + +static void __init init_iee_for_one_region(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); + + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); + + do { + // printk("region start va: 0x%16lx\n", addr); + next = pgd_addr_end(addr, end); + move_p4d_table_into_iee(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); +} + +void __init init_iee(void) +{ + unsigned long iee_addr; + pgd_t *pgdp; + phys_addr_t start, end; + u64 i; + + // handling 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(swapper_pg_dir)); + set_iee_valid_pre_init(iee_addr); + #if PGD_ALLOCATION_ORDER == 1 + set_iee_valid_pre_init(iee_addr + PAGE_SIZE); + #endif + + #ifdef CONFIG_X86_5LEVEL + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(level4_kernel_pgt)); + set_iee_valid_pre_init(iee_addr); + #endif + + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(level3_kernel_pgt)); + set_iee_valid_pre_init(iee_addr); + + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(level2_kernel_pgt)); + set_iee_valid_pre_init(iee_addr); + + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(level2_fixmap_pgt)); + set_iee_valid_pre_init(iee_addr); + + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(level1_fixmap_pgt)); + for (i = 0; i < FIXMAP_PMD_NUM; i++) { + set_iee_valid_pre_init(iee_addr + PAGE_SIZE * i); + } + + // handling 2/3/4-level page table for kernel + init_iee_for_one_region(pgdp, (unsigned long)_text, (unsigned long)_etext); + init_iee_for_one_region(pgdp, (unsigned long)__start_rodata, (unsigned long)__end_rodata); + init_iee_for_one_region(pgdp, (unsigned long)_sdata, (unsigned long)_edata); + init_iee_for_one_region(pgdp, (unsigned long)__bss_start, (unsigned long)__bss_stop); + + // handling page table for fixmap i.e. FIXADDR_START ~ FIXADDR_TOP + // printk("fixmap into iee:\n"); + init_iee_for_one_region(pgdp, FIXADDR_START, FIXADDR_TOP); + + // handling page table for %esp fixup stacks + // espfix_pud_page in espfix_64.c + + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if(start >= end) { + break; + } + init_iee_for_one_region(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); + init_iee_for_one_region(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); + } +} + +static void __init iee_set_pte_table_ro(pmd_t *pmdp, unsigned long addr, unsigned long end) +{ + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long logical_addr = (unsigned long)__va(__pmd_to_phys(pmd)); + iee_set_logical_mem_ro(logical_addr); +} + +static void __init iee_set_pmd_table_ro(pud_t *pudp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + unsigned long logical_addr = (unsigned long)__va(__pud_to_phys(pud)); + iee_set_logical_mem_ro(logical_addr); + + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if (pmd_val(pmd) & _PSE) { + continue; + } else { + iee_set_pte_table_ro(pmdp, addr, next); + } + } while (pmdp++, addr = next, addr != end); +} + +static void __init iee_set_pud_table_ro(p4d_t *p4dp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + unsigned long logical_addr = (unsigned long)__va(__p4d_to_phys(p4d)); + iee_set_logical_mem_ro(logical_addr); + + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if (pud_val(pud) & _PSE) { + // _PSE = 1 means a page, not a table + continue; + } else { + iee_set_pmd_table_ro(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); +} + +static void __init iee_set_p4d_table_ro(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t pgd = READ_ONCE(*pgdp); + p4d_t *p4dp; + p4d_t p4d; + unsigned long logical_addr = (unsigned long)__va(__pgd_to_phys(pgd)); + iee_set_logical_mem_ro(logical_addr); + + p4dp = p4d_offset(pgdp, addr); + do { + next = p4d_addr_end(addr, end); + p4d = READ_ONCE(*p4dp); + /* No 512 GiB huge pages yet */ + iee_set_pud_table_ro(p4dp, addr, next); + } while (p4dp++, addr = next, addr != end); +} + +static void __init iee_mark_pgtable_for_one_region_ro(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); + + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); + + do { + next = pgd_addr_end(addr, end); + iee_set_p4d_table_ro(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); +} + +// Mark pgtable outside as RO. +void __init iee_mark_all_lm_pgtable_ro(void) +{ + unsigned long logical_addr; + phys_addr_t start, end; + u64 i; + pgd_t *pgdp; + + // handling 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + iee_set_logical_mem_ro((unsigned long)swapper_pg_dir); + logical_addr = (unsigned long)__va(__pa_symbol(swapper_pg_dir)); + iee_set_logical_mem_ro(logical_addr); + + // handling 2/3/4/5-level page table for kernel + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_text, (unsigned long)_etext); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__start_rodata, (unsigned long)__end_rodata); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_sdata, (unsigned long)_edata); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__bss_start, (unsigned long)__bss_stop); + + // handling 2/3/4/5-level statically allocated page table + #ifdef CONFIG_X86_5LEVEL + iee_set_logical_mem_ro((unsigned long)level4_kernel_pgt); + logical_addr = (unsigned long)__va(__pa_symbol(level4_kernel_pgt)); + iee_set_logical_mem_ro(logical_addr); + #endif + + iee_set_logical_mem_ro((unsigned long)level3_kernel_pgt); + logical_addr = (unsigned long)__va(__pa_symbol(level3_kernel_pgt)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)level2_kernel_pgt); + logical_addr = (unsigned long)__va(__pa_symbol(level2_kernel_pgt)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)level2_fixmap_pgt); + logical_addr = (unsigned long)__va(__pa_symbol(level2_fixmap_pgt)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)level1_fixmap_pgt); + logical_addr = (unsigned long)__va(__pa_symbol(level1_fixmap_pgt)); + for (i = 0; i < FIXMAP_PMD_NUM; i++) { + iee_set_logical_mem_ro(logical_addr + PAGE_SIZE * i); + } + + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); + } +} +#endif /* CONFIG_PTP */ + /* * Initialize an mm_struct to be used during poking and a pointer to be used * during patching. diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index aa69353da49f..2d9b860fd0a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -60,6 +60,10 @@ #include "ident_map.c" +#ifdef CONFIG_PTP +#include +#endif + #define DEFINE_POPULATE(fname, type1, type2, init) \ static inline void fname##_init(struct mm_struct *mm, \ type1##_t *arg1, type2##_t *arg2, bool init) \ @@ -90,6 +94,38 @@ DEFINE_ENTRY(pud, pud, init) DEFINE_ENTRY(pmd, pmd, init) DEFINE_ENTRY(pte, pte, init) +#ifdef CONFIG_PTP +#define DEFINE_IEE_POPULATE(fname, type1, type2, init) \ +static inline void iee_##fname##_init(struct mm_struct *mm, \ + type1##_t *arg1, type2##_t *arg2, bool init) \ +{ \ + if (init) \ + iee_##fname##_safe_pre_init(mm, arg1, arg2); \ + else \ + iee_##fname##_pre_init(mm, arg1, arg2); \ +} + +DEFINE_IEE_POPULATE(p4d_populate, p4d, pud, init) +DEFINE_IEE_POPULATE(pgd_populate, pgd, p4d, init) +DEFINE_IEE_POPULATE(pud_populate, pud, pmd, init) +DEFINE_IEE_POPULATE(pmd_populate_kernel, pmd, pte, init) + +#define DEFINE_IEE_ENTRY(type1, type2, init) \ +static inline void iee_set_##type1##_init(type1##_t *arg1, \ + type2##_t arg2, bool init) \ +{ \ + if (init) \ + iee_set_##type1##_safe_pre_init(arg1, arg2); \ + else \ + iee_set_##type1##_pre_init(arg1, arg2); \ +} + +DEFINE_IEE_ENTRY(p4d, p4d, init) +DEFINE_IEE_ENTRY(pud, pud, init) +DEFINE_IEE_ENTRY(pmd, pmd, init) +DEFINE_IEE_ENTRY(pte, pte, init) +#endif + static inline pgprot_t prot_sethuge(pgprot_t prot) { WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT); @@ -147,6 +183,9 @@ static void sync_global_pgds_l5(unsigned long start, unsigned long end) spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif pgd_t *pgd; spinlock_t *pgt_lock; @@ -188,6 +227,9 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end) spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif pgd_t *pgd; p4d_t *p4d; spinlock_t *pgt_lock; @@ -243,6 +285,11 @@ static __ref void *spp_getpage(void) pr_debug("spp_getpage %p\n", ptr); + #ifdef CONFIG_PTP + unsigned long iee_addr = (unsigned long)__phys_to_iee(__pa(ptr)); + set_iee_page_valid(iee_addr); + #endif + return ptr; } @@ -442,7 +489,11 @@ void __init cleanup_highmap(void) if (pmd_none(*pmd)) continue; if (vaddr < (unsigned long) _text || vaddr > end) + #ifdef CONFIG_PTP + iee_set_pmd_pre_init(pmd, __pmd(0)); + #else set_pmd(pmd, __pmd(0)); + #endif } } @@ -470,7 +521,11 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, E820_TYPE_RAM) && !e820__mapped_any(paddr & PAGE_MASK, paddr_next, E820_TYPE_RESERVED_KERN)) + #ifdef CONFIG_PTP + iee_set_pte_init(pte, __pte(0), init); + #else set_pte_init(pte, __pte(0), init); + #endif continue; } @@ -490,7 +545,11 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end, pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte); pages++; + #ifdef CONFIG_PTP + iee_set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); + #else set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init); + #endif paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE; } @@ -525,7 +584,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, E820_TYPE_RAM) && !e820__mapped_any(paddr & PMD_MASK, paddr_next, E820_TYPE_RESERVED_KERN)) + #ifdef CONFIG_PTP + iee_set_pmd_init(pmd, __pmd(0), init); + #else set_pmd_init(pmd, __pmd(0), init); + #endif continue; } @@ -563,9 +626,15 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<> PAGE_SHIFT, prot_sethuge(prot)), + init); + #else set_pmd_init(pmd, pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); + #endif spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; continue; @@ -575,7 +644,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init); spin_lock(&init_mm.page_table_lock); + #ifdef CONFIG_PTP + iee_pmd_populate_kernel_init(&init_mm, pmd, pte, init); + #else pmd_populate_kernel_init(&init_mm, pmd, pte, init); + #endif spin_unlock(&init_mm.page_table_lock); } update_page_count(PG_LEVEL_2M, pages); @@ -612,7 +685,11 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, E820_TYPE_RAM) && !e820__mapped_any(paddr & PUD_MASK, paddr_next, E820_TYPE_RESERVED_KERN)) + #ifdef CONFIG_PTP + iee_set_pud_init(pud, __pud(0), init); + #else set_pud_init(pud, __pud(0), init); + #endif continue; } @@ -649,9 +726,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<> PAGE_SHIFT, prot_sethuge(prot)), + init); + #else set_pud_init(pud, pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); + #endif spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; continue; @@ -662,7 +745,11 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); + #ifdef CONFIG_PTP + iee_pud_populate_init(&init_mm, pud, pmd, init); + #else pud_populate_init(&init_mm, pud, pmd, init); + #endif spin_unlock(&init_mm.page_table_lock); } @@ -715,7 +802,11 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); + #ifdef CONFIG_PTP + iee_p4d_populate_init(&init_mm, p4d, pud, init); + #else p4d_populate_init(&init_mm, p4d, pud, init); + #endif spin_unlock(&init_mm.page_table_lock); } @@ -757,10 +848,19 @@ __kernel_physical_mapping_init(unsigned long paddr_start, spin_lock(&init_mm.page_table_lock); if (pgtable_l5_enabled()) + #ifdef CONFIG_PTP + iee_pgd_populate_init(&init_mm, pgd, p4d, init); + #else pgd_populate_init(&init_mm, pgd, p4d, init); + #endif else + #ifdef CONFIG_PTP + iee_p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), + (pud_t *) p4d, init); + #else p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d, init); + #endif spin_unlock(&init_mm.page_table_lock); pgd_changed = true; @@ -788,6 +888,118 @@ kernel_physical_mapping_init(unsigned long paddr_start, page_size_mask, prot, true); } +#ifdef CONFIG_IEE +static unsigned long __meminit +__kernel_physical_mapping_init_for_iee(unsigned long paddr_start, + unsigned long paddr_end, + unsigned long page_size_mask, + pgprot_t prot, bool init) +{ + bool pgd_changed = false; + unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; + paddr_last = paddr_end; + vaddr = (unsigned long)__phys_to_iee(paddr_start); + vaddr_end = (unsigned long)__phys_to_iee(paddr_end); + vaddr_start = vaddr; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + pgd_t *pgd = pgd_offset_k(vaddr); + p4d_t *p4d; + vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; + if (pgd_val(*pgd)) { + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + paddr_last = phys_p4d_init(p4d, __iee_pa(vaddr), + __iee_pa(vaddr_end), + page_size_mask, prot, init); + continue; + } + p4d = alloc_low_page(); + paddr_last = phys_p4d_init(p4d, __iee_pa(vaddr), + __iee_pa(vaddr_end), + page_size_mask, prot, init); + spin_lock(&init_mm.page_table_lock); + if (pgtable_l5_enabled()) + #ifdef CONFIG_PTP + iee_pgd_populate_init(&init_mm, pgd, p4d, init); + #else + pgd_populate_init(&init_mm, pgd, p4d, init); + #endif + else + #ifdef CONFIG_PTP + iee_p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), + (pud_t *) p4d, init); + #else + p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), + (pud_t *) p4d, init); + #endif + spin_unlock(&init_mm.page_table_lock); + pgd_changed = true; + } + if (pgd_changed) + sync_global_pgds(vaddr_start, vaddr_end - 1); + return paddr_last; +} +unsigned long __meminit +kernel_physical_mapping_init_for_iee(unsigned long paddr_start, + unsigned long paddr_end, + unsigned long page_size_mask, pgprot_t prot) +{ + return __kernel_physical_mapping_init_for_iee(paddr_start, paddr_end, + page_size_mask, prot, true); +} +#else +#ifdef CONFIG_KOI +static unsigned long __meminit +__kernel_physical_mapping_init_for_koi(unsigned long paddr_start, + unsigned long paddr_end, + unsigned long page_size_mask, + pgprot_t prot, bool init) +{ + bool pgd_changed = false; + unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; + paddr_last = paddr_end; + vaddr = (unsigned long)__phys_to_koi(paddr_start); + vaddr_end = (unsigned long)__phys_to_koi(paddr_end); + vaddr_start = vaddr; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + pgd_t *pgd = pgd_offset_k(vaddr); + p4d_t *p4d; + vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE; + if (pgd_val(*pgd)) { + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + paddr_last = phys_p4d_init(p4d, __koi_pa(vaddr), + __koi_pa(vaddr_end), + page_size_mask, prot, init); + continue; + } + p4d = alloc_low_page(); + paddr_last = phys_p4d_init(p4d, __koi_pa(vaddr), + __koi_pa(vaddr_end), + page_size_mask, prot, init); + spin_lock(&init_mm.page_table_lock); + if (pgtable_l5_enabled()) + pgd_populate_init(&init_mm, pgd, p4d, init); + else + p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr), + (pud_t *) p4d, init); + spin_unlock(&init_mm.page_table_lock); + pgd_changed = true; + } + if (pgd_changed) + sync_global_pgds(vaddr_start, vaddr_end - 1); + return paddr_last; +} +unsigned long __meminit +kernel_physical_mapping_init_for_koi(unsigned long paddr_start, + unsigned long paddr_end, + unsigned long page_size_mask, pgprot_t prot) +{ + return __kernel_physical_mapping_init_for_koi(paddr_start, paddr_end, + page_size_mask, prot, true); +} + +#endif +#endif /* CONFIG_IEE*/ + /* * This function is similar to kernel_physical_mapping_init() above with the * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe() @@ -811,6 +1023,131 @@ void __init initmem_init(void) } #endif +#ifdef CONFIG_PTP +static void * __init iee_ptdesc_alloc_block_zero(unsigned long size, int node, bool is_pgtable) +{ + // void *p = memblock_alloc_try_nid_raw(size, size, __pa(MAX_DMA_ADDRESS), + // MEMBLOCK_ALLOC_ACCESSIBLE, node); + void *p = alloc_low_page(); + + if (!p) + return NULL; + memset(p, 0, size); + + if (is_pgtable) + set_iee_page_valid((unsigned long)__phys_to_iee(__pa(p))); + + return p; +} + +static pte_t * __init iee_ptdesc_pte_populate(pmd_t *pmd, unsigned long addr, int node) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, false); + if (!p) + return NULL; + + pte_t entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + set_pte_at(&init_mm, addr, pte, entry); + } + return pte; +} + +static pmd_t * __init iee_ptdesc_pmd_populate(pud_t *pud, unsigned long addr, int node) +{ + pmd_t *pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pmd_populate_kernel(&init_mm, pmd, p); + } + return pmd; +} + +static pud_t * __init iee_ptdesc_pud_populate(p4d_t *p4d, unsigned long addr, int node) +{ + pud_t *pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pud_populate(&init_mm, pud, p); + } + return pud; +} + +static p4d_t * __init iee_ptdesc_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + +static pgd_t * __init iee_ptdesc_pgd_populate(unsigned long addr, int node) +{ + pgd_t *pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + void *p = iee_ptdesc_alloc_block_zero(PAGE_SIZE, node, true); + if (!p) + return NULL; + pgd_populate(&init_mm, pgd, p); + } + return pgd; +} + +static pte_t * __init iee_ptdesc_populate(unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int node = NUMA_NO_NODE; + + pgd = iee_ptdesc_pgd_populate(addr, node); + if (!pgd) + return NULL; + p4d = iee_ptdesc_p4d_populate(pgd, addr, node); + if (!p4d) + return NULL; + pud = iee_ptdesc_pud_populate(p4d, addr, node); + if (!pud) + return NULL; + pmd = iee_ptdesc_pmd_populate(pud, addr, node); + if (!pmd) + return NULL; + pte = iee_ptdesc_pte_populate(pmd, addr, node); + return pte; +} + +int __init iee_ptdesc_sparse_init(void) +{ + unsigned long start_pfn, end_pfn; + int i, nid; + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + // pr_err("IEE: start_pfn, end_pfn = %lx, %lx", start_pfn, end_pfn); + u64 ptdesc_start = ALIGN_DOWN((u64)(__pfn_to_ptdesc(start_pfn)), PAGE_SIZE); + u64 end = ALIGN_DOWN((u64)(__pfn_to_ptdesc(end_pfn)), PAGE_SIZE); + unsigned long addr = ptdesc_start; + pte_t *pte; + for (; addr < end; addr += PAGE_SIZE) { + // pr_err("IEE: addr = %lx", addr); + pte = iee_ptdesc_populate(addr); + if (!pte) + return -ENOMEM; + } + } + return 0; +} +#endif + void __init paging_init(void) { sparse_init(); @@ -824,6 +1161,10 @@ void __init paging_init(void) node_clear_state(0, N_MEMORY); node_clear_state(0, N_NORMAL_MEMORY); + #ifdef CONFIG_PTP + iee_ptdesc_sparse_init(); + #endif + zone_sizes_init(); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index aa7d279321ea..dc0f4f13a0c6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -888,7 +888,11 @@ void __init early_ioremap_init(void) pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); + #ifdef CONFIG_PTP + iee_pmd_populate_kernel_pre_init(&init_mm, pmd, bm_pte); + #else pmd_populate_kernel(&init_mm, pmd, bm_pte); + #endif /* * The boot-ioremap range spans multiple pmds, for which @@ -929,8 +933,41 @@ void __init __early_set_fixmap(enum fixed_addresses idx, pgprot_val(flags) &= __supported_pte_mask; if (pgprot_val(flags)) + #ifdef CONFIG_PTP + iee_set_pte_pre_init(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + #else set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + #endif else + #ifdef CONFIG_PTP + iee_set_pte_pre_init(pte, __pte(0)); + #else pte_clear(&init_mm, addr, pte); + #endif flush_tlb_one_kernel(addr); } + +#ifdef CONFIG_PTP +void __init __iee_set_fixmap_pre_init(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + pte = early_ioremap_pte(addr); + + /* Sanitize 'prot' against any unsupported bits: */ + pgprot_val(flags) &= __supported_pte_mask; + + if (pgprot_val(flags)) + iee_set_pte_pre_init(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + else + iee_set_pte_pre_init(pte, __pte(0)); + // pte_clear(&init_mm, addr, pte); + flush_tlb_one_kernel(addr); +} +#endif diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 230f1dee4f09..39d227daacfa 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -156,6 +156,23 @@ void __init kernel_randomize_memory(void) vaddr = round_up(vaddr + 1, PUD_SIZE); remain_entropy -= entropy; } + + #ifdef CONFIG_IEE + extern unsigned long iee_offset; + extern unsigned long IEE_OFFSET; + iee_offset = *kaslr_regions[0].base - vaddr_start + IEE_OFFSET; + #endif /* CONFIG_IEE*/ + #ifdef CONFIG_KOI + extern unsigned long koi_offset; + extern unsigned long KOI_OFFSET; + koi_offset = *kaslr_regions[0].base - vaddr_start + KOI_OFFSET; + #endif /* CONFIG_KOI*/ + + #ifdef CONFIG_PTP + unsigned long *iee_ptdesc_kaslr_base = &iee_ptdesc_base; + *iee_ptdesc_kaslr_base = *kaslr_regions[2].base + (kaslr_regions[2].size_tb << TB_SHIFT); + pr_err("IEE: ptdesc base with kaslr: %lx\n", iee_ptdesc_base); + #endif } void __meminit init_trampoline_kaslr(void) diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index 3f37b5c80bb3..2dc3f96fbc26 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -14,6 +14,15 @@ unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask, pgprot_t prot); +#ifdef CONFIG_IEE +unsigned long kernel_physical_mapping_init_for_iee(unsigned long paddr_start, + unsigned long paddr_end, + unsigned long page_size_mask, pgprot_t prot); +#else +unsigned long kernel_physical_mapping_init_for_koi(unsigned long paddr_start, + unsigned long paddr_end, + unsigned long page_size_mask, pgprot_t prot); +#endif /* CONFIG_IEE*/ unsigned long kernel_physical_mapping_change(unsigned long start, unsigned long end, unsigned long page_size_mask); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 2d850f6bae70..067e3195782d 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -37,6 +37,10 @@ #include "../mm_internal.h" +#ifdef CONFIG_PTP +#include +#endif + /* * The current flushing context - we pass it instead of 5 arguments: */ @@ -841,6 +845,9 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) struct page *page; list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif pgd_t *pgd; p4d_t *p4d; pud_t *pud; @@ -1151,7 +1158,32 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, * pagetable protections, the actual ptes set above control the * primary protection behavior: */ + #ifdef CONFIG_PTP + set_pmd((pmd_t *)kpte, __pmd(pte_val(mk_pte(base, __pgprot(_KERNPG_TABLE))))); + #ifdef CONFIG_X86_32 + if (!SHARED_KERNEL_PMD) { + struct page *page; + + list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = (pgd_t *)page_address(page) + pgd_index(address); + p4d = p4d_offset(pgd, address); + pud = pud_offset(p4d, address); + pmd = pmd_offset(pud, address); + set_pmd(pmd, mk_pmd(base, __pgprot(_KERNPG_TABLE))); + } + } + #endif + #else __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); + #endif /* * Do a global flush tlb after splitting the large page @@ -1181,17 +1213,33 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address) { struct page *base; + #ifdef CONFIG_PTP + pte_t *pbase; + #endif if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); + + #ifdef CONFIG_PTP + /* without gfp_zero mask, set_pte cannot pass privilege check*/ + base = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); + pbase = (pte_t *)page_address(base); + set_iee_page((unsigned long)pbase, 0); + #else base = alloc_pages(GFP_KERNEL, 0); + #endif + if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); if (!base) return -ENOMEM; - if (__split_large_page(cpa, kpte, address, base)) + if (__split_large_page(cpa, kpte, address, base)) { + #ifdef CONFIG_PTP + unset_iee_page((unsigned long)pbase, 0); + #endif __free_page(base); + } return 0; } @@ -1204,7 +1252,11 @@ static bool try_to_free_pte_page(pte_t *pte) if (!pte_none(pte[i])) return false; + #ifdef CONFIG_PTP + free_iee_pgtable_page(pte); + #else free_page((unsigned long)pte); + #endif return true; } @@ -1339,6 +1391,10 @@ static int alloc_pte_page(pmd_t *pmd) if (!pte) return -1; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pte, 0); + #endif + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); return 0; } @@ -1348,6 +1404,10 @@ static int alloc_pmd_page(pud_t *pud) pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); if (!pmd) return -1; + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pmd, 0); + #endif set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); return 0; @@ -1542,6 +1602,10 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); if (!p4d) return -1; + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)p4d, 0); + #endif set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE)); } @@ -1554,7 +1618,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL); if (!pud) return -1; - + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pud, 0); + #endif + set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8e1ef5345b7a..5ee71c7282ed 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -7,6 +7,12 @@ #include #include +#ifdef CONFIG_PTP +#include +#include +#include +#endif + #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; EXPORT_SYMBOL(physical_mask); @@ -94,14 +100,22 @@ static inline void pgd_list_add(pgd_t *pgd) { struct ptdesc *ptdesc = virt_to_ptdesc(pgd); + #ifdef CONFIG_PTP + list_add(&(iee_ptdesc(ptdesc)->pt_list), &pgd_list); + #else list_add(&ptdesc->pt_list, &pgd_list); + #endif } static inline void pgd_list_del(pgd_t *pgd) { struct ptdesc *ptdesc = virt_to_ptdesc(pgd); + #ifdef CONFIG_PTP + list_del(&(iee_ptdesc(ptdesc)->pt_list)); + #else list_del(&ptdesc->pt_list); + #endif } #define UNSHARED_PTRS_PER_PGD \ @@ -112,12 +126,22 @@ static inline void pgd_list_del(pgd_t *pgd) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { + #ifdef CONFIG_PTP + struct ptdesc *ptdesc = virt_to_ptdesc(pgd); + (iee_ptdesc(ptdesc))->pt_mm = mm; + #else virt_to_ptdesc(pgd)->pt_mm = mm; + #endif } struct mm_struct *pgd_page_get_mm(struct page *page) { + #ifdef CONFIG_PTP + struct ptdesc *ptdesc = page_ptdesc(page); + return (iee_ptdesc(ptdesc))->pt_mm; + #else return page_ptdesc(page)->pt_mm; + #endif } static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) @@ -417,16 +441,44 @@ static inline void _pgd_free(pgd_t *pgd) static inline pgd_t *_pgd_alloc(void) { + #ifdef CONFIG_PTP + return (pgd_t *)get_iee_pgd_page(GFP_PGTABLE_USER); + #else return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, PGD_ALLOCATION_ORDER); + #endif } static inline void _pgd_free(pgd_t *pgd) { + #ifdef CONFIG_PTP + struct page *page = virt_to_page((void *)pgd); + #if PGD_ALLOCATION_ORDER == 1 + page = virt_to_page((unsigned long)pgd + PAGE_SIZE); + #endif + free_iee_pgd_page(pgd); + #else free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); + #endif } #endif /* CONFIG_X86_PAE */ +#ifdef CONFIG_KOI +pgd_t *koi_pgd_alloc(void) +{ + pgd_t *pgd; +#ifdef CONFIG_PTP + pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); + unsigned long iee_addr = (unsigned long)__phys_to_iee(__pa(pgd)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)pgd); +#else + pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); +#endif + return pgd; +} +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -557,8 +609,13 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, int ret = 0; if (pte_young(*ptep)) + #ifdef CONFIG_PTP + ret = iee_test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *) &ptep->pte); + #else ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *) &ptep->pte); + #endif return ret; } @@ -570,8 +627,13 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, int ret = 0; if (pmd_young(*pmdp)) + #ifdef CONFIG_PTP + ret = iee_test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pmdp); + #else ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pmdp); + #endif return ret; } @@ -584,8 +646,13 @@ int pudp_test_and_clear_young(struct vm_area_struct *vma, int ret = 0; if (pud_young(*pudp)) + #ifdef CONFIG_PTP + ret = iee_test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pudp); + #else ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pudp); + #endif return ret; } @@ -833,7 +900,11 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(pmd_sv[i])) { pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); + #ifdef CONFIG_PTP + free_iee_pgtable_page(pte); + #else free_page((unsigned long)pte); + #endif } } @@ -863,7 +934,11 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + #ifdef CONFIG_PTP + free_iee_pgtable_page(pte); + #else free_page((unsigned long)pte); + #endif return 1; } diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 83a6bdf0b498..759da9856751 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -39,6 +39,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + #undef pr_fmt #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt @@ -139,7 +143,11 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) * The user page tables get the full PGD, accessible from * userspace: */ + #ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PGD, kernel_to_user_pgdp(pgdp), pgd); + #else kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + #endif /* * If this is normal user memory, make it NX in the kernel @@ -182,6 +190,10 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) unsigned long new_p4d_page = __get_free_page(gfp); if (WARN_ON_ONCE(!new_p4d_page)) return NULL; + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)new_p4d_page, 0); + #endif set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } @@ -211,6 +223,10 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) unsigned long new_pud_page = __get_free_page(gfp); if (WARN_ON_ONCE(!new_pud_page)) return NULL; + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)new_pud_page, 0); + #endif set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); } @@ -225,6 +241,10 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) unsigned long new_pmd_page = __get_free_page(gfp); if (WARN_ON_ONCE(!new_pmd_page)) return NULL; + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)new_pmd_page, 0); + #endif set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); } @@ -266,6 +286,10 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text) unsigned long new_pte_page = __get_free_page(gfp); if (!new_pte_page) return NULL; + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)new_pte_page, 0); + #endif set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); } @@ -292,7 +316,11 @@ static void __init pti_setup_vsyscall(void) if (WARN_ON(!target_pte)) return; + #ifdef CONFIG_PTP + set_pte(target_pte, *pte); + #else *target_pte = *pte; + #endif set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir)); } #else @@ -370,14 +398,22 @@ pti_clone_pgtable(unsigned long start, unsigned long end, * code that only set this bit when supported. */ if (boot_cpu_has(X86_FEATURE_PGE)) + #ifdef CONFIG_PTP + set_pmd(pmd, pmd_set_flags(*pmd, _PAGE_GLOBAL)); + #else *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); + #endif /* * Copy the PMD. That is, the kernelmode and usermode * tables will share the last-level page tables of this * address range */ + #ifdef CONFIG_PTP + set_pmd(target_pmd, *pmd); + #else *target_pmd = *pmd; + #endif addr = round_up(addr + 1, PMD_SIZE); @@ -401,10 +437,18 @@ pti_clone_pgtable(unsigned long start, unsigned long end, /* Set GLOBAL bit in both PTEs */ if (boot_cpu_has(X86_FEATURE_PGE)) + #ifdef CONFIG_PTP + set_pte(pte, pte_set_flags(*pte, _PAGE_GLOBAL)); + #else *pte = pte_set_flags(*pte, _PAGE_GLOBAL); + #endif /* Clone the PTE */ + #ifdef CONFIG_PTP + set_pte(target_pte, *pte); + #else *target_pte = *pte; + #endif addr = round_up(addr + 1, PAGE_SIZE); @@ -430,7 +474,11 @@ static void __init pti_clone_p4d(unsigned long addr) kernel_pgd = pgd_offset_k(addr); kernel_p4d = p4d_offset(kernel_pgd, addr); + #ifdef CONFIG_PTP + set_p4d(user_p4d, *kernel_p4d); + #else *user_p4d = *kernel_p4d; + #endif } /* @@ -461,7 +509,11 @@ static void __init pti_clone_user_shared(void) if (WARN_ON(!target_pte)) return; + #ifdef CONFIG_PTP + set_pte(target_pte, pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL)); + #else *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL); + #endif } } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d61f04743aad..5196ddd91e22 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -16,6 +16,9 @@ #include #include #include +#ifdef CONFIG_HIVE +#include +#endif static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -53,6 +56,11 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT_ENDBR() #endif +#ifdef CONFIG_HIVE +static void emit_mov_imm64(u8 **pprog, u32 dst_reg, + const u32 imm32_hi, const u32 imm32_lo); +#endif + static bool is_imm8(int value) { return value <= 127 && value >= -128; @@ -183,6 +191,9 @@ static const int reg2hex[] = { [BPF_REG_AX] = 2, /* R10 temp register */ [AUX_REG] = 3, /* R11 temp register */ [X86_REG_R9] = 1, /* R9 register, 6th function argument */ +#ifdef CONFIG_HIVE + [BPF_REG_BASE] = 4, /* R12 callee saved */ +#endif }; static const int reg2pt_regs[] = { @@ -205,6 +216,16 @@ static const int reg2pt_regs[] = { */ static bool is_ereg(u32 reg) { +#ifdef CONFIG_HIVE + return (1 << reg) & (BIT(BPF_REG_5) | + BIT(AUX_REG) | + BIT(BPF_REG_7) | + BIT(BPF_REG_8) | + BIT(BPF_REG_9) | + BIT(X86_REG_R9) | + BIT(BPF_REG_AX) | + BIT(BPF_REG_BASE)); +#else return (1 << reg) & (BIT(BPF_REG_5) | BIT(AUX_REG) | BIT(BPF_REG_7) | @@ -212,6 +233,7 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_9) | BIT(X86_REG_R9) | BIT(BPF_REG_AX)); +#endif } /* @@ -371,6 +393,26 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, *pprog = prog; } +#ifdef CONFIG_HIVE +static void emit_sfi_prologue(u8 **pprog, struct bpf_prog *bpf_prog) +{ + u8 *prog = *pprog; + u64 base_addr, stack_addr; + + /* assign addr to base and rsp */ + EMIT2(0x41, 0x54); /* push %r12 */ + EMIT1(0x55); /* push %rbp */ + + base_addr = (u64)bpf_prog->shadow_region_addr; + emit_mov_imm64(&prog, BPF_REG_BASE, (u32)(base_addr >> 32), (u32)base_addr); + + stack_addr = (u64)bpf_prog->shadow_stack_addr; + emit_mov_imm64(&prog, BPF_REG_FP, (u32)(stack_addr >> 32), (u32)stack_addr); + + *pprog = prog; +} +#endif + static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode) { u8 *prog = *pprog; @@ -503,6 +545,18 @@ static void emit_return(u8 **pprog, u8 *ip) *pprog = prog; } +#ifdef CONFIG_HIVE +static void emit_sfi_epilogue(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT1(0x5d); /* pop %rbp */ + EMIT2(0x41, 0x5c); /* pop %r12 */ + + *pprog = prog; +} +#endif + /* * Generate the following code: * @@ -1097,6 +1151,10 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_prog->aux->func_idx != 0); push_callee_regs(&prog, callee_regs_used); +#ifdef CONFIG_HIVE + if (bpf_prog->aux->func_idx == 0) // not subprog + emit_sfi_prologue(&prog, bpf_prog); +#endif ilen = prog - temp; if (rw_image) @@ -1945,6 +2003,10 @@ st: if (is_imm8(insn->off)) seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; +#ifdef CONFIG_HIVE + if (bpf_prog->aux->func_idx == 0) // not sub_prog + emit_sfi_epilogue(&prog); +#endif pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 91d31ac422d6..e7c782f0cde5 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -49,6 +49,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + /* * We allocate runtime services regions top-down, starting from -4G, i.e. * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. @@ -74,6 +78,11 @@ int __init efi_alloc_page_tables(void) gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + + #ifdef CONFIG_PTP + set_iee_page((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); + #endif + if (!efi_pgd) goto fail; @@ -116,7 +125,11 @@ void efi_sync_low_kernel_mappings(void) pgd_k = pgd_offset_k(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + #ifdef CONFIG_PTP + iee_memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + #else memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + #endif pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pgd_k = pgd_offset_k(EFI_VA_END); @@ -124,7 +137,11 @@ void efi_sync_low_kernel_mappings(void) p4d_k = p4d_offset(pgd_k, 0); num_entries = p4d_index(EFI_VA_END); + #ifdef CONFIG_PTP + iee_memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + #else memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + #endif /* * We share all the PUD entries apart from those that map the @@ -139,13 +156,21 @@ void efi_sync_low_kernel_mappings(void) pud_k = pud_offset(p4d_k, 0); num_entries = pud_index(EFI_VA_END); + #ifdef CONFIG_PTP + iee_memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #else memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #endif pud_efi = pud_offset(p4d_efi, EFI_VA_START); pud_k = pud_offset(p4d_k, EFI_VA_START); num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + #ifdef CONFIG_PTP + iee_memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #else memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #endif } /* diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index a595953f1d6d..a83a51431b60 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -55,15 +55,24 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); if (!p4d) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)p4d, 0); + #endif } pud = (pud_t *)get_safe_page(GFP_ATOMIC); if (!pud) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pud, 0); + #endif pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); if (!pmd) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pmd, 0); + #endif set_pmd(pmd + pmd_index(restore_jump_address), __pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot))); @@ -86,7 +95,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) static void *alloc_pgt_page(void *context) { + #ifdef CONFIG_PTP + unsigned long new = get_safe_page(GFP_ATOMIC); + set_iee_page((unsigned long)new, 0); + return (void *)new; + #else return (void *)get_safe_page(GFP_ATOMIC); + #endif } static int set_up_temporary_mappings(void) @@ -104,6 +119,9 @@ static int set_up_temporary_mappings(void) pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!pgd) return -ENOMEM; + #ifdef CONFIG_PTP + set_iee_page((unsigned long)pgd, 0); + #endif /* Prepare a temporary mapping for the kernel text */ result = set_up_temporary_text_mapping(pgd); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 6b201e64d8ab..ee450d16c687 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -785,6 +785,9 @@ void xen_mm_pin_all(void) spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif if (!PagePinned(page)) { __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page)); SetPageSavePinned(page); @@ -891,6 +894,9 @@ void xen_mm_unpin_all(void) spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { + #ifdef CONFIG_PTP + page = iee_ptdesc_to_page(page); + #endif if (PageSavePinned(page)) { BUG_ON(!PagePinned(page)); __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page)); diff --git a/block/sed-opal.c b/block/sed-opal.c index 1a1cb35bf4b7..7149485a808c 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -316,7 +316,11 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) return PTR_ERR(kref); key = key_ref_to_ptr(kref); + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ret = key_validate(key); if (ret == 0) { if (buflen > key->datalen) @@ -324,7 +328,11 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) ret = key->type->read(key, (char *)buffer, buflen); } + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_ref_put(kref); diff --git a/certs/blacklist.c b/certs/blacklist.c index 675dd7a8f07a..4db9c6cb7732 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -19,6 +19,10 @@ #include #include "blacklist.h" +#ifdef CONFIG_KEYP +#include +#endif + /* * According to crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo(), * the size of the currently longest supported hash algorithm is 512 bits, @@ -91,7 +95,11 @@ static int blacklist_key_instantiate(struct key *key, #endif /* Sets safe default permissions for keys loaded by user space. */ + #ifdef CONFIG_KEYP + iee_set_key_perm(key, BLACKLIST_KEY_PERM); + #else key->perm = BLACKLIST_KEY_PERM; + #endif /* * Skips the authentication step for builtin hashes, they are not diff --git a/certs/system_keyring.c b/certs/system_keyring.c index a803524044bd..eab10ce55564 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -96,9 +96,15 @@ int restrict_link_by_builtin_and_secondary_trusted( /* If we have a secondary trusted keyring, then that contains a link * through to the builtin keyring and the search will follow that link. */ + #ifdef CONFIG_KEYP + if (type == &key_type_keyring && + dest_keyring == secondary_trusted_keys && + payload == (union key_payload *)(builtin_trusted_keys->name_link.next)) + #else if (type == &key_type_keyring && dest_keyring == secondary_trusted_keys && payload == &builtin_trusted_keys->payload) + #endif /* Allow the builtin keyring to be added to the secondary */ return 0; @@ -125,9 +131,15 @@ int restrict_link_by_digsig_builtin_and_secondary(struct key *dest_keyring, /* If we have a secondary trusted keyring, then that contains a link * through to the builtin keyring and the search will follow that link. */ + #ifdef CONFIG_KEYP + if (type == &key_type_keyring && + dest_keyring == secondary_trusted_keys && + payload == (union key_payload *)(builtin_trusted_keys->name_link.next)) + #else if (type == &key_type_keyring && dest_keyring == secondary_trusted_keys && payload == &builtin_trusted_keys->payload) + #endif /* Allow the builtin keyring to be added to the secondary */ return 0; @@ -212,9 +224,15 @@ int restrict_link_by_builtin_secondary_and_machine( const union key_payload *payload, struct key *restrict_key) { + #ifdef CONFIG_KEYP + if (machine_trusted_keys && type == &key_type_keyring && + dest_keyring == secondary_trusted_keys && + payload == (union key_payload *)(machine_trusted_keys->name_link.next)) + #else if (machine_trusted_keys && type == &key_type_keyring && dest_keyring == secondary_trusted_keys && payload == &machine_trusted_keys->payload) + #endif /* Allow the machine keyring to be added to the secondary */ return 0; diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 68cc9290cabe..6d4415de9ee4 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -304,7 +304,11 @@ static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval, if (IS_ERR(key)) return PTR_ERR(key); + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ret = ERR_PTR(-ENOPROTOOPT); if (!strcmp(key->type->name, "user") || @@ -319,21 +323,33 @@ static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval, } if (IS_ERR(ret)) { + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); return PTR_ERR(ret); } key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL); if (!key_data) { + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); return -ENOMEM; } memcpy(key_data, ret, key_datalen); + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); err = type->setkey(ask->private, key_data, key_datalen); diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index 43af5fa510c0..134f38bd1be9 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -16,6 +16,9 @@ #include #include #include "asymmetric_keys.h" +#ifdef CONFIG_KEYP +#include +#endif const char *const key_being_used_for[NR__KEY_BEING_USED_FOR] = { @@ -467,6 +470,17 @@ static void asymmetric_key_free_preparse(struct key_preparsed_payload *prep) static void asymmetric_key_destroy(struct key *key) { struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key); + #ifdef CONFIG_KEYP + struct asymmetric_key_ids *kids = ((union key_payload *)(key->name_link.next))->data[asym_key_ids]; + void *data = ((union key_payload *)(key->name_link.next))->data[asym_crypto]; + void *auth = ((union key_payload *)(key->name_link.next))->data[asym_auth]; + + union key_payload *key_payload = ((union key_payload *)(key->name_link.next)); + key_payload->data[asym_crypto] = NULL; + key_payload->data[asym_subtype] = NULL; + key_payload->data[asym_key_ids] = NULL; + key_payload->data[asym_auth] = NULL; + #else struct asymmetric_key_ids *kids = key->payload.data[asym_key_ids]; void *data = key->payload.data[asym_crypto]; void *auth = key->payload.data[asym_auth]; @@ -475,6 +489,7 @@ static void asymmetric_key_destroy(struct key *key) key->payload.data[asym_subtype] = NULL; key->payload.data[asym_key_ids] = NULL; key->payload.data[asym_auth] = NULL; + #endif if (subtype) { subtype->destroy(data, auth); @@ -580,8 +595,13 @@ int asymmetric_key_eds_op(struct kernel_pkey_params *params, if (key->type != &key_type_asymmetric) return -EINVAL; subtype = asymmetric_key_subtype(key); + #ifdef CONFIG_KEYP + if (!subtype || + !((union key_payload *)(key->name_link.next))->data[0]) + #else if (!subtype || !key->payload.data[0]) + #endif return -EINVAL; if (!subtype->eds_op) return -ENOTSUPP; diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 1dcab27986a6..490ba98995c1 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -30,7 +30,11 @@ MODULE_LICENSE("GPL"); static void public_key_describe(const struct key *asymmetric_key, struct seq_file *m) { + #ifdef CONFIG_KEYP + struct public_key *key = ((union key_payload *)(asymmetric_key->name_link.next))->data[asym_crypto]; + #else struct public_key *key = asymmetric_key->payload.data[asym_crypto]; + #endif if (key) seq_printf(m, "%s.%s", key->id_type, key->pkey_algo); @@ -158,7 +162,11 @@ static int software_key_query(const struct kernel_pkey_params *params, struct kernel_pkey_query *info) { struct crypto_akcipher *tfm; + #ifdef CONFIG_KEYP + struct public_key *pkey = ((union key_payload *)(params->key->name_link.next))->data[asym_crypto]; + #else struct public_key *pkey = params->key->payload.data[asym_crypto]; + #endif char alg_name[CRYPTO_MAX_ALG_NAME]; struct crypto_sig *sig; u8 *key, *ptr; @@ -273,7 +281,11 @@ static int software_key_query(const struct kernel_pkey_params *params, static int software_key_eds_op(struct kernel_pkey_params *params, const void *in, void *out) { + #ifdef CONFIG_KEYP + const struct public_key *pkey = ((union key_payload *)(params->key->name_link.next))->data[asym_crypto]; + #else const struct public_key *pkey = params->key->payload.data[asym_crypto]; + #endif char alg_name[CRYPTO_MAX_ALG_NAME]; struct crypto_akcipher *tfm; struct crypto_sig *sig; @@ -453,7 +465,11 @@ EXPORT_SYMBOL_GPL(public_key_verify_signature); static int public_key_verify_signature_2(const struct key *key, const struct public_key_signature *sig) { + #ifdef CONFIG_KEYP + const struct public_key *pk = ((union key_payload *)(key->name_link.next))->data[asym_crypto]; + #else const struct public_key *pk = key->payload.data[asym_crypto]; + #endif return public_key_verify_signature(pk, sig); } diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c index 2deff81f8af5..696fa2c4e708 100644 --- a/crypto/asymmetric_keys/signature.c +++ b/crypto/asymmetric_keys/signature.c @@ -51,8 +51,13 @@ int query_asymmetric_key(const struct kernel_pkey_params *params, if (key->type != &key_type_asymmetric) return -EINVAL; subtype = asymmetric_key_subtype(key); + #ifdef CONFIG_KEYP + if (!subtype || + !((union key_payload *)(key->name_link.next))->data[0]) + #else if (!subtype || !key->payload.data[0]) + #endif return -EINVAL; if (!subtype->query) return -ENOTSUPP; @@ -145,8 +150,13 @@ int verify_signature(const struct key *key, if (key->type != &key_type_asymmetric) return -EINVAL; subtype = asymmetric_key_subtype(key); + #ifdef CONFIG_KEYP + if (!subtype || + !((union key_payload *)(key->name_link.next))->data[0]) + #else if (!subtype || !key->payload.data[0]) + #endif return -EINVAL; if (!subtype->verify_signature) return -ENOTSUPP; diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 83092d93f36a..fb12e7d0660a 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -94,7 +94,11 @@ static int __init arm_enable_runtime_services(void) return 0; } + #ifdef CONFIG_PTP + efi_memmap_unmap_after_init(); + #else efi_memmap_unmap(); + #endif mapsize = efi.memmap.desc_size * efi.memmap.nr_map; diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 77dd20f9df31..ea8ebecaa153 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -96,6 +96,26 @@ void __init efi_memmap_unmap(void) clear_bit(EFI_MEMMAP, &efi.flags); } +#ifdef CONFIG_PTP +void __init efi_memmap_unmap_after_init(void) +{ + if (!efi_enabled(EFI_MEMMAP)) + return; + + if (!(efi.memmap.flags & EFI_MEMMAP_LATE)) { + unsigned long size; + + size = efi.memmap.desc_size * efi.memmap.nr_map; + early_iounmap_after_init((__force void __iomem *)efi.memmap.map, size); + } else { + memunmap(efi.memmap.map); + } + + efi.memmap.map = NULL; + clear_bit(EFI_MEMMAP, &efi.flags); +} +#endif + /** * efi_memmap_init_late - Map efi.memmap with memremap() * @phys_addr: Physical address of the new EFI memory map diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 6c0621f6f572..0ab9f53a7841 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -187,7 +187,11 @@ static bool increase_address_space(struct protection_domain *domain, out: spin_unlock_irqrestore(&domain->lock, flags); + #ifdef CONFIG_PTP + free_iee_pgtable_page(pte); + #else free_page((unsigned long)pte); + #endif return ret; } diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 84291e38dca8..b3af9d419b6e 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_ZERO) += dm-zero.o +obj-$(CONFIG_DM_ZERO_KOI) += dm-zero_koi.o obj-$(CONFIG_DM_RAID) += dm-raid.o obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index aa6bb5b4704b..ecf777e5d176 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2512,7 +2512,11 @@ static int set_key_encrypted(struct crypt_config *cc, struct key *key) { const struct encrypted_key_payload *ekp; + #ifdef CONFIG_KEYP + ekp = ((union key_payload *)(key->name_link.next))->data[0]; + #else ekp = key->payload.data[0]; + #endif if (!ekp) return -EKEYREVOKED; @@ -2528,7 +2532,11 @@ static int set_key_trusted(struct crypt_config *cc, struct key *key) { const struct trusted_key_payload *tkp; + #ifdef CONFIG_KEYP + tkp = ((union key_payload *)(key->name_link.next))->data[0]; + #else tkp = key->payload.data[0]; + #endif if (!tkp) return -EKEYREVOKED; @@ -2590,17 +2598,29 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string return PTR_ERR(key); } + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ret = set_key(cc, key); if (ret < 0) { + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); kfree_sensitive(new_key_string); return ret; } + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); /* clear the flag since following operations may invalidate previously valid key */ diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c index 4836508ea50c..d52d5bccc190 100644 --- a/drivers/md/dm-verity-verify-sig.c +++ b/drivers/md/dm-verity-verify-sig.c @@ -40,7 +40,11 @@ static int verity_verify_get_sig_from_key(const char *key_desc, if (IS_ERR(key)) return PTR_ERR(key); + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ukp = user_key_payload_locked(key); if (!ukp) { @@ -58,7 +62,11 @@ static int verity_verify_get_sig_from_key(const char *key_desc, memcpy(sig_opts->sig, ukp->data, sig_opts->sig_size); end: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); return ret; diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 3b13e6eb1aa4..a93ac50440c6 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -79,4 +79,4 @@ module_dm(zero); MODULE_AUTHOR("Jana Saout "); MODULE_DESCRIPTION(DM_NAME " dummy target returning zeros"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL"); \ No newline at end of file diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index a03e3c45f297..71bccea085e9 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -28,7 +28,11 @@ static void *key_data(struct key *key) { struct encrypted_key_payload *epayload = dereference_key_locked(key); + #ifdef CONFIG_KEYP + lockdep_assert_held_read(&KEY_SEM(key)); + #else lockdep_assert_held_read(&key->sem); + #endif return epayload->decrypted_data; } @@ -38,7 +42,11 @@ static void nvdimm_put_key(struct key *key) if (!key) return; + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); } @@ -65,10 +73,18 @@ static struct key *nvdimm_request_key(struct nvdimm *nvdimm) } else { struct encrypted_key_payload *epayload; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif epayload = dereference_key_locked(key); if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) { + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); key = NULL; } @@ -107,10 +123,18 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm, dev_dbg(dev, "%s: key found: %#x\n", __func__, key_serial(key)); + #ifdef CONFIG_KEYP + down_read_nested(&KEY_SEM(key), subclass); + #else down_read_nested(&key->sem, subclass); + #endif epayload = dereference_key_locked(key); if (epayload->decrypted_datalen != NVDIMM_PASSPHRASE_LEN) { + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); key = NULL; } diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 7e0d8fb26465..950ee6396659 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -198,4 +198,4 @@ MODULE_DESCRIPTION("RTC test driver/device"); MODULE_LICENSE("GPL v2"); module_init(test_init); -module_exit(test_exit); +module_exit(test_exit); \ No newline at end of file diff --git a/drivers/rtc/rtc-test_glue.h b/drivers/rtc/rtc-test_glue.h new file mode 100644 index 000000000000..aee28f9680b2 --- /dev/null +++ b/drivers/rtc/rtc-test_glue.h @@ -0,0 +1,13 @@ + +static int test_rtc_read_time_glue(struct device* dev, struct rtc_time* tm); +static int test_rtc_set_time_glue(struct device* dev, struct rtc_time* tm); +static int test_rtc_read_alarm_glue(struct device* dev, struct rtc_wkalrm* alrm); +static int test_rtc_set_alarm_glue(struct device* dev, struct rtc_wkalrm* alrm); +static int test_rtc_alarm_irq_enable_glue(struct device* dev, unsigned int enable); + + +static void add_timer_glue(struct timer_list* tim_0); +static time64_t rtc_tm_to_time64_glue(struct rtc_time* rtc_0); +static time64_t ktime_get_real_seconds_glue(void); +static void rtc_time64_to_tm_glue(time64_t val_0, struct rtc_time* rtc_1); +static noinline int del_timer_glue(struct timer_list *timer); \ No newline at end of file diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index a5fbb6ed38ae..81428783b9da 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -40,7 +40,11 @@ static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size) { void __iomem *base; #ifdef CONFIG_FIX_EARLYCON_MEM + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK, FIXMAP_PAGE_IO); + #else set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK); + #endif base = (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); base += paddr & ~PAGE_MASK; #else diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c index 45b42d8f6453..b71072d6957e 100644 --- a/drivers/usb/early/ehci-dbgp.c +++ b/drivers/usb/early/ehci-dbgp.c @@ -879,7 +879,11 @@ int __init early_dbgp_init(char *s) * FIXME I don't have the bar size so just guess PAGE_SIZE is more * than enough. 1K is the biggest I have seen. */ + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(FIX_DBGP_BASE, bar_val & PAGE_MASK, FIXMAP_PAGE_NOCACHE); + #else set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); + #endif ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); ehci_bar += bar_val & ~PAGE_MASK; dbgp_printk("ehci_bar: %p\n", ehci_bar); diff --git a/fs/coredump.c b/fs/coredump.c index 9d235fa14ab9..72be355903ca 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -53,6 +53,10 @@ #include +#ifdef CONFIG_CREDP +#include +#endif + static bool dump_vma_snapshot(struct coredump_params *cprm); static void free_vma_snapshot(struct coredump_params *cprm); @@ -564,7 +568,11 @@ void do_coredump(const kernel_siginfo_t *siginfo) */ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(cred,GLOBAL_ROOT_UID); + #else cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + #endif need_suid_safe = true; } diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 7cbb1fd872ac..a8425a83aabf 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -590,8 +590,13 @@ static void fscrypt_provisioning_key_describe(const struct key *key, { seq_puts(m, key->description); if (key_is_positive(key)) { + #ifdef CONFIG_KEYP + const struct fscrypt_provisioning_key_payload *payload = + ((union key_payload *)(key->name_link.next))->data[0]; + #else const struct fscrypt_provisioning_key_payload *payload = key->payload.data[0]; + #endif seq_printf(m, ": %u [%u]", key->datalen, payload->type); } @@ -599,7 +604,11 @@ static void fscrypt_provisioning_key_describe(const struct key *key, static void fscrypt_provisioning_key_destroy(struct key *key) { + #ifdef CONFIG_KEYP + kfree_sensitive(((union key_payload *)(key->name_link.next))->data[0]); + #else kfree_sensitive(key->payload.data[0]); + #endif } static struct key_type key_type_fscrypt_provisioning = { @@ -641,7 +650,11 @@ static int get_keyring_key(u32 key_id, u32 type, if (key->type != &key_type_fscrypt_provisioning) goto bad_key; + #ifdef CONFIG_KEYP + payload = ((union key_payload *)(key->name_link.next))->data[0]; + #else payload = key->payload.data[0]; + #endif /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */ if (payload->type != type) diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 75dabd9b27f9..9d0cc6ef8e48 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -109,7 +109,11 @@ find_and_lock_process_key(const char *prefix, if (IS_ERR(key)) return key; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ukp = user_key_payload_locked(key); if (!ukp) /* was the key revoked before we acquired its semaphore? */ @@ -136,7 +140,11 @@ find_and_lock_process_key(const char *prefix, return key; invalid: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); return ERR_PTR(-ENOKEY); } @@ -315,7 +323,11 @@ int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci) return PTR_ERR(key); err = fscrypt_setup_v1_file_key(ci, payload->raw); + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); return err; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index f2ed0c0266cb..1a24f8dcff5c 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -82,7 +82,11 @@ ecryptfs_get_encrypted_key_payload_data(struct key *key) if (key->type != &key_type_encrypted) return NULL; + #ifdef CONFIG_KEYP + payload = ((union key_payload *)(key->name_link.next))->data[0]; + #else payload = key->payload.data[0]; + #endif if (!payload) return ERR_PTR(-EKEYREVOKED); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 7f9f68c00ef6..a4859a7d3b5d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -512,7 +512,11 @@ ecryptfs_find_global_auth_tok_for_sig( goto out_invalid_auth_tok; } + #ifdef CONFIG_KEYP + down_write(&(KEY_SEM(walker->global_auth_tok_key))); + #else down_write(&(walker->global_auth_tok_key->sem)); + #endif rc = ecryptfs_verify_auth_tok_from_key( walker->global_auth_tok_key, auth_tok); if (rc) @@ -525,7 +529,11 @@ ecryptfs_find_global_auth_tok_for_sig( rc = -ENOENT; goto out; out_invalid_auth_tok_unlock: + #ifdef CONFIG_KEYP + up_write(&(KEY_SEM(walker->global_auth_tok_key))); + #else up_write(&(walker->global_auth_tok_key->sem)); + #endif out_invalid_auth_tok: printk(KERN_WARNING "Invalidating auth tok with sig = [%s]\n", sig); walker->flags |= ECRYPTFS_AUTH_TOK_INVALID; @@ -846,7 +854,11 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, mutex_unlock(s->tfm_mutex); out: if (auth_tok_key) { + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); } skcipher_request_free(s->skcipher_req); @@ -1088,7 +1100,11 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, (*filename) = NULL; } if (auth_tok_key) { + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); } skcipher_request_free(s->skcipher_req); @@ -1625,10 +1641,18 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, goto out; } } + #ifdef CONFIG_KEYP + down_write(&KEY_SEM((*auth_tok_key))); + #else down_write(&(*auth_tok_key)->sem); + #endif rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok); if (rc) { + #ifdef CONFIG_KEYP + up_write(&KEY_SEM((*auth_tok_key))); + #else up_write(&(*auth_tok_key)->sem); + #endif key_put(*auth_tok_key); (*auth_tok_key) = NULL; goto out; @@ -1901,7 +1925,11 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, memcpy(&(candidate_auth_tok->token.private_key), &(matching_auth_tok->token.private_key), sizeof(struct ecryptfs_private_key)); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); rc = decrypt_pki_encrypted_session_key(candidate_auth_tok, crypt_stat); @@ -1909,12 +1937,20 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, memcpy(&(candidate_auth_tok->token.password), &(matching_auth_tok->token.password), sizeof(struct ecryptfs_password)); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); rc = decrypt_passphrase_encrypted_session_key( candidate_auth_tok, crypt_stat); } else { + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); rc = -EINVAL; } @@ -1976,7 +2012,11 @@ pki_encrypt_session_key(struct key *auth_tok_key, crypt_stat->cipher, crypt_stat->key_size), crypt_stat, &payload, &payload_len); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); @@ -2040,7 +2080,11 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, memcpy(key_rec->enc_key, auth_tok->session_key.encrypted_key, auth_tok->session_key.encrypted_key_size); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); goto encrypted_session_key_set; } @@ -2438,7 +2482,11 @@ ecryptfs_generate_key_packet_set(char *dest_base, &max, auth_tok, crypt_stat, key_rec, &written); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); if (rc) { ecryptfs_printk(KERN_WARNING, "Error " @@ -2467,7 +2515,11 @@ ecryptfs_generate_key_packet_set(char *dest_base, } (*len) += written; } else { + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(auth_tok_key)); + #else up_write(&(auth_tok_key->sem)); + #endif key_put(auth_tok_key); ecryptfs_printk(KERN_WARNING, "Unsupported " "authentication token type\n"); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 2dc927ba067f..39c6f1d1438c 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -202,7 +202,11 @@ static int ecryptfs_init_global_auth_toks( goto out; } else { global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(global_auth_tok->global_auth_tok_key)); + #else up_write(&(global_auth_tok->global_auth_tok_key)->sem); + #endif } } out: diff --git a/fs/exec.c b/fs/exec.c index eaec57f79aa1..5688b11ec9d4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -76,6 +76,14 @@ #include +#ifdef CONFIG_CREDP +#include +#endif + +#ifdef CONFIG_IEE +#include +#endif + static int bprm_creds_from_file(struct linux_binprm *bprm); int suid_dumpable = 0; @@ -733,7 +741,11 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, next ? next->vm_start : USER_PGTABLES_CEILING); } + #ifdef CONFIG_PTP + iee_tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif vma_prev(&vmi); /* Shrink the vma to just the new range */ @@ -1023,6 +1035,9 @@ static int exec_mmap(struct mm_struct *mm) if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); + #ifdef CONFIG_IEE + iee_set_token_pgd(tsk, mm->pgd); + #endif if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); lru_gen_add_mm(mm); @@ -1678,12 +1693,20 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; + #ifdef CONFIG_CREDP + iee_set_cred_euid(bprm->cred, vfsuid_into_kuid(vfsuid)); + #else bprm->cred->euid = vfsuid_into_kuid(vfsuid); + #endif } if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->per_clear |= PER_CLEAR_ON_SETID; + #ifdef CONFIG_CREDP + iee_set_cred_egid(bprm->cred, vfsgid_into_kgid(vfsgid)); + #else bprm->cred->egid = vfsgid_into_kgid(vfsgid); + #endif } } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3e724cb7ef01..e32e136e4271 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -15,6 +15,10 @@ #include +#ifdef CONFIG_CREDP +#include +#endif + #include "flexfilelayout.h" #include "../nfs4session.h" #include "../nfs4idmap.h" @@ -502,8 +506,13 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = -ENOMEM; if (!kcred) goto out_err_free; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(kcred,uid); + iee_set_cred_fsgid(kcred,gid); + #else kcred->fsuid = uid; kcred->fsgid = gid; + #endif cred = RCU_INITIALIZER(kcred); if (lgr->range.iomode == IOMODE_READ) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 25a7c771cfd8..c11c2e78e782 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -48,6 +48,13 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif +#ifdef CONFIG_KEYP +#include +#endif + #include "internal.h" #include "netns.h" #include "nfs4idmap.h" @@ -225,9 +232,18 @@ int nfs_idmap_init(void) if (ret < 0) goto failed_reg_legacy; + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(keyring, KEY_FLAG_ROOT_CAN_CLEAR, SET_BIT_OP); + #else set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #endif + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif id_resolver_cache = cred; return 0; @@ -296,7 +312,11 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, mutex_unlock(&idmap->idmap_mutex); } if (!IS_ERR(rkey)) + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(rkey, KEY_FLAG_ROOT_CAN_INVAL, SET_BIT_OP); + #else set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags); + #endif kfree(desc); return rkey; @@ -321,7 +341,11 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, } rcu_read_lock(); + #ifdef CONFIG_KEYP + iee_set_key_perm(rkey, rkey->perm | KEY_USR_VIEW); + #else rkey->perm |= KEY_USR_VIEW; + #endif ret = key_validate(rkey); if (ret < 0) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index e6beaaf4f170..e89385fd81f1 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -2,6 +2,9 @@ /* Copyright (C) 1995, 1996 Olaf Kirch */ #include +#ifdef CONFIG_CREDP +#include +#endif #include "nfsd.h" #include "auth.h" @@ -32,22 +35,40 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,rqstp->rq_cred.cr_uid); + iee_set_cred_fsgid(new,rqstp->rq_cred.cr_gid); + #else new->fsuid = rqstp->rq_cred.cr_uid; new->fsgid = rqstp->rq_cred.cr_gid; + #endif rqgi = rqstp->rq_cred.cr_group_info; if (flags & NFSEXP_ALLSQUASH) { + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,exp->ex_anon_uid); + iee_set_cred_fsgid(new,exp->ex_anon_gid); + #else new->fsuid = exp->ex_anon_uid; new->fsgid = exp->ex_anon_gid; + #endif gi = groups_alloc(0); if (!gi) goto oom; } else if (flags & NFSEXP_ROOTSQUASH) { if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,exp->ex_anon_uid); + #else new->fsuid = exp->ex_anon_uid; + #endif if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,exp->ex_anon_gid); + #else new->fsgid = exp->ex_anon_gid; + #endif gi = groups_alloc(rqgi->ngroups); if (!gi) @@ -67,18 +88,35 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) } if (uid_eq(new->fsuid, INVALID_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,exp->ex_anon_uid); + #else new->fsuid = exp->ex_anon_uid; + #endif if (gid_eq(new->fsgid, INVALID_GID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,exp->ex_anon_gid); + #else new->fsgid = exp->ex_anon_gid; + #endif set_groups(new, gi); put_group_info(gi); if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_drop_nfsd_set(new->cap_effective)); + #else new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + #endif put_cred(override_creds(new)); put_cred(new); return 0; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 49a88dde9631..4fde340b8471 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -41,6 +41,9 @@ #include "trace.h" #include "xdr4cb.h" #include "xdr4.h" +#ifdef CONFIG_CREDP +#include +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -946,8 +949,13 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r if (!kcred) return NULL; - kcred->fsuid = ses->se_cb_sec.uid; - kcred->fsgid = ses->se_cb_sec.gid; + #ifdef CONFIG_CREDP + iee_set_cred_uid(kcred,ses->se_cb_sec.uid); + iee_set_cred_gid(kcred,ses->se_cb_sec.gid); + #else + kcred->uid = ses->se_cb_sec.uid; + kcred->gid = ses->se_cb_sec.gid; + #endif return kcred; } } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4395577825a7..8c34cfe01075 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -44,6 +44,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + #include "nfsd.h" #include "state.h" #include "vfs.h" @@ -78,8 +82,13 @@ nfs4_save_creds(const struct cred **original_creds) if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,GLOBAL_ROOT_UID); + iee_set_cred_fsgid(new,GLOBAL_ROOT_GID); + #else new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; + #endif *original_creds = override_creds(new); put_cred(new); return 0; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c2495d98c189..779471a0aa66 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -16,6 +16,10 @@ #include "auth.h" #include "trace.h" +#ifdef CONFIG_CREDP +#include +#endif + #define NFSDDBG_FACILITY NFSDDBG_FH @@ -223,9 +227,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) error = nfserrno(-ENOMEM); goto out; } + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + #endif put_cred(override_creds(new)); put_cred(new); } else { diff --git a/fs/open.c b/fs/open.c index f9ac703ec1b2..5561a7ece18d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -35,6 +35,11 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + + #include "internal.h" int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, @@ -414,17 +419,34 @@ static const struct cred *access_override_creds(void) * routine. */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(override_cred,override_cred->uid); + iee_set_cred_fsgid(override_cred,override_cred->gid); + #else override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; + #endif if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ kuid_t root_uid = make_kuid(override_cred->user_ns, 0); if (!uid_eq(override_cred->uid, root_uid)) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = override_cred->cap_effective; + tmp_cap.val = 0; + iee_set_cred_cap_effective(override_cred, tmp_cap); + } while (0); + #else cap_clear(override_cred->cap_effective); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(override_cred,override_cred->cap_permitted); + #else override_cred->cap_effective = override_cred->cap_permitted; + #endif } /* @@ -444,7 +466,11 @@ static const struct cred *access_override_creds(void) * expecting RCU freeing. But normal thread-synchronous * cred accesses will keep things non-RCY. */ + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(override_cred,1); + #else override_cred->non_rcu = 1; + #endif old_cred = override_creds(override_cred); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 54602f0bed8b..58534612dc64 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -16,6 +16,10 @@ #include #include "overlayfs.h" +#ifdef CONFIG_CREDP +#include +#endif + static unsigned short ovl_redirect_max = 256; module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); MODULE_PARM_DESC(redirect_max, @@ -590,8 +594,13 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, * create a new inode, so just use the ovl mounter's * fs{u,g}id. */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(override_cred,inode->i_uid); + iee_set_cred_fsgid(override_cred,inode->i_gid); + #else override_cred->fsuid = inode->i_uid; override_cred->fsgid = inode->i_gid; + #endif err = security_dentry_create_files_as(dentry, attr->mode, &dentry->d_name, old_cred, override_cred); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 2c056d737c27..9ede99ddb04b 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -21,6 +21,10 @@ #include "overlayfs.h" #include "params.h" +#ifdef CONFIG_CREDP +#include +#endif + MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); @@ -1485,7 +1489,15 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_export_op = &ovl_export_fid_operations; /* Never override disk quota limits or use reserved space */ + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = cred->cap_effective; + cap_lower(tmp, CAP_SYS_RESOURCE); + iee_set_cred_cap_effective(cred, tmp); + } + #else cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); + #endif sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index af7849e5974f..880e7386bfff 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -18,6 +18,13 @@ #include "cifs_spnego.h" #include "cifs_debug.h" #include "cifsproto.h" +#ifdef CONFIG_CREDP +#include +#endif +#ifdef CONFIG_KEYP +#include +#endif + static const struct cred *spnego_cred; /* create a new cifs key */ @@ -33,7 +40,12 @@ cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) goto error; /* attach the data */ + #ifdef CONFIG_KEYP + union key_payload *key_payload = ((union key_payload *)(key->name_link.next)); + key_payload->data[0] = payload; + #else key->payload.data[0] = payload; + #endif ret = 0; error: @@ -43,7 +55,11 @@ cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) static void cifs_spnego_key_destroy(struct key *key) { + #ifdef CONFIG_KEYP + kfree(((union key_payload *)(key->name_link.next))->data[0]); + #else kfree(key->payload.data[0]); + #endif } @@ -163,7 +179,11 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, #ifdef CONFIG_CIFS_DEBUG2 if (cifsFYI && !IS_ERR(spnego_key)) { + #ifdef CONFIG_KEYP + struct cifs_spnego_msg *msg = ((union key_payload *)(spnego_key->name_link.next))->data[0]; + #else struct cifs_spnego_msg *msg = spnego_key->payload.data[0]; + #endif cifs_dump_mem("SPNEGO reply blob:", msg->data, min(1024U, msg->secblob_len + msg->sesskey_len)); } @@ -211,9 +231,18 @@ init_cifs_spnego(void) * instruct request_key() to use this special keyring as a cache for * the results it looks up */ + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(keyring, KEY_FLAG_ROOT_CAN_CLEAR, SET_BIT_OP); + #else set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #endif + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif spnego_cred = cred; cifs_dbg(FYI, "cifs spnego keyring: %d\n", key_serial(keyring)); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index f5b6df82e857..a51a8b474d4d 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -26,6 +26,14 @@ #include "cifs_fs_sb.h" #include "cifs_unicode.h" +#ifdef CONFIG_CREDP +#include +#endif +#ifdef CONFIG_KEYP +#include +#include +#endif + /* security id for everyone/world system group */ static const struct cifs_sid sid_everyone = { 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; @@ -78,16 +86,31 @@ cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) * dereference payload.data! */ if (prep->datalen <= sizeof(key->payload)) { + #ifdef CONFIG_KEYP + union key_payload *key_payload = ((union key_payload *)(key->name_link.next)); + key_payload->data[0] = NULL; + memcpy(key_payload, prep->data, prep->datalen); + #else key->payload.data[0] = NULL; memcpy(&key->payload, prep->data, prep->datalen); + #endif } else { payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL); if (!payload) return -ENOMEM; + #ifdef CONFIG_KEYP + union key_payload *key_payload = ((union key_payload *)(key->name_link.next)); + key_payload->data[0] = payload; + #else key->payload.data[0] = payload; + #endif } + #ifdef CONFIG_KEYP + iee_set_key_datalen(key, prep->datalen); + #else key->datalen = prep->datalen; + #endif return 0; } @@ -95,7 +118,11 @@ static inline void cifs_idmap_key_destroy(struct key *key) { if (key->datalen > sizeof(key->payload)) + #ifdef CONFIG_KEYP + kfree(((union key_payload *)(key->name_link.next))->data[0]); + #else kfree(key->payload.data[0]); + #endif } static struct key_type cifs_idmap_key_type = { @@ -311,9 +338,15 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) * there are no subauthorities and the host has 8-byte pointers, then * it could be. */ + #ifdef CONFIG_KEYP + ksid = sidkey->datalen <= sizeof(sidkey->payload) ? + (struct cifs_sid *)(sidkey->name_link.next) : + (struct cifs_sid *)((union key_payload *)(sidkey->name_link.next))->data[0]; + #else ksid = sidkey->datalen <= sizeof(sidkey->payload) ? (struct cifs_sid *)&sidkey->payload : (struct cifs_sid *)sidkey->payload.data[0]; + #endif ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32)); if (ksid_size > sidkey->datalen) { @@ -422,14 +455,22 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, if (sidtype == SIDOWNER) { kuid_t uid; uid_t id; + #ifdef CONFIG_KEYP + memcpy(&id, &((union key_payload *)(sidkey->name_link.next))->data[0], sizeof(uid_t)); + #else memcpy(&id, &sidkey->payload.data[0], sizeof(uid_t)); + #endif uid = make_kuid(&init_user_ns, id); if (uid_valid(uid)) fuid = uid; } else { kgid_t gid; gid_t id; + #ifdef CONFIG_KEYP + memcpy(&id, &((union key_payload *)(sidkey->name_link.next))->data[0], sizeof(gid_t)); + #else memcpy(&id, &sidkey->payload.data[0], sizeof(gid_t)); + #endif gid = make_kgid(&init_user_ns, id); if (gid_valid(gid)) fgid = gid; @@ -490,9 +531,18 @@ init_cifs_idmap(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(keyring, KEY_FLAG_ROOT_CAN_CLEAR, SET_BIT_OP); + #else set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #endif + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif root_cred = cred; cifs_dbg(FYI, "cifs idmap keyring: %d\n", key_serial(keyring)); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 1df0a6edcc21..c67e1b13ed0c 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2149,7 +2149,11 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) is_domain = 1; } + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif upayload = user_key_payload_locked(key); if (IS_ERR_OR_NULL(upayload)) { rc = upayload ? PTR_ERR(upayload) : -EINVAL; @@ -2227,7 +2231,11 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) strscpy(ctx->workstation_name, ses->workstation_name, sizeof(ctx->workstation_name)); out_key_put: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif key_put(key); out_err: kfree(desc); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 3216f786908f..3160e70b820f 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -1591,7 +1591,11 @@ sess_auth_kerberos(struct sess_data *sess_data) goto out; } + #ifdef CONFIG_KEYP + msg = ((union key_payload *)(spnego_key->name_link.next))->data[0]; + #else msg = spnego_key->payload.data[0]; + #endif /* * check version field to make sure that cifs.upcall is * sending us a response in an expected form diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a86a3fbfb5a4..dbbf0f2e9780 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1629,7 +1629,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) goto out; } + #ifdef CONFIG_KEYP + msg = ((union key_payload *)(spnego_key->name_link.next))->data[0]; + #else msg = spnego_key->payload.data[0]; + #endif /* * check version field to make sure that cifs.upcall is * sending us a response in an expected form diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index e564d5ff8781..2d3ca5951fa2 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -284,7 +284,11 @@ int ubifs_init_authentication(struct ubifs_info *c) return PTR_ERR(keyring_key); } + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(keyring_key)); + #else down_read(&keyring_key->sem); + #endif if (keyring_key->type != &key_type_logon) { ubifs_err(c, "key type must be logon"); @@ -351,7 +355,11 @@ int ubifs_init_authentication(struct ubifs_info *c) if (err) crypto_free_shash(c->hash_tfm); out: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(keyring_key)); + #else up_read(&keyring_key->sem); + #endif key_put(keyring_key); return err; diff --git a/fs/verity/signature.c b/fs/verity/signature.c index 90c07573dd77..e076cb6fbc84 100644 --- a/fs/verity/signature.c +++ b/fs/verity/signature.c @@ -62,7 +62,11 @@ int fsverity_verify_signature(const struct fsverity_info *vi, return 0; } + #ifdef CONFIG_KEYP + if (((struct key_struct *)(fsverity_keyring->name_link.prev))->keys.nr_leaves_on_tree == 0) { + #else if (fsverity_keyring->keys.nr_leaves_on_tree == 0) { + #endif /* * The ".fs-verity" keyring is empty, due to builtin signatures * being supported by the kernel but not actually being used. diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index 9d0479f50f97..f501e0f965f8 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h @@ -17,6 +17,9 @@ extern void *early_memremap_ro(resource_size_t phys_addr, extern void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val); extern void early_iounmap(void __iomem *addr, unsigned long size); +#ifdef CONFIG_PTP +extern void early_iounmap_after_init(void __iomem *addr, unsigned long size); +#endif extern void early_memunmap(void *addr, unsigned long size); #if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU) diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 8cc7b09c1bc7..83158589a545 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -70,6 +70,24 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR) #endif +#ifdef CONFIG_PTP +#ifndef clear_fixmap_init +#define clear_fixmap_init(idx) \ + __iee_set_fixmap_pre_init(idx, 0, FIXMAP_PAGE_CLEAR) +#endif + +#define __iee_set_fixmap_offset_pre_init(idx, phys, flags) \ +({ \ + unsigned long ________addr; \ + __iee_set_fixmap_pre_init(idx, phys, flags); \ + ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ + ________addr; \ +}) + +#define iee_set_fixmap_offset_pre_init(idx, phys) \ + __iee_set_fixmap_offset_pre_init(idx, phys, FIXMAP_PAGE_NORMAL) +#endif + /* Return a pointer with offset calculated */ #define __set_fixmap_offset(idx, phys, flags) \ ({ \ diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 6796abe1900e..725cae1dbd02 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -64,6 +64,26 @@ static inline int pfn_valid(unsigned long pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page +#ifdef CONFIG_PTP +#ifdef CONFIG_ARM64 +#define IEE_PTDESC_MAP_START VMEMMAP_END +#define iee_ptdesc_base ((struct ptdesc**)IEE_PTDESC_MAP_START - (memstart_addr >> PAGE_SHIFT)) + +#define __pfn_to_ptdesc(pfn) (iee_ptdesc_base + (pfn)) +#define __ptdesc_to_pfn(ptdesc) (unsigned long)((ptdesc) - iee_ptdesc_base) +#else +#define __pfn_to_ptdesc(pfn) (((struct ptdesc_t **)iee_ptdesc_base + (pfn))) +#define __ptdesc_to_pfn(ptdesc) (unsigned long)((ptdesc) - (struct ptdesc_t **)iee_ptdesc_base) +#endif + +#define ptdesc_to_pfn __ptdesc_to_pfn +#define pfn_to_ptdesc __pfn_to_ptdesc + +#define iee_ptdesc(ptdesc) (*(pfn_to_ptdesc(page_to_pfn((struct page*)ptdesc)))) +#define page_to_iee_ptdesc(page) (*(pfn_to_ptdesc(page_to_pfn(page)))) +#define iee_ptdesc_to_page(ptdesc) (((struct ptdesc_t *)ptdesc)->page) +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index c75d4a753849..1d86942b242c 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -7,6 +7,10 @@ #define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO) #define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) +#ifdef CONFIG_PTP +#include +#endif + /** * __pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table * @mm: the mm_struct of the current context @@ -23,6 +27,7 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) if (!ptdesc) return NULL; + return ptdesc_address(ptdesc); } @@ -106,6 +111,7 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) struct ptdesc *ptdesc = page_ptdesc(pte_page); pagetable_pte_dtor(ptdesc); + pagetable_free(ptdesc); } @@ -149,6 +155,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); pagetable_pmd_dtor(ptdesc); + pagetable_free(ptdesc); } #endif diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index 03b7dae47dd4..ef774115704f 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h @@ -26,6 +26,11 @@ static inline void pgd_clear(pgd_t *pgd) { } #define pgd_populate(mm, pgd, p4d) do { } while (0) #define pgd_populate_safe(mm, pgd, p4d) do { } while (0) + +#ifdef CONFIG_PTP +#define iee_pgd_populate_pre_init(mm, pgd, p4d) do { } while (0) +#define iee_pgd_populate_safe_pre_init(mm, pgd, p4d) do { } while (0) +#endif /* * (p4ds are folded into pgds so this doesn't get actually called, * but the define is needed for a generic inline function.) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 22384baee10e..fd0a18fe1748 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -209,6 +209,9 @@ struct mmu_table_batch { ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +#ifdef CONFIG_PTP +extern void iee_tlb_remove_table(struct mmu_gather *tlb, void *table); +#endif #else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ @@ -361,7 +364,9 @@ struct mmu_gather { }; void tlb_flush_mmu(struct mmu_gather *tlb); - +#ifdef CONFIG_PTP +void iee_tlb_flush_mmu(struct mmu_gather *tlb); +#endif static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, unsigned int range_size) @@ -476,6 +481,15 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, tlb_flush_mmu(tlb); } +#ifdef CONFIG_PTP +static inline void iee_tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + if (__tlb_remove_page_size(tlb, page, false, page_size)) + iee_tlb_flush_mmu(tlb); +} +#endif + static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool delay_rmap) { @@ -491,9 +505,19 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return tlb_remove_page_size(tlb, page, PAGE_SIZE); } +#ifdef CONFIG_PTP +static inline void iee_tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + return iee_tlb_remove_page_size(tlb, page, PAGE_SIZE); +} +#endif static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { + #ifdef CONFIG_PTP + iee_tlb_remove_table(tlb, pt); + #else tlb_remove_table(tlb, pt); + #endif } /* Like tlb_remove_ptdesc, but for page-like page directories. */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 174d865ce46e..0d945cdd78e0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -379,6 +379,17 @@ KEEP(*(.dtb.init.rodata)) \ __dtb_end = .; +#ifdef CONFIG_KOI +#define KOI_DATA() \ + . = ALIGN(PAGE_SIZE); \ + __koi_data_start = .; \ + *(.data..koi) \ + . = ALIGN(PAGE_SIZE); \ + __koi_data_end = .; +#else +#define KOI_DATA() +#endif + /* * .data section */ @@ -403,8 +414,8 @@ BRANCH_PROFILE() \ TRACE_PRINTKS() \ BPF_RAW_TP() \ - TRACEPOINT_STR() - + TRACEPOINT_STR() \ + KOI_DATA() /* * Data section helpers */ @@ -1126,6 +1137,23 @@ * They will fit only a subset of the architectures */ +#ifdef CONFIG_CREDP + #define CRED_DATA \ + . = ALIGN(PAGE_SIZE); \ + *(.iee.cred) \ + . = ALIGN(PAGE_SIZE); +#else + #define CRED_DATA +#endif + +#ifdef CONFIG_IEE_SELINUX_P + #define IEE_SELINUX_DATA \ + . = ALIGN(PAGE_SIZE); \ + *(.iee.selinux) \ + . = ALIGN(PAGE_SIZE); +#else + #define IEE_SELINUX_DATA +#endif /* * Writeable data. @@ -1143,6 +1171,8 @@ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ INIT_TASK_DATA(inittask) \ + CRED_DATA \ + IEE_SELINUX_DATA \ NOSAVE_DATA \ PAGE_ALIGNED_DATA(pagealigned) \ CACHELINE_ALIGNED_DATA(cacheline) \ diff --git a/include/keys/asymmetric-subtype.h b/include/keys/asymmetric-subtype.h index d55171f640a0..1293c5a1c624 100644 --- a/include/keys/asymmetric-subtype.h +++ b/include/keys/asymmetric-subtype.h @@ -54,7 +54,11 @@ struct asymmetric_key_subtype { static inline struct asymmetric_key_subtype *asymmetric_key_subtype(const struct key *key) { + #ifdef CONFIG_KEYP + return ((union key_payload *)(key->name_link.next))->data[asym_subtype]; + #else return key->payload.data[asym_subtype]; + #endif } #endif /* _KEYS_ASYMMETRIC_SUBTYPE_H */ diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h index 69a13e1e5b2e..6cd556bfb216 100644 --- a/include/keys/asymmetric-type.h +++ b/include/keys/asymmetric-type.h @@ -69,13 +69,21 @@ extern struct asymmetric_key_id *asymmetric_key_generate_id(const void *val_1, static inline const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key) { + #ifdef CONFIG_KEYP + return ((union key_payload *)(key->name_link.next))->data[asym_key_ids]; + #else return key->payload.data[asym_key_ids]; + #endif } static inline const struct public_key *asymmetric_key_public_key(const struct key *key) { + #ifdef CONFIG_KEYP + return ((union key_payload *)(key->name_link.next))->data[asym_crypto]; + #else return key->payload.data[asym_crypto]; + #endif } extern struct key *find_asymmetric_key(struct key *keyring, diff --git a/include/keys/request_key_auth-type.h b/include/keys/request_key_auth-type.h index 36b89a933310..63d5d9f66cb4 100644 --- a/include/keys/request_key_auth-type.h +++ b/include/keys/request_key_auth-type.h @@ -26,7 +26,11 @@ struct request_key_auth { static inline struct request_key_auth *get_request_key_auth(const struct key *key) { + #ifdef CONFIG_KEYP + return ((union key_payload *)(key->name_link.next))->data[0]; + #else return key->payload.data[0]; + #endif } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 73ef8dda7ada..29b9ced9443a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -304,6 +304,17 @@ struct bpf_map { bool free_after_mult_rcu_gp; KABI_FILL_HOLE(bool free_after_rcu_gp) s64 __percpu *elem_count; +#ifdef CONFIG_HIVE + /* fbpf add for aggregation */ + bool is_aggregated; + int used_page_cnt; + union { + struct bpf_used_page *used_pages; + void *value; + }; + void *shadow_data; + struct mutex used_page_mutex; +#endif KABI_USE(1, atomic64_t sleepable_refcnt) KABI_USE(2, const struct btf_type *attach_func_proto) @@ -1573,6 +1584,13 @@ struct bpf_prog { const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ +#ifdef CONFIG_HIVE + void *shadow_stack_addr, *shadow_region_addr, *shadow_top_addr, + *shadow_skb_addr, *low_guard_region_addr, *high_guard_region_addr, + *map_data_addr; + int shadow_stack_page_cnt, total_page_cnt, shadow_skb_page_cnt, + guard_region_page_cnt, map_page_cnt; +#endif /* Instructions for interpreter */ union { DECLARE_FLEX_ARRAY(struct sock_filter, insns); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1447d41474f5..ad3922626b0e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -10,6 +10,10 @@ #include #include +#ifdef CONFIG_HIVE +#include +#endif + /* Maximum variable offset umax_value permitted when resolving memory accesses. * In practice this is far bigger than any realistic pointer offset; this limit * ensures that umax_value + (int)off + (int)size cannot overflow a u64. @@ -226,6 +230,11 @@ struct bpf_reg_state { enum bpf_reg_liveness live; /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */ bool precise; +#ifdef CONFIG_HIVE + /* used to record whether the reg is checked */ + // bool sfi_checked; + enum bpf_sfi_check_state sfi_check_state; +#endif KABI_RESERVE(1) KABI_RESERVE(2) @@ -565,6 +574,12 @@ struct bpf_insn_aux_data { bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter__next() kfunc call */ u8 alu_state; /* used in combination with alu_limit */ +#ifdef CONFIG_HIVE + /* fbpf record for patch isnns */ + enum bpf_reg_type arg_reg_type[5]; + u64 extra_map_ptr; + // struct bpf_sfi_check_unit *sfi_check_list; +#endif /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ @@ -693,6 +708,10 @@ struct bpf_verifier_env { bool bypass_spec_v4; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +#ifdef CONFIG_HIVE + struct bpf_sfi_check_unit **insn_check_lists; /* array of per-insn check_list */ + struct mutex insn_check_list_mutex; +#endif const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; diff --git a/include/linux/cred.h b/include/linux/cred.h index e01c6d094a30..75db7ea97ab6 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + struct cred; struct inode; @@ -153,6 +157,22 @@ struct cred { KABI_RESERVE(4) } __randomize_layout; +#ifdef CONFIG_CREDP +extern unsigned long long iee_rw_gate(int flag, ...); +static void iee_set_cred_non_rcu(struct cred *cred, int non_rcu) +{ + iee_rw_gate(IEE_OP_SET_CRED_NON_RCU,cred,non_rcu); + *(int *)(&(((struct rcu_head *)(cred->rcu.func))->next)) = non_rcu; +} + +static bool iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr) +{ + bool ret; + ret = iee_rw_gate(IEE_OP_SET_CRED_ATOP_USAGE,cred,flag,nr); + return ret; +} +#endif + extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); extern int copy_creds(struct task_struct *, unsigned long); @@ -189,7 +209,11 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) { + #ifdef CONFIG_CREDP + iee_set_cred_atomic_op_usage(cred, AT_ADD, nr); + #else atomic_long_add(nr, &cred->usage); + #endif return cred; } @@ -202,7 +226,7 @@ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) */ static inline struct cred *get_new_cred(struct cred *cred) { - return get_new_cred_many(cred, 1); + return get_new_cred_many(cred, 1); // XXXzgc atomic_inc -> get_new_cred_many } /** @@ -224,7 +248,11 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(nonconst_cred,0); + #else nonconst_cred->non_rcu = 0; + #endif return get_new_cred_many(nonconst_cred, nr); } @@ -247,9 +275,19 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; + #ifdef CONFIG_CREDP + if (!iee_set_cred_atomic_op_usage(nonconst_cred,AT_INC_NOT_ZERO,0)) + return NULL; + #else if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; + #endif + + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(nonconst_cred,0); + #else nonconst_cred->non_rcu = 0; + #endif return cred; } @@ -270,8 +308,13 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { + #ifdef CONFIG_CREDP + if (iee_set_cred_atomic_op_usage(cred,AT_SUB_AND_TEST,nr)) + __put_cred(cred); + #else if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); + #endif } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 9ed79128458c..970cc4f7068b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -740,6 +740,15 @@ extern int __init __efi_memmap_init(struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); +#ifdef CONFIG_PTP +extern void __init efi_memmap_unmap_after_init(void); +#endif +extern int __init efi_memmap_install(struct efi_memory_map_data *data); +extern int __init efi_memmap_split_count(efi_memory_desc_t *md, + struct range *range); +extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, + void *buf, struct efi_mem_range *mem); +extern void __init efi_print_memmap(void); #ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index c358bad6cf8f..051838ea4c34 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -50,6 +50,9 @@ struct ctl_table_header; #define BPF_REG_ARG5 BPF_REG_5 #define BPF_REG_CTX BPF_REG_6 #define BPF_REG_FP BPF_REG_10 +#ifdef CONFIG_HIVE +#define BPF_REG_BASE BPF_REG_11 +#endif /* Additional register mappings for converted user programs. */ #define BPF_REG_A BPF_REG_0 diff --git a/include/linux/iee-func.h b/include/linux/iee-func.h new file mode 100644 index 000000000000..1958c89f5aed --- /dev/null +++ b/include/linux/iee-func.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_IEE_FUNC_H +#define _LINUX_IEE_FUNC_H +#define HUGE_PMD_ORDER 9 +#define TASK_ORDER 4 + +#ifndef page_to_phys +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) +#endif + +extern unsigned long init_iee_stack_begin[]; +extern unsigned long init_iee_stack_end[]; +extern void *init_token_page_vaddr; + +extern unsigned long long iee_rw_gate(int flag, ...); +extern u32 get_cpu_asid_bits(void); +extern unsigned long arm64_mm_context_get(struct mm_struct *mm); +extern void set_iee_page(unsigned long addr, int order); +extern void unset_iee_page(unsigned long addr, int order); +extern void set_iee_page_valid(unsigned long addr); +extern void iee_set_logical_mem_ro(unsigned long addr); +extern void __init iee_mark_all_lm_pgtable_ro(void); +extern unsigned long iee_read_token_stack(struct task_struct *tsk); +extern void iee_set_token_page_valid(void *token, void *new, unsigned int order); +extern void iee_set_token_page_invalid(void *token_addr, void *token_page, unsigned long order); +extern void unset_iee_stack_page(unsigned long addr, int order); +extern void set_iee_stack_page(unsigned long addr, int order); +extern void *iee_read_tmp_page(struct task_struct *tsk); +extern void *iee_read_freeptr(unsigned long ptr); +extern void iee_set_stack_freeptr(unsigned long addr, void *free_ptr); +extern void iee_rest_init(void); +#endif diff --git a/include/linux/key.h b/include/linux/key.h index 938d7ecfb495..057252aacc13 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -280,6 +280,38 @@ struct key { struct key_restriction *restrict_link; }; +#ifdef CONFIG_KEYP +struct key_union { + union { + struct list_head graveyard_link; + struct rb_node serial_node; + }; + struct rw_semaphore sem; + struct key *key; +}; + +struct key_struct { + struct { + /* Keyring bits */ + struct list_head name_link; + struct assoc_array keys; + }; + struct key *key; +}; +#endif + +#ifdef CONFIG_KEYP +#define KEY_SEM(KEY) (((struct key_union *)(KEY->graveyard_link.next))->sem) +#include +extern unsigned long long iee_rw_gate(int flag, ...); +static bool iee_set_key_usage(struct key *key, int n, int flag) +{ + bool ret; + ret = iee_rw_gate(IEE_OP_SET_KEY_USAGE, key, n, flag); + return ret; +} +#endif + extern struct key *key_alloc(struct key_type *type, const char *desc, kuid_t uid, kgid_t gid, @@ -305,7 +337,11 @@ extern void key_remove_domain(struct key_tag *domain_tag); static inline struct key *__key_get(struct key *key) { + #ifdef CONFIG_KEYP + iee_set_key_usage(key, 0, REFCOUNT_INC); + #else refcount_inc(&key->usage); + #endif return key; } @@ -478,17 +514,67 @@ static inline bool key_is_negative(const struct key *key) return key_read_state(key) < 0; } +#ifdef CONFIG_KEYP +static inline void iee_write_key_payload_rcu_data0(struct key *key, void *rcu_data0) +{ + union key_payload *key_payload = (union key_payload *)(key->name_link.next); + WRITE_ONCE(key_payload->rcu_data0, rcu_data0); +} + +#define dereference_key_rcu(KEY) \ + (rcu_dereference(((union key_payload *)(KEY->name_link.next))->rcu_data0)) + +#ifdef CONFIG_KEYP +#define dereference_key_locked(KEY) \ + (rcu_dereference_protected(((union key_payload *)(KEY->name_link.next))->rcu_data0, \ + rwsem_is_locked(&KEY_SEM(((struct key *)(KEY)))))) +#else +#define dereference_key_locked(KEY) \ + (rcu_dereference_protected(((union key_payload *)(KEY->name_link.next))->rcu_data0, \ + rwsem_is_locked(&((struct key *)(KEY))->sem))) +#endif + +#define iee_smp_store_release(p, v, KEY) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + iee_write_key_payload_rcu_data0(KEY, v); \ +} while (0) + +#define iee_rcu_assign_pointer(p, v, KEY) \ +do { \ + uintptr_t _r_a_p__v = (uintptr_t)(v); \ + rcu_check_sparse(p, __rcu); \ + \ + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ + iee_write_key_payload_rcu_data0(KEY, (typeof(p))(_r_a_p__v)); \ + else \ + iee_smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v), KEY); \ +} while (0) + +#define rcu_assign_keypointer(KEY, PAYLOAD) \ +do { \ + iee_rcu_assign_pointer(((union key_payload *)(KEY->name_link.next))->rcu_data0, (PAYLOAD), KEY); \ +} while (0) +#else #define dereference_key_rcu(KEY) \ (rcu_dereference((KEY)->payload.rcu_data0)) +#ifdef CONFIG_KEYP +#define dereference_key_locked(KEY) \ + (rcu_dereference_protected((KEY)->payload.rcu_data0, \ + rwsem_is_locked(&KEY_SEM(((struct key *)(KEY)))))) +#else #define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) +#endif #define rcu_assign_keypointer(KEY, PAYLOAD) \ do { \ rcu_assign_pointer((KEY)->payload.rcu_data0, (PAYLOAD)); \ } while (0) +#endif /* * the userspace interface diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e6ef9532fc3..3ecd0d77a1dc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -32,6 +32,11 @@ #include #include +#ifdef CONFIG_PTP +#include +#include +#endif + struct mempolicy; struct anon_vma; struct anon_vma_chain; @@ -2944,7 +2949,19 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt) */ static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) { + #ifdef CONFIG_PTP + #ifdef CONFIG_X86_64 + struct page *page; + if (iee_pgt_jar_init) + page = virt_to_page(get_iee_pgtable_page(gfp | __GFP_COMP)); + else + page = alloc_pages(gfp | __GFP_COMP, order); + #else + struct page *page = virt_to_page(get_iee_pgtable_page(gfp | __GFP_COMP)); + #endif + #else struct page *page = alloc_pages(gfp | __GFP_COMP, order); + #endif return page_ptdesc(page); } @@ -2960,7 +2977,11 @@ static inline void pagetable_free(struct ptdesc *pt) { struct page *page = ptdesc_page(pt); + #ifdef CONFIG_PTP + free_iee_pgtable_page((void *)page_address(page)); + #else __free_pages(page, compound_order(page)); + #endif } #if USE_SPLIT_PTE_PTLOCKS @@ -2971,7 +2992,18 @@ void ptlock_free(struct ptdesc *ptdesc); static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { + #ifdef CONFIG_PTP + #ifdef CONFIG_X86_64 + if (iee_pgt_jar_init) + return (iee_ptdesc(ptdesc))->ptl; + else + return ptdesc->ptl; + #else + return (iee_ptdesc(ptdesc))->ptl; + #endif + #else return ptdesc->ptl; + #endif } #else /* ALLOC_SPLIT_PTLOCKS */ static inline void ptlock_cache_init(void) @@ -2989,7 +3021,18 @@ static inline void ptlock_free(struct ptdesc *ptdesc) static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) { + #ifdef CONFIG_PTP + #ifdef CONFIG_X86_64 + if (iee_pgt_jar_init) + return &(iee_ptdesc(ptdesc))->ptl; + else + return &ptdesc->ptl; + #else + return &(iee_ptdesc(ptdesc))->ptl; + #endif + #else return &ptdesc->ptl; + #endif } #endif /* ALLOC_SPLIT_PTLOCKS */ @@ -3007,7 +3050,11 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) * It can happen if arch try to use slab for page table allocation: * slab code uses page->slab_cache, which share storage with page->ptl. */ + #ifdef CONFIG_PTP + VM_BUG_ON_PAGE(*(unsigned long *)&(iee_ptdesc(ptdesc)->ptl), ptdesc_page(ptdesc)); + #else VM_BUG_ON_PAGE(*(unsigned long *)&ptdesc->ptl, ptdesc_page(ptdesc)); + #endif if (!ptlock_alloc(ptdesc)) return false; spin_lock_init(ptlock_ptr(ptdesc)); @@ -3106,7 +3153,11 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE + #ifdef CONFIG_PTP + ((iee_ptdesc(ptdesc))->pmd_huge_pte) = NULL; + #else ptdesc->pmd_huge_pte = NULL; + #endif #endif return ptlock_init(ptdesc); } @@ -3114,12 +3165,20 @@ static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) static inline void pmd_ptlock_free(struct ptdesc *ptdesc) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE + #ifdef CONFIG_PTP + VM_BUG_ON_PAGE(((iee_ptdesc(ptdesc))->pmd_huge_pte), ptdesc_page(ptdesc)); + #else VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); + #endif #endif ptlock_free(ptdesc); } +#ifdef CONFIG_PTP +#define pmd_huge_pte(mm, pmd) ((iee_ptdesc(pmd_ptdesc(pmd)))->pmd_huge_pte) +#else #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) +#endif #else diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4b9a8723d3eb..07da48fd7d88 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -483,6 +483,42 @@ struct ptdesc { #endif }; +#ifdef CONFIG_PTP +struct ptdesc_t { + unsigned long __page_flags; + + union { + struct rcu_head pt_rcu_head; + struct list_head pt_list; + struct { + unsigned long _pt_pad_1; + pgtable_t pmd_huge_pte; + }; + }; + unsigned long __page_mapping; + + union { + struct mm_struct *pt_mm; + atomic_t pt_frag_refcount; + }; + + union { + unsigned long _pt_pad_2; +#if ALLOC_SPLIT_PTLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif + }; + unsigned int __page_type; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long pt_memcg_data; +#endif + struct page *page; +}; +#endif + #define TABLE_MATCH(pg, pt) \ static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) TABLE_MATCH(flags, __page_flags); @@ -1181,6 +1217,9 @@ struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); +#ifdef CONFIG_PTP +extern void iee_tlb_finish_mmu(struct mmu_gather *tlb); +#endif struct vm_fault; diff --git a/include/linux/module.h b/include/linux/module.h index 990f9d66d2f1..169021fc8501 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -609,6 +609,7 @@ struct module { KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) + } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 98db43432ee9..dca565ffb64d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1095,6 +1095,38 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) set_pgd(pgdp, pgd); \ }) +#ifdef CONFIG_PTP +#define iee_set_pte_safe_pre_init(ptep, pte) \ +({ \ + WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ + iee_set_pte_pre_init(ptep, pte); \ +}) + +#define iee_set_pmd_safe_pre_init(pmdp, pmd) \ +({ \ + WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ + iee_set_pmd_pre_init(pmdp, pmd); \ +}) + +#define iee_set_pud_safe_pre_init(pudp, pud) \ +({ \ + WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ + iee_set_pud_pre_init(pudp, pud); \ +}) + +#define iee_set_p4d_safe_pre_init(p4dp, p4d) \ +({ \ + WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ + iee_set_p4d_pre_init(p4dp, p4d); \ +}) + +#define iee_set_pgd_safe_pre_init(pgdp, pgd) \ +({ \ + WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ + iee_set_pgd_pre_init(pgdp, pgd); \ +}) +#endif + #ifndef __HAVE_ARCH_DO_SWAP_PAGE static inline void arch_do_swap_page_nr(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/include/linux/sched.h b/include/linux/sched.h index a694cc11dea5..5b89c9c5485d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -773,6 +773,24 @@ struct task_struct_resvd { struct task_struct *task; }; +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +struct task_token { +#ifdef CONFIG_IEE + pgd_t *pgd; /* Logical VA */ + void *iee_stack; /* VA */ + void *tmp_page; + bool valid; + void *kernel_stack; /* VA */ +#endif +#ifdef CONFIG_KOI + void *koi_kernel_stack; /* VA */ + void *koi_stack; /* VA */ + void *koi_stack_base; /* VA */ + unsigned long current_ttbr1; +#endif +}; +#endif + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -795,6 +813,7 @@ struct task_struct { randomized_struct_fields_start void *stack; + refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; diff --git a/include/linux/sfi_bpf.h b/include/linux/sfi_bpf.h new file mode 100644 index 000000000000..23c5d9409c6e --- /dev/null +++ b/include/linux/sfi_bpf.h @@ -0,0 +1,86 @@ +#ifndef LINUX_SFI_BPF_H +#define LINUX_SFI_BPF_H + +#include +#include + +/* fbpf log */ +#define FBPF_LOG_ENABLE +#define fbpf_log(fmt, ...) do_fbpf_log(__FUNCTION__, fmt, ##__VA_ARGS__) +void do_fbpf_log(const char *func_name, const char *fmt, ...); + +/* fbpf used for output */ +extern const char *bpf_map_type_strings[]; + +/* fbpf map vaddr to struct page* */ +struct page *kv_virt_to_page(const void *vaddr); + +/* fbpf aggregate */ +int bpf_sfi_aggregate_memory(struct bpf_verifier_env *env); + +/* fbpf record percpu map used pages */ +struct bpf_used_page { + struct page *physic_page; + struct list_head list_head; + u64 shadow_page; +}; + +/* fbpf: relevant api for bpf_used_pages */ +int bpf_record_used_pages(u64 start_addr, u64 end_addr, + struct mutex *write_mutex, struct list_head *head); + +/* fbpf: htab's value addr is built-in element, use api to substitue */ +void substitute_hash_value_ptr(struct bpf_map *map); + +inline void *alloc_aligned_memory(u64 size); + +inline int map_physic_pages(struct page **pages, void *virt_addr, int page_cnt); + +struct bpf_sfi_check_unit *bpf_sfi_get_check_list( + struct bpf_verifier_env *env, struct bpf_insn *insn, + struct bpf_reg_state *regs); + +/* fbpf: check type for a reg in insn */ +enum bpf_sfi_check_type { + BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR, + BPF_SFI_CHECK_LDST_AS_SAFE_PTR, + BPF_SFI_CHECK_LDST_AS_OFFSET, + BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR, + BPF_SFI_CHECK_HELPER_AS_SAFE_PTR, + BPF_SFI_CHECK_HELPER_AS_OFFSET, +}; + +/* fbpf: check state for a reg */ +enum bpf_sfi_check_state { + BPF_SFI_UNSAFE_PTR, + BPF_SFI_SAFE_PTR, + BPF_SFI_OFFSET, +}; + +// TODO: allocate and free this link list for each insn_aux_data +/* fbpf: check unit (link list) for a insn */ +struct bpf_sfi_check_unit { + struct list_head list_head; + int reg_no; + enum bpf_sfi_check_type check_type; + bool reg_as_offset; + enum bpf_sfi_check_state proposed_state; +}; + +/* fbpf dump insns of a BPF Program */ +void dump_insns(struct bpf_prog *prog, int start, int len, const char *prompt); + +int bpf_sfi_tmp_check(struct bpf_verifier_env *env); + +/* fbpf patch to check ld/st insns */ +int bpf_sfi_check_ldst(struct bpf_verifier_env *env); + +/* fbpf patch to check helper's args */ +int bpf_sfi_check_helper_args(struct bpf_verifier_env *env); + +struct bpf_prog *_bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len); + +bool bpf_sfi_can_instrument(struct bpf_verifier_env *env); + +#endif \ No newline at end of file diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1597a5f9b5b8..828660c74337 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1065,6 +1065,9 @@ struct sk_buff { sk_buff_data_t end; unsigned char *head, *data; +#ifdef CONFIG_HIVE + unsigned char *sfi_data, *sfi_data_meta, *sfi_data_end; +#endif unsigned int truesize; refcount_t users; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 482647774bf5..4eb077e21915 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -63,6 +63,7 @@ enum { BPF_REG_8, BPF_REG_9, BPF_REG_10, + BPF_REG_11, __MAX_BPF_REG, }; diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h index ee97668bdadb..104caf07a85f 100644 --- a/include/uapi/linux/bpf_common.h +++ b/include/uapi/linux/bpf_common.h @@ -26,6 +26,7 @@ #define BPF_MEM 0x60 #define BPF_LEN 0x80 #define BPF_MSH 0xa0 +#define BPF_REG 0xe0 /* alu/jmp fields */ #define BPF_OP(code) ((code) & 0xf0) diff --git a/init/main.c b/init/main.c index 8fdfa69dba0f..feca51bc2ef9 100644 --- a/init/main.c +++ b/init/main.c @@ -102,6 +102,16 @@ #include #include +#ifdef CONFIG_IEE +#include +#include +#include +#include +#endif +#ifdef CONFIG_PTP +#include +#endif + #include #include #include @@ -933,6 +943,12 @@ void start_kernel(void) sort_main_extable(); trap_init(); mm_core_init(); + #ifdef CONFIG_IEE + iee_stack_init(); + #endif + #ifdef CONFIG_PTP + iee_pgtable_init(); + #endif poking_init(); ftrace_init(); @@ -1073,6 +1089,20 @@ void start_kernel(void) arch_post_acpi_subsys_init(); kcsan_init(); + // Later IEE settings. + #ifdef CONFIG_IEE + iee_rest_init(); + set_iee_stack_page((unsigned long)__va(__pa_symbol(init_iee_stack_begin)), 2); + for (int i = 0; i < 4; i++) { + iee_set_logical_mem_ro((unsigned long)init_iee_stack_begin + PAGE_SIZE * i); + } + set_iee_page((unsigned long)init_token_page_vaddr, 0); + // Set the logical va of existing pgtable readonly. + #ifdef CONFIG_PTP + iee_mark_all_lm_pgtable_ro(); + #endif + #endif + /* Do the rest non-__init'ed, we're now alive */ arch_call_rest_init(); @@ -1460,7 +1490,7 @@ static int __ref kernel_init(void *unused) * to finalize PTI. */ pti_finalize(); - + system_state = SYSTEM_RUNNING; numa_default_policy(); diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index 19f6ab882ab1..8d0dea7f0f51 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -113,4 +113,15 @@ config BPF_SCHED If you are unsure how to answer this question, answer N. +config HIVE + bool "Reserved for eBPF security enhancement" + default n + depends on BPF_SYSCALL + depends on DEBUG_INFO_DWARF4 + depends on DEBUG_INFO_BTF + select NET_ACT_BPF + select BPFILTER + select NET_CLS_BPF + select NET_SCH_INGRESS + endmenu # "BPF subsystem" diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index f526b7573e97..316c693b8d35 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -46,3 +46,5 @@ obj-$(CONFIG_BPF_PRELOAD) += preload/ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o $(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE $(call if_changed_rule,cc_o_c) + +obj-$(CONFIG_HIVE) += sfi_bpf.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1811efcfbd6e..2a59acc27e6e 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -12,6 +12,9 @@ #include #include #include +#ifdef CONFIG_HIVE +#include +#endif #include "map_in_map.h" @@ -21,12 +24,38 @@ static void bpf_array_free_percpu(struct bpf_array *array) { +#ifdef CONFIG_HIVE + if (unlikely(!array->map.is_aggregated)) { + int i; + for (i = 0; i < array->map.max_entries; i++) { + free_percpu(array->pptrs[i]); + cond_resched(); + } + } else { + struct bpf_used_page *used_page_entry, *used_page_next; + struct list_head *used_pages_list_head = &array->map.used_pages->list_head; + + free_percpu(array->pptrs[0]); + cond_resched(); + + if (used_pages_list_head && !list_empty(used_pages_list_head)) { + list_for_each_entry_safe(used_page_entry, used_page_next, + used_pages_list_head, list_head){ + list_del(&used_page_entry->list_head); + vfree(used_page_entry); + } + } + + vfree(array->map.used_pages); + } +#else int i; for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); cond_resched(); } +#endif } static int bpf_array_alloc_percpu(struct bpf_array *array) @@ -34,6 +63,55 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) void __percpu *ptr; int i; +#ifdef CONFIG_HIVE + int page_cnt; + struct bpf_used_page *used_pages; + u64 start_address, end_address; + + if (unlikely(!array->map.is_aggregated)) { + for (i = 0; i < array->map.max_entries; i++) { + ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, + GFP_USER | __GFP_NOWARN); + if (!ptr) { + bpf_array_free_percpu(array); + return -ENOMEM; + } + array->pptrs[i] = ptr; + cond_resched(); + } + } else { + /* init used_pages of map */ + used_pages = vmalloc(sizeof(*used_pages)); + if (!used_pages) + return -ENOMEM; + INIT_LIST_HEAD(&used_pages->list_head); + mutex_init(&array->map.used_page_mutex); + array->map.used_pages = used_pages; + /* WARNING, align to PAGE_SIZE tends to alloc failed */ + ptr = __alloc_percpu_gfp(array->elem_size * array->map.max_entries, + PAGE_SIZE, GFP_USER | __GFP_NOWARN); + if (!ptr) { + bpf_array_free_percpu(array); + pr_err("alloc percpu failed"); + return -ENOMEM; + } + for (i = 0; i < array->map.max_entries; i++) { + array->pptrs[i] = (void *)((u64) ptr + i * array->elem_size); + cond_resched(); + } + + /* record physic pages */ + start_address = round_down((u64)ptr, PAGE_SIZE); + end_address = round_up((u64)ptr + array->map.max_entries * + array->elem_size, PAGE_SIZE); + page_cnt = bpf_record_used_pages(start_address, end_address, + &array->map.used_page_mutex, &array->map.used_pages->list_head); + if (page_cnt >= 0) + array->map.used_page_cnt = page_cnt; + else + return page_cnt; + } +#else for (i = 0; i < array->map.max_entries; i++) { ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, GFP_USER | __GFP_NOWARN); @@ -44,6 +122,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) array->pptrs[i] = ptr; cond_resched(); } +#endif return 0; } @@ -89,6 +168,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) u64 array_size, mask64; struct bpf_array *array; +#ifdef CONFIG_HIVE + bool is_aggregated; + + /* bpf_sfi_on && map_need_aggregate => is_aggregated */ + if (attr->map_type == BPF_MAP_TYPE_ARRAY || + attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + is_aggregated = true; + else + is_aggregated = false; +#endif + elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; @@ -114,12 +204,21 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size = sizeof(*array); if (percpu) { + #ifdef CONFIG_HIVE + array_size += PAGE_ALIGN((u64) max_entries * sizeof(void *)); + #else array_size += (u64) max_entries * sizeof(void *); + #endif } else { /* rely on vmalloc() to return page-aligned memory and * ensure array->value is exactly page-aligned */ - if (attr->map_flags & BPF_F_MMAPABLE) { + #ifdef CONFIG_HIVE + if ((attr->map_flags & BPF_F_MMAPABLE) || is_aggregated) + #else + if (attr->map_flags & BPF_F_MMAPABLE) + #endif + { array_size = PAGE_ALIGN(array_size); array_size += PAGE_ALIGN((u64) max_entries * elem_size); } else { @@ -128,7 +227,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) } /* allocate all map elements and zero-initialize them */ - if (attr->map_flags & BPF_F_MMAPABLE) { +#ifdef CONFIG_HIVE + if ((attr->map_flags & BPF_F_MMAPABLE) || + (is_aggregated && !percpu)) +#else + if (attr->map_flags & BPF_F_MMAPABLE) +#endif + { void *data; /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ @@ -145,6 +250,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->index_mask = index_mask; array->map.bypass_spec_v1 = bypass_spec_v1; +#ifdef CONFIG_HIVE + array->map.is_aggregated = is_aggregated; + array->map.value = array->value; + + /* record map_page_cnt for aggregation */ + if (!percpu && is_aggregated) + array->map.used_page_cnt = PAGE_ALIGN((u64) max_entries * elem_size) + >> PAGE_SHIFT; +#endif + /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); array->elem_size = elem_size; @@ -171,7 +286,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; +#ifdef CONFIG_HIVE + /* here array->shadow_data equals map->shadow_data */ + if (map->is_aggregated && map->shadow_data) + return map->shadow_data + array->elem_size * + (index & array->index_mask); + else { + return array->value + array->elem_size * (index & array->index_mask); + } +#else return array->value + (u64)array->elem_size * (index & array->index_mask); +#endif } static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, @@ -184,7 +309,15 @@ static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, if (off >= map->value_size) return -EINVAL; + +#ifdef CONFIG_HIVE + if (map->is_aggregated && map->shadow_data) + *imm = (unsigned long)map->shadow_data; + else + *imm = (unsigned long)array->value; +#else *imm = (unsigned long)array->value; +#endif return 0; } @@ -217,7 +350,21 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) if (map->map_flags & BPF_F_INNER_MAP) return -EOPNOTSUPP; +#ifdef CONFIG_HIVE + fbpf_log("shadow_data = %016llx\n", map->shadow_data); + if (map->is_aggregated && map->shadow_data) { + /* substitute array->value with map->shadow_data */ + /* assume map is the first attribute of struct array, + so that array == array->map + array=ffff8000844efe80, &array->map=ffff8000844efe80 */ + *insn++ = BPF_LDX_MEM(BPF_DW, map_ptr, map_ptr, offsetof(struct bpf_map, shadow_data)); + } + else + *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, + offsetof(struct bpf_array, value)); +#else *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); +#endif *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); @@ -237,6 +384,35 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) return insn - insn_buf; } +#ifdef CONFIG_HIVE +extern pte_t *bpf_sfi_get_ptep(u64 addr); + +static void *__percpu_array_map_lookup_elem(struct bpf_array *array, u64 value_ptr) +{ + // struct page *phy_page; + // pteval_t phy_pte, vir_pte; + struct list_head *used_pages_head; + struct bpf_used_page *entry; + struct bpf_used_page *next; + u64 in_page_offset; + + in_page_offset = value_ptr - round_down(value_ptr, PAGE_SIZE); + // phy_page = kv_virt_to_page((void *)value_ptr); + // phy_pte = pte_val(*bpf_sfi_get_ptep(value_ptr)); + + used_pages_head = &array->map.used_pages->list_head; + list_for_each_entry_safe(entry, next, used_pages_head, list_head) { + if ((u64)entry->physic_page == round_down(value_ptr, PAGE_SIZE)) { + return (void *)entry->shadow_page + in_page_offset; + } + } + + pr_err("fail to find shadow_data of percpu array %016llx\n", + (u64)(&array->map)); + return NULL; +} +#endif + /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -246,7 +422,17 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; +#ifdef CONFIG_HIVE + if (!map->is_aggregated) + return this_cpu_ptr(array->pptrs[index & array->index_mask]); + else { + u64 value_ptr = (u64)this_cpu_ptr( + array->pptrs[index & array->index_mask]); + return __percpu_array_map_lookup_elem(array, value_ptr); + } +#else return this_cpu_ptr(array->pptrs[index & array->index_mask]); +#endif } static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) @@ -438,7 +624,11 @@ static void array_map_free(struct bpf_map *map) if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); +#ifdef CONFIG_HIVE + if ((array->map.map_flags & BPF_F_MMAPABLE) || array->map.is_aggregated) +#else if (array->map.map_flags & BPF_F_MMAPABLE) +#endif bpf_map_area_free(array_map_vmalloc_addr(array)); else bpf_map_area_free(array); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 7c64ad4f3732..f65f1104557d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -15,6 +15,9 @@ #include "bpf_lru_list.h" #include "map_in_map.h" #include +#ifdef CONFIG_HIVE +#include +#endif #define HTAB_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ @@ -205,6 +208,19 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size return *(void __percpu **)(l->key + key_size); } +#ifdef CONFIG_HIVE +static inline void htab_normal_elem_set_ptr(struct htab_elem *l, u32 key_size, + void *pptr) +{ + *(void **)(l->key + key_size) = pptr; +} + +static inline void *htab_normal_elem_get_ptr(struct htab_elem *l, u32 key_size) +{ + return *(void **)(l->key + key_size); +} +#endif + static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) { return *(void **)(l->key + roundup(map->key_size, 8)); @@ -275,6 +291,35 @@ static void htab_free_elems(struct bpf_htab *htab) if (!htab_is_percpu(htab)) goto free_elems; +#ifdef CONFIG_HIVE + if (unlikely(!htab->map.is_aggregated)) { + for (i = 0; i < htab->map.max_entries; i++) { + void __percpu *pptr; + pptr = htab_elem_get_ptr(get_htab_elem(htab, i), + htab->map.key_size); + free_percpu(pptr); + cond_resched(); + } + } else { + void __percpu *pptr; + struct bpf_used_page *used_page_entry, *used_page_next; + struct list_head *used_pages_list_head = &htab->map.used_pages->list_head; + + pptr = htab_elem_get_ptr(get_htab_elem(htab, 0), htab->map.key_size); + free_percpu(pptr); + cond_resched(); + + /* free used_pages list */ + if (used_pages_list_head && !list_empty(used_pages_list_head)) { + list_for_each_entry_safe(used_page_entry, used_page_next, + used_pages_list_head, list_head) { + list_del(&used_page_entry->list_head); + vfree(used_page_entry); + } + } + vfree(htab->map.used_pages); + } +#else for (i = 0; i < htab->map.max_entries; i++) { void __percpu *pptr; @@ -283,8 +328,13 @@ static void htab_free_elems(struct bpf_htab *htab) free_percpu(pptr); cond_resched(); } +#endif free_elems: bpf_map_area_free(htab->elems); +#ifdef CONFIG_HIVE + if (!htab_is_percpu(htab)) + vfree(htab->map.value); +#endif } /* The LRU list has a lock (lru_lock). Each htab bucket has a lock @@ -318,18 +368,82 @@ static int prealloc_init(struct bpf_htab *htab) { u32 num_entries = htab->map.max_entries; int err = -ENOMEM, i; +#ifdef CONFIG_HIVE + void *data; + u64 value_size, alloc_size; +#endif if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, htab->map.numa_node); - if (!htab->elems) + if (!htab->elems) { + pr_err("htab->elems = %llx\n", (u64)htab->elems); return -ENOMEM; + } if (!htab_is_percpu(htab)) goto skip_percpu_elems; +#ifdef CONFIG_HIVE +realloc_elems: + if (!htab->map.is_aggregated) { + for (i = 0; i < num_entries; i++) { + u32 size = round_up(htab->map.value_size, 8); + void __percpu *pptr; + + pptr = bpf_map_alloc_percpu(&htab->map, size, 8, + GFP_USER | __GFP_NOWARN); + if (IS_ERR(pptr)) { + err = PTR_ERR(pptr); + goto free_elems; + } + htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, + pptr); + cond_resched(); + } + } else { + u32 size = round_up(htab->map.value_size, 8); + void __percpu *pptr; + struct bpf_used_page *used_pages; + u64 start_address, end_address; + int page_cnt; + pptr = __alloc_percpu_gfp(size * num_entries, PAGE_SIZE, + GFP_USER | __GFP_NOWARN); + if (!pptr) { + pr_err("pptr = %llx\n", (u64)pptr); + htab->map.is_aggregated = false; + goto realloc_elems; + } + + for (i = 0; i < num_entries; i++) + htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, + pptr + size * i); + + /* fBPF init user_pages */ + used_pages = vmalloc(sizeof(*used_pages)); + if (IS_ERR(used_pages)) { + pr_err("used_pages = %llx\n", (u64)used_pages); + goto free_elems; + } + htab->map.used_pages = used_pages; + INIT_LIST_HEAD(&used_pages->list_head); + mutex_init(&htab->map.used_page_mutex); + + /* fBPF record percpu physic pages */ + start_address = round_down((u64)pptr, PAGE_SIZE); + end_address = round_up((u64)pptr + htab->map.max_entries * size, PAGE_SIZE); + page_cnt = bpf_record_used_pages(start_address, end_address, + &htab->map.used_page_mutex, &htab->map.used_pages->list_head); + if (page_cnt < 0) { + err = page_cnt; + pr_err("page_cnt = %d\n", page_cnt); + goto free_elems; + } + htab->map.used_page_cnt = page_cnt; + } +#else for (i = 0; i < num_entries; i++) { u32 size = round_up(htab->map.value_size, 8); void __percpu *pptr; @@ -342,8 +456,30 @@ static int prealloc_init(struct bpf_htab *htab) pptr); cond_resched(); } +#endif skip_percpu_elems: +#ifdef CONFIG_HIVE + if (htab_is_percpu(htab) || !htab->map.is_aggregated) + goto skip_elems; + + value_size = round_up(htab->map.value_size, 8); + alloc_size = PAGE_ALIGN(value_size * num_entries); + // data = bpf_map_area_alloc(alloc_size, htab->map.numa_node); + data = vmalloc(alloc_size); + if (!data) { + pr_err("data = %llx\n", (u64)data); + err = -ENOMEM; + goto free_elems; + } + htab->map.value = data; + htab->map.used_page_cnt = alloc_size >> PAGE_SHIFT; + for (i = 0; i < num_entries; i++) + htab_normal_elem_set_ptr(get_htab_elem(htab, i), round_up(htab->map.key_size, 8), + data + value_size * i); + +skip_elems: +#endif if (htab_is_lru(htab)) err = bpf_lru_init(&htab->lru, htab->map.map_flags & BPF_F_NO_COMMON_LRU, @@ -373,6 +509,29 @@ static int prealloc_init(struct bpf_htab *htab) return err; } +#ifdef CONFIG_HIVE +void substitute_hash_value_ptr(struct bpf_map *map) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + int i; + u32 value_size = round_up(map->value_size, 8); + int num_entries = map->max_entries; + + if (!htab_is_percpu(htab) && !htab_is_lru(htab)) + num_entries += num_possible_cpus(); + + for (i = 0; i < num_entries; i++) { + htab_normal_elem_set_ptr(get_htab_elem(htab, i), + round_up(htab->map.key_size, 8), + map->shadow_data + value_size * i); + } + + pr_info("substitute htab %d data from %016llx to %016llx-%016llx\n", + map->id, (u64)htab->map.value, (u64)map->shadow_data, + (u64)map->shadow_data + value_size * num_entries); +} +#endif + static void prealloc_destroy(struct bpf_htab *htab) { htab_free_elems(htab); @@ -489,6 +648,19 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&htab->map, attr); +#ifdef CONFIG_HIVE + if (htab->map.map_type == BPF_MAP_TYPE_HASH || + htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || + htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || + htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + htab->map.is_aggregated = true; + htab->map.used_pages = NULL; + htab->map.map_flags &= ~BPF_F_NO_PREALLOC; + // pr_warn("BPF_F_NO_PREALLOC flag is deprecated\n"); + prealloc = true; + } +#endif + if (percpu_lru) { /* ensure each CPU's lru list has >=1 elements. * since we are at it, make each lru list has the same @@ -512,7 +684,12 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) htab->elem_size = sizeof(struct htab_elem) + round_up(htab->map.key_size, 8); +#ifdef CONFIG_HIVE + /* elem size = htab_elem + key + value */ + if (percpu || htab->map.is_aggregated) +#else if (percpu) +#endif htab->elem_size += sizeof(void *); else htab->elem_size += round_up(htab->map.value_size, 8); @@ -699,8 +876,17 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); +#ifdef CONFIG_HIVE + if (l) { + if (map->is_aggregated) + return htab_normal_elem_get_ptr(l, round_up(map->key_size, 8)); + else + return htab_normal_elem_get_ptr(l, round_up(map->key_size, 8)); + } +#else if (l) return l->key + round_up(map->key_size, 8); +#endif return NULL; } @@ -725,9 +911,21 @@ static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) (void *(*)(struct bpf_map *map, void *key))NULL)); *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); +#ifdef CONFIG_HIVE + // if (map->is_aggregated) + *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); + // else { + // *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + // offsetof(struct htab_elem, key) + + // roundup(map->key_size, 8)); + // } +#else *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + round_up(map->key_size, 8)); +#endif return insn - insn_buf; } @@ -739,7 +937,14 @@ static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, if (l) { if (mark) bpf_lru_node_set_ref(&l->lru_node); + #ifdef CONFIG_HIVE + if (map->is_aggregated) + return htab_normal_elem_get_ptr(l, round_up(map->key_size, 8)); + else + return l->key + round_up(map->key_size, 8); + #else return l->key + round_up(map->key_size, 8); + #endif } return NULL; @@ -774,9 +979,20 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, offsetof(struct htab_elem, lru_node) + offsetof(struct bpf_lru_node, ref), 1); +#ifdef CONFIG_HIVE + if (map->is_aggregated) + *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); + else + *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); +#else *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + round_up(map->key_size, 8)); +#endif return insn - insn_buf; } @@ -1002,7 +1218,11 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, bool percpu, bool onallcpus, struct htab_elem *old_elem) { +#ifdef CONFIG_HIVE + u32 size = round_up(htab->map.value_size, 8); +#else u32 size = htab->map.value_size; +#endif bool prealloc = htab_is_prealloc(htab); struct htab_elem *l_new, **pl_new; void __percpu *pptr; @@ -1064,11 +1284,30 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, htab_elem_set_ptr(l_new, key_size, pptr); } else if (fd_htab_map_needs_adjust(htab)) { size = round_up(size, 8); + #ifdef CONFIG_HIVE + if (htab->map.is_aggregated) { + memcpy(htab_normal_elem_get_ptr(l_new, round_up(key_size, 8)), + value, size); + } else + memcpy(l_new->key + round_up(key_size, 8), value, size); + #else memcpy(l_new->key + round_up(key_size, 8), value, size); + #endif } else { + #ifdef CONFIG_HIVE + if (htab->map.is_aggregated) + copy_map_value(&htab->map, + htab_normal_elem_get_ptr(l_new, round_up(key_size, 8)), + value); + else + copy_map_value(&htab->map, + l_new->key + round_up(key_size, 8), + value); + #else copy_map_value(&htab->map, l_new->key + round_up(key_size, 8), value); + #endif } l_new->hash = hash; @@ -1129,9 +1368,20 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, return ret; if (l_old) { /* grab the element lock and update value in place */ + #ifdef CONFIG_HIVE + if (map->is_aggregated) + copy_map_value_locked(map, + htab_normal_elem_get_ptr(l_old, round_up(key_size, 8)), + value, false); + else + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + #else copy_map_value_locked(map, l_old->key + round_up(key_size, 8), value, false); + #endif return 0; } /* fall through, grab the bucket lock and lookup again. @@ -1157,9 +1407,20 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, * grab the element lock in addition to the bucket lock * and update element in place */ + #ifdef CONFIG_HIVE + if (map->is_aggregated) + copy_map_value_locked(map, + htab_normal_elem_get_ptr(l_old, round_up(key_size, 8)), + value, false); + else + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + #else copy_map_value_locked(map, l_old->key + round_up(key_size, 8), value, false); + #endif ret = 0; goto err; } @@ -1229,8 +1490,17 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value l_new = prealloc_lru_pop(htab, key, hash); if (!l_new) return -ENOMEM; +#ifdef CONFIG_HIVE + if (map->is_aggregated) + copy_map_value(&htab->map, + htab_normal_elem_get_ptr(l_new, round_up(map->key_size, 8)), value); + else + copy_map_value(&htab->map, + l_new->key + round_up(map->key_size, 8), value); +#else copy_map_value(&htab->map, l_new->key + round_up(map->key_size, 8), value); +#endif ret = htab_lock_bucket(htab, b, hash, &flags); if (ret) @@ -2298,13 +2568,52 @@ const struct bpf_map_ops htab_lru_map_ops = { .iter_seq_info = &iter_seq_info, }; +#ifdef CONFIG_HIVE +static void *percpu_htab_aggregated_addr(struct bpf_htab *htab, const void *value_ptr1) +{ + // struct page *phy_ptr; + struct list_head *used_pages_head; + struct bpf_used_page *entry; + struct bpf_used_page *next; + u64 in_page_offset; + + // phy_ptr = kv_virt_to_page(value_ptr1); + used_pages_head = &htab->map.used_pages->list_head; + in_page_offset = (u64)value_ptr1 - round_down((u64)value_ptr1, PAGE_SIZE); + + if (!used_pages_head) { + pr_err("used_pages_head is empty = %llx\n", (u64)used_pages_head); + return ERR_PTR(-ENOMEM); + } + + list_for_each_entry_safe(entry, next, used_pages_head, list_head) { + if ((u64)entry->physic_page == round_down((u64)value_ptr1, PAGE_SIZE)) { + return (void *)entry->shadow_page + in_page_offset; + } + } + + return 0; +} +#endif + /* Called from eBPF program */ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); +#ifdef CONFIG_HIVE + if (l) { + if (map->is_aggregated && map->shadow_data) { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + void *value_ptr = this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); + return percpu_htab_aggregated_addr(htab, value_ptr); + } else + return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); + } +#else if (l) return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); +#endif else return NULL; } @@ -2317,8 +2626,20 @@ static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, return NULL; l = __htab_map_lookup_elem(map, key); +#ifdef CONFIG_HIVE + if (l) { + if (map->is_aggregated) { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + void *value_ptr = this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); + return percpu_htab_aggregated_addr(htab, value_ptr); + } + else + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + } +#else if (l) return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); +#endif else return NULL; } diff --git a/kernel/bpf/sfi_bpf.c b/kernel/bpf/sfi_bpf.c new file mode 100644 index 000000000000..93d8c4977820 --- /dev/null +++ b/kernel/bpf/sfi_bpf.c @@ -0,0 +1,1387 @@ +#include +#include +#include "disasm.h" +#include + +extern pte_t *bpf_sfi_get_ptep(u64 addr); + +const enum bpf_map_type can_lookup_map_types[] = { + BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, + BPF_MAP_TYPE_CPUMAP, + BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, + BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_SOCKHASH, +}; + +void do_fbpf_log(const char *func_name, const char *fmt, ...) +{ + #ifdef FBPF_LOG_ENABLE + va_list args; + char buffer[256]; + + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + + pr_info("%s: %s", func_name, buffer); + #endif +} + +const char *bpf_map_type_strings[] = { + "BPF_MAP_TYPE_UNSPEC", + "BPF_MAP_TYPE_HASH", + "BPF_MAP_TYPE_ARRAY", + "BPF_MAP_TYPE_PROG_ARRAY", + "BPF_MAP_TYPE_PERF_EVENT_ARRAY", + "BPF_MAP_TYPE_PERCPU_HASH", + "BPF_MAP_TYPE_PERCPU_ARRAY", + "BPF_MAP_TYPE_STACK_TRACE", + "BPF_MAP_TYPE_CGROUP_ARRAY", + "BPF_MAP_TYPE_LRU_HASH", + "BPF_MAP_TYPE_LRU_PERCPU_HASH", + "BPF_MAP_TYPE_LPM_TRIE", + "BPF_MAP_TYPE_ARRAY_OF_MAPS", + "BPF_MAP_TYPE_HASH_OF_MAPS", + "BPF_MAP_TYPE_DEVMAP", + "BPF_MAP_TYPE_SOCKMAP", + "BPF_MAP_TYPE_CPUMAP", + "BPF_MAP_TYPE_XSKMAP", + "BPF_MAP_TYPE_SOCKHASH", + "BPF_MAP_TYPE_CGROUP_STORAGE", + "BPF_MAP_TYPE_REUSEPORT_SOCKARRAY", + "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE", + "BPF_MAP_TYPE_QUEUE", + "BPF_MAP_TYPE_STACK", + "BPF_MAP_TYPE_SK_STORAGE", + "BPF_MAP_TYPE_DEVMAP_HASH", + "BPF_MAP_TYPE_STRUCT_OPS", + "BPF_MAP_TYPE_RINGBUF", + "BPF_MAP_TYPE_INODE_STORAGE", +}; + +int bpf_record_used_pages(u64 start_addr, u64 end_addr, + struct mutex *write_mutex, struct list_head *head) +{ + int cpu; + u64 page_addr = start_addr; + int mmapable_page_cnt = 0; + + while (page_addr < end_addr) { + for_each_possible_cpu(cpu) { + // struct page *physic_page; + void *ptr; + struct bpf_used_page* new_page; + + ptr = per_cpu_ptr((void *)page_addr, cpu); + // if (is_vmalloc_addr(ptr)) { + // physic_page = vmalloc_to_page(ptr); + // } else { + // physic_page = virt_to_page(ptr); + // } + // if (!physic_page) { + // pr_err("cannot find physic page for %016llx\n", page_addr); + // return -ENOENT; + // } + new_page = vmalloc(sizeof(*new_page)); + new_page->physic_page = ptr; + + mutex_lock(write_mutex); + // printk("virt addr=%016llx\n", (u64)ptr); + list_add(&new_page->list_head, head); + mmapable_page_cnt++; + mutex_unlock(write_mutex); + } + page_addr += PAGE_SIZE; + } + + return mmapable_page_cnt; +} + +struct page *kv_virt_to_page(const void *vaddr) +{ + if (is_vmalloc_addr(vaddr)) + return vmalloc_to_page(vaddr); + else + return virt_to_page(vaddr); +} + +static bool map_can_lookup(struct bpf_map *map) +{ + int i; + enum bpf_map_type type = map->map_type; + + for (i = 0; i < ARRAY_SIZE(can_lookup_map_types); i++) + if (type == can_lookup_map_types[i]) + return true; + + return false; +} + +static int get_env_map_cnt(struct bpf_verifier_env *env) +{ + int i; + struct bpf_map *map; + int page_cnt = 0; + + // calculate maps page count + for (i = 0; i < env->used_map_cnt; i++) { + map = env->used_maps[i]; + if (map->is_aggregated && map->used_page_cnt) { + page_cnt += map->used_page_cnt; + fbpf_log("map %d %s page_cnt = %d\n", map->id, + bpf_map_type_strings[map->map_type], map->used_page_cnt); + } else if (map_can_lookup(map)) { + /* if do not support this map, just return to normal bpf load */ + pr_err("unsupport map_type %s\n", bpf_map_type_strings[map->map_type]); + return -EPERM; + // continue; + } + } + + return page_cnt; +} + +static inline u64 count_page(struct page **pages, u64 start_addr, int *page_index) +{ + pages[*page_index] = (struct page *)start_addr; + + // pr_info("addr of page %d = %llx\n", *page_index, (u64)start_addr); + + start_addr += PAGE_SIZE; + (*page_index)++; + + return start_addr; +} + +static int count_percpu_pages(struct bpf_map *map, struct page **pages, int *page_index) +{ + struct list_head *used_pages_head = NULL; + struct bpf_used_page *entry; + struct bpf_used_page *next; + int i = 0; + + used_pages_head = &map->used_pages->list_head; + + /* should not list_del and free here, later other progs may reuse this map */ + list_for_each_entry_safe(entry, next, used_pages_head, list_head) { + // printk("count page %016llx\n", entry->physic_page); + // pages[*page_index] = entry->physic_page; + // pages[*page_index] = page_address(entry->physic_page); + // entry->shadow_page = *page_index; + // if (!pages[*page_index]) { + // pr_err("illegal address=%016llx\n", (u64)pages[*page_index]); + // return -EFAULT; + // } + // pr_info("addr of page %d = %llx\n", *page_index, (u64)page_address(entry->physic_page)); + // (*page_index)++; + count_page(pages, (u64)entry->physic_page, page_index); + entry->shadow_page = i; + i++; + } + + return 0; +} + +static int count_all_pages(struct bpf_verifier_env *env, struct page **pages, int *page_index) +{ + struct bpf_map *map; + int i, j; + u64 start_addr; + int err; + int page_index_before; + + /* 1. coutn pages in low guard region */ + // start_addr = PAGE_ALIGN((u64)env->prog->low_guard_region_addr); + // for (i = 0; i < env->prog->guard_region_page_cnt / 2; i++) { + // // pages[*page_index] = kv_virt_to_page((void *)start_addr); + // start_addr = count_page(pages, start_addr, page_index); + + // /* DEBUG check addr */ + // // pr_info("check page %d addr 0x%llx\n", i, start_addr); + // // *(u64 *)start_addr = 0; + // // pr_info("write into 0x%llx page pass\n", start_addr); + + // /* DEBUG check get_ptep */ + // // pte_t *ptep = bpf_sfi_get_ptep(start_addr); + // // if (IS_ERR(ptep)) + // // pr_err("get ptep of 0x%llx failed\n", start_addr); + // // else + // // pr_info("get ptep of 0x%llx pass\n", start_addr); + // } + + /* 2. count pages in map's data area */ + for (i = 0; i < env->used_map_cnt; i++) { + page_index_before = *page_index; + map = env->used_maps[i]; + + if (!map->is_aggregated) + continue; + + if (map->map_type == BPF_MAP_TYPE_ARRAY || + map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { + start_addr = (u64)map->value; + } else if (map->map_type == BPF_MAP_TYPE_HASH || + map->map_type == BPF_MAP_TYPE_LRU_HASH) { + start_addr = (u64)map->value; + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || + map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + err = count_percpu_pages(map, pages, page_index); + if (err) + return err; + goto hook_end; + } else if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + pr_err("WARNING: ignore map %s\n", bpf_map_type_strings[map->map_type]); + goto hook_end; + } else { + pr_err("ERROR: miss map %s in count_all_pages()\n", + bpf_map_type_strings[map->map_type]); + return -ENOENT; + } + + /* assert start_addr is page_aligned */ + if (unlikely(!PAGE_ALIGNED(start_addr))) { + pr_err("ERROR: fbpf self, map_data should be PAGE_ALIGNED\n"); + pr_err("map %s, addr=%016llx", + bpf_map_type_strings[map->map_type], (u64)map); + return -EFAULT; + } + + /* for normal map with base addr */ + for (j = 0; j < map->used_page_cnt; j++) { + // pages[*page_index] = kv_virt_to_page((void *)start_addr); + start_addr = count_page(pages, start_addr, page_index); + } + +hook_end: + fbpf_log("hook aggregated area for map %d: %s, %016llx, \ +page_cnt: %d, should be: %d\n", + map->id, bpf_map_type_strings[map->map_type], (u64)map, + *page_index - page_index_before, map->used_page_cnt); + } + + /* 3. count pages in skb area */ + start_addr = PAGE_ALIGN((u64)env->prog->shadow_skb_addr); + for (i = 0; i < env->prog->shadow_skb_page_cnt; i++) { + // pages[*page_index] = kv_virt_to_page((void *)start_addr); + start_addr = count_page(pages, start_addr, page_index); + } + + /* 4. count pages in stack area */ + start_addr = PAGE_ALIGN((u64)env->prog->shadow_stack_addr); + for (i = 0; i < env->prog->shadow_stack_page_cnt; i++) { + // pages[*page_index] = kv_virt_to_page((void *)start_addr); + start_addr = count_page(pages, start_addr, page_index); + } + + /* 5. coutn pages in high guard region */ + // start_addr = PAGE_ALIGN((u64)env->prog->high_guard_region_addr); + // for (i = 0; i < env->prog->guard_region_page_cnt / 2; i++) { + // start_addr = count_page(pages, start_addr, page_index); + // } + + return 0; +} + +static int record_percpu_map_shadow_page(struct bpf_map *map) +{ + struct list_head *used_pages_head = NULL; + struct bpf_used_page *entry; + struct bpf_used_page *next; + void *shadow_base_addr = map->shadow_data; + + used_pages_head = &map->used_pages->list_head; + + /* fbpf should not list_del and free here, + later other progs may reuse this map. + instead, map_free will do this job */ + list_for_each_entry_safe(entry, next, used_pages_head, list_head) { + entry->shadow_page = (u64)shadow_base_addr + PAGE_SIZE * + entry->shadow_page; /* shadow_page is the index recorded in count_all_pages() */ + pteval_t phy_pte, vir_pte; + phy_pte = pte_val(*bpf_sfi_get_ptep((u64)entry->physic_page)); + vir_pte = pte_val(*bpf_sfi_get_ptep((u64)entry->shadow_page)); + if (unlikely(phy_pte != vir_pte)) { + pr_err("ERROR: recorded shadow_page unequal actual shadow_page\n"); + return -EFAULT; + } + } + + return 0; +} + +// static inline int set_as_guard_page(unsigned long addr) +// { +// pte_t pte, *ptep; + +// ptep = bpf_sfi_get_ptep(addr); +// if (IS_ERR(ptep)) +// return PTR_ERR(ptep); + +// pte = __pte(pte_val(*ptep) & ~PTE_VALID); +// set_pte(ptep, __pte((pte_val(pte) | PTE_BPF_SFI_GP))); + +// flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +// isb(); + +// return 0; +// } + +inline void *alloc_aligned_memory(u64 size) +{ + // unsigned long order = get_order(size); + // void *ptr = (void *)__get_free_pages(GFP_KERNEL, order); + int page_cnt = size >> PAGE_SHIFT; + void *base = vmalloc((2 * page_cnt - 1) * PAGE_SIZE); + if (IS_ERR(base)) { + printk(KERN_ERR "Failed to allocate memory\n"); + return ERR_PTR(-ENOMEM); + } + + void *ptr = (void *)(((unsigned long)base + page_cnt * PAGE_SIZE) & ~(size - 1)); + + // check alignment + if ((unsigned long)ptr & (size - 1)) { + // free_pages((unsigned long)ptr, order); + vfree(base); + pr_err("Memory not properly aligned\n"); + return ERR_PTR(-ENOMEM); + } + + return ptr; +} + +inline int map_physic_pages(struct page **pages, void *virt_addr, int page_cnt) +{ + int i; + + for (i = 0; i < page_cnt; i++) { + // u64 origin_addr = page_to_pfn(pages[i]) << PAGE_SHIFT; + // u64 origin_addr = (u64)page_address(pages[i]); + u64 origin_addr = (u64)pages[i]; + // pr_info("get origin ptep of page %d, addr = %llx\n", i, origin_addr); + pte_t *origin_ptep = bpf_sfi_get_ptep(origin_addr); + if (IS_ERR(origin_ptep)) { + // pr_err("get origin ptep of 0x%llx failed\n", origin_addr); + return PTR_ERR(origin_ptep); + } + + // pr_info("get cur ptep of page %d\n", i); + u64 cur_addr = (u64)virt_addr + i * PAGE_SIZE; + pte_t *ptep = bpf_sfi_get_ptep(cur_addr); + if (IS_ERR(ptep)) { + // pr_err("get sfi ptep of 0x%llx failed\n", cur_addr); + return PTR_ERR(ptep); + } + + #ifdef CONFIG_PTP + set_pte(ptep, *origin_ptep); + #else + *ptep = *origin_ptep; + #endif + } + + return 0; +} + +/** + * sfi_bpf space: + * guard page 8 pages + * map's data 0xffff80008457f000 - 0xffff80008457f000 + * skb 0xffff80008457f000 - 0xffff80008457f000 + * stack 0xffff80008457f000 - 0xffff800084580000 + * guard page 8 pages + */ +int bpf_sfi_aggregate_memory(struct bpf_verifier_env *env) +{ + struct page **pages = NULL; + struct bpf_map *map; + u64 start_addr; + void *shadow_base_addr = NULL, *shadow_high_addr = NULL; + int i, page_index = 0; + int total_page_cnt, map_page_cnt, stack_page_cnt, skb_page_cnt/*, guard_page_cnt*/; + int err; + void *shadow_stack_addr, *shadow_skb_addr; + + /* calculate page_cnt */ + env->prog->map_page_cnt = map_page_cnt = get_env_map_cnt(env); + stack_page_cnt = round_up(MAX_BPF_STACK, PAGE_SIZE) >> PAGE_SHIFT; + if (env->prog->type == BPF_PROG_TYPE_SCHED_CLS || + env->prog->type == BPF_PROG_TYPE_SCHED_ACT || + env->prog->type == BPF_PROG_TYPE_XDP || + env->prog->type == BPF_PROG_TYPE_LWT_XMIT || + env->prog->type == BPF_PROG_TYPE_SK_SKB || + env->prog->type == BPF_PROG_TYPE_SK_MSG) + skb_page_cnt = 16; + else + skb_page_cnt = 0; + // guard_page_cnt = 16; + + /* roundup totoal page number to power of two for SFI consideration */ + env->prog->shadow_skb_page_cnt = skb_page_cnt; + env->prog->total_page_cnt = total_page_cnt = roundup_pow_of_two(stack_page_cnt + map_page_cnt + skb_page_cnt); + env->prog->shadow_stack_page_cnt = stack_page_cnt = total_page_cnt - map_page_cnt - skb_page_cnt; + pr_info("page_cnt: map=%d, skb=%d, stack=%d\n", + map_page_cnt, skb_page_cnt, stack_page_cnt); + + /* map map's value area to pages */ + pages = kzalloc(sizeof(struct page *) * (total_page_cnt), GFP_KERNEL | __GFP_NOWARN); + if (IS_ERR(pages)) { + pr_err("ERROR: alloc tmp pages array failed\n"); + err = PTR_ERR(pages); + goto ret; + } + + /* alloc tmp skb page to fill in pages */ + if (skb_page_cnt) { + shadow_skb_addr = vmalloc(skb_page_cnt * PAGE_SIZE); + if (IS_ERR(shadow_skb_addr)) { + pr_err("alloc skb area failed\n"); + err = PTR_ERR(shadow_skb_addr); + goto ret; + } + env->prog->shadow_skb_addr = shadow_skb_addr; + } + + /* alloc shadow stack value area */ + if (stack_page_cnt) { + shadow_stack_addr = vmalloc(stack_page_cnt * PAGE_SIZE); + if (IS_ERR(shadow_stack_addr)) { + pr_err("alloc stack area failed\n"); + err = PTR_ERR(shadow_stack_addr); + goto ret; + } + env->prog->shadow_stack_addr = shadow_stack_addr; + } else { + pr_err("no bpf stack is alloced!\n"); + err = -ENOMEM; + goto ret; + } + + /* alloc guard region */ + // if (guard_page_cnt) { + // void *guard_region_addr = vmalloc(guard_page_cnt * PAGE_SIZE); + // if (IS_ERR(guard_region_addr)) { + // pr_err("alloc guard region failed\n"); + // err = PTR_ERR(guard_region_addr); + // goto ret; + // } + // env->prog->low_guard_region_addr = guard_region_addr; + // env->prog->high_guard_region_addr = guard_region_addr + guard_page_cnt / 2 * PAGE_SIZE; + // } + + /* fill the array pages */ + err = count_all_pages(env, pages, &page_index); + if (err) + goto ret; + + /* check every page is counted */ + fbpf_log("page index=%d, page cnt=%d (should equal)\n", page_index, total_page_cnt); + if(unlikely(page_index != total_page_cnt)) { + pr_err("ERROR: page_index != page_cnt\n"); + err = -EFAULT; + goto ret; + } + + /* remap memory area to virtually contiguous space */ + // shadow_base_addr = vmap(pages, total_page_cnt, VM_MAP, PAGE_KERNEL); + shadow_base_addr = alloc_aligned_memory(total_page_cnt * PAGE_SIZE); + if (IS_ERR(shadow_base_addr)) { + pr_err("unable to vmap map's value area and stack area\n"); + err = PTR_ERR(shadow_base_addr); + goto ret; + } + shadow_high_addr = shadow_base_addr + total_page_cnt * PAGE_SIZE; + env->prog->shadow_region_addr = shadow_base_addr; + env->prog->shadow_top_addr = shadow_high_addr; + + fbpf_log("shadow_base = 0x%llx, shadow_stack = 0x%llx\n", shadow_base_addr, shadow_high_addr); + + err = map_physic_pages(pages, shadow_base_addr, total_page_cnt); + if (err < 0) { + pr_err("remap failed, %d\n", err); + goto ret; + } + + /* substitute with SFI address space addrs */ + start_addr = (u64)shadow_base_addr; + + /* low guard region */ + // env->prog->low_guard_region_addr = (void *)start_addr; + // start_addr += guard_page_cnt / 2 * PAGE_SIZE; + + /* IMPORTANT: update shadow addr for maps */ + env->prog->map_data_addr = (void *)start_addr; + for (i = 0; i < env->used_map_cnt; i++) { + map = env->used_maps[i]; + + if (!map->is_aggregated) + continue; + + /* record addr mapping info */ + map->shadow_data = (void *)start_addr; + + if (map->map_type == BPF_MAP_TYPE_HASH || + map->map_type == BPF_MAP_TYPE_LRU_HASH) { + substitute_hash_value_ptr(map); + } else if (map->map_type == BPF_MAP_TYPE_ARRAY || + map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { + ; // only need to record map->shadow_data + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || + map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + err = record_percpu_map_shadow_page(map); + if (err) + goto ret; + } else { + ; // no need to record or substitute + } + + start_addr += map->used_page_cnt * PAGE_SIZE; + } + + /* the space between map's data and stack is for skb_buff */ + env->prog->shadow_skb_addr = (void *)start_addr; + start_addr += skb_page_cnt * PAGE_SIZE; + + /* stack */ + /* NOTICE: stack's addr is high addr (sp), rather than low addr (fp) */ + start_addr += stack_page_cnt * PAGE_SIZE; + env->prog->shadow_stack_addr = (void *)start_addr; + + /* high guard region */ + // env->prog->high_guard_region_addr = (void *)start_addr; + // start_addr += guard_page_cnt / 2 * PAGE_SIZE; + + BUG_ON(unlikely(start_addr != (u64)(env->prog->shadow_top_addr))); + + // set guard region and skb region as guard page + // start_addr = (u64)env->prog->low_guard_region_addr; + // for (int i = 0; i < guard_page_cnt / 2; i++) { + // err = set_as_guard_page(start_addr); + // if (err < 0) { + // pr_err("set as guard page failed, %d\n", err); + // goto ret; + // } + // start_addr += PAGE_SIZE; + // } + // start_addr = (u64)env->prog->high_guard_region_addr; + // for (int i = 0; i < guard_page_cnt / 2; i++) { + // err = set_as_guard_page(start_addr); + // if (err < 0) { + // pr_err("set as guard page failed, %d\n", err); + // goto ret; + // } + // start_addr += PAGE_SIZE; + // } + // start_addr = (u64)env->prog->shadow_skb_addr; + // for (int i = 0; i < skb_page_cnt; i++) { + // err = set_as_guard_page(start_addr); + // if (err < 0) { + // pr_err("set as guard page failed, %d\n", err); + // goto ret; + // } + // start_addr += PAGE_SIZE; + // } + + // manually trigger page fault now + // pr_err("start probe now\n"); + // *(u64 *)(env->prog->low_guard_region_addr + 16) = 1; + // pr_err("oops, back from page fault!\n"); + + fbpf_log("sfi_bpf space:\n"); + // fbpf_log("low guard region\t0x%016llx - 0x%016llx\n", + // env->prog->low_guard_region_addr, env->prog->low_guard_region_addr + guard_page_cnt / 2 * PAGE_SIZE); + fbpf_log("map's data\t\t0x%016llx - 0x%016llx\n", + env->prog->map_data_addr, env->prog->map_data_addr + map_page_cnt * PAGE_SIZE); + fbpf_log("skb\t\t\t0x%016llx - 0x%016llx\n", + env->prog->shadow_skb_addr, env->prog->shadow_skb_addr + skb_page_cnt * PAGE_SIZE); + fbpf_log("stack\t\t\t0x%016llx - 0x%016llx\n", + env->prog->shadow_stack_addr - stack_page_cnt * PAGE_SIZE, env->prog->shadow_stack_addr); + // fbpf_log("high guard region\t0x%016llx - 0x%016llx\n", + // env->prog->high_guard_region_addr, env->prog->high_guard_region_addr + guard_page_cnt / 2 * PAGE_SIZE); + + fbpf_log("prog [%s] func_id = %d: 0x%016llx - 0x%016llx, size = 0x%llx\n", + env->prog->aux->name, env->prog->aux->func_idx, + (u64)env->prog->shadow_region_addr, + (u64)env->prog->shadow_stack_addr, + (u64)(total_page_cnt * PAGE_SIZE)); + +ret: + if (pages) + kfree(pages); + return err; +} + +static inline bool is_LDST_insn(struct bpf_insn *insn) +{ + u8 class = BPF_CLASS(insn->code); + + if (class == BPF_LD || class == BPF_LDX || class == BPF_ST || class == BPF_STX) + return true; + else + return false; +} + +static inline bool is_HELPER_insn(struct bpf_insn *insn) +{ + u8 class = BPF_CLASS(insn->code); + u8 op = BPF_OP(insn->code); + + if ((class == BPF_JMP || class == BPF_JMP32) && + op == BPF_CALL && + insn->src_reg != BPF_PSEUDO_CALL) + return true; + else + return false; +} + +__attribute__((unused)) static char *bpf_sfi_check_type_strs[] = { + [BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR] = "BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR", + [BPF_SFI_CHECK_HELPER_AS_SAFE_PTR] = "BPF_SFI_CHECK_HELPER_AS_SAFE_PTR", + [BPF_SFI_CHECK_HELPER_AS_OFFSET] = "BPF_SFI_CHECK_HELPER_AS_OFFSET", + [BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR] = "BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR", + [BPF_SFI_CHECK_LDST_AS_SAFE_PTR] = "BPF_SFI_CHECK_LDST_AS_SAFE_PTR", + [BPF_SFI_CHECK_LDST_AS_OFFSET] = "BPF_SFI_CHECK_LDST_AS_OFFSET", +}; + +/** + * return a link list, each element mark a check of a reg need to be done. + * Ex: + * - { reg_0, CHECK_LDST } -> NULL + * - { reg_3, CHEKC_HELPER } -> { reg_2, CHECK_MODIFIED } -> { reg_1, CHECK_NONE } + * + */ +struct bpf_sfi_check_unit *bpf_sfi_get_check_list(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *regs) +{ + if (is_LDST_insn(insn)) { + enum bpf_reg_type ptr_type; + int reg_no; + struct bpf_sfi_check_unit *check_list, *new_unit; + bool reg_as_offset; + u8 mode; + + if (BPF_CLASS(insn->code) == BPF_LD || + BPF_CLASS(insn->code) == BPF_LDX) + reg_no = insn->src_reg; + else + reg_no = insn->dst_reg; + + if (reg_no == BPF_REG_FP) + return NULL; + + ptr_type = base_type(regs[reg_no].type); + if (ptr_type != PTR_TO_STACK && + ptr_type != PTR_TO_MAP_VALUE && + ptr_type != PTR_TO_PACKET) { + // fbpf_log("%d: ignore ptr=%d, r%d", i, ptr_type, reg); + return NULL; + } + + mode = BPF_MODE(insn->code); + switch (mode) { + case BPF_MEM: + reg_as_offset = true; + break; + case BPF_ATOMIC: + case BPF_IMM: + reg_as_offset = false; + break; + default: + pr_err("unprocessed mode %d\n", mode); + return ERR_PTR(ENOTSUPP); + } + + reg_as_offset = false; + + check_list = (struct bpf_sfi_check_unit *)kzalloc(sizeof(struct bpf_sfi_check_unit), GFP_KERNEL); + INIT_LIST_HEAD(&check_list->list_head); + + new_unit = (struct bpf_sfi_check_unit *)kzalloc(sizeof(struct bpf_sfi_check_unit), GFP_KERNEL); + INIT_LIST_HEAD(&new_unit->list_head); + new_unit->reg_no = reg_no; + new_unit->reg_as_offset = reg_as_offset; + + if (regs[reg_no].sfi_check_state == BPF_SFI_UNSAFE_PTR) { + new_unit->check_type = BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR; + } + else if (regs[reg_no].sfi_check_state == BPF_SFI_SAFE_PTR) { + new_unit->check_type = BPF_SFI_CHECK_LDST_AS_SAFE_PTR; + } + else { + new_unit->check_type = BPF_SFI_CHECK_LDST_AS_OFFSET; + } + + if (insn->off == 0) { + if (new_unit->check_type == BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR || + new_unit->check_type == BPF_SFI_CHECK_LDST_AS_OFFSET) { + if (reg_as_offset) + new_unit->proposed_state = BPF_SFI_OFFSET; + else + new_unit->proposed_state = BPF_SFI_SAFE_PTR; + } + else + new_unit->proposed_state = BPF_SFI_SAFE_PTR; + } else { + new_unit->proposed_state = BPF_SFI_SAFE_PTR; + } + + list_add(&new_unit->list_head, &check_list->list_head); + // fbpf_log("%d: add %s check point\n", insn - env->prog->insnsi, + // bpf_sfi_check_type_strs[new_unit->check_type]); + + return check_list; + } + else if (is_HELPER_insn(insn)) { + const struct bpf_func_proto *fn; + int arg, reg_no; + enum bpf_reg_type arg_reg_type; + struct bpf_sfi_check_unit *bpf_sfi_check_list; + + if (unlikely(!env->ops->get_func_proto)) { + pr_err("ERROR: unsupport helper\n"); + return ERR_PTR(ENOENT); + } + fn = env->ops->get_func_proto(insn->imm, env->prog); + if (!fn) { + pr_err("ERROR: unknown helper type\n"); + return ERR_PTR(ENOENT); + } + + bpf_sfi_check_list = (struct bpf_sfi_check_unit *)kzalloc(sizeof(struct bpf_sfi_check_unit), GFP_KERNEL); + INIT_LIST_HEAD(&bpf_sfi_check_list->list_head); + + for (arg = 0; arg < 5; arg++) { + reg_no = arg + 1; + /* here use mapped actual reg type, not formal arg type */ + arg_reg_type = base_type(regs[reg_no].type); + + // fbpf_log("%d: arg%d_type = %d\n", insn - env->prog->insnsi, reg_no, arg_reg_type); + + if (arg_reg_type == PTR_TO_STACK || + arg_reg_type == PTR_TO_MAP_VALUE || + arg_reg_type == PTR_TO_PACKET) { + struct bpf_sfi_check_unit *check_unit = (struct bpf_sfi_check_unit *)kzalloc(sizeof(struct bpf_sfi_check_unit), GFP_KERNEL); + INIT_LIST_HEAD(&check_unit->list_head); + check_unit->reg_no = reg_no; + + if (regs[reg_no].sfi_check_state == BPF_SFI_UNSAFE_PTR) { + check_unit->check_type = BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR; + } + else if (regs[reg_no].sfi_check_state == BPF_SFI_SAFE_PTR) { + check_unit->check_type = BPF_SFI_CHECK_HELPER_AS_SAFE_PTR; + } + else { + check_unit->check_type = BPF_SFI_CHECK_HELPER_AS_OFFSET; + } + + check_unit->proposed_state = BPF_SFI_SAFE_PTR; + + list_add(&check_unit->list_head, &bpf_sfi_check_list->list_head); + // fbpf_log("%d: add %s check point\n", insn - env->prog->insnsi, + // bpf_sfi_check_type_strs[check_unit->check_type]); + } + } + + return bpf_sfi_check_list; + } + else { + return NULL; + } +} + +__printf(2, 3) static void fake_verbose(void *private_data, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); +} + +void dump_insns(struct bpf_prog *prog, int start, int len, const char *prompt) +{ + #ifdef FBPF_LOG_ENABLE + int insn_cnt; + struct bpf_insn *insn; + int i; + struct bpf_insn_cbs cbs = { + .cb_print = fake_verbose, + .private_data = NULL, + }; + + if (unlikely(!prog)) { + fbpf_log("ERROR: prog is NULL\n"); + return; + } + + if (start + len > prog->len) { + fbpf_log("ERROR: print insn from %d to %d is out of bound\n", start, start + len - 1); + return; + } + insn_cnt = len; + insn = prog->insnsi + start; + + if (prompt) + printk("[dump insns] --- %s\n", prompt); + for (i = 0; i < insn_cnt; i++) { + print_bpf_insn(&cbs, insn, false); + insn++; + } + #endif +} + +int bpf_sfi_tmp_check(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog; + struct bpf_insn *insn; + int insn_cnt; + struct bpf_insn_aux_data *aux; + int i; + struct bpf_sfi_check_unit *check_list; + + if (!env) { + fbpf_log("env is NULL\n"); + return -EFAULT; + } + prog = env->prog; + aux = env->insn_aux_data; + + if (!prog) { + fbpf_log("prog is NULL\n"); + return -EFAULT; + } + insn_cnt = prog->len; + + insn = prog->insnsi; + if (!insn) + return 0; + + for (i = 0; i < insn_cnt; i++) { + insn = &prog->insnsi[i]; + aux = &env->insn_aux_data[i]; + check_list = env->insn_check_lists[i]; + + if (!aux) { + dump_insns(prog, i, 1, "empty aux"); + fbpf_log("aux is empty for insn %d", i); + return -EFAULT; + } + + /* check optimization */ + if (check_list) { + struct bpf_sfi_check_unit *p; + char check_type[128]; + + mutex_lock(&env->insn_check_list_mutex); + list_for_each_entry(p, &check_list->list_head, list_head) { + switch (p->check_type) { + case BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR: + if (insn->off == 0) + strcpy(check_type, "BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR"); + else + strcpy(check_type, "BPF_SFI_CHECK_LDST_OFFSET_AS_UNSAFE_PTR");; + break; + case BPF_SFI_CHECK_LDST_AS_SAFE_PTR: + if (insn->off == 0) + strcpy(check_type, "BPF_SFI_CHECK_LDST_AS_SAFE_PTR"); + else + strcpy(check_type, "BPF_SFI_CHECK_LDST_OFFSET_AS_SAFE_PTR");; + break; + case BPF_SFI_CHECK_LDST_AS_OFFSET: + if (insn->off == 0) + strcpy(check_type, "BPF_SFI_CHECK_LDST_AS_OFFSET"); + else + strcpy(check_type, "BPF_SFI_CHECK_LDST_OFFSET_AS_OFFSET");; + break; + case BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR: + strcpy(check_type, "BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR"); + break; + case BPF_SFI_CHECK_HELPER_AS_SAFE_PTR: + strcpy(check_type, "BPF_SFI_CHECK_HELPER_AS_SAFE_PTR"); + break; + case BPF_SFI_CHECK_HELPER_AS_OFFSET: + strcpy(check_type, "BPF_SFI_CHECK_HELPER_AS_OFFSET"); + break; + } + fbpf_log("%d: reg %d, check type %s\n", i, p->reg_no, check_type); + } + mutex_unlock(&env->insn_check_list_mutex); + } + } + + return 0; +} + +/** + * return: + * > 0: patch cnt + * = 0: no need to patch + * < 0: errno + */ +static inline int form_check_ldst_as_unsafe_ptr(struct bpf_insn *origin_insn, struct bpf_insn *patch, bool reg_as_offset, + u64 sfi_region_size, int reg, int *origin_insn_no) +{ + int patch_len; + struct bpf_insn modified_origin_insn = *origin_insn; + u8 size = BPF_SIZE(origin_insn->code); + u8 class = BPF_CLASS(origin_insn->code); + + pr_info("reg_as_offset %d\n", reg_as_offset); + + /* lock *(u64 *)(r0 + 0) += 1 */ + /* lock *(u64 *)(r0 + 8) += 1 */ + if (!reg_as_offset) { + patch[0] = BPF_ALU64_IMM(BPF_AND, reg, sfi_region_size - 1); + patch[1] = BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE); + patch[2] = *origin_insn; + + *origin_insn_no = 2; + patch_len = 3; + } else { + /* *(u64 *)(r0 + 0) = 0 */ + if (origin_insn->off == 0) { + patch[0] = BPF_ALU64_IMM(BPF_AND, reg, sfi_region_size - 1); + modified_origin_insn.code = class | BPF_REG | size; + if (class == BPF_LD || class == BPF_LDX) + modified_origin_insn.src_reg = BPF_REG_BASE; + if (class == BPF_ST || class == BPF_STX) + modified_origin_insn.dst_reg = BPF_REG_BASE; + modified_origin_insn.off = reg; + patch[1] = modified_origin_insn; + + *origin_insn_no = 1; + patch_len = 2; + } + /* *(u64 *)(r0 + 8) = 0 */ + else { + patch[0] = BPF_ALU64_IMM(BPF_AND, reg, sfi_region_size - 1); + patch[1] = BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE); + patch[2] = *origin_insn; + + *origin_insn_no = 2; + patch_len = 3; + } + } + + return patch_len; +} + +/** + * return: + * > 0: patch cnt + * = 0: no need to patch + * < 0: errno + */ +static inline int form_check_ldst_as_offset(struct bpf_insn *origin_insn, struct bpf_insn *patch, bool reg_as_offset, + u64 sfi_region_size, int reg, int *origin_insn_no) +{ + int patch_len; + struct bpf_insn modified_origin_insn = *origin_insn; + u8 size = BPF_SIZE(origin_insn->code); + u8 class = BPF_CLASS(origin_insn->code); + + if (!reg_as_offset) { + /* lock *(u64 *)(r0 + 0) += 1 */ + /* lock *(u64 *)(r0 + 8) += 1 */ + patch[0] = BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE); + patch[1] = *origin_insn; + + *origin_insn_no = 1; + patch_len = 2; + + } else { + /* *(u64 *)(r0 + 0) = 0 */ + if (origin_insn->off == 0) { + modified_origin_insn.code = class | BPF_REG | size; + if (class == BPF_LD || class == BPF_LDX) + modified_origin_insn.src_reg = BPF_REG_BASE; + if (class == BPF_ST || class == BPF_STX) + modified_origin_insn.dst_reg = BPF_REG_BASE; + modified_origin_insn.off = reg; + patch[0] = modified_origin_insn; + + *origin_insn_no = 0; + patch_len = 1; + } + /* *(u64 *)(r0 + 8) = 0 */ + else { + patch[0] = BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE); + patch[1] = *origin_insn; + + *origin_insn_no = 1; + patch_len = 2; + } + } + + return patch_len; +} + +/** + * form a patch according to the insn + * v3 means this is optimized twice + * @origin_insn: where the instrument point locates at + * @check_unit: the sfi check unit + * @patch_array: patch will be stored in this array if return code is 0 + * @origin_insn_no: the index of origin insn + * @sfi_region_size: the size of sfi region + * return: + * < 0: errno + * >= 0: patch_cnt + */ +int form_sfi_check_patch_v3(struct bpf_insn *origin_insn, struct bpf_sfi_check_unit *check_unit, + struct bpf_insn *patch_array, int* origin_insn_no, u64 sfi_region_size) +{ + struct bpf_insn patch[16]; + int patch_len = 0; + enum bpf_sfi_check_type type = check_unit->check_type; + int reg = check_unit->reg_no; + + /** normal case, + * 1. and with SFI_REGION_SIZE + * 2. if not reg_as_offset + * 2.1 or with SFI_REGION_BASE + * 2.2 if has off, add off + * 2.3 origin insn + * 2.4 if has off, sub off + * 3. if reg_as_offset + * 3.1 if has off, add off + * 3.2 replace BPF_LD/BPF_ST to BPF_LDR/BPF_STR + * 3.3 if has off, sub off + */ + if (type == BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR) { + bool reg_as_offset = check_unit->reg_as_offset; + patch_len = form_check_ldst_as_unsafe_ptr(origin_insn, patch, + reg_as_offset, sfi_region_size, reg, origin_insn_no); + } + /** second case + * 1. no need to and again + * 2. but need to process off + * 3. and replace insn to BPF_LDR/BPF_STR + */ + else if (type == BPF_SFI_CHECK_LDST_AS_OFFSET) { + bool reg_as_offset = check_unit->reg_as_offset; + patch_len = form_check_ldst_as_offset(origin_insn, patch, + reg_as_offset, sfi_region_size, reg, origin_insn_no); + } + /** after check helper, ptr is still ptr, but safe + * 1. and with SFI_REGION_SIZE + * 2. or with SFI_REGION_BASE + */ + else if (type == BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR) { + patch[0] = BPF_ALU64_IMM(BPF_AND, reg, sfi_region_size - 1); + patch[1] = BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE); + patch[2] = *origin_insn; + + *origin_insn_no = 2; + patch_len = 3; + } + /** second case + * 1. no need to and + * 2. just or + */ + else if (type == BPF_SFI_CHECK_HELPER_AS_OFFSET) { + patch[0] = BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE); + patch[1] = *origin_insn; + + *origin_insn_no = 1; + patch_len = 2; + } + /* no need to check again */ + else if (type == BPF_SFI_CHECK_HELPER_AS_SAFE_PTR || + type == BPF_SFI_CHECK_LDST_AS_SAFE_PTR) { + *origin_insn_no = 0; + patch_len = 0; + } + else { + pr_err("unresolved case\n"); + BUG_ON(1); + } + + if (patch_len > sizeof(patch_array)) + return -ENOMEM; + + for (int i = 0; i < patch_len; i++) + patch_array[i] = patch[i]; + + return patch_len; +} + +int bpf_sfi_check_ldst(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog; + struct bpf_insn *insn; + int insn_cnt; + struct bpf_insn_aux_data *aux; + struct bpf_prog *new_prog; + int i, cnt, delta = 0; + u8 reg; + int origin_insn_no; + enum bpf_reg_type ptr_type; + struct bpf_sfi_check_unit *check_list, *origin_check_list; + + if (!env) { + fbpf_log("env is NULL\n"); + return -EFAULT; + } + prog = env->prog; + aux = env->insn_aux_data; + + if (!prog) { + fbpf_log("prog is NULL\n"); + return -EFAULT; + } + insn_cnt = prog->len; + + insn = prog->insnsi; + if (!insn) + return 0; + + /* use i + delta to traverse: insn, aux. */ + for (i = 0; i < insn_cnt; i++) { + struct bpf_insn_aux_data origin_insn_aux; + + insn = &prog->insnsi[i + delta]; + aux = &env->insn_aux_data[i + delta]; + check_list = env->insn_check_lists[i + delta]; + + if (!aux) { + dump_insns(prog, i, 1, "empty aux"); + fbpf_log("aux is empty for insn %d", i); + return -EFAULT; + } + origin_insn_aux = *aux; + origin_check_list = env->insn_check_lists[i + delta]; + + /* check optimization */ + if (check_list) { + struct bpf_sfi_check_unit *p; + // u64 offset_bound_mask = (roundup_pow_of_two( + // env->prog->total_page_cnt) << PAGE_SHIFT) - 1; + u64 sfi_region_size = env->prog->total_page_cnt << PAGE_SHIFT; + + list_for_each_entry(p, &check_list->list_head, list_head) { + if (p->check_type != BPF_SFI_CHECK_LDST_AS_UNSAFE_PTR && + p->check_type != BPF_SFI_CHECK_LDST_AS_SAFE_PTR && + p->check_type != BPF_SFI_CHECK_LDST_AS_OFFSET) + continue; + + reg = p->reg_no; + ptr_type = base_type(aux->ptr_type); + + fbpf_log("%d: check ptr=%d, r%d", i, ptr_type, reg); + dump_insns(prog, i + delta, 1, "Ld/st insn to rewrite"); + + struct bpf_insn patch[16]; + cnt = form_sfi_check_patch_v3(insn, p, patch, &origin_insn_no, + sfi_region_size); + + /* NOTICE: use cnt to decide whether to patch */ + if (cnt == 0) + continue; + + new_prog = _bpf_patch_insn_data(env, i + delta, patch, cnt); + if (!new_prog) { + fbpf_log("patch failed\n"); + return -ENOMEM; + } + + dump_insns(new_prog, i + delta, cnt, "restore ldst"); + + env->insn_aux_data[i + delta + origin_insn_no] = origin_insn_aux; + env->insn_check_lists[i + delta + origin_insn_no] = origin_check_list; + env->prog = prog = new_prog; + delta += cnt - 1; + } + + // if (!old_off) { + // struct bpf_insn zero_off_patch[] = { + // BPF_ALU64_IMM(BPF_AND, reg, offset_bound_mask), + // BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE), + // *insn, + // }; + + // cnt = ARRAY_SIZE(zero_off_patch); + // origin_insn_no = find_origin_insn(zero_off_patch, cnt, insn); + + // new_prog = bpf_patch_insn_data(env, i + delta, zero_off_patch, cnt); + // if (!new_prog) { + // fbpf_log("patch failed\n"); + // return -ENOMEM; + // } + + // // dump_insns(new_prog, i + delta, cnt, "restore ldst"); + + // env->insn_aux_data[i + delta + origin_insn_no] = + // origin_insn_aux; + // env->prog = prog = new_prog; + // delta += cnt - 1; + // continue; + // } else { + // struct bpf_insn non_zero_off_patch[] = { + // BPF_ALU64_IMM(BPF_ADD, reg, old_off), + // BPF_ALU64_IMM(BPF_AND, reg, offset_bound_mask), + // BPF_ALU64_REG(BPF_OR, reg, BPF_REG_BASE), + // *insn, + // BPF_ALU64_IMM(BPF_SUB, reg, old_off), + // }; + + // cnt = ARRAY_SIZE(non_zero_off_patch); + // origin_insn_no = find_origin_insn(non_zero_off_patch, cnt, insn); + + // new_prog = bpf_patch_insn_data(env, i + delta, non_zero_off_patch, cnt); + // if (!new_prog) { + // fbpf_log("patch failed\n"); + // return -ENOMEM; + // } + + // // dump_insns(new_prog, i + delta, cnt, "restore ldst"); + + // env->insn_aux_data[i + delta + origin_insn_no] = + // origin_insn_aux; + // env->prog = prog = new_prog; + // delta += cnt - 1; + // continue; + // } + } + } + + // dump_insns(env->prog, 0, env->prog->len, "all after restore ldst"); + + return 0; +} + +int bpf_sfi_check_helper_args(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog; + int insn_cnt; + struct bpf_prog *new_prog; + int i, cnt, delta = 0; + struct bpf_sfi_check_unit *check_list, *origin_check_list; + + if (!env) { + fbpf_log("ERROR: env is NULL\n"); + return -EFAULT; + } + prog = env->prog; + + if (!prog) { + fbpf_log("ERROR: prog is NULL\n"); + return -EFAULT; + } + insn_cnt = prog->len; + + // dump_insns(prog, 0, prog->len, "before restore_helper_args"); + + /** + * use i + delta to traverse: insn, aux. + * in inner loop, do not update the insn and aux. + */ + for (i = 0; i < insn_cnt; i++) { + struct bpf_insn_aux_data origin_insn_aux; + int sub_delta = 0; + int alpha = 0; + struct bpf_insn_aux_data *aux; + struct bpf_insn *insn; + + insn = &prog->insnsi[i + delta + alpha]; + aux = &env->insn_aux_data[i + delta + alpha]; + check_list = env->insn_check_lists[i + delta + alpha]; + if (!aux) { + dump_insns(prog, i, 1, "empty aux"); + fbpf_log("ERROR: aux is empty for insn %d\n", i); + return -EFAULT; + } + origin_insn_aux = *aux; + origin_check_list = env->insn_check_lists[i + delta + alpha]; + + /* check optimization */ + if (check_list) { + int func_id = insn->imm; + struct bpf_sfi_check_unit *p; + // u64 offset_bound_mask = (roundup_pow_of_two( + // env->prog->total_page_cnt) << PAGE_SHIFT) - 1; + u64 sfi_region_size = env->prog->total_page_cnt << PAGE_SHIFT; + + list_for_each_entry(p, &check_list->list_head, list_head) { + int origin_insn_no; + int reg_no = p->reg_no; + enum bpf_reg_type arg_type = aux->arg_reg_type[reg_no - 1]; + insn = &prog->insnsi[i + delta + alpha]; + aux = &env->insn_aux_data[i + delta + alpha]; + struct bpf_insn patch[16]; + + // struct bpf_insn patch[] = { + // BPF_ALU64_IMM(BPF_AND, reg_no, offset_bound_mask), + // BPF_ALU64_REG(BPF_OR, reg_no, BPF_REG_BASE), + // *insn, + // }; + + if (p->check_type != BPF_SFI_CHECK_HELPER_AS_UNSAFE_PTR && + p->check_type != BPF_SFI_CHECK_HELPER_AS_SAFE_PTR && + p->check_type != BPF_SFI_CHECK_HELPER_AS_OFFSET) + break; + + cnt = form_sfi_check_patch_v3(insn, p, patch, &origin_insn_no, + sfi_region_size); + + /* NOTICE: use cnt to decide whether to patch */ + if (cnt == 0) + continue; + + // cnt = ARRAY_SIZE(patch); + // origin_insn_no = find_origin_insn(patch, cnt, insn); + + fbpf_log("patch %d(%d): helper func=%d, \ +restore arg r%d, type=%d, cnt=%d, alpha=%d\n", + i + delta + alpha, i, func_id, reg_no, + arg_type, cnt, alpha); + + new_prog = _bpf_patch_insn_data(env, i + delta + alpha, + patch, cnt); + if (!new_prog) { + fbpf_log("ERROR: patch failed\n"); + return -ENOMEM; + } + + dump_insns(new_prog, i + delta + alpha, + cnt, "restore stack ptr arg"); + + env->insn_aux_data[i + delta + alpha + origin_insn_no] = origin_insn_aux; + env->insn_check_lists[i + delta + alpha + origin_insn_no] = origin_check_list; + env->prog = prog = new_prog; + + sub_delta += cnt - 1; + alpha += origin_insn_no; + } + + delta += sub_delta; + } + } + + return 0; +} + +bool bpf_sfi_can_instrument(struct bpf_verifier_env *env) +{ + for (int i = 0; i < env->used_map_cnt; i++) { + struct bpf_map *map = env->used_maps[i]; + if (map_can_lookup(map) && !map->is_aggregated) { + return false; + } + } + + return true; +} \ No newline at end of file diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8fa7b37152c0..f305e2fdce82 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -32,6 +32,10 @@ #include #endif +#ifdef CONFIG_HIVE +#include +#endif + #include "disasm.h" static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { @@ -10032,6 +10036,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn int insn_idx = *insn_idx_p; bool changes_data; int i, err, func_id; +#ifdef CONFIG_HIVE + struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; +#endif /* find function prototype */ func_id = insn->imm; @@ -10122,6 +10129,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); +#ifdef CONFIG_HIVE + /* fbpf record real arg type of helper call */ + for (i = 0; i < 5; i++) { + aux->arg_reg_type[i] = regs[i + 1].type; + } +#endif + if (meta.release_regno) { err = -EINVAL; /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot @@ -16468,7 +16482,7 @@ static int propagate_liveness(struct bpf_verifier_env *env, return -EFAULT; } /* Propagate read liveness of registers... */ - BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); + BUILD_BUG_ON(BPF_REG_FP + 2 != MAX_BPF_REG); for (frame = 0; frame <= vstate->curframe; frame++) { parent = vparent->frame[frame]; state = vstate->frame[frame]; @@ -17076,15 +17090,30 @@ static int do_check(struct bpf_verifier_env *env) bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); struct bpf_verifier_state *state = env->cur_state; struct bpf_insn *insns = env->prog->insnsi; +#ifdef CONFIG_HIVE + struct bpf_reg_state *regs = NULL, *regs_before = NULL; + struct bpf_sfi_check_unit *check_list; + int i; +#else struct bpf_reg_state *regs; +#endif int insn_cnt = env->prog->len; bool do_print_state = false; int prev_insn_idx = -1; +#ifdef CONFIG_HIVE + regs_before = vmalloc(sizeof(struct bpf_reg_state) * MAX_BPF_REG); + if (IS_ERR(regs_before)) + return PTR_ERR(regs_before); +#endif + for (;;) { struct bpf_insn *insn; u8 class; int err; + #ifdef CONFIG_HIVE + bool updated[MAX_BPF_REG] = {false}; + #endif /* reset current history entry on each new instruction */ env->cur_hist_ent = NULL; @@ -17177,6 +17206,16 @@ static int do_check(struct bpf_verifier_env *env) sanitize_mark_insn_seen(env); prev_insn_idx = env->insn_idx; + #ifdef CONFIG_HIVE + /* fbpf: compare regs_before and regs to get which regs are changed */ + memcpy(regs_before, regs, sizeof(struct bpf_reg_state) * MAX_BPF_REG); + + /* fbpf: used to form check list of vulnerable insns */ + check_list = bpf_sfi_get_check_list(env, insn, regs); + if (IS_ERR(check_list)) + return PTR_ERR(check_list); + #endif + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -17218,8 +17257,17 @@ static int do_check(struct bpf_verifier_env *env) err = check_atomic(env, env->insn_idx, insn); if (err) return err; + #ifdef CONFIG_HIVE + err = save_aux_ptr_type(env, regs[insn->dst_reg].type, true); + if (err) + return err; + #endif env->insn_idx++; + #ifdef CONFIG_HIVE + goto next_insn; + #else continue; + #endif } if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { @@ -17323,7 +17371,12 @@ static int do_check(struct bpf_verifier_env *env) env->insn_idx += insn->off + 1; else env->insn_idx += insn->imm + 1; + + #ifdef CONFIG_HIVE + goto next_insn; + #else continue; + #endif } else if (opcode == BPF_EXIT) { if (BPF_SRC(insn->code) != BPF_K || @@ -17363,7 +17416,11 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; do_print_state = true; + #ifdef CONFIG_HIVE + goto next_insn; + #else continue; + #endif } err = check_return_code(env); @@ -17380,7 +17437,11 @@ static int do_check(struct bpf_verifier_env *env) break; } else { do_print_state = true; + #ifdef CONFIG_HIVE + goto next_insn; + #else continue; + #endif } } else { err = check_cond_jmp_op(env, insn, &env->insn_idx); @@ -17406,12 +17467,52 @@ static int do_check(struct bpf_verifier_env *env) verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; } + #ifdef CONFIG_HIVE + err = save_aux_ptr_type(env, regs[insn->src_reg].type, true); + if (err) + return err; + #endif } else { verbose(env, "unknown insn class %d\n", class); return -EINVAL; } env->insn_idx++; + +#ifdef CONFIG_HIVE +next_insn: + if (check_list) { + struct bpf_sfi_check_unit *p; + list_for_each_entry(p, &check_list->list_head, list_head) { + int reg_no = p->reg_no; + regs[reg_no].sfi_check_state = p->proposed_state; + updated[reg_no] = true; + } + + if (!env->insn_check_lists[prev_insn_idx]) { + // fbpf_log("update into %d\n", prev_insn_idx); + env->insn_check_lists[prev_insn_idx] = check_list; + } + } + + for (i = 0; i < MAX_BPF_REG; i++) { + // if (base_type(regs_before[i].type) == PTR_TO_STACK) + // continue; + /* reg move from src to dst, use src_reg's check_state */ + if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X)) + continue; + /* use check_list proposed state if have */ + if (updated[i]) + continue; + /* if reg changed, reset sfi_check state */ + if (memcmp(regs_before + i, regs + i, sizeof(struct bpf_reg_state))) { + regs[i].sfi_check_state = BPF_SFI_UNSAFE_PTR; + } + } + + // fbpf_log("debugging insn %d", prev_insn_idx); + // bpf_sfi_tmp_check(env); +#endif } return 0; @@ -17866,6 +17967,168 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) return 0; } + + +#ifdef CONFIG_HIVE +static int resolve_pseudo_ldimm64_again(struct bpf_verifier_env *env) +{ + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int i, j, err; + + err = bpf_prog_calc_tag(env->prog); + if (err) + return err; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { + struct bpf_insn_aux_data *aux; + struct bpf_map *map; + struct fd f; + u64 addr; + u32 fd; + + if (insn[0].src_reg == 0) + /* valid generic load 64-bit imm */ + goto next_insn; + + if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) { + aux = &env->insn_aux_data[i]; + err = check_pseudo_btf_id(env, insn, aux); + if (err) + return err; + goto next_insn; + } + + if (insn[0].src_reg == BPF_PSEUDO_FUNC) { + aux = &env->insn_aux_data[i]; + aux->ptr_type = PTR_TO_FUNC; + goto next_insn; + } + + /* In final convert_pseudo_ld_imm64() step, this is + * converted into regular 64-bit imm load insn. + */ + switch (insn[0].src_reg) { + case BPF_PSEUDO_MAP_VALUE: + break; + default: + goto next_insn; + } + + fd = insn[0].imm; + + f = fdget(fd); + map = __bpf_map_get(f); + if (IS_ERR(map)) { + verbose(env, "fd %d is not pointing to valid bpf_map\n", + insn[0].imm); + return PTR_ERR(map); + } + + err = check_map_prog_compatibility(env, map, env->prog); + if (err) { + fdput(f); + return err; + } + + aux = &env->insn_aux_data[i]; + if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || + insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { + addr = (unsigned long)map; + goto next_insn; + } else { + u32 off = insn[1].imm; + + /* do this after aggregation */ + if (!env->prog->shadow_region_addr) { + pr_err("bpf sfi aggregate memory failed\n"); + return -EINVAL; + } + + if (off >= BPF_MAX_VAR_OFF) { + verbose(env, "direct value offset of %u is not allowed\n", off); + fdput(f); + return -EINVAL; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + fdput(f); + return -EINVAL; + } + + err = map->ops->map_direct_value_addr(map, &addr, off); + if (err) { + verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", + map->value_size, off); + fdput(f); + return err; + } + + aux->map_off = off; + addr += off; + aux->extra_map_ptr = (u64)map; + } + + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32; + + /* check whether we recorded this map already */ + for (j = 0; j < env->used_map_cnt; j++) { + if (env->used_maps[j] == map) { + aux->map_index = j; + fdput(f); + goto next_insn; + } + } + + if (env->used_map_cnt >= MAX_USED_MAPS) { + fdput(f); + return -E2BIG; + } + + if (env->prog->aux->sleepable) + atomic64_inc(&map->sleepable_refcnt); + /* hold the map. If the program is rejected by verifier, + * the map will be released by release_maps() or it + * will be used by the valid program until it's unloaded + * and all maps are released in bpf_free_used_maps() + */ + bpf_map_inc(map); + + aux->map_index = env->used_map_cnt; + env->used_maps[env->used_map_cnt++] = map; + + if (bpf_map_is_cgroup_storage(map) && + bpf_cgroup_storage_assign(env->prog->aux, map)) { + verbose(env, "only one cgroup storage of each type is allowed\n"); + fdput(f); + return -EBUSY; + } + + fdput(f); +next_insn: + insn++; + i++; + continue; + } + + /* Basic sanity check before we invest more work here. */ + if (!bpf_opcode_in_insntable(insn->code)) { + verbose(env, "unknown opcode %02x\n", insn->code); + return -EINVAL; + } + } + + /* now all pseudo BPF_LD_IMM64 instructions load valid + * 'struct bpf_map *' into a register instead of user map_fd. + * These pointers will be used later by verifier to validate map access. + */ + return 0; +} +#endif + /* drop refcnt of maps used by the rejected program */ static void release_maps(struct bpf_verifier_env *env) { @@ -17932,6 +18195,26 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env, vfree(old_data); } +#ifdef CONFIG_HIVE +static void adjust_insn_check_lists(struct bpf_verifier_env *env, + struct bpf_sfi_check_unit **new_check_lists, + struct bpf_prog *new_prog, u32 off, u32 cnt) +{ + struct bpf_sfi_check_unit **old_check_lists = env->insn_check_lists; + u32 prog_len; + + if (cnt == 1) + return; + prog_len = new_prog->len; + + memcpy(new_check_lists, old_check_lists, sizeof(struct bpf_insn_check_unit *) * off); + memcpy(new_check_lists + off + cnt - 1, old_check_lists + off, + sizeof(struct bpf_insn_check_unit *) * (prog_len - off - cnt + 1)); + env->insn_check_lists = new_check_lists; + vfree(old_check_lists); +} +#endif + static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) { int i; @@ -17965,12 +18248,21 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of { struct bpf_prog *new_prog; struct bpf_insn_aux_data *new_data = NULL; +#ifdef CONFIG_HIVE + struct bpf_sfi_check_unit **new_check_lists = NULL; +#endif if (len > 1) { new_data = vzalloc(array_size(env->prog->len + len - 1, sizeof(struct bpf_insn_aux_data))); if (!new_data) return NULL; + #ifdef CONFIG_HIVE + new_check_lists = vzalloc(array_size(env->prog->len + len - 1, + sizeof(struct bpf_sfi_check_unit *))); + if (!new_check_lists) + return NULL; + #endif } new_prog = bpf_patch_insn_single(env->prog, off, patch, len); @@ -17983,11 +18275,22 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return NULL; } adjust_insn_aux_data(env, new_data, new_prog, off, len); +#ifdef CONFIG_HIVE + adjust_insn_check_lists(env, new_check_lists, new_prog, off, len); +#endif adjust_subprog_starts(env, off, len); adjust_poke_descs(new_prog, off, len); return new_prog; } +#ifdef CONFIG_HIVE +struct bpf_prog *_bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + return bpf_patch_insn_data(env, off, patch, len); +} +#endif + static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt) { @@ -18643,6 +18946,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + #ifdef CONFIG_HIVE + func[i]->shadow_stack_addr = env->prog->shadow_stack_addr; + func[i]->shadow_region_addr = env->prog->shadow_region_addr; + #endif func[i]->blinding_requested = prog->blinding_requested; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; @@ -20473,6 +20780,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 len = (*prog)->len; env->insn_aux_data = vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); +#ifdef CONFIG_HIVE + env->insn_check_lists = vzalloc(array_size(sizeof(struct bpf_sfi_check_unit *), len)); + for (i = 0; i < len; i++) + env->insn_check_lists[i] = NULL; + mutex_init(&env->insn_check_list_mutex); +#endif ret = -ENOMEM; if (!env->insn_aux_data) goto err_free_env; @@ -20564,6 +20877,48 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) ret = bpf_prog_offload_finalize(env); + +#ifdef CONFIG_HIVE + /* bpf_sfi aggregate and check is after bpf_check, if skip full_check, + bpf_sfi checks are skipped either */ + fbpf_log("bpf_check finished\n"); + // dump_insns(env->prog, 0, env->prog->len, "after do_check"); + + // if (ret == 0) + // ret = bpf_sfi_tmp_check(env); + + /* if not support map or mode, just return to normal bpf */ + if (!bpf_sfi_can_instrument(env)) { + struct bpf_map *map; + for (i = 0; i < env->used_map_cnt; i++) { + map = env->used_maps[i]; + map->is_aggregated = false; + } + fbpf_log("skip sfi instrument\n"); + goto skip_sfi_instrument; + } + + // TODO unmap shadow data + /* fbpf aggregate values and caculate shadow stack addr */ + if (ret == 0) + ret = bpf_sfi_aggregate_memory(env); + + if (ret == 0) + ret = resolve_pseudo_ldimm64_again(env); + + if (ret == 0) + ret = bpf_sfi_check_ldst(env); + + if (ret == 0) + ret = bpf_sfi_check_helper_args(env); + +skip_sfi_instrument: + // dump_insns(env->prog, 0, env->prog->len, "after bpf_sfi_checks"); + + fbpf_log("bpf_sfi_check finished\n"); + + pr_info("[%s] bpf insn len = %d\n", env->prog->aux->name, env->prog->len); +#endif skip_full_check: kvfree(env->explored_states); @@ -20685,5 +21040,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 vfree(env->insn_aux_data); err_free_env: kvfree(env); +#ifdef CONFIG_HIVE + pr_info("prog [%s] bpf_check's ret = %d\n", env->prog->aux->name, ret); +#endif return ret; } diff --git a/kernel/cred.c b/kernel/cred.c index c033a201c808..813db29f35df 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -20,6 +20,11 @@ #include #include +#ifdef CONFIG_CREDP +#include +#include +#endif + #if 0 #define kdebug(FMT, ...) \ printk("[%-5.5s%5u] " FMT "\n", \ @@ -33,7 +38,12 @@ do { \ } while (0) #endif +#ifdef CONFIG_CREDP +struct kmem_cache *cred_jar; +static struct kmem_cache *rcu_jar; +#else static struct kmem_cache *cred_jar; +#endif /* init to 2 - one for init_task, one to ensure it is never freed */ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; @@ -41,6 +51,32 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; /* * The initial credentials for the initial task */ +#ifdef CONFIG_CREDP +struct cred init_cred __section(".iee.cred") = { + .usage = ATOMIC_INIT(4), +#ifdef CONFIG_DEBUG_CREDENTIALS + .subscribers = ATOMIC_INIT(2), + .magic = CRED_MAGIC, +#endif + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_EMPTY_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, + .user = INIT_USER, + .user_ns = &init_user_ns, + .group_info = &init_groups, + .ucounts = &init_ucounts, +}; +#else struct cred init_cred = { .usage = ATOMIC_INIT(4), .uid = GLOBAL_ROOT_UID, @@ -61,13 +97,43 @@ struct cred init_cred = { .group_info = &init_groups, .ucounts = &init_ucounts, }; +#endif + +static inline void set_cred_subscribers(struct cred *cred, int n) +{ +#ifdef CONFIG_DEBUG_CREDENTIALS + atomic_set(&cred->subscribers, n); +#endif +} + +static inline int read_cred_subscribers(const struct cred *cred) +{ +#ifdef CONFIG_DEBUG_CREDENTIALS + return atomic_read(&cred->subscribers); +#else + return 0; +#endif +} + +static inline void alter_cred_subscribers(const struct cred *_cred, int n) +{ +#ifdef CONFIG_DEBUG_CREDENTIALS + struct cred *cred = (struct cred *) _cred; + + atomic_add(n, &cred->subscribers); +#endif +} /* * The RCU callback to actually dispose of a set of credentials */ static void put_cred_rcu(struct rcu_head *rcu) { + #ifdef CONFIG_CREDP + struct cred *cred = *(struct cred **)(rcu + 1); + #else struct cred *cred = container_of(rcu, struct cred, rcu); + #endif kdebug("put_cred_rcu(%p)", cred); @@ -86,6 +152,9 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->ucounts) put_ucounts(cred->ucounts); put_user_ns(cred->user_ns); + #ifdef CONFIG_CREDP + kmem_cache_free(rcu_jar, (struct rcu_head *)(cred->rcu.func)); + #endif kmem_cache_free(cred_jar, cred); } @@ -104,10 +173,22 @@ void __put_cred(struct cred *cred) BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); + #ifdef CONFIG_CREDP + if (*(int *)(&(((struct rcu_head *)(cred->rcu.func))->next))) + #else if (cred->non_rcu) + #endif + #ifdef CONFIG_CREDP + put_cred_rcu((struct rcu_head *)(cred->rcu.func)); + #else put_cred_rcu(&cred->rcu); + #endif else + #ifdef CONFIG_CREDP + call_rcu((struct rcu_head *)(cred->rcu.func), put_cred_rcu); + #else call_rcu(&cred->rcu, put_cred_rcu); + #endif } EXPORT_SYMBOL(__put_cred); @@ -178,7 +259,18 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_rcu(new,kmem_cache_zalloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + iee_set_cred_atomic_set_usage(new,1); + #else atomic_long_set(&new->usage, 1); + #endif + + #ifdef CONFIG_DEBUG_CREDENTIALS + new->magic = CRED_MAGIC; + #endif + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -213,13 +305,25 @@ struct cred *prepare_creds(void) if (!new) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_rcu(new,kmem_cache_alloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + #endif + kdebug("prepare_creds() alloc %p", new); old = task->cred; + #ifdef CONFIG_CREDP + iee_copy_cred(old,new); + + iee_set_cred_non_rcu(new,0); + iee_set_cred_atomic_set_usage(new,1); + #else memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; atomic_long_set(&new->usage, 1); + #endif get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -232,10 +336,18 @@ struct cred *prepare_creds(void) #endif #ifdef CONFIG_SECURITY +#ifdef CONFIG_CREDP + iee_set_cred_security(new,NULL); +#else new->security = NULL; +#endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); + #else new->ucounts = get_ucounts(new->ucounts); + #endif if (!new->ucounts) goto error; @@ -265,15 +377,30 @@ struct cred *prepare_exec_creds(void) #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new,NULL); + #else new->thread_keyring = NULL; + #endif /* inherit the session keyring; new process keyring */ key_put(new->process_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new,NULL); + #else new->process_keyring = NULL; + #endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + iee_set_cred_suid(new,new->euid); + iee_set_cred_fsgid(new,new->egid); + iee_set_cred_sgid(new,new->egid); + #else new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; + #endif return new; } @@ -327,7 +454,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) * had one */ if (new->thread_keyring) { key_put(new->thread_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new,NULL); + #else new->thread_keyring = NULL; + #endif if (clone_flags & CLONE_THREAD) install_thread_keyring_to_cred(new); } @@ -337,7 +468,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) */ if (!(clone_flags & CLONE_THREAD)) { key_put(new->process_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new,NULL); + #else new->process_keyring = NULL; + #endif } #endif @@ -594,7 +729,11 @@ int set_cred_ucounts(struct cred *new) if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) return -EAGAIN; + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, new_ucounts); + #else new->ucounts = new_ucounts; + #endif put_ucounts(old_ucounts); return 0; @@ -606,8 +745,21 @@ int set_cred_ucounts(struct cred *new) void __init cred_init(void) { /* allocate a slab in which we can store credentials */ + #ifdef CONFIG_CREDP cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT|SLAB_RED_ZONE, NULL); + rcu_jar = kmem_cache_create("rcu_jar", sizeof(struct rcu_head) + sizeof(struct cred *), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + // Map init_cred + *((struct rcu_head **)(&(init_cred.rcu.func))) = (struct rcu_head *)kmem_cache_zalloc(rcu_jar, GFP_KERNEL); + *(struct cred **)(((struct rcu_head *)(init_cred.rcu.func)) + 1) = &init_cred; + set_iee_page_valid((unsigned long)__phys_to_iee(__pa_symbol(&init_cred))); + iee_set_logical_mem_ro((unsigned long)&init_cred); + iee_set_logical_mem_ro((unsigned long)__va(__pa_symbol(&init_cred))); + #else + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + #endif } /** @@ -638,29 +790,56 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_rcu(new,kmem_cache_alloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + #endif + kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); + #ifdef CONFIG_CREDP + iee_copy_cred(old,new); + iee_set_cred_non_rcu(new,0); + iee_set_cred_atomic_set_usage(new,1); + #else *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); + #endif get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); #ifdef CONFIG_KEYS +#ifdef CONFIG_CREDP + iee_set_cred_session_keyring(new,NULL); + iee_set_cred_process_keyring(new,NULL); + iee_set_cred_thread_keyring(new,NULL); + iee_set_cred_request_key_auth(new,NULL); + iee_set_cred_jit_keyring(new,KEY_REQKEY_DEFL_THREAD_KEYRING); +#else new->session_keyring = NULL; new->process_keyring = NULL; new->thread_keyring = NULL; new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; #endif +#endif #ifdef CONFIG_SECURITY +#ifdef CONFIG_CREDP + iee_set_cred_security(new,NULL); +#else new->security = NULL; #endif +#endif + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); + #else new->ucounts = get_ucounts(new->ucounts); + #endif if (!new->ucounts) goto error; @@ -727,8 +906,13 @@ int set_create_files_as(struct cred *new, struct inode *inode) { if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) return -EINVAL; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,inode->i_uid); + iee_set_cred_fsgid(new,inode->i_gid); + #else new->fsuid = inode->i_uid; new->fsgid = inode->i_gid; + #endif return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); diff --git a/kernel/exit.c b/kernel/exit.c index 3540b2c9b1b6..3bb305d7edf2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -74,6 +74,11 @@ #include #include +#ifdef CONFIG_IEE +#include +#include +#endif + /* * The default value should be high enough to not crash a system that randomly * crashes its kernel from time to time, but low enough to at least not permit @@ -560,6 +565,9 @@ static void exit_mm(void) smp_mb__after_spinlock(); local_irq_disable(); current->mm = NULL; + #ifdef CONFIG_IEE + iee_set_token_pgd(current, NULL); + #endif membarrier_update_current_mm(NULL); enter_lazy_tlb(mm, current); local_irq_enable(); diff --git a/kernel/fork.c b/kernel/fork.c index f30b24c68442..617cb03938ff 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -115,6 +115,13 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_IEE +#include +#include +#include +#include +#endif + /* * Minimum number of threads to boot the kernel */ @@ -128,14 +135,14 @@ /* * Protected counters by write_lock_irq(&tasklist_lock) */ -unsigned long total_forks; /* Handle normal Linux uptimes. */ -int nr_threads; /* The idle threads do not count.. */ +unsigned long total_forks; /* Handle normal Linux uptimes. */ +int nr_threads; /* The idle threads do not count.. */ -static int max_threads; /* tunable limit on nr_threads */ +static int max_threads; /* tunable limit on nr_threads */ -#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) +#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) -static const char * const resident_page_types[] = { +static const char *const resident_page_types[] = { NAMED_ARRAY_INDEX(MM_FILEPAGES), NAMED_ARRAY_INDEX(MM_ANONPAGES), NAMED_ARRAY_INDEX(MM_SWAPENTS), @@ -144,7 +151,7 @@ static const char * const resident_page_types[] = { DEFINE_PER_CPU(unsigned long, process_counts) = 0; -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ #ifdef CONFIG_PROVE_RCU int lockdep_tasklist_lock_is_held(void) @@ -159,7 +166,7 @@ int nr_processes(void) int cpu; int total = 0; - for_each_possible_cpu(cpu) + for_each_possible_cpu (cpu) total += per_cpu(process_counts, cpu); return total; @@ -170,7 +177,11 @@ void __weak arch_release_task_struct(struct task_struct *tsk) } #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +struct kmem_cache *task_struct_cachep; +#else static struct kmem_cache *task_struct_cachep; +#endif static inline struct task_struct *alloc_task_struct_node(int node) { @@ -190,7 +201,7 @@ static inline void free_task_struct(struct task_struct *tsk) * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ -# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) +#if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) # ifdef CONFIG_VMAP_STACK /* @@ -311,8 +322,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * so memcg accounting is performed manually on assigning/releasing * stacks to tasks. Drop __GFP_ACCOUNT. */ - stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, - VMALLOC_START, VMALLOC_END, + stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, + VMALLOC_END, THREADINFO_GFP & ~__GFP_ACCOUNT, PAGE_KERNEL, 0, node, __builtin_return_address(0)); @@ -410,9 +421,10 @@ static void free_thread_stack(struct task_struct *tsk) void thread_stack_cache_init(void) { - thread_stack_cache = kmem_cache_create_usercopy("thread_stack", - THREAD_SIZE, THREAD_SIZE, 0, 0, - THREAD_SIZE, NULL); + thread_stack_cache = + kmem_cache_create_usercopy("thread_stack", THREAD_SIZE, + THREAD_SIZE, 0, 0, THREAD_SIZE, + NULL); BUG_ON(thread_stack_cache == NULL); } @@ -502,7 +514,8 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) { - struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + struct vm_area_struct *new = + kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) return NULL; @@ -602,6 +615,10 @@ void put_task_stack(struct task_struct *tsk) } #endif +#ifdef CONFIG_KOI +extern s64 koi_offset; +#endif + void free_task(struct task_struct *tsk) { #ifdef CONFIG_SECCOMP @@ -633,6 +650,21 @@ void free_task(struct task_struct *tsk) if (dynamic_affinity_enabled()) sched_prefer_cpus_free(tsk); #endif +#ifdef CONFIG_IEE + iee_invalidate_token(tsk); +#ifdef CONFIG_KOI + // Free koi stack. + unsigned long koi_stack = iee_rw_gate(IEE_READ_KOI_STACK_BASE, tsk); + if (koi_stack != 0) + free_pages(koi_stack, 2); +#endif +#else +#ifdef CONFIG_KOI + struct task_token *token = (struct task_token *)((unsigned long)tsk + KOI_OFFSET); + memset(token, 0, sizeof(struct task_token)); +#endif +#endif + #ifdef CONFIG_QOS_SCHED_SMART_GRID if (smart_grid_enabled()) sched_grid_qos_free(tsk); @@ -661,7 +693,7 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) #ifdef CONFIG_MMU static __latent_entropy int dup_mmap(struct mm_struct *mm, - struct mm_struct *oldmm) + struct mm_struct *oldmm) { struct vm_area_struct *mpnt, *tmp; int retval; @@ -772,7 +804,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ vma_interval_tree_insert_after(tmp, mpnt, - &mapping->i_mmap); + &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); i_mmap_unlock_write(mapping); } @@ -846,7 +878,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mmap_write_unlock(oldmm); return 0; } -#define mm_alloc_pgd(mm) (0) +#define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ @@ -854,20 +886,22 @@ static void check_mm(struct mm_struct *mm) { int i; - BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, - "Please make sure 'struct resident_page_types[]' is updated as well"); + BUILD_BUG_ON_MSG( + ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, + "Please make sure 'struct resident_page_types[]' is updated as well"); for (i = 0; i < NR_MM_COUNTERS; i++) { long x = mm_counter_sum(mm, i); if (unlikely(x)) - pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", - mm, resident_page_types[i], x); + pr_alert( + "BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", + mm, resident_page_types[i], x); } if (mm_pgtables_bytes(mm)) pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", - mm_pgtables_bytes(mm)); + mm_pgtables_bytes(mm)); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON_MM(mm->pmd_huge_pte, mm); @@ -1018,14 +1052,6 @@ void __put_task_struct(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(__put_task_struct); -void __put_task_struct_rcu_cb(struct rcu_head *rhp) -{ - struct task_struct *task = container_of(rhp, struct task_struct, rcu); - - __put_task_struct(task); -} -EXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb); - void __init __weak arch_task_cache_init(void) { } /* @@ -1043,8 +1069,8 @@ static void set_max_threads(unsigned int max_threads_suggested) if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else - threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE, - (u64) THREAD_SIZE * 8UL); + threads = div64_u64((u64)nr_pages * (u64)PAGE_SIZE, + (u64)THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; @@ -1079,17 +1105,24 @@ void __init fork_init(void) int i; #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN -#define ARCH_MIN_TASKALIGN 0 +#define ARCH_MIN_TASKALIGN 0 #endif int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); unsigned long useroffset, usersize; /* create a slab on which task_structs can be allocated */ task_struct_whitelist(&useroffset, &usersize); + #ifdef CONFIG_IEE task_struct_cachep = kmem_cache_create_usercopy("task_struct", arch_task_struct_size, align, - SLAB_PANIC|SLAB_ACCOUNT, + SLAB_PANIC|SLAB_ACCOUNT|SLAB_RED_ZONE, useroffset, usersize, NULL); + #else + task_struct_cachep = + kmem_cache_create_usercopy("task_struct", arch_task_struct_size, + align, SLAB_PANIC | SLAB_ACCOUNT, + useroffset, usersize, NULL); + #endif #endif /* do the arch specific task caches init */ @@ -1097,8 +1130,8 @@ void __init fork_init(void) set_max_threads(MAX_THREADS); - init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; - init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; + init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads / 2; + init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads / 2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; @@ -1111,8 +1144,8 @@ void __init fork_init(void) set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY); #ifdef CONFIG_VMAP_STACK - cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", - NULL, free_vm_stack_cache); + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", NULL, + free_vm_stack_cache); #endif scs_init(); @@ -1122,7 +1155,7 @@ void __init fork_init(void) } int __weak arch_dup_task_struct(struct task_struct *dst, - struct task_struct *src) + struct task_struct *src) { *dst = *src; return 0; @@ -1133,14 +1166,14 @@ void set_task_stack_end_magic(struct task_struct *tsk) unsigned long *stackend; stackend = end_of_stack(tsk); - *stackend = STACK_END_MAGIC; /* for overflow detection */ + *stackend = STACK_END_MAGIC; /* for overflow detection */ } static bool dup_resvd_task_struct(struct task_struct *dst, struct task_struct *orig, int node) { - dst->_resvd = kzalloc_node(sizeof(struct task_struct_resvd), - GFP_KERNEL, node); + dst->_resvd = kzalloc_node(sizeof(struct task_struct_resvd), GFP_KERNEL, + node); if (!dst->_resvd) return false; @@ -1321,7 +1354,7 @@ static void mm_init_uprobes_state(struct mm_struct *mm) } static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, - struct user_namespace *user_ns) + struct user_namespace *user_ns) { mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); @@ -1438,8 +1471,8 @@ EXPORT_SYMBOL_GPL(mmput); #ifdef CONFIG_MMU static void mmput_async_fn(struct work_struct *work) { - struct mm_struct *mm = container_of(work, struct mm_struct, - async_put_work); + struct mm_struct *mm = + container_of(work, struct mm_struct, async_put_work); __mmput(mm); } @@ -1615,13 +1648,12 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) struct mm_struct *mm; int err; - err = down_read_killable(&task->signal->exec_update_lock); + err = down_read_killable(&task->signal->exec_update_lock); if (err) return ERR_PTR(err); mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { + if (mm && mm != current->mm && !ptrace_may_access(task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } @@ -1644,7 +1676,7 @@ static void complete_vfork_done(struct task_struct *tsk) } static int wait_for_vfork_done(struct task_struct *child, - struct completion *vfork) + struct completion *vfork) { unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE; int killed; @@ -1695,8 +1727,8 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) * not set up a proper pointer then tough luck. */ put_user(0, tsk->clear_child_tid); - do_futex(tsk->clear_child_tid, FUTEX_WAKE, - 1, NULL, NULL, 0, 0); + do_futex(tsk->clear_child_tid, FUTEX_WAKE, 1, NULL, + NULL, 0, 0); } tsk->clear_child_tid = NULL; } @@ -1780,6 +1812,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) #endif tsk->mm = NULL; +#ifdef CONFIG_IEE + iee_set_token_pgd(tsk, NULL); +#endif tsk->active_mm = NULL; /* @@ -1811,6 +1846,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) } tsk->mm = mm; +#ifdef CONFIG_IEE + iee_set_token_pgd(tsk, mm->pgd); +#endif tsk->active_mm = mm; sched_mm_cid_fork(tsk); return 0; @@ -2025,8 +2063,8 @@ static inline void init_task_pid_links(struct task_struct *task) INIT_HLIST_NODE(&task->pid_links[type]); } -static inline void -init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) +static inline void init_task_pid(struct task_struct *task, enum pid_type type, + struct pid *pid) { if (type == PIDTYPE_PID) task->thread_pid = pid; @@ -2288,6 +2326,11 @@ static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) mutex_unlock(&oom_adj_mutex); } +#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) +extern s64 koi_offset; +#endif + + #ifdef CONFIG_RV static void rv_task_fork(struct task_struct *p) { @@ -2325,10 +2368,12 @@ __latent_entropy struct task_struct *copy_process( * Don't allow sharing the root directory with processes in a different * namespace */ - if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) + if ((clone_flags & (CLONE_NEWNS | CLONE_FS)) == + (CLONE_NEWNS | CLONE_FS)) return ERR_PTR(-EINVAL); - if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) + if ((clone_flags & (CLONE_NEWUSER | CLONE_FS)) == + (CLONE_NEWUSER | CLONE_FS)) return ERR_PTR(-EINVAL); /* @@ -2353,7 +2398,7 @@ __latent_entropy struct task_struct *copy_process( * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && - current->signal->flags & SIGNAL_UNKILLABLE) + current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); /* @@ -2398,6 +2443,10 @@ __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; +#ifdef CONFIG_IEE + iee_validate_token(p); +#else +#endif p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; @@ -2419,7 +2468,8 @@ __latent_entropy struct task_struct *copy_process( /* * Clear TID on mm_release()? */ - p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; + p->clear_child_tid = + (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; ftrace_graph_init_task(p); @@ -2539,10 +2589,10 @@ __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_TRACE_IRQFLAGS memset(&p->irqtrace, 0, sizeof(p->irqtrace)); - p->irqtrace.hardirq_disable_ip = _THIS_IP_; - p->irqtrace.softirq_enable_ip = _THIS_IP_; - p->softirqs_enabled = 1; - p->softirq_context = 0; + p->irqtrace.hardirq_disable_ip = _THIS_IP_; + p->irqtrace.softirq_enable_ip = _THIS_IP_; + p->softirqs_enabled = 1; + p->softirq_context = 0; #endif p->pagefault_disabled = 0; @@ -2555,8 +2605,8 @@ __latent_entropy struct task_struct *copy_process( p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_BCACHE - p->sequential_io = 0; - p->sequential_io_avg = 0; + p->sequential_io = 0; + p->sequential_io_avg = 0; #endif #ifdef CONFIG_BPF_SYSCALL RCU_INIT_POINTER(p->bpf_storage, NULL); @@ -2643,7 +2693,7 @@ __latent_entropy struct task_struct *copy_process( /* * sigaltstack should be cleared when sharing the same VM */ - if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) + if ((clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) sas_ss_reset(p); /* @@ -2722,7 +2772,7 @@ __latent_entropy struct task_struct *copy_process( write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ - if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { + if (clone_flags & (CLONE_PARENT | CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; if (clone_flags & CLONE_THREAD) @@ -2786,8 +2836,9 @@ __latent_entropy struct task_struct *copy_process( * tasklist_lock with adding child to the process tree * for propagate_has_child_subreaper optimization. */ - p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || - p->real_parent->signal->is_child_subreaper; + p->signal->has_child_subreaper = + p->real_parent->signal->has_child_subreaper || + p->real_parent->signal->is_child_subreaper; list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); attach_pid(p, PIDTYPE_TGID); @@ -2938,8 +2989,8 @@ struct task_struct * __init fork_idle(int cpu) */ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) { - unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| - CLONE_IO; + unsigned long flags = CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + CLONE_THREAD | CLONE_IO; struct kernel_clone_args args = { .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), @@ -3103,8 +3154,8 @@ SYSCALL_DEFINE0(fork) SYSCALL_DEFINE0(vfork) { struct kernel_clone_args args = { - .flags = CLONE_VFORK | CLONE_VM, - .exit_signal = SIGCHLD, + .flags = CLONE_VFORK | CLONE_VM, + .exit_signal = SIGCHLD, }; return kernel_clone(&args); @@ -3114,35 +3165,30 @@ SYSCALL_DEFINE0(vfork) #ifdef __ARCH_WANT_SYS_CLONE #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, - int __user *, parent_tidptr, - unsigned long, tls, - int __user *, child_tidptr) + int __user *, parent_tidptr, unsigned long, tls, int __user *, + child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, - int __user *, parent_tidptr, - int __user *, child_tidptr, - unsigned long, tls) -#elif defined(CONFIG_CLONE_BACKWARDS3) -SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, - int, stack_size, - int __user *, parent_tidptr, - int __user *, child_tidptr, + int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls) +#elif defined(CONFIG_CLONE_BACKWARDS3) +SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, + stack_size, int __user *, parent_tidptr, int __user *, + child_tidptr, unsigned long, tls) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, - int __user *, parent_tidptr, - int __user *, child_tidptr, - unsigned long, tls) + int __user *, parent_tidptr, int __user *, child_tidptr, + unsigned long, tls) #endif { struct kernel_clone_args args = { - .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), - .pidfd = parent_tidptr, - .child_tid = child_tidptr, - .parent_tid = parent_tidptr, - .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), - .stack = newsp, - .tls = tls, + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), + .pidfd = parent_tidptr, + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), + .stack = newsp, + .tls = tls, }; return kernel_clone(&args); @@ -3198,21 +3244,21 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, return -EINVAL; *kargs = (struct kernel_clone_args){ - .flags = args.flags, - .pidfd = u64_to_user_ptr(args.pidfd), - .child_tid = u64_to_user_ptr(args.child_tid), - .parent_tid = u64_to_user_ptr(args.parent_tid), - .exit_signal = args.exit_signal, - .stack = args.stack, - .stack_size = args.stack_size, - .tls = args.tls, - .set_tid_size = args.set_tid_size, - .cgroup = args.cgroup, + .flags = args.flags, + .pidfd = u64_to_user_ptr(args.pidfd), + .child_tid = u64_to_user_ptr(args.child_tid), + .parent_tid = u64_to_user_ptr(args.parent_tid), + .exit_signal = args.exit_signal, + .stack = args.stack, + .stack_size = args.stack_size, + .tls = args.tls, + .set_tid_size = args.set_tid_size, + .cgroup = args.cgroup, }; if (args.set_tid && - copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), - (kargs->set_tid_size * sizeof(pid_t)))) + copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), + (kargs->set_tid_size * sizeof(pid_t)))) return -EFAULT; kargs->set_tid = kset_tid; @@ -3307,7 +3353,8 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) } #endif -void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data) +void walk_process_tree(struct task_struct *top, proc_visitor visitor, + void *data) { struct task_struct *leader, *parent, *child; int res; @@ -3315,8 +3362,8 @@ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data read_lock(&tasklist_lock); leader = top = top->group_leader; down: - for_each_thread(leader, parent) { - list_for_each_entry(child, &parent->children, sibling) { + for_each_thread (leader, parent) { + list_for_each_entry (child, &parent->children, sibling) { res = visitor(child, data); if (res) { if (res < 0) @@ -3324,8 +3371,7 @@ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data leader = child; goto down; } -up: - ; + up:; } } @@ -3402,11 +3448,11 @@ void __init proc_caches_init(void) */ static int check_unshare_flags(unsigned long unshare_flags) { - if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| - CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| - CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP| - CLONE_NEWTIME)) + if (unshare_flags & + ~(CLONE_THREAD | CLONE_FS | CLONE_NEWNS | CLONE_SIGHAND | CLONE_VM | + CLONE_FILES | CLONE_SYSVSEM | CLONE_NEWUTS | CLONE_NEWIPC | + CLONE_NEWNET | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWCGROUP | + CLONE_NEWTIME)) return -EINVAL; /* * Not implemented, but pretend it works if there is nothing @@ -3516,7 +3562,7 @@ int ksys_unshare(unsigned long unshare_flags) * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ - if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) + if (unshare_flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) @@ -3527,8 +3573,8 @@ int ksys_unshare(unsigned long unshare_flags) err = unshare_userns(unshare_flags, &new_cred); if (err) goto bad_unshare_cleanup_fd; - err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, - new_cred, new_fs); + err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_cred, + new_fs); if (err) goto bad_unshare_cleanup_cred; @@ -3625,8 +3671,8 @@ int unshare_files(void) return 0; } -int sysctl_max_threads(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; diff --git a/kernel/groups.c b/kernel/groups.c index 9b43da22647d..8045812e8a3c 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -11,6 +11,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif struct group_info *groups_alloc(int gidsetsize) { @@ -119,7 +122,11 @@ void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); get_group_info(group_info); + #ifdef CONFIG_CREDP + iee_set_cred_group_info(new,group_info); + #else new->group_info = group_info; + #endif } EXPORT_SYMBOL(set_groups); diff --git a/kernel/kthread.c b/kernel/kthread.c index 980e6b325b7d..d83342b67608 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -30,6 +30,11 @@ #include #include +#ifdef CONFIG_IEE +#include +#include +#endif + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); @@ -1457,6 +1462,9 @@ void kthread_use_mm(struct mm_struct *mm) tsk->active_mm = mm; tsk->mm = mm; membarrier_update_current_mm(mm); + #ifdef CONFIG_IEE + iee_set_token_pgd(tsk, mm->pgd); + #endif switch_mm_irqs_off(active_mm, mm, tsk); local_irq_enable(); task_unlock(tsk); @@ -1501,7 +1509,11 @@ void kthread_unuse_mm(struct mm_struct *mm) local_irq_disable(); tsk->mm = NULL; membarrier_update_current_mm(NULL); + #ifdef CONFIG_IEE + iee_set_token_pgd(tsk, NULL); + #endif mmgrab_lazy_tlb(mm); + /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); local_irq_enable(); diff --git a/kernel/module/main.c b/kernel/module/main.c index 14a51af2fbea..f810f005d90a 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -63,6 +63,10 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_KOI +#include "asm/koi.h" +#endif + /* * Mutex protects: * 1) List of modules (also safely readable with preempt_disable), @@ -769,7 +773,11 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, /* Store the name and taints of the last unloaded module for diagnostic purposes */ strscpy(last_unloaded_module.name, mod->name, sizeof(last_unloaded_module.name)); strscpy(last_unloaded_module.taints, module_flags(mod, buf, false), sizeof(last_unloaded_module.taints)); - +#ifdef CONFIG_KOI + koi_destroy_pagetable(mod); + + printk(KERN_ERR "pagetable destroyed\n"); +#endif free_module(mod); /* someone could wait for the module in add_unformed_module() */ wake_up_all(&module_wq); @@ -2590,6 +2598,11 @@ static noinline int do_init_module(struct module *mod) module_enable_ro(mod, true); mod_tree_remove_init(mod); module_arch_freeing_init(mod); +#ifdef CONFIG_KOI + for_class_mod_mem_type(type, init) { + koi_unmap_mem(mod, (unsigned long)mod->mem[type].base, mod->mem[type].size); + } +#endif for_class_mod_mem_type(type, init) { mod->mem[type].base = NULL; mod->mem[type].size = 0; @@ -2636,6 +2649,11 @@ static noinline int do_init_module(struct module *mod) MODULE_STATE_GOING, mod); klp_module_going(mod); ftrace_release_mod(mod); +#ifdef CONFIG_KOI + koi_destroy_pagetable(mod); + + printk(KERN_ERR "[KOI] pagetable for module: %s destroyed\n", mod->name); +#endif free_module(mod); wake_up_all(&module_wq); @@ -2840,6 +2858,32 @@ static int early_mod_check(struct load_info *info, int flags) return err; } +#ifdef CONFIG_KOI +void shared_variable_dfs(struct mm_struct *ko_mm, pgd_t *ko_pg_dir, struct shared_variable_descriptor *desc, unsigned int *get_val_id, unsigned int *from_id_get_ad, unsigned int i, unsigned long addr) +{ + unsigned int j; + unsigned int new_desc_num; + unsigned long *ptr; + for(j=0; iasync_probe_requested = async_probe; +#ifdef CONFIG_KOI + /* + * find the shared_vars_section and get the shared var list + */ + ndx = find_sec(info, ".shared_vars_section"); + if (ndx){ + koi_create_pagetable(mod); + target = NULL; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("[Error] shared_vars_section detected, But mem node for module: %s not found\n", mod->name); + koi_destroy_pagetable(mod); + goto bug_cleanup; + } + + ko_mm = target->ko_mm; + ko_pg_dir = target->pgdp; + + desc = (struct shared_variable_descriptor *)info->sechdrs[ndx].sh_addr; + + num_desc = info->sechdrs[ndx].sh_size / sizeof(struct shared_variable_descriptor); + get_val_id = kmalloc(DRIVER_ISOLATION_MAX_VAL, GFP_KERNEL); + from_id_get_ad = kmalloc(DRIVER_ISOLATION_MAX_VAL, GFP_KERNEL); + + for (j = 0; j < num_desc; j++) { + unsigned int desc_id = desc[j].id; + get_val_id[j] = desc_id; + from_id_get_ad[desc_id] = j; + } + + for (i = 0; i < num_desc; i++) { + if (desc[i].type == 0) { + addr = kallsyms_lookup_name(desc[i].name); + koi_copy_pagetable(ko_mm, ko_pg_dir, addr & PAGE_MASK, (addr + desc[i].size + PAGE_SIZE) & PAGE_MASK, 0); + shared_variable_dfs(ko_mm, ko_pg_dir, desc, get_val_id, from_id_get_ad, i, addr); + } + } + kfree(get_val_id); + kfree(from_id_get_ad); + } +#endif /* Module is ready to execute: parsing args may do that. */ after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, -32768, 32767, mod, diff --git a/kernel/sys.c b/kernel/sys.c index 44b575990333..fbc47f83af50 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -75,6 +75,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + #include "uid16.h" #ifndef SET_UNALIGN_CTL @@ -395,7 +399,11 @@ long __sys_setregid(gid_t rgid, gid_t egid) if (gid_eq(old->gid, krgid) || gid_eq(old->egid, krgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + iee_set_cred_gid(new,krgid); + #else new->gid = krgid; + #endif else goto error; } @@ -404,15 +412,27 @@ long __sys_setregid(gid_t rgid, gid_t egid) gid_eq(old->egid, kegid) || gid_eq(old->sgid, kegid) || ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + iee_set_cred_egid(new,kegid); + #else new->egid = kegid; + #endif else goto error; } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) + #ifdef CONFIG_CREDP + iee_set_cred_sgid(new,new->egid); + #else new->sgid = new->egid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,new->egid); + #else new->fsgid = new->egid; + #endif retval = security_task_fix_setgid(new, old, LSM_SETID_RE); if (retval < 0) @@ -454,9 +474,25 @@ long __sys_setgid(gid_t gid) retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + { + iee_set_cred_fsgid(new,kgid); + iee_set_cred_sgid(new,kgid); + iee_set_cred_egid(new,kgid); + iee_set_cred_gid(new,kgid); + } + #else new->gid = new->egid = new->sgid = new->fsgid = kgid; + #endif else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) + #ifdef CONFIG_CREDP + { + iee_set_cred_fsgid(new,kgid); + iee_set_cred_egid(new,kgid); + } + #else new->egid = new->fsgid = kgid; + #endif else goto error; @@ -488,7 +524,11 @@ static int set_user(struct cred *new) return -EAGAIN; free_uid(new->user); + #ifdef CONFIG_CREDP + iee_set_cred_user(new,new_user); + #else new->user = new_user; + #endif return 0; } @@ -549,7 +589,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) retval = -EPERM; if (ruid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,kruid); + #else new->uid = kruid; + #endif if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && !ns_capable_setid(old->user_ns, CAP_SETUID)) @@ -557,7 +601,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) } if (euid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,keuid); + #else new->euid = keuid; + #endif if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && @@ -572,8 +620,16 @@ long __sys_setreuid(uid_t ruid, uid_t euid) } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) + #ifdef CONFIG_CREDP + iee_set_cred_suid(new,new->euid); + #else new->suid = new->euid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + #else new->fsuid = new->euid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_RE); if (retval < 0) @@ -626,7 +682,12 @@ long __sys_setuid(uid_t uid) retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETUID)) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,kuid); + iee_set_cred_suid(new,kuid); + #else new->suid = new->uid = kuid; + #endif if (!uid_eq(kuid, old->uid)) { retval = set_user(new); if (retval < 0) @@ -636,7 +697,12 @@ long __sys_setuid(uid_t uid) goto error; } + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,kuid); + iee_set_cred_fsuid(new,kuid); + #else new->fsuid = new->euid = kuid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_ID); if (retval < 0) @@ -710,7 +776,11 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return -ENOMEM; if (ruid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,kruid); + #else new->uid = kruid; + #endif if (!uid_eq(kruid, old->uid)) { retval = set_user(new); if (retval < 0) @@ -718,10 +788,22 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) } } if (euid != (uid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,keuid); + #else new->euid = keuid; + #endif if (suid != (uid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_suid(new,ksuid); + #else new->suid = ksuid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + #else new->fsuid = new->euid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_RES); if (retval < 0) @@ -810,12 +892,29 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return -ENOMEM; if (rgid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_gid(new,krgid); + #else new->gid = krgid; + #endif if (egid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_egid(new,kegid); + #else new->egid = kegid; + #endif if (sgid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_sgid(new,ksgid); + #else new->sgid = ksgid; + #endif + + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,new->egid); + #else new->fsgid = new->egid; + #endif retval = security_task_fix_setgid(new, old, LSM_SETID_RES); if (retval < 0) @@ -882,7 +981,11 @@ long __sys_setfsuid(uid_t uid) uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || ns_capable_setid(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,kuid); + #else new->fsuid = kuid; + #endif if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) goto change_okay; } @@ -926,7 +1029,11 @@ long __sys_setfsgid(gid_t gid) gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) { if (!gid_eq(kgid, old->fsgid)) { + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,kgid); + #else new->fsgid = kgid; + #endif if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) goto change_okay; } diff --git a/kernel/umh.c b/kernel/umh.c index 1b13c5d34624..32f5c88e10bf 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -32,6 +32,10 @@ #include +#ifdef CONFIG_CREDP +#include +#endif + static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); @@ -91,9 +95,15 @@ static int call_usermodehelper_exec_async(void *data) goto out; spin_lock(&umh_sysctl_lock); + #ifdef CONFIG_CREDP + iee_set_cred_cap_bset(new,cap_intersect(usermodehelper_bset, new->cap_bset)); + iee_set_cred_cap_inheritable(new,cap_intersect(usermodehelper_inheritable, + new->cap_inheritable)); + #else new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); new->cap_inheritable = cap_intersect(usermodehelper_inheritable, new->cap_inheritable); + #endif spin_unlock(&umh_sysctl_lock); if (sub_info->init) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1d8e47bed3f1..9f1921025539 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,6 +22,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); @@ -45,6 +49,19 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) /* Start with the same capabilities as init but useless for doing * anything as the capabilities are bound to the new user namespace. */ + #ifdef CONFIG_CREDP + iee_set_cred_securebits(cred,SECUREBITS_DEFAULT); + iee_set_cred_cap_inheritable(cred,CAP_EMPTY_SET); + iee_set_cred_cap_permitted(cred,CAP_FULL_SET); + iee_set_cred_cap_effective(cred,CAP_FULL_SET); + iee_set_cred_cap_ambient(cred,CAP_EMPTY_SET); + iee_set_cred_cap_bset(cred,CAP_FULL_SET); +#ifdef CONFIG_KEYS + key_put(cred->request_key_auth); + iee_set_cred_request_key_auth(cred,NULL); +#endif + iee_set_cred_user_ns(cred,user_ns); + #else cred->securebits = SECUREBITS_DEFAULT; cred->cap_inheritable = CAP_EMPTY_SET; cred->cap_permitted = CAP_FULL_SET; @@ -57,6 +74,7 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) #endif /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ cred->user_ns = user_ns; + #endif } static unsigned long enforced_nproc_rlimit(void) diff --git a/lib/digsig.c b/lib/digsig.c index 04b5e55ed95f..0a10a459bdaa 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -81,7 +81,11 @@ static int digsig_verify_rsa(struct key *key, const struct user_key_payload *ukp; struct pubkey_hdr *pkh; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ukp = user_key_payload_locked(key); if (!ukp) { @@ -176,7 +180,11 @@ static int digsig_verify_rsa(struct key *key, while (--i >= 0) mpi_free(pkey[i]); err1: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif return err; } diff --git a/mm/Kconfig b/mm/Kconfig index 782c43f08e8f..7706340f189c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -530,6 +530,11 @@ config NUMA_KEEP_MEMINFO config MEMORY_ISOLATION bool +# Configs for pgtable isolation +config PTP + depends on IEE + def_bool y + # IORESOURCE_SYSTEM_RAM regions in the kernel resource tree that are marked # IORESOURCE_EXCLUSIVE cannot be mapped to user space, for example, via # /dev/mem. diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index d25d99cb5f2b..2ea51f559d4e 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -44,6 +44,7 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr if (!folio) return; + if (ptep_clear_young_notify(vma, addr, pte)) folio_set_young(folio); diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 68af76ca8bc9..2789784777dd 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -437,7 +437,11 @@ static void __init pmd_huge_tests(struct pgtable_debug_args *args) * X86 defined pmd_set_huge() verifies that the given * PMD is not a populated non-leaf entry. */ + #ifdef CONFIG_PTP + set_pmd(args->pmdp, __pmd(0)); + #else WRITE_ONCE(*args->pmdp, __pmd(0)); + #endif WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot)); WARN_ON(!pmd_clear_huge(args->pmdp)); pmd = READ_ONCE(*args->pmdp); @@ -457,7 +461,11 @@ static void __init pud_huge_tests(struct pgtable_debug_args *args) * X86 defined pud_set_huge() verifies that the given * PUD is not a populated non-leaf entry. */ + #ifdef CONFIG_PTP + set_pud(args->pudp, __pud(0)); + #else WRITE_ONCE(*args->pudp, __pud(0)); + #endif WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot)); WARN_ON(!pud_clear_huge(args->pudp)); pud = READ_ONCE(*args->pudp); @@ -496,6 +504,12 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating PUD clear\n"); WARN_ON(pud_none(pud)); + pud = __pud(pud_val(pud) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_pud(args->pudp, pud); + #else + WRITE_ONCE(*args->pudp, pud); + #endif pud_clear(args->pudp); pud = READ_ONCE(*args->pudp); WARN_ON(!pud_none(pud)); @@ -532,6 +546,12 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating P4D clear\n"); WARN_ON(p4d_none(p4d)); + p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_p4d(args->p4dp, p4d); + #else + WRITE_ONCE(*args->p4dp, p4d); + #endif p4d_clear(args->p4dp); p4d = READ_ONCE(*args->p4dp); WARN_ON(!p4d_none(p4d)); @@ -565,6 +585,12 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating PGD clear\n"); WARN_ON(pgd_none(pgd)); + pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_pgd(args->pgdp, pgd); + #else + WRITE_ONCE(*args->pgdp, pgd); + #endif pgd_clear(args->pgdp); pgd = READ_ONCE(*args->pgdp); WARN_ON(!pgd_none(pgd)); @@ -630,6 +656,12 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating PMD clear\n"); WARN_ON(pmd_none(pmd)); + pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_pmd(args->pmdp, pmd); + #else + WRITE_ONCE(*args->pmdp, pmd); + #endif pmd_clear(args->pmdp); pmd = READ_ONCE(*args->pmdp); WARN_ON(!pmd_none(pmd)); diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index ce06b2884789..a039c7a50ec5 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -147,7 +147,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) if (after_paging_init) __late_set_fixmap(idx, phys_addr, prot); else + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(idx, phys_addr, prot); + #else __early_set_fixmap(idx, phys_addr, prot); + #endif phys_addr += PAGE_SIZE; --idx; --nrpages; @@ -199,13 +203,66 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) if (after_paging_init) __late_clear_fixmap(idx); else + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(idx, 0, FIXMAP_PAGE_CLEAR); + #else __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + #endif --idx; --nrpages; } prev_map[slot] = NULL; } +#ifdef CONFIG_PTP +void __init early_iounmap_after_init(void __iomem *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n", + addr, size)) + return; + + if (WARN(prev_size[slot] != size, + "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", + addr, size, slot, prev_size[slot])) + return; + + WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n", + addr, size, slot); + + virt_addr = (unsigned long)addr; + if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))) + return; + + offset = offset_in_page(virt_addr); + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_clear_fixmap(idx); + else + __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + --idx; + --nrpages; + } + prev_map[slot] = NULL; +} +#endif + /* Remap an IO device */ void __init __iomem * early_ioremap(resource_size_t phys_addr, unsigned long size) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3c58e95a33eb..404e5ee540d3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -40,6 +40,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + #include #include #include "internal.h" @@ -2730,8 +2734,17 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); + #ifdef CONFIG_PTP + #ifdef CONFIG_X86_64 + unsigned long pfn = page_to_pfn(pgtable); + paravirt_alloc_pte(mm, pfn); + _pmd = __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE); + #else + _pmd = __pmd(__phys_to_pmd_val(page_to_phys(pgtable)) | PMD_TYPE_TABLE); + #endif + #else pmd_populate(mm, &_pmd, pgtable); - + #endif pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte); for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) { @@ -2900,8 +2913,17 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * This's critical for some architectures (Power). */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); + #ifdef CONFIG_PTP + #ifdef CONFIG_X86_64 + unsigned long pfn = page_to_pfn(pgtable); + paravirt_alloc_pte(mm, pfn); + _pmd = __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE); + #else + _pmd = __pmd(__phys_to_pmd_val(page_to_phys(pgtable)) | PMD_TYPE_TABLE); + #endif + #else pmd_populate(mm, &_pmd, pgtable); - + #endif pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte); diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 149ab629855c..93b6a7ebde82 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -83,7 +83,7 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) if (!head) return 0; - + pgtable = pte_alloc_one_kernel(&init_mm); if (!pgtable) return -ENOMEM; diff --git a/mm/init-mm.c b/mm/init-mm.c index 24c809379274..4539eb8e7cce 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -55,3 +55,19 @@ void setup_initial_init_mm(void *start_code, void *end_code, init_mm.end_data = (unsigned long)end_data; init_mm.brk = (unsigned long)brk; } + +#ifdef CONFIG_KOI +/* + * This is used to init ko_mm when creating pgtable for a ko to be isolated + * the ko_mm belongs to a specific ko, pgdp is allocated by koi_pgd_alloc + */ +void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp) { + ko_mm->pgd = pgdp; + ko_mm->mm_users = (atomic_t)ATOMIC_INIT(2); + ko_mm->mm_count = (atomic_t)ATOMIC_INIT(1); + ko_mm->mmap_lock = (struct rw_semaphore)__RWSEM_INITIALIZER(ko_mm->mmap_lock); + ko_mm->page_table_lock = __SPIN_LOCK_UNLOCKED(ko_mm.page_table_lock); + ko_mm->arg_lock = __SPIN_LOCK_UNLOCKED(ko_mm->arg_lock); + ko_mm->mmlist = (struct list_head)LIST_HEAD_INIT(ko_mm->mmlist); +} +#endif diff --git a/mm/memory.c b/mm/memory.c index badaf096a344..a7553a457754 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -80,6 +80,11 @@ #include #include +#ifdef CONFIG_PTP +#include +#include +#endif + #include #include @@ -1111,6 +1116,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, ret = -ENOMEM; goto out; } + src_pte = pte_offset_map_nolock(src_mm, src_pmd, addr, &src_ptl); if (!src_pte) { pte_unmap_unlock(dst_pte, dst_ptl); @@ -6626,12 +6632,20 @@ bool ptlock_alloc(struct ptdesc *ptdesc) ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL); if (!ptl) return false; + #ifdef CONFIG_PTP + (iee_ptdesc(ptdesc))->ptl = ptl; + #else ptdesc->ptl = ptl; + #endif return true; } void ptlock_free(struct ptdesc *ptdesc) { + #ifdef CONFIG_PTP + kmem_cache_free(page_ptl_cachep, (iee_ptdesc(ptdesc))->ptl); + #else kmem_cache_free(page_ptl_cachep, ptdesc->ptl); + #endif } #endif diff --git a/mm/mmap.c b/mm/mmap.c index dfa3d2bfe289..9139c6838279 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2413,10 +2413,17 @@ static void unmap_region(struct mm_struct *mm, struct ma_state *mas, update_hiwater_rss(mm); unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked); mas_set(mas, mt_start); + #ifdef CONFIG_PTP + tlb_flush_mmu(&tlb); + #endif free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING, mm_wr_locked); + #ifdef CONFIG_PTP + iee_tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif } /* @@ -3360,9 +3367,16 @@ void exit_mmap(struct mm_struct *mm) mmap_write_lock(mm); mt_clear_in_rcu(&mm->mm_mt); mas_set(&mas, vma->vm_end); + #ifdef CONFIG_PTP + tlb_flush_mmu(&tlb); + #endif free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING, true); + #ifdef CONFIG_PTP + iee_tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif /* * Walk the list again, actually closing and freeing it, with preemption diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 99b3e9408aa0..ff58ad5365ce 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -141,6 +141,54 @@ static void __tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch) } } +#ifdef CONFIG_PTP +static void __iee_tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch) +{ + struct encoded_page **pages = batch->encoded_pages; + unsigned int nr, nr_pages; + + while (batch->nr) { + if (!page_poisoning_enabled_static() && !want_init_on_free()) { + nr = min(MAX_NR_FOLIOS_PER_FREE, batch->nr); + + /* + * Make sure we cover page + nr_pages, and don't leave + * nr_pages behind when capping the number of entries. + */ + if (unlikely(encoded_page_flags(pages[nr - 1]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr++; + } else { + /* + * With page poisoning and init_on_free, the time it + * takes to free memory grows proportionally with the + * actual memory size. Therefore, limit based on the + * actual memory size and not the number of involved + * folios. + */ + for (nr = 0, nr_pages = 0; + nr < batch->nr && nr_pages < MAX_NR_FOLIOS_PER_FREE; + nr++) { + if (unlikely(encoded_page_flags(pages[nr]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr_pages += encoded_nr_pages(pages[++nr]); + else + nr_pages++; + } + } + + for (int i = 0; i < nr; i++) { + struct page *page = encoded_page_ptr(pages[i]); + __iee_tlb_remove_table((void *)page); + } + pages += nr; + batch->nr -= nr; + + cond_resched(); + } +} +#endif + static void tlb_batch_pages_flush(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; @@ -150,6 +198,17 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb) tlb->active = &tlb->local; } +#ifdef CONFIG_PTP +static void iee_tlb_batch_pages_flush(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) + __iee_tlb_batch_free_encoded_pages(batch); + tlb->active = &tlb->local; +} +#endif + static void tlb_batch_list_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch, *next; @@ -347,6 +406,71 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) tlb_table_flush(tlb); } +#ifdef CONFIG_PTP +static void __iee_tlb_remove_table_free(struct mmu_table_batch *batch) +{ + int i; + + for (i = 0; i < batch->nr; i++) + __iee_tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE +static void iee_tlb_remove_table_rcu(struct rcu_head *head) +{ + __iee_tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu)); +} + +static void iee_tlb_remove_table_free(struct mmu_table_batch *batch) +{ + call_rcu(&batch->rcu, iee_tlb_remove_table_rcu); +} +#else +static void iee_tlb_remove_table_free(struct mmu_table_batch *batch) +{ + __iee_tlb_remove_table_free(batch); +} +#endif + +static void iee_tlb_remove_table_one(void *table) +{ + tlb_remove_table_sync_one(); + __iee_tlb_remove_table(table); +} + +static void iee_tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + tlb_table_invalidate(tlb); + iee_tlb_remove_table_free(*batch); + *batch = NULL; + } +} + +void iee_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + tlb_table_invalidate(tlb); + iee_tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + iee_tlb_table_flush(tlb); +} +#endif + static inline void tlb_table_init(struct mmu_gather *tlb) { tlb->batch = NULL; @@ -469,3 +593,55 @@ void tlb_finish_mmu(struct mmu_gather *tlb) #endif dec_tlb_flush_pending(tlb->mm); } + +#ifdef CONFIG_PTP +static void iee_tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + iee_tlb_table_flush(tlb); +#ifndef CONFIG_MMU_GATHER_NO_GATHER + iee_tlb_batch_pages_flush(tlb); +#endif +} + +void iee_tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + iee_tlb_flush_mmu_free(tlb); +} + +void iee_tlb_finish_mmu(struct mmu_gather *tlb) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB + * flush by batching, one thread may end up seeing inconsistent PTEs + * and result in having stale TLB entries. So flush TLB forcefully + * if we detect parallel PTE batching threads. + * + * However, some syscalls, e.g. munmap(), may free page tables, this + * needs force flush everything in the given range. Otherwise this + * may result in having stale TLB entries for some architectures, + * e.g. aarch64, that could specify flush what level TLB. + */ + if (mm_tlb_flush_nested(tlb->mm)) { + /* + * The aarch64 yields better performance with fullmm by + * avoiding multiple CPUs spamming TLBI messages at the + * same time. + * + * On x86 non-fullmm doesn't yield significant difference + * against fullmm. + */ + tlb->fullmm = 1; + __tlb_reset_range(tlb); + tlb->freed_tables = 1; + } + + iee_tlb_flush_mmu(tlb); + +#ifndef CONFIG_MMU_GATHER_NO_GATHER + tlb_batch_list_free(tlb); +#endif + dec_tlb_flush_pending(tlb->mm); +} +#endif \ No newline at end of file diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 4fcd959dcc4d..17f2d8b81285 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -169,9 +169,17 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, /* FIFO */ if (!pmd_huge_pte(mm, pmdp)) + #ifdef CONFIG_PTP + INIT_LIST_HEAD(&((page_to_iee_ptdesc(pgtable))->pt_list)); + #else INIT_LIST_HEAD(&pgtable->lru); + #endif else + #ifdef CONFIG_PTP + list_add(&((page_to_iee_ptdesc(pgtable))->pt_list), &((page_to_iee_ptdesc(pmd_huge_pte(mm, pmdp)))->pt_list)); + #else list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); + #endif pmd_huge_pte(mm, pmdp) = pgtable; } #endif @@ -186,10 +194,23 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) /* FIFO */ pgtable = pmd_huge_pte(mm, pmdp); + #ifdef CONFIG_PTP + struct ptdesc_t *ptdesc = list_first_entry_or_null(&((page_to_iee_ptdesc(pgtable))->pt_list), + struct ptdesc_t, pt_list); + if(ptdesc) + pmd_huge_pte(mm, pmdp) = ptdesc->page; + else + pmd_huge_pte(mm, pmdp) = NULL; + #else pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru, struct page, lru); + #endif if (pmd_huge_pte(mm, pmdp)) + #ifdef CONFIG_PTP + list_del(&((iee_ptdesc(pgtable))->pt_list)); + #else list_del(&pgtable->lru); + #endif return pgtable; } #endif @@ -238,7 +259,11 @@ static void pte_free_now(struct rcu_head *head) { struct page *page; + #ifdef CONFIG_PTP + page = container_of(head, struct ptdesc_t, pt_rcu_head)->page; + #else page = container_of(head, struct page, rcu_head); + #endif pte_free(NULL /* mm not passed and not used */, (pgtable_t)page); } @@ -247,7 +272,11 @@ void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) struct page *page; page = pgtable; + #ifdef CONFIG_PTP + call_rcu(&((page_to_iee_ptdesc(page))->pt_rcu_head), pte_free_now); + #else call_rcu(&page->rcu_head, pte_free_now); + #endif } #endif /* pte_free_defer */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/mm/slab.h b/mm/slab.h index 3d07fb428393..67ff0beb4ec3 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -26,6 +26,35 @@ typedef u64 freelist_full_t; #undef system_has_freelist_aba #endif +#ifdef CONFIG_IEE +extern struct kmem_cache *iee_stack_jar; +#endif + +#ifdef CONFIG_PTP +extern struct kmem_cache *pgtable_jar; +extern struct kmem_cache *ptdesc_jar; +extern struct kmem_cache *pgd_jar; +#endif + +#ifdef CONFIG_CREDP +extern struct kmem_cache *cred_jar; +#endif + +#ifdef CONFIG_KEYP +extern struct kmem_cache *key_jar; +extern struct kmem_cache *key_union_jar; +extern struct kmem_cache *key_struct_jar; +extern struct kmem_cache *key_payload_jar; +#endif + +#ifdef CONFIG_IEE_SELINUX_P +extern struct kmem_cache *policy_jar; +#endif + +#if defined(CONFIG_KOI) || defined(CONFIG_IEE) +extern struct kmem_cache *task_struct_cachep; +#endif + /* * Freelist pointer and counter to cmpxchg together, avoids the typical ABA * problems with cmpxchg of just a pointer. @@ -767,7 +796,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, s->flags, flags); kmsan_slab_alloc(s, p[i], flags); } - memcg_slab_post_alloc_hook(s, objcg, flags, size, p); } @@ -843,6 +871,15 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { + #ifdef CONFIG_IEE + if(c == iee_stack_jar) + return false; + #endif + #ifdef CONFIG_PTP + if(c == pgtable_jar || c == pgd_jar) + return false; + #endif + if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, &init_on_alloc)) { if (c->ctor) diff --git a/mm/slab_common.c b/mm/slab_common.c index 735f59618556..a6252bc223be 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -145,6 +145,26 @@ int slab_unmergeable(struct kmem_cache *s) if (s->ctor) return 1; + #ifdef CONFIG_IEE + if(strcmp(s->name, "iee_stack_jar") == 0) + return 1; + #endif + #ifdef CONFIG_PTP + if(strcmp(s->name, "pgtable_jar") == 0) + return 1; + if(strcmp(s->name, "pgd_jar") == 0) + return 1; + #endif + #ifdef CONFIG_KEYP + if(strcmp(s->name, "key_jar") == 0) + return 1; + #endif + #ifdef CONFIG_IEE_SELINUX_P + if(strcmp(s->name, "policy_jar") == 0) + return 1; + #endif + + #ifdef CONFIG_HARDENED_USERCOPY if (s->usersize) return 1; @@ -170,6 +190,25 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align, if (ctor) return NULL; + #ifdef CONFIG_IEE + if(strcmp(name, "iee_stack_jar") == 0) + return NULL; + #endif + #ifdef CONFIG_PTP + if(strcmp(name, "pgtable_jar") == 0) + return NULL; + if(strcmp(name, "pgd_jar") == 0) + return NULL; + #endif + #ifdef CONFIG_KEYP + if(strcmp(name, "key_jar") == 0) + return NULL; + #endif + #ifdef CONFIG_IEE_SELINUX_P + if(strcmp(name, "policy_jar") == 0) + return NULL; + #endif + size = ALIGN(size, sizeof(void *)); align = calculate_alignment(flags, align, size); size = ALIGN(size, align); diff --git a/mm/slub.c b/mm/slub.c index a5ff41567a8f..49fee6d57f8f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -42,11 +42,30 @@ #include #include +#ifdef CONFIG_IEE +#include +#include +#include +#include +#else +#ifdef CONFIG_KOI +#include +extern void koi_add_page_mapping(void *token, void *token_page, unsigned int order); +extern void koi_init_token(struct task_struct *tsk); +#endif +#endif +#ifdef CONFIG_PTP +#include +extern unsigned long pgtable_jar_offset; +extern unsigned long pgd_jar_offset; +#endif + #include #include #include "internal.h" + /* * Lock order: * 1. slab_mutex (Global Mutex) @@ -317,6 +336,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) /* * Tracking user of a slab. */ +#ifndef CONFIG_IEE #define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ @@ -329,6 +349,7 @@ struct track { }; enum track_item { TRACK_ALLOC, TRACK_FREE }; +#endif #ifdef SLAB_SUPPORTS_SYSFS static int sysfs_slab_add(struct kmem_cache *); @@ -379,7 +400,9 @@ static struct workqueue_struct *flushwq; * freeptr_t represents a SLUB freelist pointer, which might be encoded * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. */ +#ifndef CONFIG_IEE typedef struct { unsigned long v; } freeptr_t; +#endif /* * Returns freelist pointer (ptr). With hardening, this is obfuscated @@ -419,7 +442,14 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) object = kasan_reset_tag(object); ptr_addr = (unsigned long)object + s->offset; + #ifdef CONFIG_IEE + if(s == iee_stack_jar) + p.v = (unsigned long)iee_read_freeptr(ptr_addr); + else + p = *(freeptr_t *)(ptr_addr); + #else p = *(freeptr_t *)(ptr_addr); + #endif return freelist_ptr_decode(s, p, ptr_addr); } @@ -464,6 +494,36 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) #endif freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); + #ifdef CONFIG_IEE + if (s == iee_stack_jar) { + iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); + return; + } + #endif + #ifdef CONFIG_PTP + if (s == pgtable_jar || s == pgd_jar) { + iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); + return; + } + #endif + #ifdef CONFIG_CREDP + if (s == cred_jar) { + iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); + return; + } + #endif + #ifdef CONFIG_KEYP + if(s == key_jar) { + iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); + return; + } + #endif + #ifdef CONFIG_IEE_SELINUX_P + if (s == policy_jar) { + iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); + return; + } + #endif *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); } @@ -853,7 +913,31 @@ static void set_track_update(struct kmem_cache *s, void *object, depot_stack_handle_t handle) { struct track *p = get_track(s, object, alloc); - +#ifdef CONFIG_CREDP + struct track tmp; + if(s == cred_jar) + { + tmp = *p; + #ifdef CONFIG_STACKDEPOT + tmp.handle = handle; + #endif + tmp.addr = addr; + tmp.cpu = smp_processor_id(); + tmp.pid = current->pid; + tmp.when = jiffies; + iee_set_track(p,&tmp); + } + else + { + #ifdef CONFIG_STACKDEPOT + p->handle = handle; + #endif + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current->pid; + p->when = jiffies; + } +#else #ifdef CONFIG_STACKDEPOT p->handle = handle; #endif @@ -861,6 +945,7 @@ static void set_track_update(struct kmem_cache *s, void *object, p->cpu = smp_processor_id(); p->pid = current->pid; p->when = jiffies; +#endif } static __always_inline void set_track(struct kmem_cache *s, void *object, @@ -879,7 +964,14 @@ static void init_tracking(struct kmem_cache *s, void *object) return; p = get_track(s, object, TRACK_ALLOC); + #ifdef CONFIG_CREDP + if(s == cred_jar) + iee_memset(p, 0, 2*sizeof(struct track)); + else + memset(p, 0, 2*sizeof(struct track)); + #else memset(p, 0, 2*sizeof(struct track)); + #endif } static void print_track(const char *s, struct track *t, unsigned long pr_time) @@ -1045,7 +1137,14 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) unsigned int poison_size = s->object_size; if (s->flags & SLAB_RED_ZONE) { + #ifdef CONFIG_CREDP + if(s == cred_jar) + iee_memset(p - s->red_left_pad, val, s->red_left_pad); + else + memset(p - s->red_left_pad, val, s->red_left_pad); + #else memset(p - s->red_left_pad, val, s->red_left_pad); + #endif if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { /* @@ -1058,12 +1157,33 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) } if (s->flags & __OBJECT_POISON) { + #ifdef CONFIG_CREDP + if(s == cred_jar) + { + iee_memset(p, POISON_FREE, poison_size - 1); + iee_memset(&p[poison_size - 1], POISON_END, 1); + } + else + { + memset(p, POISON_FREE, poison_size - 1); + p[poison_size - 1] = POISON_END; + } + #else memset(p, POISON_FREE, poison_size - 1); p[poison_size - 1] = POISON_END; + #endif } - if (s->flags & SLAB_RED_ZONE) + if (s->flags & SLAB_RED_ZONE) { + #ifdef CONFIG_CREDP + if(s == cred_jar) + iee_memset(p + poison_size, val, s->inuse - poison_size); + else + memset(p + poison_size, val, s->inuse - poison_size); + #else memset(p + poison_size, val, s->inuse - poison_size); + #endif + } } static void restore_bytes(struct kmem_cache *s, char *message, u8 data, @@ -1433,7 +1553,14 @@ void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) return; metadata_access_enable(); + #ifdef CONFIG_CREDP + if(s == cred_jar) + iee_memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + else + memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + #else memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + #endif metadata_access_disable(); } @@ -1990,6 +2117,36 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) cur = setup_object(s, cur); slab->freelist = cur; + #ifdef CONFIG_IEE + if(s == task_struct_cachep) + { + int i; + void *pstack; + void *obj; + for(i = 0; i < freelist_count; i++) + { + pstack = get_iee_stack(); + if (!pstack) { + pr_err("SLUB: Unable to get IEE stack for %s\n", s->name); + return false; + } + obj = start + s->random_seq[i]; + iee_init_token((struct task_struct *)obj, pstack + PAGE_SIZE * 4, (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 0)); + } + } + #endif + #if defined(CONFIG_KOI) && !defined(CONFIG_IEE) + if(s == task_struct_cachep) + { + int i; + void *obj; + for(i = 0; i < freelist_count; i++) + { + obj = start + s->random_seq[i]; + koi_init_token((struct task_struct *)obj); + } + } + #endif for (idx = 1; idx < slab->objects; idx++) { next = next_freelist_entry(s, &pos, start, page_limit, freelist_count); @@ -2021,6 +2178,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) void *start, *p, *next; int idx; bool shuffle; + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + unsigned int order; + #endif flags &= gfp_allowed_mask; @@ -2034,15 +2194,46 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM; + #ifdef CONFIG_PTP + if(s == pgtable_jar || s == pgd_jar) + alloc_gfp |= __GFP_ZERO; + #endif + #ifdef CONFIG_KEYP + if(s == key_jar) + alloc_gfp |= __GFP_ZERO; + #endif + + #ifdef CONFIG_IEE_SELINUX_P + if(s == policy_jar) + alloc_gfp |= __GFP_ZERO; + #endif slab = alloc_slab_page(alloc_gfp, node, oo); + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + order = oo_order(oo); + #endif if (unlikely(!slab)) { oo = s->min; alloc_gfp = flags; + #ifdef CONFIG_PTP + if(s == pgtable_jar || s == pgd_jar) + alloc_gfp |= __GFP_ZERO; + #endif + #ifdef CONFIG_KEYP + if(s == key_jar) + alloc_gfp |= __GFP_ZERO; + #endif + #ifdef CONFIG_IEE_SELINUX_P + if(s == policy_jar) + alloc_gfp |= __GFP_ZERO; + #endif /* * Allocation may have failed due to fragmentation. * Try a lower order alloc if possible */ slab = alloc_slab_page(alloc_gfp, node, oo); + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + order = oo_order(oo); + #endif if (unlikely(!slab)) return NULL; stat(s, ORDER_FALLBACK); @@ -2052,6 +2243,63 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->inuse = 0; slab->frozen = 0; + #ifdef CONFIG_IEE + if (s == task_struct_cachep) + { + void *token_addr = (void *)__phys_to_iee(page_to_phys(folio_page(slab_folio(slab), 0))); + void *alloc_token = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + iee_set_token_page_valid(token_addr, alloc_token, order); + } + if (s == iee_stack_jar) { + set_iee_stack_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + + #ifdef CONFIG_PTP + if (s == pgtable_jar || s == pgd_jar) + { + set_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + #endif + + #ifdef CONFIG_CREDP + if (s == cred_jar) + { + set_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + #endif + + #ifdef CONFIG_KEYP + if(s == key_jar) + { + set_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + #endif + + #ifdef CONFIG_IEE_SELINUX_P + if(s == policy_jar) + { + set_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + #endif + #else + #ifdef CONFIG_KOI + if (s == task_struct_cachep) { +#ifdef CONFIG_X86_64 + void *token_addr = (void *)__phys_to_koi(page_to_phys(folio_page(slab_folio(slab), 0))); + void *alloc_token = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + koi_add_page_mapping(token_addr, alloc_token, order); +#else + int i; + for (i = 0; i < (0x1 << order); i++) { + unsigned long token_addr = __phys_to_virt(page_to_phys(folio_page(slab_folio(slab), i))) + (unsigned long)KOI_OFFSET; + unsigned long alloc_token = __get_free_page(GFP_KERNEL | __GFP_ZERO); + koi_add_page_mapping(token_addr, alloc_token); + } +#endif + } + #endif + #endif + account_slab(slab, oo_order(oo), s, flags); slab->slab_cache = s; @@ -2091,6 +2339,114 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); } +#if defined(CONFIG_X86_64) && defined (CONFIG_IEE) +struct iee_free_slab_work { + struct work_struct work; + struct kmem_cache *s; + struct slab *slab; +}; +static void iee_free_task_struct_slab(struct work_struct *work) +{ + struct iee_free_slab_work *iee_free_slab_work = container_of(work, struct iee_free_slab_work, work); + struct kmem_cache *s = iee_free_slab_work->s; + struct slab *slab = iee_free_slab_work->slab; + struct folio *folio = slab_folio(slab); + int order = folio_order(folio); + // Free stack and tmp page. + int i; + void *start = fixup_red_left(s, page_address(folio_page(folio, 0))); + void *obj; + void *iee_stack; + void *tmp_page; + void *token; + for(i = 0; i < oo_objects(s->oo); i++) + { + obj = start + s->random_seq[i]; + iee_stack = (void *)iee_read_token_stack((struct task_struct *)obj); + if (iee_stack) { + free_iee_stack((void *)(iee_stack - PAGE_SIZE * 4)); + } + tmp_page = iee_read_tmp_page((struct task_struct *)obj); + free_pages((unsigned long)tmp_page, 0); + } + // Free token. + token = (void *)__phys_to_iee(page_to_phys(folio_page(folio, 0))); + iee_set_token_page_invalid(token, NULL, order); + __free_pages(&folio->page, order); + kfree(iee_free_slab_work); +} +#ifdef CONFIG_PTP +static void iee_free_pgtable_slab(struct work_struct *work) +{ + struct iee_free_slab_work *iee_free_slab_work = container_of(work, struct iee_free_slab_work, work); + struct slab *slab = iee_free_slab_work->slab; + struct folio *folio = slab_folio(slab); + int order = folio_order(folio); + unset_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + __free_pages(&folio->page, order); + kfree(iee_free_slab_work); +} +#endif + +#ifdef CONFIG_CREDP +static void iee_free_cred_slab(struct work_struct *work) +{ + struct iee_free_slab_work *iee_free_slab_work = container_of(work, struct iee_free_slab_work, work); + struct slab *slab = iee_free_slab_work->slab; + struct folio *folio = slab_folio(slab); + int order = folio_order(folio); + unset_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + __free_pages(&folio->page, order); + kfree(iee_free_slab_work); +} +#endif +static void iee_free_slab(struct kmem_cache *s, struct slab *slab, void (*do_free_slab)(struct work_struct *work)) +{ + struct iee_free_slab_work *iee_free_slab_work = kmalloc(sizeof(struct iee_free_slab_work), GFP_ATOMIC); + if (!iee_free_slab_work) + panic("No mem for struct iee_free_slab_work"); + iee_free_slab_work->s = s; + iee_free_slab_work->slab = slab; + INIT_WORK(&iee_free_slab_work->work, do_free_slab); + schedule_work(&iee_free_slab_work->work); +} +#else +#ifdef CONFIG_KOI +extern void koi_remove_page_mapping(unsigned long token, void *__unused, unsigned long order); +#ifdef CONFIG_X86_64 +struct koi_free_slab_work { + struct work_struct work; + struct kmem_cache *s; + struct slab *slab; +}; +static void koi_free_task_struct_slab(struct work_struct *work) +{ + struct koi_free_slab_work *koi_free_slab_work = container_of(work, struct koi_free_slab_work, work); + struct slab *slab = koi_free_slab_work->slab; + struct folio *folio = slab_folio(slab); + int order = folio_order(folio); + void *token; + // Free token. + token = (void *)__phys_to_koi(page_to_phys(folio_page(folio, 0))); + koi_remove_page_mapping((unsigned long)token, NULL, order); + __free_pages(&folio->page, order); + kfree(koi_free_slab_work); +} + +static void koi_free_slab(struct kmem_cache *s, struct slab *slab, void (*do_free_slab)(struct work_struct *work)) +{ + struct koi_free_slab_work *koi_free_slab_work = kmalloc(sizeof(struct koi_free_slab_work), GFP_ATOMIC); + if (!koi_free_slab_work) + panic("No mem for struct koi_free_slab_work"); + koi_free_slab_work->s = s; + koi_free_slab_work->slab = slab; + INIT_WORK(&koi_free_slab_work->work, do_free_slab); + schedule_work(&koi_free_slab_work->work); +} +#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_KOI */ +#endif + static void __free_slab(struct kmem_cache *s, struct slab *slab) { struct folio *folio = slab_folio(slab); @@ -2104,6 +2460,118 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) __folio_clear_slab(folio); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); + + #ifdef CONFIG_IEE + // If the page containing this token is empty, free it and restore iee&lm va. + if(s == task_struct_cachep) + { + #ifdef CONFIG_X86_64 + iee_free_slab(s, slab, iee_free_task_struct_slab); + return; + #else + // Free stack. + int i; + void *start = fixup_red_left(s, page_address(folio_page(folio, 0))); + void *obj; + void *iee_stack; + void *tmp_page; + void *token_addr; + for(i = 0; i < oo_objects(s->oo); i++) + { + obj = start + s->random_seq[i]; + tmp_page = iee_read_tmp_page((struct task_struct *)obj); + free_pages((unsigned long)tmp_page, 0); + iee_stack = (void *)iee_read_token_stack((struct task_struct *)obj); + if (iee_stack) { + free_iee_stack((void *)(iee_stack - PAGE_SIZE * 4)); + } + } + // Free token. + token_addr = (void *)__phys_to_iee(page_to_phys(folio_page(folio, 0))); + { + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token_addr); + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token_addr); + pud_t *pudp = pud_offset(p4dp, (unsigned long)token_addr); + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token_addr); + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token_addr); + void *token_page = page_address(pte_page(*ptep)); + iee_set_token_page_invalid(token_addr, token_page, order); + free_pages((unsigned long)token_page, order); + } + #endif + } + if (s == iee_stack_jar) + { + unset_iee_stack_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + + #ifdef CONFIG_PTP + if(s == pgtable_jar || s == pgd_jar) + { + #ifdef CONFIG_X86_64 + iee_free_slab(s, slab, iee_free_pgtable_slab); + return; + #else + unset_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + #endif + } + #endif + + #ifdef CONFIG_CREDP + if (s == cred_jar) + { + #ifdef CONFIG_X86_64 + iee_free_slab(s, slab, iee_free_cred_slab); + return; + #else + unset_iee_page((unsigned long)page_address(folio_page(folio, 0)), order); + #endif + } + #endif + + #ifdef CONFIG_KEYP + if(s == key_jar) + { + unset_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + #endif + + #ifdef CONFIG_IEE_SELINUX_P + if(s == policy_jar) + { + unset_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + } + #endif + + #else + #ifdef CONFIG_KOI + if(s == task_struct_cachep) + { + #ifdef CONFIG_X86_64 + koi_free_slab(s, slab, koi_free_task_struct_slab); + return; + #else + int i; + for(i = 0; i < (0x1 << order); i++) + { + unsigned long token_addr = __phys_to_virt(page_to_phys(folio_page(folio, i))) + (unsigned long)KOI_OFFSET; + unsigned long flags; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(token_addr)); + isb(); + unsigned long res = read_sysreg(par_el1); + local_irq_restore(flags); + if(!(res & 0x1)) { + koi_remove_page_mapping(token_addr, NULL, 0); + free_page((unsigned long)__va(res & PTE_ADDR_MASK)); + } + } + #endif + } + #endif + #endif + __free_pages(&folio->page, order); } @@ -3447,6 +3915,38 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, 0, sizeof(void *)); } +#ifdef CONFIG_IEE +#ifdef CONFIG_KFENCE +static bool is_iee_kmem_cache(struct kmem_cache *s) +{ + if (s == iee_stack_jar) + return true; +#ifdef CONFIG_PTP + else if (s == pgtable_jar || s == pgd_jar) + return true; +#endif +#ifdef CONFIG_CREDP + else if (s == cred_jar) + return true; +#endif +#ifdef CONFIG_KEYP + else if (s == key_jar) + return true; +#endif +#ifdef CONFIG_IEE_SELINUX_P + else if (s == policy_jar) + return true; +#endif + return false; +} +#else +static bool is_iee_kmem_cache(struct kmem_cache *s) +{ + return false; +} +#endif // CONFIG_KFENCE +#endif // CONFIG_IEE + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call @@ -3468,10 +3968,18 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list if (!s) return NULL; +#ifdef CONFIG_IEE + /* Skip kfence_alloc for iee kmem caches. */ + if(is_iee_kmem_cache(s)) + goto slab_alloc; +#endif object = kfence_alloc(s, orig_size, gfpflags); if (unlikely(object)) goto out; +#ifdef CONFIG_IEE +slab_alloc: +#endif object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); maybe_wipe_obj_freeptr(s, object); @@ -3493,6 +4001,8 @@ static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size); } +// u64 pgtable_alloc_count = 0; + static __fastpath_inline void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) @@ -3501,6 +4011,13 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE); + // if (s == pgtable_jar && (pgtable_alloc_count % 100 == 0)) { + // printk("IEE TEST: object 0x%llx slab 0x%llx obj_to_page 0x%llx", + // (u64)ret, page_address((struct page*)virt_to_slab(ret)), + // page_address(virt_to_page(ret))); + // pgtable_alloc_count++; + // } + return ret; } @@ -3951,6 +4468,11 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, local_lock_irqsave(&s->cpu_slab->lock, irqflags); for (i = 0; i < size; i++) { + #ifdef CONFIG_IEE + /* Skip kfence_alloc for iee kmem caches. */ + if(is_iee_kmem_cache(s)) + goto slab_alloc; + #endif void *object = kfence_alloc(s, s->object_size, flags); if (unlikely(object)) { @@ -3958,6 +4480,9 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, continue; } + #ifdef CONFIG_IEE + slab_alloc: + #endif object = c->freelist; if (unlikely(!object)) { /* @@ -4291,6 +4816,36 @@ static void early_kmem_cache_node_alloc(int node) __add_partial(n, slab, DEACTIVATE_TO_HEAD); } +#ifdef CONFIG_PTP +void early_pgtable_jar_alloc(struct kmem_cache *pgtable_jar) +{ + struct slab *slab; + int node = 0; + int i = 0; + + for(i = 0; i < nr_cpu_ids; i++) + { + node = cpupid_to_nid(i); + slab = new_slab(pgtable_jar, GFP_NOWAIT | __GFP_ZERO, node); + + BUG_ON(!slab); + if (slab_nid(slab) != node) { + pr_err("SLUB: Unable to allocate memory from node %d\n", node); + pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); + } + + slab->inuse = 0; + inc_slabs_node(pgtable_jar, node, slab->objects); + + /* + * No locks need to be taken here as it has just been + * initialized and there is no concurrent access. + */ + __add_partial(get_node(pgtable_jar, slab_nid(slab)), slab, DEACTIVATE_TO_HEAD); + } +} +#endif + static void free_kmem_cache_nodes(struct kmem_cache *s) { int node; @@ -4484,6 +5039,31 @@ static int calculate_sizes(struct kmem_cache *s) s->size = size; s->reciprocal_size = reciprocal_value(size); order = calculate_order(size); + + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + if(strcmp(s->name, "task_struct") == 0) + order = HUGE_PMD_ORDER; + if(strcmp(s->name, "iee_stack_jar") == 0) + order = HUGE_PMD_ORDER; + #endif + #ifdef CONFIG_PTP + if(strcmp(s->name, "pgtable_jar") == 0) + order = HUGE_PMD_ORDER; + if(strcmp(s->name, "pgd_jar") == 0) + order = HUGE_PMD_ORDER; + #endif + #ifdef CONFIG_CREDP + if(strcmp(s->name, "cred_jar") == 0) + order = HUGE_PMD_ORDER; + #endif + #ifdef CONFIG_KEYP + if(strcmp(s->name, "key_jar") == 0) + order = HUGE_PMD_ORDER; + #endif + #ifdef CONFIG_IEE_SELINUX_P + if(strcmp(s->name, "policy_jar") == 0) + order = HUGE_PMD_ORDER; + #endif if ((int)order < 0) return 0; @@ -4546,6 +5126,23 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2); s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial); + #ifdef CONFIG_IEE + if(strcmp(s->name, "task_struct") == 0) + s->min_partial *= (1 << TASK_ORDER); + if(strcmp(s->name, "iee_stack_jar") == 0) + s->min_partial *= (1 << TASK_ORDER); + #endif + #ifdef CONFIG_PTP + if(strcmp(s->name, "pgtable_jar") == 0) + s->min_partial = (1 << HUGE_PMD_ORDER); + if(strcmp(s->name, "pgd_jar") == 0) + s->min_partial = (1 << HUGE_PMD_ORDER); + #endif + #ifdef CONFIG_KEYP + if(strcmp(s->name, "key_jar") == 0) + s->min_partial = (1 << TASK_ORDER); + #endif + set_cpu_partial(s); #ifdef CONFIG_NUMA @@ -5119,6 +5716,13 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) if (err) return err; + #ifdef CONFIG_PTP + if(strcmp(s->name, "pgtable_jar") == 0) + pgtable_jar_offset = s->offset; + if (strcmp(s->name, "pgd_jar") == 0) + pgd_jar_offset = s->offset; + #endif + /* Mutex is not taken during early boot */ if (slab_state <= UP) return 0; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 2628fc02be08..8601b88ab96f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -28,6 +28,10 @@ #include #include +#ifdef CONFIG_PTP +#include +#endif + #include #include @@ -146,6 +150,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, struct page *reuse) { pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(ptep_get(pte))) { pte_t entry; void *p; @@ -167,6 +172,9 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, get_page(reuse); p = page_to_virt(reuse); } + #ifdef CONFIG_PTP + set_iee_page_valid((unsigned long)__phys_to_iee(__pa(p))); + #endif entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } @@ -181,6 +189,10 @@ static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) return NULL; memset(p, 0, size); + #ifdef CONFIG_PTP + set_iee_page_valid((unsigned long)__phys_to_iee(__pa(p))); + #endif + return p; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index cb0951fea238..6cefa902facb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3458,7 +3458,7 @@ static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private) if (WARN_ON_ONCE(pfn_valid(pfn))) return -EINVAL; - + ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); set_pte_at(&init_mm, addr, pte, ptent); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4c6441536d55..2483f38d6eca 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -385,7 +385,11 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name, goto out; } + #ifdef CONFIG_KEYP + ckey = ((union key_payload *)(ukey->name_link.next))->data[0]; + #else ckey = ukey->payload.data[0]; + #endif err = ceph_crypto_key_clone(dst, ckey); if (err) goto out_key; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 051d22c0e4ad..cd5839ea3000 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -336,7 +336,11 @@ static void ceph_key_free_preparse(struct key_preparsed_payload *prep) static void ceph_key_destroy(struct key *key) { + #ifdef CONFIG_KEYP + struct ceph_crypto_key *ckey = ((union key_payload *)(key->name_link.next))->data[0]; + #else struct ceph_crypto_key *ckey = key->payload.data[0]; + #endif ceph_crypto_key_destroy(ckey); kfree(ckey); diff --git a/net/core/filter.c b/net/core/filter.c index 48dd2896ee1d..4ef815d885d4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -569,7 +569,7 @@ static int bpf_convert_filter(struct sock_filter *prog, int len, u8 bpf_src; BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); - BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); + BUILD_BUG_ON(BPF_REG_FP + 2 != MAX_BPF_REG); if (len <= 0 || len > BPF_MAXINSNS) return -EINVAL; @@ -9699,25 +9699,41 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct __sk_buff, data): + #ifdef CONFIG_HIVE + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sfi_data), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sfi_data)); + #else *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), si->dst_reg, si->src_reg, offsetof(struct sk_buff, data)); + #endif break; case offsetof(struct __sk_buff, data_meta): off = si->off; off -= offsetof(struct __sk_buff, data_meta); + #ifdef CONFIG_HIVE + off += offsetof(struct sk_buff, sfi_data_meta); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + #else off += offsetof(struct sk_buff, cb); off += offsetof(struct bpf_skb_data_end, data_meta); *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); + #endif break; case offsetof(struct __sk_buff, data_end): off = si->off; off -= offsetof(struct __sk_buff, data_end); + #ifdef CONFIG_HIVE + off += offsetof(struct sk_buff, sfi_data_end); + #else off += offsetof(struct sk_buff, cb); off += offsetof(struct bpf_skb_data_end, data_end); + #endif *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); break; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index c42ddd85ff1f..8450eb924b62 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -34,6 +34,13 @@ #include #include "internal.h" +#ifdef CONFIG_CREDP +#include +#endif +#ifdef CONFIG_KEYP +#include +#endif + MODULE_DESCRIPTION("DNS Resolver"); MODULE_AUTHOR("Wang Lei"); MODULE_LICENSE("GPL"); @@ -295,7 +302,11 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); if (key_is_positive(key)) { + #ifdef CONFIG_KEYP + int err = PTR_ERR(((union key_payload *)(key->name_link.next))->data[dns_key_error]); + #else int err = PTR_ERR(key->payload.data[dns_key_error]); + #endif if (err) seq_printf(m, ": %d", err); @@ -311,7 +322,11 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) static long dns_resolver_read(const struct key *key, char *buffer, size_t buflen) { + #ifdef CONFIG_KEYP + int err = PTR_ERR(((union key_payload *)(key->name_link.next))->data[dns_key_error]); + #else int err = PTR_ERR(key->payload.data[dns_key_error]); + #endif if (err) return err; @@ -364,9 +379,18 @@ static int __init init_dns_resolver(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(keyring, KEY_FLAG_ROOT_CAN_CLEAR, SET_BIT_OP); + #else set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #endif + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif dns_resolver_cache = cred; kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 82b084cc1cc6..2bebc6eef501 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -47,6 +47,10 @@ #include "internal.h" +#ifdef CONFIG_KEYP +#include +#endif + /** * dns_query - Query the DNS * @net: The network namespace to operate in. @@ -133,16 +137,26 @@ int dns_query(struct net *net, goto out; } + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(rkey)); + iee_set_key_flag_bit(rkey, KEY_FLAG_ROOT_CAN_INVAL, SET_BIT_OP); + iee_set_key_perm(rkey, rkey->perm | KEY_USR_VIEW); + #else down_read(&rkey->sem); set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags); rkey->perm |= KEY_USR_VIEW; + #endif ret = key_validate(rkey); if (ret < 0) goto put; /* If the DNS server gave an error, return that to the caller */ + #ifdef CONFIG_KEYP + ret = PTR_ERR(((union key_payload *)(rkey->name_link.next))->data[dns_key_error]); + #else ret = PTR_ERR(rkey->payload.data[dns_key_error]); + #endif if (ret) goto put; @@ -161,7 +175,11 @@ int dns_query(struct net *net, ret = len; put: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(rkey)); + #else up_read(&rkey->sem); + #endif if (invalidate) key_invalidate(rkey); key_put(rkey); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fa8aec78f63d..ba9d2feace7a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -307,7 +307,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, if (!key) key = rx->key; + #ifdef CONFIG_KEYP + if (key && !((union key_payload *)(key->name_link.next))->data[0]) + #else if (key && !key->payload.data[0]) + #endif key = NULL; /* a no-security key */ memset(&p, 0, sizeof(p)); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 598b4ee389fc..1d9574406c16 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -247,8 +247,13 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; + #ifdef CONFIG_KEYP + ret = conn->security->init_connection_security( + conn, ((union key_payload *)(conn->key->name_link.next))->data[0]); + #else ret = conn->security->init_connection_security( conn, conn->key->payload.data[0]); + #endif if (ret < 0) return ret; diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 33e8302a79e3..c4b755733ed2 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -413,7 +413,11 @@ static void rxrpc_free_preparse(struct key_preparsed_payload *prep) */ static void rxrpc_destroy(struct key *key) { + #ifdef CONFIG_KEYP + rxrpc_free_token_list(((union key_payload *)(key->name_link.next))->data[0]); + #else rxrpc_free_token_list(key->payload.data[0]); + #endif } /* @@ -426,7 +430,11 @@ static void rxrpc_describe(const struct key *key, struct seq_file *m) seq_puts(m, key->description); + #ifdef CONFIG_KEYP + for (token = ((union key_payload *)(key->name_link.next))->data[0]; token; token = token->next) { + #else for (token = key->payload.data[0]; token; token = token->next) { + #endif seq_puts(m, sep); switch (token->security_index) { @@ -584,7 +592,11 @@ static long rxrpc_read(const struct key *key, size += 1 * 4; /* token count */ ntoks = 0; + #ifdef CONFIG_KEYP + for (token = ((union key_payload *)(key->name_link.next))->data[0]; token; token = token->next) { + #else for (token = key->payload.data[0]; token; token = token->next) { + #endif toksize = 4; /* sec index */ switch (token->security_index) { @@ -654,7 +666,11 @@ static long rxrpc_read(const struct key *key, ENCODE(ntoks); tok = 0; + #ifdef CONFIG_KEYP + for (token = ((union key_payload *)(key->name_link.next))->data[0]; token; token = token->next) { + #else for (token = key->payload.data[0]; token; token = token->next) { + #endif toksize = toksizes[tok++]; ENCODE(toksize); oldxdr = xdr; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ad6c57a9f27c..3161cbf4a958 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -21,6 +21,10 @@ #include #include "ar-internal.h" +#ifdef CONFIG_KEYP +#include +#endif + #define RXKAD_VERSION 2 #define MAXKRB5TICKETLEN 1024 #define RXKAD_TKT_TYPE_KERBEROS_V5 256 @@ -88,10 +92,18 @@ static void rxkad_free_preparse_server_key(struct key_preparsed_payload *prep) static void rxkad_destroy_server_key(struct key *key) { + #ifdef CONFIG_KEYP + if (((union key_payload *)(key->name_link.next))->data[0]) { + crypto_free_skcipher(((union key_payload *)(key->name_link.next))->data[0]); + union key_payload *key_payload = ((union key_payload *)(key->name_link.next)); + key_payload->data[0] = NULL; + } + #else if (key->payload.data[0]) { crypto_free_skcipher(key->payload.data[0]); key->payload.data[0] = NULL; } + #endif } /* @@ -205,7 +217,11 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, return -ENOMEM; } + #ifdef CONFIG_KEYP + token = ((union key_payload *)(conn->key->name_link.next))->data[0]; + #else token = conn->key->payload.data[0]; + #endif memcpy(&iv, token->kad->session_key, sizeof(iv)); tmpbuf[0] = htonl(conn->proto.epoch); @@ -317,7 +333,11 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, } /* encrypt from the session key */ + #ifdef CONFIG_KEYP + token = ((union key_payload *)(call->conn->key->name_link.next))->data[0]; + #else token = call->conn->key->payload.data[0]; + #endif memcpy(&iv, token->kad->session_key, sizeof(iv)); sg_init_one(&sg, txb->data, txb->len); @@ -507,7 +527,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } /* decrypt from the session key */ + #ifdef CONFIG_KEYP + token = ((union key_payload *)(call->conn->key->name_link.next))->data[0]; + #else token = call->conn->key->payload.data[0]; + #endif memcpy(&iv, token->kad->session_key, sizeof(iv)); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); @@ -824,7 +848,11 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES, rxkad_abort_chall_level); + #ifdef CONFIG_KEYP + token = ((union key_payload *)(conn->key->name_link.next))->data[0]; + #else token = conn->key->payload.data[0]; + #endif /* build the response packet */ resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); @@ -876,12 +904,24 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, *_expiry = 0; + #ifdef CONFIG_KEYP + ASSERT(((union key_payload *)(server_key->name_link.next))->data[0] != NULL); + #else ASSERT(server_key->payload.data[0] != NULL); + #endif ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); + #ifdef CONFIG_KEYP + memcpy(&iv, &((union key_payload *)(server_key->name_link.next))->data[2], sizeof(iv)); + #else memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); + #endif + #ifdef CONFIG_KEYP + req = skcipher_request_alloc(((union key_payload *)(server_key->name_link.next))->data[0], GFP_NOFS); + #else req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS); + #endif if (!req) return -ENOMEM; diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index cb8dd1d3b1d4..6bffe9965040 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -79,7 +79,11 @@ int rxrpc_init_client_call_security(struct rxrpc_call *call) if (ret < 0) return ret; + #ifdef CONFIG_KEYP + for (token = ((union key_payload *)(key->name_link.next))->data[0]; token; token = token->next) { + #else for (token = key->payload.data[0]; token; token = token->next) { + #endif sec = rxrpc_security_lookup(token->security_index); if (sec) goto found; @@ -103,7 +107,11 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) _enter("{%d},{%x}", conn->debug_id, key_serial(key)); + #ifdef CONFIG_KEYP + for (token = ((union key_payload *)(key->name_link.next))->data[0]; token; token = token->next) { + #else for (token = key->payload.data[0]; token; token = token->next) { + #endif if (token->security_index == conn->security->security_index) goto found; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 24f765d243db..4369ed7bf34b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -585,7 +585,11 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, } key = rx->key; + #ifdef CONFIG_KEYP + if (key && !((union key_payload *)(rx->key->name_link.next))->data[0]) + #else if (key && !rx->key->payload.data[0]) + #endif key = NULL; memset(&cp, 0, sizeof(cp)); diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index e51940589ee5..1ea7e51b71f0 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -100,7 +100,11 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) static void rxrpc_destroy_s(struct key *key) { + #ifdef CONFIG_KEYP + const struct rxrpc_security *sec = ((union key_payload *)(key->name_link.next))->data[1]; + #else const struct rxrpc_security *sec = key->payload.data[1]; + #endif if (sec && sec->destroy_server_key) sec->destroy_server_key(key); @@ -108,7 +112,11 @@ static void rxrpc_destroy_s(struct key *key) static void rxrpc_describe_s(const struct key *key, struct seq_file *m) { + #ifdef CONFIG_KEYP + const struct rxrpc_security *sec = ((union key_payload *)(key->name_link.next))->data[1]; + #else const struct rxrpc_security *sec = key->payload.data[1]; + #endif seq_puts(m, key->description); if (sec && sec->describe_server_key) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 382c7a71f81f..9416a3c0ac66 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -16,6 +16,10 @@ #include #include +#ifdef CONFIG_HIVE +#include +#endif + #include #include #include @@ -29,6 +33,10 @@ MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_SUPPORTED_GEN_FLAGS \ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW) +#ifdef CONFIG_HIVE +extern pte_t *bpf_sfi_get_ptep(u64 addr); +#endif + struct cls_bpf_head { struct list_head plist; struct idr handle_idr; @@ -78,6 +86,70 @@ static int cls_bpf_exec_opcode(int code) } } +#ifdef CONFIG_HIVE +static inline void bpf_sfi_map_skb(struct bpf_prog *prog, struct sk_buff *skb) +{ + u32 skb_data_offset, skb_meta_offset, skb_end_offset, reserved_skb_data_size; + u64 addr_to_map, map_length, map_page_cnt; + u64 start_time, end_time; + pte_t pte; + struct bpf_skb_data_end *cb; + + // skb_data_size = (u64)((struct bpf_skb_data_end *)skb->cb)->data_end - (u64)skb->head; + // skb_page_cnt = PAGE_ALIGN(skb_data_size) >> PAGE_SHIFT; + // pr_err("skb page cnt = %d\n", skb_page_cnt); + +#ifdef CONFIG_ARM64 + isb(); +#elif CONFIG_X86_64 + asm volatile("mfence":::"memory"); +#endif + start_time = sched_clock(); + + // 1. get skb data size + cb = (struct bpf_skb_data_end *)skb->cb; + skb_data_offset = (u64)skb->data & ~PAGE_MASK; + skb_meta_offset = cb->data_meta - (void *)skb->data; + skb_end_offset = cb->data_end - (void *)skb->sfi_data_end; + map_length = skb->len; + // 2. ensure bpf_sfi reserved size is enough + reserved_skb_data_size = prog->shadow_skb_page_cnt * PAGE_SIZE; + BUG_ON(unlikely(reserved_skb_data_size < map_length)); + // 3. double map + map_page_cnt = PAGE_ALIGN(map_length) >> PAGE_SHIFT; + addr_to_map = (u64)prog->shadow_skb_addr; + // printk("skb %llx, %d page, map to %llx\n", (u64)skb->data, skb_page_cnt, addr_to_map); + for (int i = 0; i < map_page_cnt; i++) { + pte_t *origin_ptep = bpf_sfi_get_ptep((u64)skb->data + i * PAGE_SIZE); + if (unlikely(IS_ERR(origin_ptep))) { + pr_err("map pkt %llx failed\n", (u64)skb->data + i * PAGE_SIZE); + return; + } + pte_t *sfi_ptep = bpf_sfi_get_ptep(addr_to_map + i * PAGE_SIZE); + if (unlikely(IS_ERR(sfi_ptep))) { + pr_err("map pkt %llx failed\n", addr_to_map + i * PAGE_SIZE); + return; + } + pte = __pte(pte_val(*origin_ptep)); + set_pte(sfi_ptep, pte); + } + flush_tlb_kernel_range(addr_to_map, addr_to_map + map_page_cnt * PAGE_SIZE); + // skb->sfi_bpf_mapped = true; + skb->sfi_data = (void *)addr_to_map + skb_data_offset; + skb->sfi_data_meta = skb->sfi_data + skb_meta_offset; + skb->sfi_data_end = skb->sfi_data + skb_end_offset; + +#ifdef CONFIG_ARM64 + isb(); +#elif CONFIG_X86_64 + asm volatile("mfence":::"memory"); +#endif + end_time = sched_clock(); + // pr_err("shadow packet region: %llx, %llx", addr_to_map, addr_to_map + map_page_cnt * PAGE_SIZE); + pr_err("shadow packet time = %lldns", end_time - start_time); +} +#endif + TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) @@ -98,10 +170,16 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); bpf_compute_data_pointers(skb); + #ifdef CONFIG_HIVE + bpf_sfi_map_skb(prog->filter, skb); + #endif filter_res = bpf_prog_run(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { bpf_compute_data_pointers(skb); + #ifdef CONFIG_HIVE + bpf_sfi_map_skb(prog->filter, skb); + #endif filter_res = bpf_prog_run(prog->filter, skb); } if (unlikely(!skb->tstamp && skb->mono_delivery_time)) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ec41b26af76e..c429a92dc3ef 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -38,9 +38,13 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; +#ifdef CONFIG_CREDP +struct cred* machine_cred; +#else static struct cred machine_cred = { .usage = ATOMIC_INIT(1), }; +#endif /* * Return the machine_cred pointer to be used whenever @@ -48,7 +52,11 @@ static struct cred machine_cred = { */ const struct cred *rpc_machine_cred(void) { + #ifdef CONFIG_CREDP + return machine_cred; + #else return &machine_cred; + #endif } EXPORT_SYMBOL_GPL(rpc_machine_cred); @@ -871,6 +879,9 @@ static struct shrinker rpc_cred_shrinker = { int __init rpcauth_init_module(void) { + #ifdef CONFIG_CREDP + machine_cred = prepare_kernel_cred(&init_task); + #endif int err; err = rpc_init_authunix(); @@ -888,6 +899,9 @@ int __init rpcauth_init_module(void) void rpcauth_remove_module(void) { + #ifdef CONFIG_CREDP + abort_creds(machine_cred); + #endif rpc_destroy_authunix(); unregister_shrinker(&rpc_cred_shrinker); } diff --git a/security/commoncap.c b/security/commoncap.c index bc0521104197..d7d3b7cc13e8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -26,6 +26,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + /* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. @@ -266,6 +270,15 @@ int cap_capset(struct cred *new, if (!cap_issubset(*effective, *permitted)) return -EPERM; + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,*effective); + iee_set_cred_cap_inheritable(new,*inheritable); + iee_set_cred_cap_permitted(new,*permitted); + + iee_set_cred_cap_ambient(new,cap_intersect(new->cap_ambient, + cap_intersect(*permitted, + *inheritable))); + #else new->cap_effective = *effective; new->cap_inheritable = *inheritable; new->cap_permitted = *permitted; @@ -277,6 +290,7 @@ int cap_capset(struct cred *new, new->cap_ambient = cap_intersect(new->cap_ambient, cap_intersect(*permitted, *inheritable)); + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EINVAL; return 0; @@ -601,9 +615,16 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) * The addition of pA' is handled later. */ +#ifdef CONFIG_CREDP + kernel_cap_t temp = new->cap_permitted; + temp.val = (new->cap_bset.val & caps->permitted.val) | + (new->cap_inheritable.val & caps->inheritable.val); + iee_set_cred_cap_permitted(new,temp); +#else new->cap_permitted.val = (new->cap_bset.val & caps->permitted.val) | (new->cap_inheritable.val & caps->inheritable.val); +#endif if (caps->permitted.val & ~new->cap_permitted.val) /* insufficient to execute correctly */ @@ -726,7 +747,15 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, int rc = 0; struct cpu_vfs_cap_data vcaps; + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = bprm->cred->cap_permitted; + tmp_cap.val = 0; + iee_set_cred_cap_permitted(bprm->cred, tmp_cap); + } while (0); + #else cap_clear(bprm->cred->cap_permitted); + #endif if (!file_caps_enabled) return 0; @@ -757,7 +786,15 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, out: if (rc) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = bprm->cred->cap_permitted; + tmp_cap.val = 0; + iee_set_cred_cap_permitted(bprm->cred, tmp_cap); + } while (0); + #else cap_clear(bprm->cred->cap_permitted); + #endif return rc; } @@ -809,8 +846,13 @@ static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, */ if (__is_eff(root_uid, new) || __is_real(root_uid, new)) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new,cap_combine(old->cap_bset, + old->cap_inheritable)); + #else new->cap_permitted = cap_combine(old->cap_bset, old->cap_inheritable); + #endif } /* * If only the real uid is 0, we do not set the effective bit. @@ -919,34 +961,71 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) /* downgrade; they get no more than they had, and maybe less */ if (!ns_capable(new->user_ns, CAP_SETUID) || (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) { + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,new->uid); + iee_set_cred_egid(new,new->gid); + #else new->euid = new->uid; new->egid = new->gid; + #endif } + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new,cap_intersect(new->cap_permitted, + old->cap_permitted)); + #else new->cap_permitted = cap_intersect(new->cap_permitted, old->cap_permitted); + #endif } + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + iee_set_cred_suid(new,new->euid); + iee_set_cred_fsgid(new,new->egid); + iee_set_cred_sgid(new,new->egid); + #else new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; + #endif /* File caps or setid cancels ambient. */ if (has_fcap || is_setid) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + tmp_cap.val = 0; + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif /* * Now that we've computed pA', update pP' to give: * pP' = (X & fP) | (pI & fI) | pA' */ + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new,cap_combine(new->cap_permitted, new->cap_ambient)); + #else new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient); + #endif /* * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set, * this is the same as pE' = (fE ? pP' : 0) | pA'. */ if (effective) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,new->cap_permitted); + #else new->cap_effective = new->cap_permitted; + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,new->cap_ambient); + #else new->cap_effective = new->cap_ambient; + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; @@ -957,7 +1036,11 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) return ret; } + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; @@ -1092,8 +1175,21 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) !uid_eq(new->euid, root_uid) && !uid_eq(new->suid, root_uid))) { if (!issecure(SECURE_KEEP_CAPS)) { + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_permitted; + tmp_cap.val = 0; + iee_set_cred_cap_permitted(new, tmp_cap); + } while (0); + do { + kernel_cap_t tmp_cap = new->cap_effective; + tmp_cap.val = 0; + iee_set_cred_cap_effective(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_permitted); cap_clear(new->cap_effective); + #endif } /* @@ -1101,12 +1197,32 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) * by exec to drop capabilities. We should make sure that * this remains the case. */ + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + tmp_cap.val = 0; + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif } if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid)) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_effective; + tmp_cap.val = 0; + iee_set_cred_cap_effective(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_effective); + #endif if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,new->cap_permitted); + #else new->cap_effective = new->cap_permitted; + #endif } /** @@ -1142,13 +1258,22 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) if (!issecure(SECURE_NO_SETUID_FIXUP)) { kuid_t root_uid = make_kuid(old->user_ns, 0); if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_drop_fs_set(new->cap_effective)); + #else new->cap_effective = cap_drop_fs_set(new->cap_effective); + #endif if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_raise_fs_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_fs_set(new->cap_effective, new->cap_permitted); + #endif } break; @@ -1243,7 +1368,15 @@ static int cap_prctl_drop(unsigned long cap) new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_bset; + cap_lower(tmp, cap); + iee_set_cred_cap_bset(new, tmp); + } + #else cap_lower(new->cap_bset, cap); + #endif return commit_creds(new); } @@ -1319,7 +1452,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,arg2); + #else new->securebits = arg2; + #endif return commit_creds(new); case PR_GET_SECUREBITS: @@ -1338,9 +1475,17 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!new) return -ENOMEM; if (arg2) + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,new->securebits | issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits |= issecure_mask(SECURE_KEEP_CAPS); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + #endif return commit_creds(new); case PR_CAP_AMBIENT: @@ -1351,7 +1496,15 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + tmp_cap.val = 0; + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif return commit_creds(new); } @@ -1375,9 +1528,25 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!new) return -ENOMEM; if (arg2 == PR_CAP_AMBIENT_RAISE) + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_ambient; + cap_raise(tmp, arg3); + iee_set_cred_cap_ambient(new, tmp); + } + #else cap_raise(new->cap_ambient, arg3); + #endif else + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_ambient; + cap_lower(tmp, arg3); + iee_set_cred_cap_ambient(new, tmp); + } + #else cap_lower(new->cap_ambient, arg3); + #endif return commit_creds(new); } diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 720e5913832f..41ecdd75961a 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -466,14 +466,26 @@ int evm_init_key(void) if (IS_ERR(evm_key)) return -ENOENT; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(evm_key)); + #else down_read(&evm_key->sem); + #endif + #ifdef CONFIG_KEYP + ekp = ((union key_payload *)(evm_key->name_link.next))->data[0]; + #else ekp = evm_key->payload.data[0]; + #endif rc = evm_set_key(ekp->decrypted_data, ekp->decrypted_datalen); /* burn the original key contents */ memset(ekp->decrypted_data, 0, ekp->decrypted_datalen); + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(evm_key)); + #else up_read(&evm_key->sem); + #endif key_put(evm_key); return rc; } diff --git a/security/keys/big_key.c b/security/keys/big_key.c index c3367622c683..bc6a097f2f87 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -164,7 +164,11 @@ void big_key_free_preparse(struct key_preparsed_payload *prep) */ void big_key_revoke(struct key *key) { + #ifdef CONFIG_KEYP + struct big_key_payload *payload = to_big_key_payload(*((union key_payload *)(key->name_link.next))); + #else struct big_key_payload *payload = to_big_key_payload(key->payload); + #endif /* clear the quota */ key_payload_reserve(key, 0); @@ -177,7 +181,11 @@ void big_key_revoke(struct key *key) */ void big_key_destroy(struct key *key) { + #ifdef CONFIG_KEYP + struct big_key_payload *payload = to_big_key_payload(*((union key_payload *)(key->name_link.next))); + #else struct big_key_payload *payload = to_big_key_payload(key->payload); + #endif if (payload->length > BIG_KEY_FILE_THRESHOLD) { path_put(&payload->path); @@ -210,7 +218,11 @@ int big_key_update(struct key *key, struct key_preparsed_payload *prep) */ void big_key_describe(const struct key *key, struct seq_file *m) { + #ifdef CONFIG_KEYP + struct big_key_payload *payload = to_big_key_payload(*((union key_payload *)(key->name_link.next))); + #else struct big_key_payload *payload = to_big_key_payload(key->payload); + #endif seq_puts(m, key->description); @@ -226,7 +238,11 @@ void big_key_describe(const struct key *key, struct seq_file *m) */ long big_key_read(const struct key *key, char *buffer, size_t buflen) { + #ifdef CONFIG_KEYP + struct big_key_payload *payload = to_big_key_payload(*((union key_payload *)(key->name_link.next))); + #else struct big_key_payload *payload = to_big_key_payload(key->payload); + #endif size_t datalen = payload->length; long ret; diff --git a/security/keys/dh.c b/security/keys/dh.c index da64c358474b..f00a3e0c2c87 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -32,7 +32,11 @@ static ssize_t dh_data_from_key(key_serial_t keyid, const void **data) ret = -EOPNOTSUPP; if (key->type == &key_type_user) { + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif status = key_validate(key); if (status == 0) { const struct user_key_payload *payload; @@ -49,7 +53,11 @@ static ssize_t dh_data_from_key(key_serial_t keyid, const void **data) ret = -ENOMEM; } } + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif } key_put(key); diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 1e313982af02..ef5e8f4f03f0 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -314,11 +314,19 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k if (IS_ERR(ukey)) goto error; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(ukey)); + #else down_read(&ukey->sem); + #endif upayload = user_key_payload_locked(ukey); if (!upayload) { /* key was revoked before we acquired its semaphore */ + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(ukey)); + #else up_read(&ukey->sem); + #endif key_put(ukey); ukey = ERR_PTR(-EKEYREVOKED); goto error; @@ -729,7 +737,11 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload, if (ret < 0) pr_err("encrypted_key: failed to decrypt key (%d)\n", ret); out: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(mkey)); + #else up_read(&mkey->sem); + #endif key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); return ret; @@ -874,7 +886,11 @@ static void encrypted_rcu_free(struct rcu_head *rcu) */ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) { + #ifdef CONFIG_KEYP + struct encrypted_key_payload *epayload = ((union key_payload *)(key->name_link.next))->data[0]; + #else struct encrypted_key_payload *epayload = key->payload.data[0]; + #endif struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; @@ -974,7 +990,11 @@ static long encrypted_read(const struct key *key, char *buffer, goto out; } + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(mkey)); + #else up_read(&mkey->sem); + #endif key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); @@ -983,7 +1003,11 @@ static long encrypted_read(const struct key *key, char *buffer, return asciiblob_len; out: + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(mkey)); + #else up_read(&mkey->sem); + #endif key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); return ret; @@ -994,7 +1018,11 @@ static long encrypted_read(const struct key *key, char *buffer, */ static void encrypted_destroy(struct key *key) { + #ifdef CONFIG_KEYP + kfree_sensitive(((union key_payload *)(key->name_link.next))->data[0]); + #else kfree_sensitive(key->payload.data[0]); + #endif } struct key_type key_type_encrypted = { diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c index e6d22ce77e98..13803e0f1a8f 100644 --- a/security/keys/encrypted-keys/masterkey_trusted.c +++ b/security/keys/encrypted-keys/masterkey_trusted.c @@ -34,8 +34,13 @@ struct key *request_trusted_key(const char *trusted_desc, if (IS_ERR(tkey)) goto error; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(tkey)); + tpayload = ((union key_payload *)(tkey->name_link.next))->data[0]; + #else down_read(&tkey->sem); tpayload = tkey->payload.data[0]; + #endif *master_key = tpayload->key; *master_keylen = tpayload->key_len; error: diff --git a/security/keys/gc.c b/security/keys/gc.c index eaddaceda14e..4038e7b95395 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -10,6 +10,11 @@ #include #include "internal.h" +#ifdef CONFIG_KEYP +#include +#include +#endif + /* * Delay between key revocation/expiry in seconds */ @@ -135,18 +140,30 @@ void key_gc_keytype(struct key_type *ktype) static noinline void key_gc_unused_keys(struct list_head *keys) { while (!list_empty(keys)) { + #ifdef CONFIG_KEYP + struct key *key = list_entry(keys->next, struct key_union, graveyard_link)->key; + #else struct key *key = list_entry(keys->next, struct key, graveyard_link); + #endif short state = key->state; + #ifdef CONFIG_KEYP + list_del(&(((struct key_union *)(key->graveyard_link.next))->graveyard_link)); + #else list_del(&key->graveyard_link); + #endif kdebug("- %u", key->serial); key_check(key); #ifdef CONFIG_KEY_NOTIFICATIONS remove_watch_list(key->watchers, key->serial); + #ifdef CONFIG_KEYP + iee_set_key_watchers(key, NULL); + #else key->watchers = NULL; + #endif #endif /* Throw away the key data if the key is instantiated */ @@ -171,7 +188,15 @@ static noinline void key_gc_unused_keys(struct list_head *keys) key_put_tag(key->domain_tag); kfree(key->description); + #ifdef CONFIG_KEYP + kmem_cache_free(key_union_jar,(struct key_union *)(key->graveyard_link.next)); + kmem_cache_free(key_struct_jar, (struct key_struct *)(key->name_link.prev)); + kmem_cache_free(key_payload_jar, (union key_payload *)(key->name_link.next)); + iee_memset(key, 0, sizeof(*key)); + barrier_data(key); + #else memzero_explicit(key, sizeof(*key)); + #endif kmem_cache_free(key_jar, key); } } @@ -223,7 +248,11 @@ static void key_garbage_collector(struct work_struct *work) continue_scanning: while (cursor) { + #ifdef CONFIG_KEYP + key = rb_entry(cursor, struct key_union, serial_node)->key; + #else key = rb_entry(cursor, struct key, serial_node); + #endif cursor = rb_next(cursor); if (refcount_read(&key->usage) == 0) @@ -232,8 +261,13 @@ static void key_garbage_collector(struct work_struct *work) if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) { if (key->type == key_gc_dead_keytype) { gc_state |= KEY_GC_FOUND_DEAD_KEY; + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(key, KEY_FLAG_DEAD, SET_BIT_OP); + iee_set_key_perm(key, 0); + #else set_bit(KEY_FLAG_DEAD, &key->flags); key->perm = 0; + #endif goto skip_dead_key; } else if (key->type == &key_type_keyring && key->restrict_link) { @@ -339,10 +373,18 @@ static void key_garbage_collector(struct work_struct *work) */ found_unreferenced_key: kdebug("unrefd key %d", key->serial); + #ifdef CONFIG_KEYP + rb_erase(&(((struct key_union *)(key->graveyard_link.next))->serial_node), &key_serial_tree); + #else rb_erase(&key->serial_node, &key_serial_tree); + #endif spin_unlock(&key_serial_lock); + #ifdef CONFIG_KEYP + list_add_tail(&(((struct key_union *)(key->graveyard_link.next))->graveyard_link), &graveyard); + #else list_add_tail(&key->graveyard_link, &graveyard); + #endif gc_state |= KEY_GC_REAP_AGAIN; goto maybe_resched; @@ -370,11 +412,21 @@ static void key_garbage_collector(struct work_struct *work) destroy_dead_key: spin_unlock(&key_serial_lock); kdebug("destroy key %d", key->serial); + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + iee_set_key_type(key, &key_type_dead); + #else down_write(&key->sem); key->type = &key_type_dead; + #endif if (key_gc_dead_keytype->destroy) key_gc_dead_keytype->destroy(key); + #ifdef CONFIG_KEYP + iee_memset((key->name_link.next), KEY_DESTROY, sizeof(key->payload)); + up_write(&KEY_SEM(key)); + #else memset(&key->payload, KEY_DESTROY, sizeof(key->payload)); up_write(&key->sem); + #endif goto maybe_resched; } diff --git a/security/keys/internal.h b/security/keys/internal.h index ec2ec335b613..00d76f89179c 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -83,12 +83,20 @@ extern unsigned key_quota_maxbytes; extern struct kmem_cache *key_jar; +#ifdef CONFIG_KEYP +extern struct kmem_cache *key_union_jar; +extern struct kmem_cache *key_struct_jar; +extern struct kmem_cache *key_payload_jar; +#endif extern struct rb_root key_serial_tree; extern spinlock_t key_serial_lock; extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; extern void key_set_index_key(struct keyring_index_key *index_key); +#ifdef CONFIG_KEYP +extern void iee_key_set_index_key(struct keyring_index_key *index_key); +#endif extern struct key_type *key_type_lookup(const char *type); extern void key_type_put(struct key_type *ktype); diff --git a/security/keys/key.c b/security/keys/key.c index 35db23d05302..92b8840bed69 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -17,7 +17,18 @@ #include #include "internal.h" +#ifdef CONFIG_KEYP +#include +#include +#include +#endif + struct kmem_cache *key_jar; +#ifdef CONFIG_KEYP +struct kmem_cache *key_union_jar; +struct kmem_cache *key_struct_jar; +struct kmem_cache *key_payload_jar; +#endif struct rb_root key_serial_tree; /* tree of keys indexed by serial */ DEFINE_SPINLOCK(key_serial_lock); @@ -139,9 +150,16 @@ static inline void key_alloc_serial(struct key *key) /* propose a random serial number and look for a hole for it in the * serial number tree */ do { + #ifdef CONFIG_KEYP + key_serial_t tmp; + get_random_bytes(&tmp, sizeof(key->serial)); + + iee_set_key_serial(key, tmp >> 1); + #else get_random_bytes(&key->serial, sizeof(key->serial)); key->serial >>= 1; /* negative numbers are not permitted */ + #endif } while (key->serial < 3); spin_lock(&key_serial_lock); @@ -152,7 +170,11 @@ static inline void key_alloc_serial(struct key *key) while (*p) { parent = *p; + #ifdef CONFIG_KEYP + xkey = rb_entry(parent, struct key_union, serial_node)->key; + #else xkey = rb_entry(parent, struct key, serial_node); + #endif if (key->serial < xkey->serial) p = &(*p)->rb_left; @@ -163,8 +185,13 @@ static inline void key_alloc_serial(struct key *key) } /* we've found a suitable hole - arrange for this key to occupy it */ + #ifdef CONFIG_KEYP + rb_link_node(&(((struct key_union *)(key->graveyard_link.next))->serial_node), parent, p); + rb_insert_color(&(((struct key_union *)(key->graveyard_link.next))->serial_node), &key_serial_tree); + #else rb_link_node(&key->serial_node, parent, p); rb_insert_color(&key->serial_node, &key_serial_tree); + #endif spin_unlock(&key_serial_lock); return; @@ -173,9 +200,18 @@ static inline void key_alloc_serial(struct key *key) * that point looking for the next unused serial number */ serial_exists: for (;;) { + #ifdef CONFIG_KEYP + key_serial_t tmp = key->serial + 1; + iee_set_key_serial(key, tmp); + #else key->serial++; + #endif if (key->serial < 3) { + #ifdef CONFIG_KEYP + iee_set_key_serial(key, 3); + #else key->serial = 3; + #endif goto attempt_insertion; } @@ -183,7 +219,11 @@ static inline void key_alloc_serial(struct key *key) if (!parent) goto attempt_insertion; + #ifdef CONFIG_KEYP + xkey = rb_entry(parent, struct key_union, serial_node)->key; + #else xkey = rb_entry(parent, struct key, serial_node); + #endif if (key->serial < xkey->serial) goto attempt_insertion; } @@ -231,6 +271,9 @@ struct key *key_alloc(struct key_type *type, const char *desc, struct key *key; size_t desclen, quotalen; int ret; + #ifdef CONFIG_KEYP + unsigned long kflags; + #endif key = ERR_PTR(-EINVAL); if (!desc || !*desc) @@ -274,17 +317,72 @@ struct key *key_alloc(struct key_type *type, const char *desc, } /* allocate and initialise the key and its description */ + #ifdef CONFIG_KEYP + key = kmem_cache_alloc(key_jar, GFP_KERNEL); + #else key = kmem_cache_zalloc(key_jar, GFP_KERNEL); + #endif if (!key) goto no_memory_2; - + #ifdef CONFIG_KEYP + struct key_union *key_union = kmem_cache_zalloc(key_union_jar, GFP_KERNEL); + key_union->key = key; + struct key_struct *key_struct = kmem_cache_zalloc(key_struct_jar, GFP_KERNEL); + key_struct->key = key; + iee_set_key_union(key, key_union); + iee_set_key_struct(key, key_struct); + iee_set_key_payload(key, kmem_cache_alloc(key_payload_jar, GFP_KERNEL)); + #endif + + #ifdef CONFIG_KEYP + struct keyring_index_key tmp = key->index_key; + tmp.desc_len = desclen; + tmp.description = kmemdup(desc, desclen + 1, GFP_KERNEL); + iee_set_key_index_key(key, &tmp); + #else key->index_key.desc_len = desclen; key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); + #endif if (!key->index_key.description) goto no_memory_3; + #ifdef CONFIG_KEYP + tmp = key->index_key; + tmp.type = type; + iee_set_key_index_key(key, &tmp); + iee_key_set_index_key(&key->index_key); + #else key->index_key.type = type; key_set_index_key(&key->index_key); + #endif + + #ifdef CONFIG_KEYP + iee_set_key_usage(key, 1, REFCOUNT_SET); + init_rwsem(&KEY_SEM(key)); + lockdep_set_class(&KEY_SEM(key), &type->lock_class); + iee_set_key_user(key, user); + iee_set_key_quotalen(key, quotalen); + iee_set_key_datalen(key, type->def_datalen); + iee_set_key_uid(key, uid); + iee_set_key_gid(key, gid); + iee_set_key_perm(key, perm); + iee_set_key_restrict_link(key, restrict_link); + iee_set_key_last_used_at(key, ktime_get_real_seconds()); + + kflags = key->flags; + if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) + kflags |= 1 << KEY_FLAG_IN_QUOTA; + if (flags & KEY_ALLOC_BUILT_IN) + kflags |= 1 << KEY_FLAG_BUILTIN; + if (flags & KEY_ALLOC_UID_KEYRING) + kflags |= 1 << KEY_FLAG_UID_KEYRING; + if (flags & KEY_ALLOC_SET_KEEP) + kflags |= 1 << KEY_FLAG_KEEP; + iee_set_key_flags(key, kflags); +#ifdef KEY_DEBUGGING + iee_set_key_magic(key, KEY_DEBUG_MAGIC); +#endif + #else refcount_set(&key->usage, 1); init_rwsem(&key->sem); lockdep_set_class(&key->sem, &type->lock_class); @@ -310,6 +408,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, #ifdef KEY_DEBUGGING key->magic = KEY_DEBUG_MAGIC; #endif + #endif /* let the security module know about the key */ ret = security_key_alloc(key, cred, flags); @@ -326,6 +425,12 @@ struct key *key_alloc(struct key_type *type, const char *desc, security_error: kfree(key->description); + #ifdef CONFIG_KEYP + kmem_cache_free(key_union_jar,(struct key_union *)(key->graveyard_link.next)); + kmem_cache_free(key_struct_jar, (struct key_struct *)(key->name_link.prev)); + kmem_cache_free(key_payload_jar, (union key_payload *)(key->name_link.next)); + iee_memset(key, 0, sizeof(struct key)); + #endif kmem_cache_free(key_jar, key); if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) { spin_lock(&user->lock); @@ -338,6 +443,12 @@ struct key *key_alloc(struct key_type *type, const char *desc, goto error; no_memory_3: + #ifdef CONFIG_KEYP + kmem_cache_free(key_union_jar,(struct key_union *)(key->graveyard_link.next)); + kmem_cache_free(key_struct_jar, (struct key_struct *)(key->name_link.prev)); + kmem_cache_free(key_payload_jar, (union key_payload *)(key->name_link.next)); + iee_memset(key, 0, sizeof(struct key)); + #endif kmem_cache_free(key_jar, key); no_memory_2: if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) { @@ -391,14 +502,22 @@ int key_payload_reserve(struct key *key, size_t datalen) } else { key->user->qnbytes += delta; + #ifdef CONFIG_KEYP + iee_set_key_quotalen(key, key->quotalen + delta); + #else key->quotalen += delta; + #endif } spin_unlock(&key->user->lock); } /* change the recorded data length if that didn't generate an error */ if (ret == 0) + #ifdef CONFIG_KEYP + iee_set_key_datalen(key, datalen); + #else key->datalen = datalen; + #endif return ret; } @@ -412,8 +531,14 @@ static void mark_key_instantiated(struct key *key, int reject_error) /* Commit the payload before setting the state; barrier versus * key_read_state(). */ + #ifdef CONFIG_KEYP + compiletime_assert_atomic_type(key->state); + barrier(); + iee_set_key_state(key, (reject_error < 0) ? reject_error : KEY_IS_POSITIVE); + #else smp_store_release(&key->state, (reject_error < 0) ? reject_error : KEY_IS_POSITIVE); + #endif } /* @@ -449,13 +574,22 @@ static int __key_instantiate_and_link(struct key *key, mark_key_instantiated(key, 0); notify_key(key, NOTIFY_KEY_INSTANTIATED, 0); + #ifdef CONFIG_KEYP + if (iee_set_key_flag_bit(key, KEY_FLAG_USER_CONSTRUCT, TEST_AND_CLEAR_BIT)) + awaken = 1; + #else if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; + #endif /* and link it into the destination keyring */ if (keyring) { if (test_bit(KEY_FLAG_KEEP, &keyring->flags)) + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(key, KEY_FLAG_KEEP, SET_BIT_OP); + #else set_bit(KEY_FLAG_KEEP, &key->flags); + #endif __key_link(keyring, key, _edit); } @@ -464,8 +598,14 @@ static int __key_instantiate_and_link(struct key *key, if (authkey) key_invalidate(authkey); - if (prep->expiry != TIME64_MAX) + if (prep->expiry != TIME64_MAX) { + #ifdef CONFIG_KEYP + iee_set_key_expiry(key, prep->expiry); + #else key_set_expiry(key, prep->expiry); + #endif + key_schedule_gc(prep->expiry + key_gc_delay); + } } } @@ -605,10 +745,20 @@ int key_reject_and_link(struct key *key, atomic_inc(&key->user->nikeys); mark_key_instantiated(key, -error); notify_key(key, NOTIFY_KEY_INSTANTIATED, -error); + #ifdef CONFIG_KEYP + iee_set_key_expiry(key, ktime_get_real_seconds() + timeout); + #else key_set_expiry(key, ktime_get_real_seconds() + timeout); + #endif + key_schedule_gc(key->expiry + key_gc_delay); + #ifdef CONFIG_KEYP + if (iee_set_key_flag_bit(key, KEY_FLAG_USER_CONSTRUCT, TEST_AND_CLEAR_BIT)) + awaken = 1; + #else if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; + #endif ret = 0; @@ -647,8 +797,13 @@ void key_put(struct key *key) if (key) { key_check(key); + #ifdef CONFIG_KEYP + if (iee_set_key_usage(key, 0, REFCOUNT_DEC_AND_TEST)) + schedule_work(&key_gc_work); + #else if (refcount_dec_and_test(&key->usage)) schedule_work(&key_gc_work); + #endif } } EXPORT_SYMBOL(key_put); @@ -666,7 +821,11 @@ struct key *key_lookup(key_serial_t id) /* search the tree for the specified key */ n = key_serial_tree.rb_node; while (n) { + #ifdef CONFIG_KEYP + key = rb_entry(n, struct key_union, serial_node)->key; + #else key = rb_entry(n, struct key, serial_node); + #endif if (id < key->serial) n = n->rb_left; @@ -684,8 +843,13 @@ struct key *key_lookup(key_serial_t id) /* A key is allowed to be looked up only if someone still owns a * reference to it - otherwise it's awaiting the gc. */ + #ifdef CONFIG_KEYP + if (!iee_set_key_usage(key, 0, REFCOUNT_INC_NOT_ZERO)) + goto not_found; + #else if (!refcount_inc_not_zero(&key->usage)) goto not_found; + #endif error: spin_unlock(&key_serial_lock); @@ -723,13 +887,27 @@ void key_set_timeout(struct key *key, unsigned timeout) time64_t expiry = TIME64_MAX; /* make the changes with the locks held to prevent races */ + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif if (timeout > 0) expiry = ktime_get_real_seconds() + timeout; + + #ifdef CONFIG_KEYP + iee_set_key_expiry(key, expiry); + #else key_set_expiry(key, expiry); + #endif + key_schedule_gc(key->expiry + key_gc_delay); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif } EXPORT_SYMBOL_GPL(key_set_timeout); @@ -762,7 +940,11 @@ static inline key_ref_t __key_update(key_ref_t key_ref, if (!key->type->update) goto error; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif ret = key->type->update(key, prep); if (ret == 0) { @@ -771,7 +953,11 @@ static inline key_ref_t __key_update(key_ref_t key_ref, notify_key(key, NOTIFY_KEY_UPDATED, 0); } + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif if (ret < 0) goto error; @@ -1087,7 +1273,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) goto error; } + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif ret = key->type->update(key, &prep); if (ret == 0) { @@ -1096,7 +1286,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) notify_key(key, NOTIFY_KEY_UPDATED, 0); } + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif error: if (key->type->preparse) @@ -1125,6 +1319,23 @@ void key_revoke(struct key *key) * authorisation key whilst holding the sem on a key we've just * instantiated */ + #ifdef CONFIG_KEYP + down_write_nested(&KEY_SEM(key), 1); + if (!iee_set_key_flag_bit(key, KEY_FLAG_REVOKED, TEST_AND_SET_BIT)) { + notify_key(key, NOTIFY_KEY_REVOKED, 0); + if (key->type->revoke) + key->type->revoke(key); + + /* set the death time to no more than the expiry time */ + time = ktime_get_real_seconds(); + if (key->revoked_at == 0 || key->revoked_at > time) { + iee_set_key_revoked_at(key, time); + key_schedule_gc(key->revoked_at + key_gc_delay); + } + } + + up_write(&KEY_SEM(key)); + #else down_write_nested(&key->sem, 1); if (!test_and_set_bit(KEY_FLAG_REVOKED, &key->flags)) { notify_key(key, NOTIFY_KEY_REVOKED, 0); @@ -1140,6 +1351,7 @@ void key_revoke(struct key *key) } up_write(&key->sem); + #endif } EXPORT_SYMBOL(key_revoke); @@ -1157,12 +1369,21 @@ void key_invalidate(struct key *key) key_check(key); if (!test_bit(KEY_FLAG_INVALIDATED, &key->flags)) { + #ifdef CONFIG_KEYP + down_write_nested(&KEY_SEM(key), 1); + if (!iee_set_key_flag_bit(key, KEY_FLAG_INVALIDATED, TEST_AND_SET_BIT)) { + notify_key(key, NOTIFY_KEY_INVALIDATED, 0); + key_schedule_gc_links(); + } + up_write(&KEY_SEM(key)); + #else down_write_nested(&key->sem, 1); if (!test_and_set_bit(KEY_FLAG_INVALIDATED, &key->flags)) { notify_key(key, NOTIFY_KEY_INVALIDATED, 0); key_schedule_gc_links(); } up_write(&key->sem); + #endif } } EXPORT_SYMBOL(key_invalidate); @@ -1186,9 +1407,16 @@ int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep) ret = key_payload_reserve(key, prep->quotalen); if (ret == 0) { rcu_assign_keypointer(key, prep->payload.data[0]); + #ifdef CONFIG_KEYP + union key_payload *key_payload = ((union key_payload *)(key->name_link.next)); + key_payload->data[1] = prep->payload.data[1]; + key_payload->data[2] = prep->payload.data[2]; + key_payload->data[3] = prep->payload.data[3]; + #else key->payload.data[1] = prep->payload.data[1]; key->payload.data[2] = prep->payload.data[2]; key->payload.data[3] = prep->payload.data[3]; + #endif prep->payload.data[0] = NULL; prep->payload.data[1] = NULL; prep->payload.data[2] = NULL; @@ -1262,6 +1490,11 @@ void __init key_init(void) /* allocate a slab in which we can store keys */ key_jar = kmem_cache_create("key_jar", sizeof(struct key), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + #ifdef CONFIG_KEYP + key_union_jar = kmem_cache_create("key_union_jar", sizeof(struct key_union), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + key_struct_jar = kmem_cache_create("key_struct_jar", sizeof(struct key_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + key_payload_jar = kmem_cache_create("key_payload_jar", sizeof(union key_payload)*2, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + #endif /* add the special key types */ list_add_tail(&key_type_keyring.link, &key_types_list); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index aa1dc43b16dd..e92df31642d5 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -23,6 +23,12 @@ #include #include #include "internal.h" +#ifdef CONFIG_CREDP +#include +#endif +#ifdef CONFIG_KEYP +#include +#endif #define KEY_MAX_DESC_SIZE 4096 @@ -804,11 +810,19 @@ static long __keyctl_read_key(struct key *key, char *buffer, size_t buflen) { long ret; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(key)); + #else down_read(&key->sem); + #endif ret = key_validate(key); if (ret == 0) ret = key->type->read(key, buffer, buflen); + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(key)); + #else up_read(&key->sem); + #endif return ret; } @@ -978,7 +992,11 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) /* make the changes with the locks held to prevent chown/chown races */ ret = -EACCES; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif { bool is_privileged_op = false; @@ -1036,19 +1054,32 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) } zapowner = key->user; + #ifdef CONFIG_KEYP + iee_set_key_user(key, newowner); + iee_set_key_uid(key, uid); + #else key->user = newowner; key->uid = uid; + #endif } /* change the GID */ if (group != (gid_t) -1) + #ifdef CONFIG_KEYP + iee_set_key_gid(key, gid); + #else key->gid = gid; + #endif notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; error_put: + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif key_put(key); if (zapowner) key_user_put(zapowner); @@ -1090,16 +1121,28 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) /* make the changes with the locks held to prevent chown/chmod races */ ret = -EACCES; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif /* if we're not the sysadmin, we can only change a key that we own */ if (uid_eq(key->uid, current_fsuid()) || capable(CAP_SYS_ADMIN)) { + #ifdef CONFIG_KEYP + iee_set_key_perm(key, perm); + #else key->perm = perm; + #endif notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; } + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif key_put(key); error: return ret; @@ -1155,7 +1198,11 @@ static int keyctl_change_reqkey_auth(struct key *key) return -ENOMEM; key_put(new->request_key_auth); + #ifdef CONFIG_CREDP + iee_set_cred_request_key_auth(new,key_get(key)); + #else new->request_key_auth = key_get(key); + #endif return commit_creds(new); } @@ -1196,7 +1243,11 @@ static long keyctl_instantiate_key_common(key_serial_t id, if (!instkey) goto error; + #ifdef CONFIG_KEYP + rka = ((union key_payload *)(instkey->name_link.next))->data[0]; + #else rka = instkey->payload.data[0]; + #endif if (rka->target_key->serial != id) goto error; @@ -1358,7 +1409,11 @@ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error, if (!instkey) goto error; + #ifdef CONFIG_KEYP + rka = ((union key_payload *)(instkey->name_link.next))->data[0]; + #else rka = instkey->payload.data[0]; + #endif if (rka->target_key->serial != id) goto error; @@ -1432,7 +1487,11 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) } set: + #ifdef CONFIG_CREDP + iee_set_cred_jit_keyring(new,reqkey_defl); + #else new->jit_keyring = reqkey_defl; + #endif commit_creds(new); return old_setting; error: @@ -1644,9 +1703,17 @@ long keyctl_session_to_parent(void) cred = cred_alloc_blank(); if (!cred) goto error_keyring; + #ifdef CONFIG_CREDP + newwork = (struct rcu_head *)(cred->rcu.func); + #else newwork = &cred->rcu; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(cred,key_ref_to_ptr(keyring_r)); + #else cred->session_keyring = key_ref_to_ptr(keyring_r); + #endif keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); @@ -1705,7 +1772,11 @@ long keyctl_session_to_parent(void) write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) + #ifdef CONFIG_CREDP + put_cred(*(struct cred **)(oldwork + 1)); + #else put_cred(container_of(oldwork, struct cred, rcu)); + #endif if (newwork) put_cred(cred); return ret; @@ -1814,25 +1885,45 @@ long keyctl_watch_key(key_serial_t id, int watch_queue_fd, int watch_id) if (ret < 0) goto err_watch; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif if (!key->watchers) { + #ifdef CONFIG_KEYP + iee_set_key_watchers(key, wlist); + #else key->watchers = wlist; + #endif wlist = NULL; } ret = add_watch_to_object(watch, key->watchers); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif if (ret == 0) watch = NULL; } else { ret = -EBADSLT; if (key->watchers) { + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(key)); + #else down_write(&key->sem); + #endif ret = remove_watch_from_object(key->watchers, wqueue, key_serial(key), false); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(key)); + #else up_write(&key->sem); + #endif } } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index f331725d5a37..b8bb07a55130 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -21,6 +21,11 @@ #include #include "internal.h" +#ifdef CONFIG_KEYP +#include +#include +#endif + /* * When plumbing the depths of the key tree, this sets a hard limit * set on how deep we're willing to go. @@ -112,7 +117,11 @@ static void keyring_publish_name(struct key *keyring) keyring->description[0] && keyring->description[0] != '.') { write_lock(&keyring_name_lock); + #ifdef CONFIG_KEYP + list_add_tail(&(((struct key_struct *)(keyring->name_link.prev))->name_link), &ns->keyring_name_list); + #else list_add_tail(&keyring->name_link, &ns->keyring_name_list); + #endif write_unlock(&keyring_name_lock); } } @@ -140,7 +149,11 @@ static void keyring_free_preparse(struct key_preparsed_payload *prep) static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep) { + #ifdef CONFIG_KEYP + assoc_array_init(&((struct key_struct *)(keyring->name_link.prev))->keys); + #else assoc_array_init(&keyring->keys); + #endif /* make the keyring available by name if it has one */ keyring_publish_name(keyring); return 0; @@ -207,13 +220,70 @@ static void hash_key_type_and_desc(struct keyring_index_key *index_key) index_key->hash = hash; } +#ifdef CONFIG_KEYP +static void iee_hash_key_type_and_desc(struct keyring_index_key *index_key) +{ + const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; + const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK; + const char *description = index_key->description; + unsigned long hash, type; + u32 piece; + u64 acc; + int n, desc_len = index_key->desc_len; + + type = (unsigned long)index_key->type; + acc = mult_64x32_and_fold(type, desc_len + 13); + acc = mult_64x32_and_fold(acc, 9207); + piece = (unsigned long)index_key->domain_tag; + acc = mult_64x32_and_fold(acc, piece); + acc = mult_64x32_and_fold(acc, 9207); + + for (;;) { + n = desc_len; + if (n <= 0) + break; + if (n > 4) + n = 4; + piece = 0; + memcpy(&piece, description, n); + description += n; + desc_len -= n; + acc = mult_64x32_and_fold(acc, piece); + acc = mult_64x32_and_fold(acc, 9207); + } + + /* Fold the hash down to 32 bits if need be. */ + hash = acc; + if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) + hash ^= acc >> 32; + + /* Squidge all the keyrings into a separate part of the tree to + * ordinary keys by making sure the lowest level segment in the hash is + * zero for keyrings and non-zero otherwise. + */ + if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0) + hash |= (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; + else if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0) + hash = (hash + (hash << level_shift)) & ~fan_mask; + struct keyring_index_key tmp = *index_key; + tmp.hash = hash; + iee_set_key_index_key(container_of(index_key, struct key, index_key), &tmp); +} +#endif + +#ifdef CONFIG_KEYP +static struct key_tag default_domain_tag = { .usage = REFCOUNT_INIT(1), }; +#endif + /* * Finalise an index key to include a part of the description actually in the * index key, to set the domain tag and to calculate the hash. */ void key_set_index_key(struct keyring_index_key *index_key) { + #ifndef CONFIG_KEYP static struct key_tag default_domain_tag = { .usage = REFCOUNT_INIT(1), }; + #endif size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); memcpy(index_key->desc, index_key->description, n); @@ -228,6 +298,33 @@ void key_set_index_key(struct keyring_index_key *index_key) hash_key_type_and_desc(index_key); } +#ifdef CONFIG_KEYP +void iee_key_set_index_key(struct keyring_index_key *index_key) +{ + size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); + struct keyring_index_key tmp; + + iee_memcpy(index_key->desc, index_key->description, n); + + if (!index_key->domain_tag) { + if (index_key->type->flags & KEY_TYPE_NET_DOMAIN) + { + tmp = *index_key; + tmp.domain_tag = current->nsproxy->net_ns->key_domain; + iee_set_key_index_key(container_of(index_key, struct key, index_key), &tmp); + } + else + { + tmp = *index_key; + tmp.domain_tag = &default_domain_tag; + iee_set_key_index_key(container_of(index_key, struct key, index_key), &tmp); + } + } + + iee_hash_key_type_and_desc(index_key); +} +#endif + /** * key_put_tag - Release a ref on a tag. * @tag: The tag to release. @@ -414,9 +511,15 @@ static void keyring_destroy(struct key *keyring) if (keyring->description) { write_lock(&keyring_name_lock); + #ifdef CONFIG_KEYP + if(((struct key_struct *)(keyring->name_link.prev))->name_link.next != NULL && + !list_empty(&(((struct key_struct *)(keyring->name_link.prev))->name_link))) + list_del(&(((struct key_struct *)(keyring->name_link.prev))->name_link)); + #else if (keyring->name_link.next != NULL && !list_empty(&keyring->name_link)) list_del(&keyring->name_link); + #endif write_unlock(&keyring_name_lock); } @@ -428,7 +531,11 @@ static void keyring_destroy(struct key *keyring) kfree(keyres); } + #ifdef CONFIG_KEYP + assoc_array_destroy(&((struct key_struct *)(keyring->name_link.prev))->keys, &keyring_assoc_array_ops); + #else assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); + #endif } /* @@ -442,8 +549,13 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) seq_puts(m, "[anon]"); if (key_is_positive(keyring)) { + #ifdef CONFIG_KEYP + if (((struct key_struct *)(keyring->name_link.prev))->keys.nr_leaves_on_tree != 0) + seq_printf(m, ": %lu", ((struct key_struct *)(keyring->name_link.prev))->keys.nr_leaves_on_tree); + #else if (keyring->keys.nr_leaves_on_tree != 0) seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); + #endif else seq_puts(m, ": empty"); } @@ -494,8 +606,13 @@ static long keyring_read(const struct key *keyring, ctx.buffer = (key_serial_t *)buffer; ctx.buflen = buflen; ctx.count = 0; + #ifdef CONFIG_KEYP + ret = assoc_array_iterate(&((struct key_struct *)(keyring->name_link.prev))->keys, + keyring_read_iterator, &ctx); + #else ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); + #endif if (ret < 0) { kleave(" = %ld [iterate]", ret); return ret; @@ -503,7 +620,11 @@ static long keyring_read(const struct key *keyring, } /* Return the size of the buffer needed */ + #ifdef CONFIG_KEYP + ret = ((struct key_struct *)(keyring->name_link.prev))->keys.nr_leaves_on_tree * sizeof(key_serial_t); + #else ret = keyring->keys.nr_leaves_on_tree * sizeof(key_serial_t); + #endif if (ret <= buflen) kleave("= %ld [ok]", ret); else @@ -648,12 +769,22 @@ static int search_keyring(struct key *keyring, struct keyring_search_context *ct if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_DIRECT) { const void *object; + #ifdef CONFIG_KEYP + object = assoc_array_find(&((struct key_struct *)(keyring->name_link.prev))->keys, + &keyring_assoc_array_ops, + &ctx->index_key); + #else object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, &ctx->index_key); + #endif return object ? ctx->iterator(object, ctx) : 0; } + #ifdef CONFIG_KEYP + return assoc_array_iterate(&((struct key_struct *)(keyring->name_link.prev))->keys, ctx->iterator, ctx); + #else return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); + #endif } /* @@ -729,7 +860,11 @@ static bool search_nested_keyrings(struct key *keyring, if (!(ctx->flags & KEYRING_SEARCH_RECURSE)) goto not_this_keyring; + #ifdef CONFIG_KEYP + ptr = READ_ONCE(((struct key_struct *)(keyring->name_link.prev))->keys.root); + #else ptr = READ_ONCE(keyring->keys.root); + #endif if (!ptr) goto not_this_keyring; @@ -856,10 +991,17 @@ static bool search_nested_keyrings(struct key *keyring, key = key_ref_to_ptr(ctx->result); key_check(key); if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { + #ifdef CONFIG_KEYP + iee_set_key_last_used_at(key, ctx->now); + iee_set_key_last_used_at(keyring, ctx->now); + while (sp > 0) + iee_set_key_last_used_at(stack[--sp].keyring, ctx->now); + #else key->last_used_at = ctx->now; keyring->last_used_at = ctx->now; while (sp > 0) stack[--sp].keyring->last_used_at = ctx->now; + #endif } kleave(" = true"); return true; @@ -1056,7 +1198,11 @@ int keyring_restrict(key_ref_t keyring_ref, const char *type, goto error; } + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(keyring)); + #else down_write(&keyring->sem); + #endif down_write(&keyring_serialise_restrict_sem); if (keyring->restrict_link) { @@ -1064,12 +1210,20 @@ int keyring_restrict(key_ref_t keyring_ref, const char *type, } else if (keyring_detect_restriction_cycle(keyring, restrict_link)) { ret = -EDEADLK; } else { + #ifdef CONFIG_KEYP + iee_set_key_restrict_link(keyring, restrict_link); + #else keyring->restrict_link = restrict_link; + #endif notify_key(keyring, NOTIFY_KEY_SETATTR, 0); } up_write(&keyring_serialise_restrict_sem); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(keyring)); + #else up_write(&keyring->sem); + #endif if (ret < 0) { key_put(restrict_link->key); @@ -1109,8 +1263,13 @@ key_ref_t find_key_to_update(key_ref_t keyring_ref, kenter("{%d},{%s,%s}", keyring->serial, index_key->type->name, index_key->description); + #ifdef CONFIG_KEYP + object = assoc_array_find(&((struct key_struct *)(keyring->name_link.prev))->keys, &keyring_assoc_array_ops, + index_key); + #else object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, index_key); + #endif if (object) goto found; @@ -1154,7 +1313,13 @@ struct key *find_keyring_by_name(const char *name, bool uid_keyring) /* Search this hash bucket for a keyring with a matching name that * grants Search permission and that hasn't been revoked */ + #ifdef CONFIG_KEYP + for(keyring = list_first_entry(&ns->keyring_name_list, struct key_struct, name_link)->key; + !(&(((struct key_struct *)(keyring->name_link.prev))->name_link) == (&ns->keyring_name_list)); + keyring = list_entry(((struct key_struct *)(keyring->name_link.prev))->name_link.next, struct key_struct, name_link)->key) { + #else list_for_each_entry(keyring, &ns->keyring_name_list, name_link) { + #endif if (!kuid_has_mapping(ns, keyring->user->uid)) continue; @@ -1177,9 +1342,15 @@ struct key *find_keyring_by_name(const char *name, bool uid_keyring) /* we've got a match but we might end up racing with * key_cleanup() if the keyring is currently 'dead' * (ie. it has a zero usage count) */ + #ifdef CONFIG_KEYP + if (!iee_set_key_usage(keyring, 0, REFCOUNT_INC_NOT_ZERO)) + continue; + iee_set_key_last_used_at(keyring, ktime_get_real_seconds()); + #else if (!refcount_inc_not_zero(&keyring->usage)) continue; keyring->last_used_at = ktime_get_real_seconds(); + #endif goto out; } @@ -1238,13 +1409,21 @@ static int keyring_detect_cycle(struct key *A, struct key *B) */ int __key_link_lock(struct key *keyring, const struct keyring_index_key *index_key) + #ifdef CONFIG_KEYP + __acquires(&KEY_SEM(keyring)) + #else __acquires(&keyring->sem) + #endif __acquires(&keyring_serialise_link_lock) { if (keyring->type != &key_type_keyring) return -ENOTDIR; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(keyring)); + #else down_write(&keyring->sem); + #endif /* Serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders. @@ -1260,8 +1439,13 @@ int __key_link_lock(struct key *keyring, */ int __key_move_lock(struct key *l_keyring, struct key *u_keyring, const struct keyring_index_key *index_key) + #ifdef CONFIG_KEYP + __acquires(&KEY_SEM(l_keyring)) + __acquires(&KEY_SEM(u_keyring)) + #else __acquires(&l_keyring->sem) __acquires(&u_keyring->sem) + #endif __acquires(&keyring_serialise_link_lock) { if (l_keyring->type != &key_type_keyring || @@ -1273,11 +1457,21 @@ int __key_move_lock(struct key *l_keyring, struct key *u_keyring, * move operation. */ if (l_keyring < u_keyring) { + #ifdef CNOFIG_KEYP + down_write(&KEY_SEM(l_keyring)); + down_write_nested(&KEY_SEM(u_keyring), 1); + #else down_write(&l_keyring->sem); down_write_nested(&u_keyring->sem, 1); + #endif } else { + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(u_keyring)); + down_write_nested(&KEY_SEM(l_keyring), 1); + #else down_write(&u_keyring->sem); down_write_nested(&l_keyring->sem, 1); + #endif } /* Serialise link/link calls to prevent parallel calls causing a cycle @@ -1314,10 +1508,17 @@ int __key_link_begin(struct key *keyring, /* Create an edit script that will insert/replace the key in the * keyring tree. */ + #ifdef CONFIG_KEYP + edit = assoc_array_insert(&((struct key_struct *)(keyring->name_link.prev))->keys, + &keyring_assoc_array_ops, + index_key, + NULL); + #else edit = assoc_array_insert(&keyring->keys, &keyring_assoc_array_ops, index_key, NULL); + #endif if (IS_ERR(edit)) { ret = PTR_ERR(edit); goto error; @@ -1385,7 +1586,11 @@ void __key_link(struct key *keyring, struct key *key, void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit *edit) + #ifdef CONFIG_KEYP + __releases(&KEY_SEM(keyring)) + #else __releases(&keyring->sem) + #endif __releases(&keyring_serialise_link_lock) { BUG_ON(index_key->type == NULL); @@ -1398,7 +1603,11 @@ void __key_link_end(struct key *keyring, } assoc_array_cancel_edit(edit); } + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(keyring)); + #else up_write(&keyring->sem); + #endif if (index_key->type == &key_type_keyring) mutex_unlock(&keyring_serialise_link_lock); @@ -1411,8 +1620,13 @@ static int __key_link_check_restriction(struct key *keyring, struct key *key) { if (!keyring->restrict_link || !keyring->restrict_link->check) return 0; + #ifdef CONFIG_KEYP + return keyring->restrict_link->check(keyring, key->type, ((union key_payload *)(key->name_link.next)), + keyring->restrict_link->key); + #else return keyring->restrict_link->check(keyring, key->type, &key->payload, keyring->restrict_link->key); + #endif } /** @@ -1472,12 +1686,20 @@ EXPORT_SYMBOL(key_link); * Lock a keyring for unlink. */ static int __key_unlink_lock(struct key *keyring) + #ifdef CONFIG_KEYP + __acquires(&KEY_SEM(keyring)) + #else __acquires(&keyring->sem) + #endif { if (keyring->type != &key_type_keyring) return -ENOTDIR; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(keyring)); + #else down_write(&keyring->sem); + #endif return 0; } @@ -1491,8 +1713,13 @@ static int __key_unlink_begin(struct key *keyring, struct key *key, BUG_ON(*_edit != NULL); + #ifdef CONFIG_KEYP + edit = assoc_array_delete(&((struct key_struct *)(keyring->name_link.prev))->keys, &keyring_assoc_array_ops, + &key->index_key); + #else edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, &key->index_key); + #endif if (IS_ERR(edit)) return PTR_ERR(edit); @@ -1521,11 +1748,19 @@ static void __key_unlink(struct key *keyring, struct key *key, static void __key_unlink_end(struct key *keyring, struct key *key, struct assoc_array_edit *edit) + #ifdef CONFIG_KEYP + __releases(&KEY_SEM(keyring)) + #else __releases(&keyring->sem) + #endif { if (edit) assoc_array_cancel_edit(edit); + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(keyring)); + #else up_write(&keyring->sem); + #endif } /** @@ -1655,9 +1890,15 @@ int keyring_clear(struct key *keyring) if (keyring->type != &key_type_keyring) return -ENOTDIR; + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(keyring)); + + edit = assoc_array_clear(&((struct key_struct *)(keyring->name_link.prev))->keys, &keyring_assoc_array_ops); + #else down_write(&keyring->sem); edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + #endif if (IS_ERR(edit)) { ret = PTR_ERR(edit); } else { @@ -1668,7 +1909,11 @@ int keyring_clear(struct key *keyring) ret = 0; } + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(keyring)); + #else up_write(&keyring->sem); + #endif return ret; } EXPORT_SYMBOL(keyring_clear); @@ -1682,7 +1927,11 @@ static void keyring_revoke(struct key *keyring) { struct assoc_array_edit *edit; + #ifdef CONFIG_KEYP + edit = assoc_array_clear(&((struct key_struct *)(keyring->name_link.prev))->keys, &keyring_assoc_array_ops); + #else edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + #endif if (!IS_ERR(edit)) { if (edit) assoc_array_apply_edit(edit); @@ -1728,8 +1977,13 @@ void keyring_gc(struct key *keyring, time64_t limit) /* scan the keyring looking for dead keys */ rcu_read_lock(); + #ifdef CONFIG_KEYP + result = assoc_array_iterate(&((struct key_struct *)(keyring->name_link.prev))->keys, + keyring_gc_check_iterator, &limit); + #else result = assoc_array_iterate(&keyring->keys, keyring_gc_check_iterator, &limit); + #endif rcu_read_unlock(); if (result == true) goto do_gc; @@ -1739,10 +1993,17 @@ void keyring_gc(struct key *keyring, time64_t limit) return; do_gc: + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(keyring)); + assoc_array_gc(&((struct key_struct *)(keyring->name_link.prev))->keys, &keyring_assoc_array_ops, + keyring_gc_select_iterator, &limit); + up_write(&KEY_SEM(keyring)); + #else down_write(&keyring->sem); assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, keyring_gc_select_iterator, &limit); up_write(&keyring->sem); + #endif kleave(" [gc]"); } @@ -1781,7 +2042,11 @@ void keyring_restriction_gc(struct key *keyring, struct key_type *dead_type) } /* Lock the keyring to ensure that a link is not in progress */ + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(keyring)); + #else down_write(&keyring->sem); + #endif keyres = keyring->restrict_link; @@ -1791,7 +2056,11 @@ void keyring_restriction_gc(struct key *keyring, struct key_type *dead_type) keyres->key = NULL; keyres->keytype = NULL; + #ifdef CONFIG_KEYP + up_write(&KEY_SEM(keyring)); + #else up_write(&keyring->sem); + #endif kleave(" [restriction gc]"); } diff --git a/security/keys/proc.c b/security/keys/proc.c index 4f4e2c1824f1..5f72240ddd96 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -67,7 +67,11 @@ static struct rb_node *key_serial_next(struct seq_file *p, struct rb_node *n) n = rb_next(n); while (n) { + #ifdef CONFIG_KEYP + struct key *key = rb_entry(n, struct key_union, serial_node)->key; + #else struct key *key = rb_entry(n, struct key, serial_node); + #endif if (kuid_has_mapping(user_ns, key->user->uid)) break; n = rb_next(n); @@ -82,7 +86,11 @@ static struct key *find_ge_key(struct seq_file *p, key_serial_t id) struct key *minkey = NULL; while (n) { + #ifdef CONFIG_KEYP + struct key *key = rb_entry(n, struct key_union, serial_node)->key; + #else struct key *key = rb_entry(n, struct key, serial_node); + #endif if (id < key->serial) { if (!minkey || minkey->serial > key->serial) minkey = key; @@ -102,10 +110,18 @@ static struct key *find_ge_key(struct seq_file *p, key_serial_t id) for (;;) { if (kuid_has_mapping(user_ns, minkey->user->uid)) return minkey; + #ifdef CONFIG_KEYP + n = rb_next(&(((struct key_union *)(minkey->graveyard_link.next))->serial_node)); + #else n = rb_next(&minkey->serial_node); + #endif if (!n) return NULL; + #ifdef CONFIG_KEYP + minkey = rb_entry(n, struct key_union, serial_node)->key; + #else minkey = rb_entry(n, struct key, serial_node); + #endif } } @@ -123,12 +139,20 @@ static void *proc_keys_start(struct seq_file *p, loff_t *_pos) if (!key) return NULL; *_pos = key->serial; + #ifdef CONFIG_KEYP + return &(((struct key_union *)(key->graveyard_link.next))->serial_node); + #else return &key->serial_node; + #endif } static inline key_serial_t key_node_serial(struct rb_node *n) { + #ifdef CONFIG_KEYP + struct key *key = rb_entry(n, struct key_union, serial_node)->key; + #else struct key *key = rb_entry(n, struct key, serial_node); + #endif return key->serial; } @@ -153,7 +177,11 @@ static void proc_keys_stop(struct seq_file *p, void *v) static int proc_keys_show(struct seq_file *m, void *v) { struct rb_node *_p = v; + #ifdef CONFIG_KEYP + struct key *key = rb_entry(_p, struct key_union, serial_node)->key; + #else struct key *key = rb_entry(_p, struct key, serial_node); + #endif unsigned long flags; key_ref_t key_ref, skey_ref; time64_t now, expiry; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b5d5333ab330..bff026c9abbf 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -19,6 +19,13 @@ #include #include "internal.h" +#ifdef CONFIG_CREDP +#include +#endif +#ifdef CONFIG_KEYP +#include +#endif + /* Session keyring create vs join semaphore */ static DEFINE_MUTEX(key_session_mutex); @@ -232,7 +239,11 @@ int install_thread_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new,keyring); + #else new->thread_keyring = keyring; + #endif return 0; } @@ -279,7 +290,11 @@ int install_process_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new,keyring); + #else new->process_keyring = keyring; + #endif return 0; } @@ -338,7 +353,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* install the keyring */ old = cred->session_keyring; + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(cred,keyring); + #else cred->session_keyring = keyring; + #endif if (old) key_put(old); @@ -378,9 +397,15 @@ void key_fsuid_changed(struct cred *new_cred) { /* update the ownership of the thread keyring */ if (new_cred->thread_keyring) { + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(new_cred->thread_keyring)); + iee_set_key_uid(new_cred->thread_keyring, new_cred->fsuid); + up_write(&KEY_SEM(new_cred->thread_keyring)); + #else down_write(&new_cred->thread_keyring->sem); new_cred->thread_keyring->uid = new_cred->fsuid; up_write(&new_cred->thread_keyring->sem); + #endif } } @@ -391,9 +416,15 @@ void key_fsgid_changed(struct cred *new_cred) { /* update the ownership of the thread keyring */ if (new_cred->thread_keyring) { + #ifdef CONFIG_KEYP + down_write(&KEY_SEM(new_cred->thread_keyring)); + iee_set_key_gid(new_cred->thread_keyring, new_cred->fsgid); + up_write(&KEY_SEM(new_cred->thread_keyring)); + #else down_write(&new_cred->thread_keyring->sem); new_cred->thread_keyring->gid = new_cred->fsgid; up_write(&new_cred->thread_keyring->sem); + #endif } } @@ -557,7 +588,11 @@ key_ref_t search_process_keyrings_rcu(struct keyring_search_context *ctx) const struct cred *cred = ctx->cred; if (key_validate(cred->request_key_auth) == 0) { + #ifdef CONFIG_KEYP + rka = ((union key_payload *)(ctx->cred->request_key_auth->name_link.next))->data[0]; + #else rka = ctx->cred->request_key_auth->payload.data[0]; + #endif //// was search_process_keyrings() [ie. recursive] ctx->cred = rka->cred; @@ -725,17 +760,29 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, if (!ctx.cred->request_key_auth) goto error; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(ctx.cred->request_key_auth)); + #else down_read(&ctx.cred->request_key_auth->sem); + #endif if (test_bit(KEY_FLAG_REVOKED, &ctx.cred->request_key_auth->flags)) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { + #ifdef CONFIG_KEYP + rka = ((union key_payload *)(ctx.cred->request_key_auth->name_link.next))->data[0]; + #else rka = ctx.cred->request_key_auth->payload.data[0]; + #endif key = rka->dest_keyring; __key_get(key); } + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(ctx.cred->request_key_auth)); + #else up_read(&ctx.cred->request_key_auth->sem); + #endif if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -804,7 +851,11 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, if (ret < 0) goto invalid_key; + #ifdef CONFIG_KEYP + iee_set_key_last_used_at(key, ktime_get_real_seconds()); + #else key->last_used_at = ktime_get_real_seconds(); + #endif error: put_cred(ctx.cred); @@ -911,7 +962,11 @@ long join_session_keyring(const char *name) void key_change_session_keyring(struct callback_head *twork) { const struct cred *old = current_cred(); + #ifdef CONFIG_CREDP + struct cred *new = *(struct cred **)(twork + 1); + #else struct cred *new = container_of(twork, struct cred, rcu); + #endif if (unlikely(current->flags & PF_EXITING)) { put_cred(new); @@ -925,6 +980,38 @@ void key_change_session_keyring(struct callback_head *twork) return; } + /* If get_ucounts fails more bits are needed in the refcount */ + if (unlikely(!get_ucounts(old->ucounts))) { + WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); + put_cred(new); + return; + } + + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,old-> uid); + iee_set_cred_euid(new,old-> euid); + iee_set_cred_suid(new,old-> suid); + iee_set_cred_fsuid(new,old->fsuid); + iee_set_cred_gid(new,old-> gid); + iee_set_cred_egid(new,old-> egid); + iee_set_cred_sgid(new,old-> sgid); + iee_set_cred_fsgid(new,old->fsgid); + iee_set_cred_user(new,get_uid(old->user)); + iee_set_cred_ucounts(new, old->ucounts); + iee_set_cred_user_ns(new,get_user_ns(old->user_ns)); + iee_set_cred_group_info(new,get_group_info(old->group_info)); + + iee_set_cred_securebits(new,old->securebits); + iee_set_cred_cap_inheritable(new,old->cap_inheritable); + iee_set_cred_cap_permitted(new,old->cap_permitted); + iee_set_cred_cap_effective(new,old->cap_effective); + iee_set_cred_cap_ambient(new,old->cap_ambient); + iee_set_cred_cap_bset(new,old->cap_bset); + + iee_set_cred_jit_keyring(new,old->jit_keyring); + iee_set_cred_thread_keyring(new,key_get(old->thread_keyring)); + iee_set_cred_process_keyring(new,key_get(old->process_keyring)); + #else new-> uid = old-> uid; new-> euid = old-> euid; new-> suid = old-> suid; @@ -948,6 +1035,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); new->process_keyring = key_get(old->process_keyring); + #endif security_transfer_creds(new, old); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index a7673ad86d18..d281459651af 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -17,6 +17,10 @@ #include "internal.h" #include +#ifdef CONFIG_KEYP +#include +#endif + #define key_negative_timeout 60 /* default timeout on a negative key's existence */ static struct key *check_cached_key(struct keyring_search_context *ctx) @@ -285,13 +289,21 @@ static int construct_get_dest_keyring(struct key **_dest_keyring) case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: if (cred->request_key_auth) { authkey = cred->request_key_auth; + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(authkey)); + #else down_read(&authkey->sem); + #endif rka = get_request_key_auth(authkey); if (!test_bit(KEY_FLAG_REVOKED, &authkey->flags)) dest_keyring = key_get(rka->dest_keyring); + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(authkey)); + #else up_read(&authkey->sem); + #endif if (dest_keyring) { do_perm_check = false; break; @@ -398,7 +410,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx, if (IS_ERR(key)) goto alloc_failed; + #ifdef CONFIG_KEYP + iee_set_key_flag_bit(key, KEY_FLAG_USER_CONSTRUCT, SET_BIT_OP); + #else set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); + #endif if (dest_keyring) { ret = __key_link_lock(dest_keyring, &key->index_key); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 8f33cd170e42..ddf86b3a2ecb 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -145,7 +145,11 @@ static void request_key_auth_revoke(struct key *key) */ static void request_key_auth_destroy(struct key *key) { + #ifdef CONFIG_KEYP + struct request_key_auth *rka = rcu_access_pointer(((union key_payload *)(key->name_link.next))->rcu_data0); + #else struct request_key_auth *rka = rcu_access_pointer(key->payload.rcu_data0); + #endif kenter("{%d}", key->serial); if (rka) { @@ -184,22 +188,38 @@ struct key *request_key_auth_new(struct key *target, const char *op, * another process */ if (cred->request_key_auth) { /* it is - use that instantiation context here too */ + #ifdef CONFIG_KEYP + down_read(&KEY_SEM(cred->request_key_auth)); + #else down_read(&cred->request_key_auth->sem); + #endif /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) { + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(cred->request_key_auth)); + #else up_read(&cred->request_key_auth->sem); + #endif ret = -EKEYREVOKED; goto error_free_rka; } + #ifdef CONFIG_KEYP + irka = ((union key_payload *)(cred->request_key_auth->name_link.next))->data[0]; + #else irka = cred->request_key_auth->payload.data[0]; + #endif rka->cred = get_cred(irka->cred); rka->pid = irka->pid; + #ifdef CONFIG_KEYP + up_read(&KEY_SEM(cred->request_key_auth)); + #else up_read(&cred->request_key_auth->sem); + #endif } else { /* it isn't - use this process as the context */ diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index fee1ab2c734d..5ef67deb1df0 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -233,7 +233,11 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) if (key_is_negative(key)) return -ENOKEY; + #ifdef CONFIG_KEYP + p = ((union key_payload *)(key->name_link.next))->data[0]; + #else p = key->payload.data[0]; + #endif if (!p->migratable) return -EPERM; if (datalen <= 0 || datalen > 32767 || !prep->data) @@ -307,7 +311,11 @@ static long trusted_read(const struct key *key, char *buffer, */ static void trusted_destroy(struct key *key) { + #ifdef CONFIG_KEYP + kfree_sensitive(((union key_payload *)(key->name_link.next))->data[0]); + #else kfree_sensitive(key->payload.data[0]); + #endif } struct key_type key_type_trusted = { diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 749e2a4dcb13..1985cea69256 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -14,6 +14,10 @@ #include #include "internal.h" +#ifdef CONFIG_KEYP +#include +#endif + static int logon_vet_description(const char *desc); /* @@ -109,7 +113,11 @@ int user_update(struct key *key, struct key_preparsed_payload *prep) return ret; /* attach the new data, displacing the old */ + #ifdef CONFIG_KEYP + iee_set_key_expiry(key, prep->expiry); + #else key->expiry = prep->expiry; + #endif if (key_is_positive(key)) zap = dereference_key_locked(key); rcu_assign_keypointer(key, prep->payload.data[0]); @@ -145,7 +153,11 @@ EXPORT_SYMBOL(user_revoke); */ void user_destroy(struct key *key) { + #ifdef CONFIG_KEYP + struct user_key_payload *upayload = ((union key_payload *)(key->name_link.next))->data[0]; + #else struct user_key_payload *upayload = key->payload.data[0]; + #endif kfree_sensitive(upayload); } diff --git a/security/security.c b/security/security.c index 839e12addac7..12320f9d4989 100644 --- a/security/security.c +++ b/security/security.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif /* How many LSMs were built into the kernel? */ #define LSM_COUNT (__end_lsm_info - __start_lsm_info) @@ -570,11 +573,19 @@ EXPORT_SYMBOL(unregister_blocking_lsm_notifier); static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) { if (blob_sizes.lbs_cred == 0) { + #ifdef CONFIG_CREDP + iee_set_cred_security(cred,NULL); + #else cred->security = NULL; + #endif return 0; } + #ifdef CONFIG_CREDP + iee_set_cred_security(cred,kzalloc(blob_sizes.lbs_cred, gfp)); + #else cred->security = kzalloc(blob_sizes.lbs_cred, gfp); + #endif if (cred->security == NULL) return -ENOMEM; return 0; @@ -2965,7 +2976,11 @@ void security_cred_free(struct cred *cred) call_void_hook(cred_free, cred); kfree(cred->security); + #ifdef CONFIG_CREDP + iee_set_cred_security(cred,NULL); + #else cred->security = NULL; + #endif } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d4a99d98ec77..a08b94e13fd8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -104,9 +104,22 @@ #include "audit.h" #include "avc_ss.h" +#ifdef CONFIG_IEE_SELINUX_P +#include +#include +#endif + +#ifdef CONFIG_KEYP +#include +#endif + #define SELINUX_INODE_INIT_XATTRS 1 +#ifdef CONFIG_IEE_SELINUX_P +struct selinux_state selinux_state __section(".iee.selinux"); +#else struct selinux_state selinux_state; +#endif /* SECMARK reference count */ static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0); @@ -6585,7 +6598,11 @@ static int selinux_key_alloc(struct key *k, const struct cred *cred, else ksec->sid = tsec->sid; + #ifdef CONFIG_KEYP + iee_set_key_security(k, ksec); + #else k->security = ksec; + #endif return 0; } @@ -6593,7 +6610,11 @@ static void selinux_key_free(struct key *k) { struct key_security_struct *ksec = k->security; + #ifdef CONFIG_KEYP + iee_set_key_security(k, NULL); + #else k->security = NULL; + #endif kfree(ksec); } @@ -7284,15 +7305,54 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { #endif }; +#ifdef CONFIG_IEE_SELINUX_P +struct kmem_cache *policy_jar; + +static void policy_cache_init(void) +{ + struct selinux_policy* unused; + policy_jar = kmem_cache_create("policy_jar", sizeof(struct selinux_policy), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + /* Test this cache */ + unused = kmem_cache_alloc(policy_jar, GFP_KERNEL); + kmem_cache_free(policy_jar, unused); + + printk("IEE SELINUXP: policy cache created."); +} +#endif + static __init int selinux_init(void) { pr_info("SELinux: Initializing.\n"); memset(&selinux_state, 0, sizeof(selinux_state)); +#ifdef CONFIG_IEE_SELINUX_P + WRITE_ONCE(selinux_state.enforcing, selinux_enforcing_boot); +#else enforcing_set(selinux_enforcing_boot); +#endif selinux_avc_init(); + +#ifdef CONFIG_IEE_SELINUX_P + /* Put selinux_status inside IEE. */ + /* Prepare mutex lock and write the ptr to mutex->owner. */ + struct mutex* status_lock = kzalloc(GFP_KERNEL, sizeof(struct mutex)); + struct mutex* policy_mutex = kzalloc(GFP_KERNEL, sizeof(struct mutex)); + mutex_init(status_lock); + mutex_init(policy_mutex); + selinux_state.status_lock.owner.counter = (s64)status_lock; + selinux_state.policy_mutex.owner.counter = (s64)policy_mutex; + + /* Setting lm addr to be RO, IEE addr valid. */ + set_iee_page_valid((unsigned long)__phys_to_iee(__pa_symbol(&selinux_state))); + iee_set_logical_mem_ro((unsigned long)&selinux_state); + iee_set_logical_mem_ro((unsigned long)__va(__pa_symbol(&selinux_state))); + printk("IEE SELINUXP: Succeeded on preparing selinux_state."); + policy_cache_init(); +#else mutex_init(&selinux_state.status_lock); mutex_init(&selinux_state.policy_mutex); +#endif /* Set the security state for the initial task. */ cred_init_security(); diff --git a/security/selinux/ima.c b/security/selinux/ima.c index aa34da9b0aeb..022b3ce8d740 100644 --- a/security/selinux/ima.c +++ b/security/selinux/ima.c @@ -12,6 +12,10 @@ #include "security.h" #include "ima.h" +#ifdef CONFIG_IEE_SELINUX_P +#include +#endif + /* * selinux_ima_collect_state - Read selinux configuration settings * @@ -74,7 +78,11 @@ void selinux_ima_measure_state_locked(void) size_t policy_len; int rc = 0; +#ifdef CONFIG_IEE_SELINUX_P + lockdep_assert_held(iee_get_selinux_policy_lock()); +#else lockdep_assert_held(&selinux_state.policy_mutex); +#endif state_str = selinux_ima_collect_state(); if (!state_str) { @@ -112,9 +120,21 @@ void selinux_ima_measure_state_locked(void) */ void selinux_ima_measure_state(void) { +#ifdef CONFIG_IEE_SELINUX_P + lockdep_assert_not_held(iee_get_selinux_policy_lock()); +#else lockdep_assert_not_held(&selinux_state.policy_mutex); +#endif +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_policy_lock()); +#else mutex_lock(&selinux_state.policy_mutex); +#endif selinux_ima_measure_state_locked(); +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif } diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index a9de89af8fdc..2eee89071a56 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -113,11 +113,15 @@ static inline bool selinux_initialized(void) return smp_load_acquire(&selinux_state.initialized); } +#ifdef CONFIG_IEE_SELINUX_P +extern void selinux_mark_initialized(void); +#else static inline void selinux_mark_initialized(void) { /* do a synchronized write to avoid race conditions */ smp_store_release(&selinux_state.initialized, true); } +#endif #ifdef CONFIG_SECURITY_SELINUX_DEVELOP static inline bool enforcing_enabled(void) @@ -125,10 +129,14 @@ static inline bool enforcing_enabled(void) return READ_ONCE(selinux_state.enforcing); } +#ifdef CONFIG_IEE_SELINUX_P +extern void enforcing_set(bool value); +#else static inline void enforcing_set(bool value) { WRITE_ONCE(selinux_state.enforcing, value); } +#endif #else static inline bool enforcing_enabled(void) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 54bc18e8164b..137cdc6ea27c 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -43,6 +43,10 @@ #include "conditional.h" #include "ima.h" +#ifdef CONFIG_IEE_SELINUX_P +#include +#endif + enum sel_inos { SEL_ROOT_INO = 2, SEL_LOAD, /* load policy */ @@ -371,7 +375,11 @@ static int sel_open_policy(struct inode *inode, struct file *filp) BUG_ON(filp->private_data); +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_policy_lock()); +#else mutex_lock(&selinux_state.policy_mutex); +#endif rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); @@ -401,11 +409,19 @@ static int sel_open_policy(struct inode *inode, struct file *filp) filp->private_data = plm; +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif return 0; err: +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif if (plm) vfree(plm->data); @@ -594,7 +610,11 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (!count) return -EINVAL; +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_policy_lock()); +#else mutex_lock(&selinux_state.policy_mutex); +#endif length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); @@ -632,13 +652,31 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, audit_get_sessionid(current)); out: +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif vfree(data); return length; } +// #ifdef CONFIG_IEE_SELINUX_P +// /* Do sel_write_load twice to test free process of policy. */ +// static ssize_t sel_write_load_test(struct file *file, const char __user *buf, +// size_t count, loff_t *ppos) +// { +// sel_write_load(file, buf, count, ppos); +// return sel_write_load(file, buf, count, ppos); +// } +// #endif + static const struct file_operations sel_load_ops = { +// #ifdef CONFIG_IEE_SELINUX_P +// .write = sel_write_load_test, +// #else .write = sel_write_load, +// #endif .llseek = generic_file_llseek, }; @@ -1216,7 +1254,11 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_policy_lock()); +#else mutex_lock(&selinux_state.policy_mutex); +#endif ret = -EINVAL; if (index >= fsi->bool_num || strcmp(name, @@ -1235,14 +1277,22 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, } length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, fsi->bool_pending_values[index]); +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif ret = simple_read_from_buffer(buf, count, ppos, page, length); out_free: free_page((unsigned long)page); return ret; out_unlock: +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif goto out_free; } @@ -1267,7 +1317,11 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, if (IS_ERR(page)) return PTR_ERR(page); +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_policy_lock()); +#else mutex_lock(&selinux_state.policy_mutex); +#endif length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, @@ -1291,7 +1345,11 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, length = count; out: +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif kfree(page); return length; } @@ -1322,7 +1380,11 @@ static ssize_t sel_commit_bools_write(struct file *filep, if (IS_ERR(page)) return PTR_ERR(page); +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_policy_lock()); +#else mutex_lock(&selinux_state.policy_mutex); +#endif length = avc_has_perm(current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, @@ -1343,7 +1405,11 @@ static ssize_t sel_commit_bools_write(struct file *filep, length = count; out: +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_policy_lock()); +#else mutex_unlock(&selinux_state.policy_mutex); +#endif kfree(page); return length; } diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 379ac7b5c709..6d5cb766116a 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -68,6 +68,10 @@ #include "policycap_names.h" #include "ima.h" +#ifdef CONFIG_IEE_SELINUX_P +#include +#endif + struct selinux_policy_convert_data { struct convert_context_args args; struct sidtab_convert_params sidtab_params; @@ -2108,9 +2112,14 @@ static void security_load_policycaps(struct selinux_policy *policy) p = &policy->policydb; +#ifdef CONFIG_IEE_SELINUX_P + for (i = 0; i < ARRAY_SIZE(selinux_state.policycap); i++) + iee_set_sel_policy_cap(i, ebitmap_get_bit(&p->policycaps, i)); +#else for (i = 0; i < ARRAY_SIZE(selinux_state.policycap); i++) WRITE_ONCE(selinux_state.policycap[i], ebitmap_get_bit(&p->policycaps, i)); +#endif for (i = 0; i < ARRAY_SIZE(selinux_policycap_names); i++) pr_info("SELinux: policy capability %s=%d\n", @@ -2173,6 +2182,9 @@ void selinux_policy_commit(struct selinux_load_state *load_state) { struct selinux_state *state = &selinux_state; struct selinux_policy *oldpolicy, *newpolicy = load_state->policy; +#ifdef CONFIG_IEE_SELINUX_P + struct selinux_policy *temppolicy; +#endif unsigned long flags; u32 seqno; @@ -2197,10 +2209,20 @@ void selinux_policy_commit(struct selinux_load_state *load_state) /* Install the new policy. */ if (oldpolicy) { sidtab_freeze_begin(oldpolicy->sidtab, &flags); +#ifdef CONFIG_IEE_SELINUX_P + iee_sel_rcu_assign_policy(newpolicy, kmem_cache_alloc(policy_jar, GFP_KERNEL)); + kfree(newpolicy); +#else rcu_assign_pointer(state->policy, newpolicy); +#endif sidtab_freeze_end(oldpolicy->sidtab, &flags); } else { +#ifdef CONFIG_IEE_SELINUX_P + iee_sel_rcu_assign_policy(newpolicy, kmem_cache_alloc(policy_jar, GFP_KERNEL)); + kfree(newpolicy); +#else rcu_assign_pointer(state->policy, newpolicy); +#endif } /* Load the policycaps from the new policy */ @@ -2218,7 +2240,20 @@ void selinux_policy_commit(struct selinux_load_state *load_state) /* Free the old policy */ synchronize_rcu(); +#ifdef CONFIG_IEE_SELINUX_P + /* + * Normal free process includes setting freed objects pointers to be NULL, however it + * would be hard as old policy is already inside IEE. So Make a kernel copy of the old + * policy to free objects it points to. + */ + if (oldpolicy){ + temppolicy = kmemdup(oldpolicy, sizeof(*temppolicy), GFP_KERNEL); + selinux_policy_free(temppolicy); + kfree(oldpolicy); + } +#else selinux_policy_free(oldpolicy); +#endif kfree(load_state->convert_data); /* Notify others of the policy change */ @@ -3016,6 +3051,9 @@ int security_set_bools(u32 len, int *values) { struct selinux_state *state = &selinux_state; struct selinux_policy *newpolicy, *oldpolicy; +#ifdef CONFIG_IEE_SELINUX_P + struct selinux_policy *temppolicy; +#endif int rc; u32 i, seqno = 0; @@ -3068,8 +3106,13 @@ int security_set_bools(u32 len, int *values) newpolicy->latest_granting = oldpolicy->latest_granting + 1; seqno = newpolicy->latest_granting; +#ifdef CONFIG_IEE_SELINUX_P + iee_sel_rcu_assign_policy(newpolicy, kmem_cache_alloc(policy_jar, GFP_KERNEL)); + kfree(newpolicy); +#else /* Install the new policy */ rcu_assign_pointer(state->policy, newpolicy); +#endif /* * Free the conditional portions of the old policydb @@ -3077,7 +3120,20 @@ int security_set_bools(u32 len, int *values) * structure itself but not what it references. */ synchronize_rcu(); +#ifdef CONFIG_IEE_SELINUX_P + /* + * Normal free process includes setting freed objects pointers to be NULL, however it + * would be hard as old policy is already inside IEE. So Make a kernel copy of the old + * policy to free objects it points to. + */ + temppolicy = kmemdup(oldpolicy, sizeof(*temppolicy), GFP_KERNEL); + if (!temppolicy) + return -ENOMEM; + selinux_policy_cond_free(temppolicy); + kfree(oldpolicy); +#else selinux_policy_cond_free(oldpolicy); +#endif /* Notify others of the policy change */ selinux_notify_policy_change(seqno); diff --git a/security/selinux/status.c b/security/selinux/status.c index dffca22ce6f7..642965684325 100644 --- a/security/selinux/status.c +++ b/security/selinux/status.c @@ -13,6 +13,10 @@ #include "avc.h" #include "security.h" +#ifdef CONFIG_IEE_SELINUX_P +#include +#endif + /* * The selinux_status_page shall be exposed to userspace applications * using mmap interface on /selinux/status. @@ -44,9 +48,17 @@ struct page *selinux_kernel_status_page(void) struct selinux_kernel_status *status; struct page *result = NULL; +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_status_lock()); +#else mutex_lock(&selinux_state.status_lock); +#endif if (!selinux_state.status_page) { + #ifdef CONFIG_IEE_SELINUX_P + iee_set_selinux_status_pg(alloc_page(GFP_KERNEL|__GFP_ZERO)); + #else selinux_state.status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); + #endif if (selinux_state.status_page) { status = page_address(selinux_state.status_page); @@ -66,7 +78,11 @@ struct page *selinux_kernel_status_page(void) } } result = selinux_state.status_page; +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_status_lock()); +#else mutex_unlock(&selinux_state.status_lock); +#endif return result; } @@ -80,7 +96,11 @@ void selinux_status_update_setenforce(bool enforcing) { struct selinux_kernel_status *status; +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_status_lock()); +#else mutex_lock(&selinux_state.status_lock); +#endif if (selinux_state.status_page) { status = page_address(selinux_state.status_page); @@ -92,7 +112,11 @@ void selinux_status_update_setenforce(bool enforcing) smp_wmb(); status->sequence++; } +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_status_lock()); +#else mutex_unlock(&selinux_state.status_lock); +#endif } /* @@ -105,7 +129,11 @@ void selinux_status_update_policyload(u32 seqno) { struct selinux_kernel_status *status; +#ifdef CONFIG_IEE_SELINUX_P + mutex_lock(iee_get_selinux_status_lock()); +#else mutex_lock(&selinux_state.status_lock); +#endif if (selinux_state.status_page) { status = page_address(selinux_state.status_page); @@ -118,5 +146,9 @@ void selinux_status_update_policyload(u32 seqno) smp_wmb(); status->sequence++; } +#ifdef CONFIG_IEE_SELINUX_P + mutex_unlock(iee_get_selinux_status_lock()); +#else mutex_unlock(&selinux_state.status_lock); +#endif } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 4625674f0e95..cbd843db9d41 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -45,6 +45,10 @@ #include #include "smack.h" +#ifdef CONFIG_KEYP +#include +#endif + #define TRANS_TRUE "TRUE" #define TRANS_TRUE_SIZE 4 @@ -4411,7 +4415,11 @@ static int smack_key_alloc(struct key *key, const struct cred *cred, { struct smack_known *skp = smk_of_task(smack_cred(cred)); + #ifdef CONFIG_KEYP + iee_set_key_security(key, skp); + #else key->security = skp; + #endif return 0; } @@ -4423,7 +4431,11 @@ static int smack_key_alloc(struct key *key, const struct cred *cred, */ static void smack_key_free(struct key *key) { + #ifdef CONFIG_KEYP + iee_set_key_security(key, NULL); + #else key->security = NULL; + #endif } /** -- 2.43.0