aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/mmu_context.h6
-rw-r--r--arch/sparc/include/asm/mmu_context_64.h6
-rw-r--r--arch/sparc/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/mmu.h18
-rw-r--r--arch/x86/include/asm/mmu_context.h49
-rw-r--r--arch/x86/include/asm/processor-flags.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h48
-rw-r--r--arch/x86/include/asm/uaccess.h58
-rw-r--r--arch/x86/include/uapi/asm/prctl.h5
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h6
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_64.c68
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/lib/getuser.S83
-rw-r--r--arch/x86/lib/putuser.S54
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/tlb.c53
-rw-r--r--drivers/iommu/iommu-sva.c8
-rw-r--r--drivers/vfio/vfio_iommu_type1.c2
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/task_mmu.c9
-rw-r--r--include/linux/ioasid.h9
-rw-r--r--include/linux/mm.h11
-rw-r--r--include/linux/mmu_context.h14
-rw-r--r--include/linux/sched/mm.h8
-rw-r--r--include/linux/uaccess.h22
-rw-r--r--mm/gup.c4
-rw-r--r--mm/madvise.c5
-rw-r--r--mm/migrate.c11
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/lam.c1241
35 files changed, 1701 insertions, 149 deletions
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 72dbd6400549..56911691bef0 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -288,6 +288,12 @@ void post_ttbr_update_workaround(void);
unsigned long arm64_mm_context_get(struct mm_struct *mm);
void arm64_mm_context_put(struct mm_struct *mm);
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL >> 8;
+}
+
#include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index 7a8380c63aab..799e797c5cdd 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -185,6 +185,12 @@ static inline void finish_arch_post_lock_switch(void)
}
}
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL >> adi_nbits();
+}
+
#include <asm-generic/mmu_context.h>
#endif /* !(__ASSEMBLY__) */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 94266a5c5b04..b825a5dd0210 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -8,8 +8,10 @@
#include <linux/compiler.h>
#include <linux/string.h>
+#include <linux/mm_types.h>
#include <asm/asi.h>
#include <asm/spitfire.h>
+#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm-generic/access_ok.h>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f87590c3c382..53bab123a8ee 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2290,6 +2290,17 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
+config ADDRESS_MASKING
+ bool "Linear Address Masking support"
+ depends on X86_64
+ help
+ Linear Address Masking (LAM) modifies the checking that is applied
+ to 64-bit linear addresses, allowing software to use of the
+ untranslated address bits for metadata.
+
+ The capability can be used for efficient address sanitizers (ASAN)
+ implementation and for optimizations in JITs.
+
config HOTPLUG_CPU
def_bool y
depends on SMP
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index d234ca797e4a..e0ca8120aea8 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -317,7 +317,7 @@ static struct vm_area_struct gate_vma __ro_after_init = {
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_COMPAT
- if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL))
+ if (!mm || !test_bit(MM_CONTEXT_HAS_VSYSCALL, &mm->context.flags))
return NULL;
#endif
if (vsyscall_mode == NONE)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 73c9672c123b..353b054812de 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -321,6 +321,7 @@
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 5dfa4fb76f4b..fafe9be7a6f4 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -75,6 +75,12 @@
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif
+#ifdef CONFIG_ADDRESS_MASKING
+# define DISABLE_LAM 0
+#else
+# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -115,7 +121,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING)
-#define DISABLED_MASK12 0
+#define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5d7494631ea9..0da5c227f490 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,9 +9,13 @@
#include <linux/bits.h>
/* Uprobes on this MM assume 32-bit code */
-#define MM_CONTEXT_UPROBE_IA32 BIT(0)
+#define MM_CONTEXT_UPROBE_IA32 0
/* vsyscall page is accessible on this MM */
-#define MM_CONTEXT_HAS_VSYSCALL BIT(1)
+#define MM_CONTEXT_HAS_VSYSCALL 1
+/* Do not allow changing LAM mode */
+#define MM_CONTEXT_LOCK_LAM 2
+/* Allow LAM and SVA coexisting */
+#define MM_CONTEXT_FORCE_TAGGED_SVA 3
/*
* x86 has arch-specific MMU state beyond what lives in mm_struct.
@@ -39,7 +43,15 @@ typedef struct {
#endif
#ifdef CONFIG_X86_64
- unsigned short flags;
+ unsigned long flags;
+#endif
+
+#ifdef CONFIG_ADDRESS_MASKING
+ /* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
+ unsigned long lam_cr3_mask;
+
+ /* Significant bits of the virtual address. Excludes tag bits. */
+ u64 untag_mask;
#endif
struct mutex lock;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c3ad8a526378..1d29dc791f5a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -85,6 +85,51 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
}
#endif
+#ifdef CONFIG_ADDRESS_MASKING
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
+ return mm->context.lam_cr3_mask;
+}
+
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
+ mm->context.untag_mask = oldmm->context.untag_mask;
+}
+
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return mm->context.untag_mask;
+}
+
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
+ mm->context.untag_mask = -1UL;
+}
+
+#define arch_pgtable_dma_compat arch_pgtable_dma_compat
+static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
+{
+ return !mm_lam_cr3_mask(mm) ||
+ test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
+}
+#else
+
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+}
+
+static inline void mm_reset_untag_mask(struct mm_struct *mm)
+{
+}
+#endif
+
#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
@@ -109,6 +154,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
+ mm_reset_untag_mask(mm);
init_new_context_ldt(mm);
return 0;
}
@@ -162,6 +208,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
arch_dup_pkeys(oldmm, mm);
paravirt_enter_mmap(mm);
+ dup_lam(oldmm, mm);
return ldt_dup_context(oldmm, mm);
}
@@ -175,7 +222,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
static inline bool is_64bit_mm(struct mm_struct *mm)
{
return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
- !(mm->context.flags & MM_CONTEXT_UPROBE_IA32);
+ !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
}
#else
static inline bool is_64bit_mm(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index a7f3d9100adb..d8cccadc83a6 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -28,6 +28,8 @@
* On systems with SME, one bit (in a variable position!) is stolen to indicate
* that the top-level paging structure is encrypted.
*
+ * On systemms with LAM, bits 61 and 62 are used to indicate LAM mode.
+ *
* All of the remaining bits indicate the physical address of the top-level
* paging structure.
*
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index cda3118f3b27..75bfaa421030 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_TLBFLUSH_H
#define _ASM_X86_TLBFLUSH_H
-#include <linux/mm.h>
+#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/processor.h>
@@ -12,6 +12,7 @@
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
+#include <asm/pgtable.h>
void __flush_tlb_all(void);
@@ -53,6 +54,15 @@ static inline void cr4_clear_bits(unsigned long mask)
local_irq_restore(flags);
}
+#ifdef CONFIG_ADDRESS_MASKING
+DECLARE_PER_CPU(u64, tlbstate_untag_mask);
+
+static inline u64 current_untag_mask(void)
+{
+ return this_cpu_read(tlbstate_untag_mask);
+}
+#endif
+
#ifndef MODULE
/*
* 6 because 6 should be plenty and struct tlb_state will fit in two cache
@@ -101,6 +111,16 @@ struct tlb_state {
*/
bool invalidate_other;
+#ifdef CONFIG_ADDRESS_MASKING
+ /*
+ * Active LAM mode.
+ *
+ * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
+ * disabled.
+ */
+ u8 lam;
+#endif
+
/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
@@ -357,6 +377,32 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
}
#define huge_pmd_needs_flush huge_pmd_needs_flush
+#ifdef CONFIG_ADDRESS_MASKING
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+ u64 lam = this_cpu_read(cpu_tlbstate.lam);
+
+ return lam << X86_CR3_LAM_U57_BIT;
+}
+
+static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+{
+ this_cpu_write(cpu_tlbstate.lam,
+ mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
+ this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
+}
+
+#else
+
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+ return 0;
+}
+
+static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+{
+}
+#endif
#endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1cc756eafa44..457e814712af 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -7,11 +7,14 @@
#include <linux/compiler.h>
#include <linux/instrumented.h>
#include <linux/kasan-checks.h>
+#include <linux/mm_types.h>
#include <linux/string.h>
+#include <linux/mmap_lock.h>
#include <asm/asm.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/extable.h>
+#include <asm/tlbflush.h>
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline bool pagefault_disabled(void);
@@ -21,6 +24,57 @@ static inline bool pagefault_disabled(void);
# define WARN_ON_IN_IRQ()
#endif
+#ifdef CONFIG_ADDRESS_MASKING
+/*
+ * Mask out tag bits from the address.
+ *
+ * Magic with the 'sign' allows to untag userspace pointer without any branches
+ * while leaving kernel addresses intact.
+ */
+static inline unsigned long __untagged_addr(unsigned long addr)
+{
+ long sign;
+
+ /*
+ * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
+ * in alternative instructions. The relocation gets wrong when gets
+ * copied to the target place.
+ */
+ asm (ALTERNATIVE("",
+ "sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */
+ "or %%gs:tlbstate_untag_mask, %[sign]\n\t"
+ "and %[sign], %[addr]\n\t", X86_FEATURE_LAM)
+ : [addr] "+r" (addr), [sign] "=r" (sign)
+ : "m" (tlbstate_untag_mask), "[sign]" (addr));
+
+ return addr;
+}
+
+#define untagged_addr(addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr(__addr); \
+})
+
+static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
+ unsigned long addr)
+{
+ long sign = addr >> 63;
+
+ mmap_assert_locked(mm);
+ addr &= (mm)->context.untag_mask | sign;
+
+ return addr;
+}
+
+#define untagged_addr_remote(mm, addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
+})
+
+#else
+#define untagged_addr(addr) (addr)
+#endif
+
/**
* access_ok - Checks if a user space pointer is valid
* @addr: User space pointer to start of block to check
@@ -38,10 +92,10 @@ static inline bool pagefault_disabled(void);
* Return: true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid.
*/
-#define access_ok(addr, size) \
+#define access_ok(addr, size) \
({ \
WARN_ON_IN_IRQ(); \
- likely(__access_ok(addr, size)); \
+ likely(__access_ok(untagged_addr(addr), size)); \
})
#include <asm-generic/access_ok.h>
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index f298c778f856..e8d7ebbca1a4 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -23,4 +23,9 @@
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+#define ARCH_FORCE_TAGGED_SVA 0x4004
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index c47cc7f2feeb..d898432947ff 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -82,6 +82,10 @@
#define X86_CR3_PCID_BITS 12
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+#define X86_CR3_LAM_U57_BIT 61 /* Activate LAM for userspace, 62:57 bits masked */
+#define X86_CR3_LAM_U57 _BITULL(X86_CR3_LAM_U57_BIT)
+#define X86_CR3_LAM_U48_BIT 62 /* Activate LAM for userspace, 62:48 bits masked */
+#define X86_CR3_LAM_U48 _BITULL(X86_CR3_LAM_U48_BIT)
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
@@ -132,6 +136,8 @@
#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT)
#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */
#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT)
+#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */
+#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT)
/*
* x86-64 Task Priority Register, CR8
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b650cde3f64d..50d950771371 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -48,6 +48,7 @@
#include <asm/frame.h>
#include <asm/unwind.h>
#include <asm/tdx.h>
+#include <asm/mmu_context.h>
#include "process.h"
@@ -162,6 +163,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+
+ if (p->mm && (clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM)
+ set_bit(MM_CONTEXT_LOCK_LAM, &p->mm->context.flags);
#else
p->thread.sp0 = (unsigned long) (childregs + 1);
savesegment(gs, p->thread.gs);
@@ -368,6 +372,8 @@ void arch_setup_new_exec(void)
task_clear_spec_ssb_noexec(current);
speculation_ctrl_update(read_thread_flags());
}
+
+ mm_reset_untag_mask(current->mm);
}
#ifdef CONFIG_X86_IOPL_IOPERM
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bb65a68b4b49..223b223f713f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -671,7 +671,7 @@ void set_personality_64bit(void)
task_pt_regs(current)->orig_ax = __NR_execve;
current_thread_info()->status &= ~TS_COMPAT;
if (current->mm)
- current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL;
+ __set_bit(MM_CONTEXT_HAS_VSYSCALL, &current->mm->context.flags);
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
@@ -708,7 +708,7 @@ static void __set_personality_ia32(void)
* uprobes applied to this MM need to know this and
* cannot use user_64bit_mode() at that time.
*/
- current->mm->context.flags = MM_CONTEXT_UPROBE_IA32;
+ __set_bit(MM_CONTEXT_UPROBE_IA32, &current->mm->context.flags);
}
current->personality |= force_personality32;
@@ -743,6 +743,52 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
}
#endif
+#ifdef CONFIG_ADDRESS_MASKING
+
+#define LAM_U57_BITS 6
+
+static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ return -ENODEV;
+
+ /* PTRACE_ARCH_PRCTL */
+ if (current->mm != mm)
+ return -EINVAL;
+
+ if (mm_valid_pasid(mm) &&
+ !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags))
+ return -EINVAL;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
+ mmap_write_unlock(mm);
+ return -EBUSY;
+ }
+
+ if (!nr_bits) {
+ mmap_write_unlock(mm);
+ return -EINVAL;
+ } else if (nr_bits <= LAM_U57_BITS) {
+ mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
+ mm->context.untag_mask = ~GENMASK(62, 57);
+ } else {
+ mmap_write_unlock(mm);
+ return -EINVAL;
+ }
+
+ write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
+ set_tlbstate_lam_mode(mm);
+ set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
+
+ mmap_write_unlock(mm);
+
+ return 0;
+}
+#endif
+
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
@@ -830,7 +876,23 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, arg2);
#endif
-
+#ifdef CONFIG_ADDRESS_MASKING
+ case ARCH_GET_UNTAG_MASK:
+ return put_user(task->mm->context.untag_mask,
+ (unsigned long __user *)arg2);
+ case ARCH_ENABLE_TAGGED_ADDR:
+ return prctl_enable_tagged_addr(task->mm, arg2);
+ case ARCH_FORCE_TAGGED_SVA:
+ if (current != task)
+ return -EINVAL;
+ set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags);
+ return 0;
+ case ARCH_GET_MAX_TAG_BITS:
+ if (!cpu_feature_enabled(X86_FEATURE_LAM))
+ return put_user(0, (unsigned long __user *)arg2);
+ else
+ return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
+#endif
default:
ret = -EINVAL;
break;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d317dc3d06a3..8b83d8fbce71 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -671,15 +671,15 @@ static bool try_fixup_enqcmd_gp(void)
if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
return false;
- pasid = current->mm->pasid;
-
/*
* If the mm has not been allocated a
* PASID, the #GP can not be fixed up.
*/
- if (!pasid_valid(pasid))
+ if (!mm_valid_pasid(current->mm))
return false;
+ pasid = current->mm->pasid;
+
/*
* Did this thread already have its PASID activated?
* If so, the #GP must be from something else.
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index b70d98d79a9d..b64a2bd1a1ef 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -37,22 +37,22 @@
#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
-#ifdef CONFIG_X86_5LEVEL
-#define LOAD_TASK_SIZE_MINUS_N(n) \
- ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
- __stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
-#else
-#define LOAD_TASK_SIZE_MINUS_N(n) \
- mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
-#endif
+.macro check_range size:req
+.if IS_ENABLED(CONFIG_X86_64)
+ mov %rax, %rdx
+ sar $63, %rdx
+ or %rdx, %rax
+.else
+ cmp $TASK_SIZE_MAX-\size+1, %eax
+ jae .Lbad_get_user
+ sbb %edx, %edx /* array_index_mask_nospec() */
+ and %edx, %eax
+.endif
+.endm
.text
SYM_FUNC_START(__get_user_1)
- LOAD_TASK_SIZE_MINUS_N(0)
- cmp %_ASM_DX,%_ASM_AX
- jae bad_get_user
- sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
- and %_ASM_DX, %_ASM_AX
+ check_range size=1
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -62,11 +62,7 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
- LOAD_TASK_SIZE_MINUS_N(1)
- cmp %_ASM_DX,%_ASM_AX
- jae bad_get_user
- sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
- and %_ASM_DX, %_ASM_AX
+ check_range size=2
ASM_STAC
2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
@@ -76,11 +72,7 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
- LOAD_TASK_SIZE_MINUS_N(3)
- cmp %_ASM_DX,%_ASM_AX
- jae bad_get_user
- sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
- and %_ASM_DX, %_ASM_AX
+ check_range size=4
ASM_STAC
3: movl (%_ASM_AX),%edx
xor %eax,%eax
@@ -90,30 +82,17 @@ SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8)
-#ifdef CONFIG_X86_64
- LOAD_TASK_SIZE_MINUS_N(7)
- cmp %_ASM_DX,%_ASM_AX
- jae bad_get_user
- sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
- and %_ASM_DX, %_ASM_AX
+ check_range size=8
ASM_STAC
+#ifdef CONFIG_X86_64
4: movq (%_ASM_AX),%rdx
- xor %eax,%eax
- ASM_CLAC
- RET
#else
- LOAD_TASK_SIZE_MINUS_N(7)
- cmp %_ASM_DX,%_ASM_AX
- jae bad_get_user_8
- sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
- and %_ASM_DX, %_ASM_AX
- ASM_STAC
4: movl (%_ASM_AX),%edx
5: movl 4(%_ASM_AX),%ecx
+#endif
xor %eax,%eax
ASM_CLAC
RET
-#endif
SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
@@ -166,7 +145,7 @@ EXPORT_SYMBOL(__get_user_nocheck_8)
SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
ASM_CLAC
-bad_get_user:
+.Lbad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
RET
@@ -184,23 +163,23 @@ SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
/* get_user */
- _ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
- _ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
- _ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(1b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(2b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(3b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(4b, .Lbad_get_user_clac)
#else
- _ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
- _ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE(4b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE(5b, .Lbad_get_user_8_clac)
#endif
/* __get_user */
- _ASM_EXTABLE_UA(6b, .Lbad_get_user_clac)
- _ASM_EXTABLE_UA(7b, .Lbad_get_user_clac)
- _ASM_EXTABLE_UA(8b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(6b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(7b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(8b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(9b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(9b, .Lbad_get_user_clac)
#else
- _ASM_EXTABLE_UA(9b, .Lbad_get_user_8_clac)
- _ASM_EXTABLE_UA(10b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE(9b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE(10b, .Lbad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 32125224fcca..3062d09a776d 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -33,20 +33,20 @@
* as they get called from within inline assembly.
*/
-#ifdef CONFIG_X86_5LEVEL
-#define LOAD_TASK_SIZE_MINUS_N(n) \
- ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
- __stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
-#else
-#define LOAD_TASK_SIZE_MINUS_N(n) \
- mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
-#endif
+.macro check_range size:req
+.if IS_ENABLED(CONFIG_X86_64)
+ mov %rcx, %rbx
+ sar $63, %rbx
+ or %rbx, %rcx
+.else
+ cmp $TASK_SIZE_MAX-\size+1, %ecx
+ jae .Lbad_put_user
+.endif
+.endm
.text
SYM_FUNC_START(__put_user_1)
- LOAD_TASK_SIZE_MINUS_N(0)
- cmp %_ASM_BX,%_ASM_CX
- jae .Lbad_put_user
+ check_range size=1
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@@ -66,9 +66,7 @@ SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
- LOAD_TASK_SIZE_MINUS_N(1)
- cmp %_ASM_BX,%_ASM_CX
- jae .Lbad_put_user
+ check_range size=2
ASM_STAC
3: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
@@ -88,9 +86,7 @@ SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
- LOAD_TASK_SIZE_MINUS_N(3)
- cmp %_ASM_BX,%_ASM_CX
- jae .Lbad_put_user
+ check_range size=4
ASM_STAC
5: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
@@ -110,9 +106,7 @@ SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
- LOAD_TASK_SIZE_MINUS_N(7)
- cmp %_ASM_BX,%_ASM_CX
- jae .Lbad_put_user
+ check_range size=8
ASM_STAC
7: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
@@ -144,15 +138,15 @@ SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
RET
SYM_CODE_END(.Lbad_put_user_clac)
- _ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(6b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(7b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(9b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(1b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(2b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(3b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(4b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(5b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(6b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(7b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(9b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE_UA(8b, .Lbad_put_user_clac)
- _ASM_EXTABLE_UA(10b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(8b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(10b, .Lbad_put_user_clac)
#endif
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cbc53da4c1b4..3cdac0f0055d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1048,6 +1048,11 @@ __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
+#ifdef CONFIG_ADDRESS_MASKING
+DEFINE_PER_CPU(u64, tlbstate_untag_mask);
+EXPORT_PER_CPU_SYMBOL(tlbstate_untag_mask);
+#endif
+
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
/* entry 0 MUST be WB (hardwired to speed up translations) */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 16c5292d227d..267acf27480a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
return ret;
}
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
{
+ unsigned long cr3 = __sme_pa(pgd) | lam;
+
if (static_cpu_has(X86_FEATURE_PCID)) {
- return __sme_pa(pgd) | kern_pcid(asid);
+ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+ cr3 |= kern_pcid(asid);
} else {
VM_WARN_ON_ONCE(asid != 0);
- return __sme_pa(pgd);
}
+
+ return cr3;
}
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
+ unsigned long lam)
{
- VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
/*
* Use boot_cpu_has() instead of this_cpu_has() as this function
* might be called during early boot. This should work even after
* boot because all CPU's the have same capabilities:
*/
VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
- return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+ return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
}
/*
@@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}
-static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
+ bool need_flush)
{
unsigned long new_mm_cr3;
if (need_flush) {
invalidate_user_asid(new_asid);
- new_mm_cr3 = build_cr3(pgdir, new_asid);
+ new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
} else {
- new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+ new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
}
/*
@@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
{
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ unsigned long new_lam = mm_lam_cr3_mask(next);
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
u64 next_tlb_gen;
@@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
- if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
+ tlbstate_lam_cr3_mask()))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
@@ -552,10 +559,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* instruction.
*/
if (real_prev == next) {
+ /* Not actually switching mm's */
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
/*
+ * If this races with another thread that enables lam, 'new_lam'
+ * might not match tlbstate_lam_cr3_mask().
+ */
+
+ /*
* Even in lazy TLB mode, the CPU should stay set in the
* mm_cpumask. The TLB shootdown code can figure out from
* cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
@@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
barrier();
}
+ set_tlbstate_lam_mode(next);
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- load_new_mm_cr3(next->pgd, new_asid, true);
+ load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- load_new_mm_cr3(next->pgd, new_asid, false);
+ load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
/* Assert that CR3 already references the right mm. */
WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
+ /* LAM expected to be disabled */
+ WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
+ WARN_ON(mm_lam_cr3_mask(mm));
+
/*
* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
* doesn't work like other CR4 bits because it can only be set from
@@ -699,8 +717,8 @@ void initialize_tlbstate_and_flush(void)
WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
!(cr4_read_shadow() & X86_CR4_PCIDE));
- /* Force ASID 0 and force a TLB flush. */
- write_cr3(build_cr3(mm->pgd, 0));
+ /* Disable LAM, force ASID 0 and force a TLB flush. */
+ write_cr3(build_cr3(mm->pgd, 0, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
@@ -708,6 +726,7 @@ void initialize_tlbstate_and_flush(void)
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
+ set_tlbstate_lam_mode(mm);
for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
@@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
*/
unsigned long __get_current_cr3_fast(void)
{
- unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
- this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+ unsigned long cr3 =
+ build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid),
+ tlbstate_lam_cr3_mask());
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 24bf9b2b58aa..dd76a1a09cf7 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -2,6 +2,7 @@
/*
* Helpers for IOMMU drivers implementing SVA
*/
+#include <linux/mmu_context.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
#include <linux/iommu.h>
@@ -32,16 +33,19 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
min == 0 || max < min)
return -EINVAL;
+ if (!arch_pgtable_dma_compat(mm))
+ return -EBUSY;
+
mutex_lock(&iommu_sva_lock);
/* Is a PASID already associated with this mm? */
- if (pasid_valid(mm->pasid)) {
+ if (mm_valid_pasid(mm)) {
if (mm->pasid < min || mm->pasid >= max)
ret = -EOVERFLOW;
goto out;
}
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
- if (!pasid_valid(pasid))
+ if (pasid == INVALID_IOASID)
ret = -ENOMEM;
else
mm_pasid_set(mm, pasid);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 493c31de0edb..3d4dd9420c30 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -580,7 +580,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
goto done;
}
- vaddr = untagged_addr(vaddr);
+ vaddr = untagged_addr_remote(mm, vaddr);
retry:
vma = vma_lookup(mm, vaddr);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 425824ad85e1..d35bbf35a874 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -91,6 +91,7 @@
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
#include <linux/kthread.h>
+#include <linux/mmu_context.h>
#include <asm/processor.h>
#include "internal.h"
@@ -425,6 +426,11 @@ static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm)
seq_printf(m, "THP_enabled:\t%d\n", thp_enabled);
}
+static inline void task_untag_mask(struct seq_file *m, struct mm_struct *mm)
+{
+ seq_printf(m, "untag_mask:\t%#lx\n", mm_untag_mask(mm));
+}
+
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -440,6 +446,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_mem(m, mm);
task_core_dumping(m, task);
task_thp_status(m, mm);
+ task_untag_mask(m, mm);
mmput(mm);
}
task_sig(m, task);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cb49479acd2e..420510f6a545 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1688,8 +1688,13 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* watch out for wraparound */
start_vaddr = end_vaddr;
- if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
- start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
+ if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
+ ret = mmap_read_lock_killable(mm);
+ if (ret)
+ goto out_free;
+ start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
+ mmap_read_unlock(mm);
+ }
/* Ensure the address is inside the task */
if (start_vaddr > mm->task_size)
diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h
index af1c9d62e642..836ae09e92c2 100644
--- a/include/linux/ioasid.h
+++ b/include/linux/ioasid.h
@@ -40,10 +40,6 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
int ioasid_set_data(ioasid_t ioasid, void *data);
-static inline bool pasid_valid(ioasid_t ioasid)
-{
- return ioasid != INVALID_IOASID;
-}
#else /* !CONFIG_IOASID */
static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
@@ -74,10 +70,5 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
return -ENOTSUPP;
}
-static inline bool pasid_valid(ioasid_t ioasid)
-{
- return false;
-}
-
#endif /* CONFIG_IOASID */
#endif /* __LINUX_IOASID_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3731999cd9f0..27ce77080c79 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -98,17 +98,6 @@ extern int mmap_rnd_compat_bits __read_mostly;
#include <asm/page.h>
#include <asm/processor.h>
-/*
- * Architectures that support memory tagging (assigning tags to memory regions,
- * embedding these tags into addresses that point to these memory regions, and
- * checking that the memory and the pointer tags match on memory accesses)
- * redefine this macro to strip tags from pointers.
- * It's defined as noop for architectures that don't support memory tagging.
- */
-#ifndef untagged_addr
-#define untagged_addr(addr) (addr)
-#endif
-
#ifndef __pa_symbol
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index b9b970f7ab45..f2b7a3f04099 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -28,4 +28,18 @@ static inline void leave_mm(int cpu) { }
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif
+#ifndef mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL;
+}
+#endif
+
+#ifndef arch_pgtable_dma_compat
+static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
+{
+ return true;
+}
+#endif
+
#endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 689dbe812563..af12fcb11005 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -485,6 +485,11 @@ static inline void mm_pasid_init(struct mm_struct *mm)
mm->pasid = INVALID_IOASID;
}
+static inline bool mm_valid_pasid(struct mm_struct *mm)
+{
+ return mm->pasid != INVALID_IOASID;
+}
+
/* Associate a PASID with an mm_struct: */
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
{
@@ -493,13 +498,14 @@ static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
static inline void mm_pasid_drop(struct mm_struct *mm)
{
- if (pasid_valid(mm->pasid)) {
+ if (mm_valid_pasid(mm)) {
ioasid_free(mm->pasid);
mm->pasid = INVALID_IOASID;
}
}
#else
static inline void mm_pasid_init(struct mm_struct *mm) {}
+static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
static inline void mm_pasid_drop(struct mm_struct *mm) {}
#endif
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index ab9728138ad6..3064314f4832 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -11,6 +11,28 @@
#include <asm/uaccess.h>
/*
+ * Architectures that support memory tagging (assigning tags to memory regions,
+ * embedding these tags into addresses that point to these memory regions, and
+ * checking that the memory and the pointer tags match on memory accesses)
+ * redefine this macro to strip tags from pointers.
+ *
+ * Passing down mm_struct allows to define untagging rules on per-process
+ * basis.
+ *
+ * It's defined as noop for architectures that don't support memory tagging.
+ */
+#ifndef untagged_addr
+#define untagged_addr(addr) (addr)
+#endif
+
+#ifndef untagged_addr_remote
+#define untagged_addr_remote(mm, addr) ({ \
+ mmap_assert_locked(mm); \
+ untagged_addr(addr); \
+})
+#endif
+
+/*
* Architectures should provide two primitives (raw_copy_{to,from}_user())
* and get rid of their private instances of copy_{to,from}_user() and
* __copy_{to,from}_user{,_inatomic}().
diff --git a/mm/gup.c b/mm/gup.c
index 1f72a717232b..ff689c88a357 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1085,7 +1085,7 @@ static long __get_user_pages(struct mm_struct *mm,
if (!nr_pages)
return 0;
- start = untagged_addr(start);
+ start = untagged_addr_remote(mm, start);
VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
@@ -1259,7 +1259,7 @@ int fixup_user_fault(struct mm_struct *mm,
struct vm_area_struct *vma;
vm_fault_t ret;
- address = untagged_addr(address);
+ address = untagged_addr_remote(mm, address);
if (unlocked)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
diff --git a/mm/madvise.c b/mm/madvise.c
index 24c5cffe3e6c..b5ffbaf616f5 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1390,8 +1390,6 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
size_t len;
struct blk_plug plug;
- start = untagged_addr(start);
-
if (!madvise_behavior_valid(behavior))
return -EINVAL;
@@ -1423,6 +1421,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
mmap_read_lock(mm);
}
+ start = untagged_addr_remote(mm, start);
+ end = start + len;
+
blk_start_plug(&plug);
error = madvise_walk_vmas(mm, start, end, behavior,
madvise_vma_behavior);
diff --git a/mm/migrate.c b/mm/migrate.c
index 02cace7955d4..01cac26a3127 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2099,15 +2099,18 @@ static int do_move_pages_to_node(struct mm_struct *mm,
* target node
* 1 - when it has been queued
*/
-static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
int node, struct list_head *pagelist, bool migrate_all)
{
struct vm_area_struct *vma;
+ unsigned long addr;
struct page *page;
int err;
bool isolated;
mmap_read_lock(mm);
+ addr = (unsigned long)untagged_addr_remote(mm, p);
+
err = -EFAULT;
vma = vma_lookup(mm, addr);
if (!vma || !vma_migratable(vma))
@@ -2213,7 +2216,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
for (i = start = 0; i < nr_pages; i++) {
const void __user *p;
- unsigned long addr;
int node;
err = -EFAULT;
@@ -2221,7 +2223,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
goto out_flush;
if (get_user(node, nodes + i))
goto out_flush;
- addr = (unsigned long)untagged_addr(p);
err = -ENODEV;
if (node < 0 || node >= MAX_NUMNODES)
@@ -2249,8 +2250,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
* Errors in the page lookup or isolation are not fatal and we simply
* report them via status
*/
- err = add_page_for_migration(mm, addr, current_node,
- &pagelist, flags & MPOL_MF_MOVE_ALL);
+ err = add_page_for_migration(mm, p, current_node, &pagelist,
+ flags & MPOL_MF_MOVE_ALL);
if (err > 0) {
/* The page is successfully queued for migration */
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index ca9374b56ead..598135d3162b 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx
+ corrupt_xstate_header amx lam
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
new file mode 100644
index 000000000000..eb0e46905bf9
--- /dev/null
+++ b/tools/testing/selftests/x86/lam.c
@@ -0,0 +1,1241 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sched.h>
+
+#include <sys/uio.h>
+#include <linux/io_uring.h>
+#include "../kselftest.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+/* LAM modes, these definitions were copied from kernel code */
+#define LAM_NONE 0
+#define LAM_U57_BITS 6
+
+#define LAM_U57_MASK (0x3fULL << 57)
+/* arch prctl for LAM */
+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+#define ARCH_FORCE_TAGGED_SVA 0x4004
+
+/* Specified test function bits */
+#define FUNC_MALLOC 0x1
+#define FUNC_BITS 0x2
+#define FUNC_MMAP 0x4
+#define FUNC_SYSCALL 0x8
+#define FUNC_URING 0x10
+#define FUNC_INHERITE 0x20
+#define FUNC_PASID 0x40
+
+#define TEST_MASK 0x7f
+
+#define LOW_ADDR (0x1UL << 30)
+#define HIGH_ADDR (0x3UL << 48)
+
+#define MALLOC_LEN 32
+
+#define PAGE_SIZE (4 << 10)
+
+#define STACK_SIZE 65536
+
+#define barrier() ({ \
+ __asm__ __volatile__("" : : : "memory"); \
+})
+
+#define URING_QUEUE_SZ 1
+#define URING_BLOCK_SZ 2048
+
+/* Pasid test define */
+#define LAM_CMD_BIT 0x1
+#define PAS_CMD_BIT 0x2
+#define SVA_CMD_BIT 0x4
+
+#define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0))
+
+struct testcases {
+ unsigned int later;
+ int expected; /* 2: SIGSEGV Error; 1: other errors */
+ unsigned long lam;
+ uint64_t addr;
+ uint64_t cmd;
+ int (*test_func)(struct testcases *test);
+ const char *msg;
+};
+
+/* Used by CQ of uring, source file handler and file's size */
+struct file_io {
+ int file_fd;
+ off_t file_sz;
+ struct iovec iovecs[];
+};
+
+struct io_uring_queue {
+ unsigned int *head;
+ unsigned int *tail;
+ unsigned int *ring_mask;
+ unsigned int *ring_entries;
+ unsigned int *flags;
+ unsigned int *array;
+ union {
+ struct io_uring_cqe *cqes;
+ struct io_uring_sqe *sqes;
+ } queue;
+ size_t ring_sz;
+};
+
+struct io_ring {
+ int ring_fd;
+ struct io_uring_queue sq_ring;
+ struct io_uring_queue cq_ring;
+};
+
+int tests_cnt;
+jmp_buf segv_env;
+
+static void segv_handler(int sig)
+{
+ ksft_print_msg("Get segmentation fault(%d).", sig);
+
+ siglongjmp(segv_env, 1);
+}
+
+static inline int cpu_has_lam(void)
+{
+ unsigned int cpuinfo[4];
+
+ __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+
+ return (cpuinfo[0] & (1 << 26));
+}
+
+/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
+static inline int cpu_has_la57(void)
+{
+ unsigned int cpuinfo[4];
+
+ __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+
+ return (cpuinfo[2] & (1 << 16));
+}
+
+/*
+ * Set tagged address and read back untag mask.
+ * check if the untagged mask is expected.
+ *
+ * @return:
+ * 0: Set LAM mode successfully
+ * others: failed to set LAM
+ */
+static int set_lam(unsigned long lam)
+{
+ int ret = 0;
+ uint64_t ptr = 0;
+
+ if (lam != LAM_U57_BITS && lam != LAM_NONE)
+ return -1;
+
+ /* Skip check return */
+ syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam);
+
+ /* Get untagged mask */
+ syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr);
+
+ /* Check mask returned is expected */
+ if (lam == LAM_U57_BITS)
+ ret = (ptr != ~(LAM_U57_MASK));
+ else if (lam == LAM_NONE)
+ ret = (ptr != -1ULL);
+
+ return ret;
+}
+
+static unsigned long get_default_tag_bits(void)
+{
+ pid_t pid;
+ int lam = LAM_NONE;
+ int ret = 0;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ } else if (pid == 0) {
+ /* Set LAM mode in child process */
+ if (set_lam(LAM_U57_BITS) == 0)
+ lam = LAM_U57_BITS;
+ else
+ lam = LAM_NONE;
+ exit(lam);
+ } else {
+ wait(&ret);
+ lam = WEXITSTATUS(ret);
+ }
+
+ return lam;
+}
+
+/*
+ * Set tagged address and read back untag mask.
+ * check if the untag mask is expected.
+ */
+static int get_lam(void)
+{
+ uint64_t ptr = 0;
+ int ret = -1;
+ /* Get untagged mask */
+ if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1)
+ return -1;
+
+ /* Check mask returned is expected */
+ if (ptr == ~(LAM_U57_MASK))
+ ret = LAM_U57_BITS;
+ else if (ptr == -1ULL)
+ ret = LAM_NONE;
+
+
+ return ret;
+}
+
+/* According to LAM mode, set metadata in high bits */
+static uint64_t set_metadata(uint64_t src, unsigned long lam)
+{
+ uint64_t metadata;
+
+ srand(time(NULL));
+
+ switch (lam) {
+ case LAM_U57_BITS: /* Set metadata in bits 62:57 */
+ /* Get a random non-zero value as metadata */
+ metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57;
+ metadata |= (src & ~(LAM_U57_MASK));
+ break;
+ default:
+ metadata = src;
+ break;
+ }
+
+ return metadata;
+}
+
+/*
+ * Set metadata in user pointer, compare new pointer with original pointer.
+ * both pointers should point to the same address.
+ *
+ * @return:
+ * 0: value on the pointer with metadate and value on original are same
+ * 1: not same.
+ */
+static int handle_lam_test(void *src, unsigned int lam)
+{
+ char *ptr;
+
+ strcpy((char *)src, "USER POINTER");
+
+ ptr = (char *)set_metadata((uint64_t)src, lam);
+ if (src == ptr)
+ return 0;
+
+ /* Copy a string into the pointer with metadata */
+ strcpy((char *)ptr, "METADATA POINTER");
+
+ return (!!strcmp((char *)src, (char *)ptr));
+}
+
+
+int handle_max_bits(struct testcases *test)
+{
+ unsigned long exp_bits = get_default_tag_bits();
+ unsigned long bits = 0;
+
+ if (exp_bits != LAM_NONE)
+ exp_bits = LAM_U57_BITS;
+
+ /* Get LAM max tag bits */
+ if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1)
+ return 1;
+
+ return (exp_bits != bits);
+}
+
+/*
+ * Test lam feature through dereference pointer get from malloc.
+ * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV
+ */
+static int handle_malloc(struct testcases *test)
+{
+ char *ptr = NULL;
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) == -1)
+ return 1;
+
+ ptr = (char *)malloc(MALLOC_LEN);
+ if (ptr == NULL) {
+ perror("malloc() failure\n");
+ return 1;
+ }
+
+ /* Set signal handler */
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ ret = handle_lam_test(ptr, test->lam);
+ } else {
+ ret = 2;
+ }
+
+ if (test->later != 0 && test->lam != 0)
+ if (set_lam(test->lam) == -1 && ret == 0)
+ ret = 1;
+
+ free(ptr);
+
+ return ret;
+}
+
+static int handle_mmap(struct testcases *test)
+{
+ void *ptr;
+ unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ return 1;
+
+ ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ flags, -1, 0);
+ if (ptr == MAP_FAILED) {
+ if (test->addr == HIGH_ADDR)
+ if (!cpu_has_la57())
+ return 3; /* unsupport LA57 */
+ return 1;
+ }
+
+ if (test->later != 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ ret = 1;
+
+ if (ret == 0) {
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ ret = handle_lam_test(ptr, test->lam);
+ } else {
+ ret = 2;
+ }
+ }
+
+ munmap(ptr, PAGE_SIZE);
+ return ret;
+}
+
+static int handle_syscall(struct testcases *test)
+{
+ struct utsname unme, *pu;
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ return 1;
+
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam);
+ ret = uname(pu);
+ if (ret < 0)
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+
+ if (test->later != 0 && test->lam != 0)
+ if (set_lam(test->lam) != -1 && ret == 0)
+ ret = 1;
+
+ return ret;
+}
+
+int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+ return (int)syscall(__NR_io_uring_setup, entries, p);
+}
+
+int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags)
+{
+ return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0);
+}
+
+/* Init submission queue and completion queue */
+int mmap_io_uring(struct io_uring_params p, struct io_ring *s)
+{
+ struct io_uring_queue *sring = &s->sq_ring;
+ struct io_uring_queue *cring = &s->cq_ring;
+
+ sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int);
+ cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
+
+ if (p.features & IORING_FEAT_SINGLE_MMAP) {
+ if (cring->ring_sz > sring->ring_sz)
+ sring->ring_sz = cring->ring_sz;
+
+ cring->ring_sz = sring->ring_sz;
+ }
+
+ void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, s->ring_fd,
+ IORING_OFF_SQ_RING);
+
+ if (sq_ptr == MAP_FAILED) {
+ perror("sub-queue!");
+ return 1;
+ }
+
+ void *cq_ptr = sq_ptr;
+
+ if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
+ cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, s->ring_fd,
+ IORING_OFF_CQ_RING);
+ if (cq_ptr == MAP_FAILED) {
+ perror("cpl-queue!");
+ munmap(sq_ptr, sring->ring_sz);
+ return 1;
+ }
+ }
+
+ sring->head = sq_ptr + p.sq_off.head;
+ sring->tail = sq_ptr + p.sq_off.tail;
+ sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
+ sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
+ sring->flags = sq_ptr + p.sq_off.flags;
+ sring->array = sq_ptr + p.sq_off.array;
+
+ /* Map a queue as mem map */
+ s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ s->ring_fd, IORING_OFF_SQES);
+ if (s->sq_ring.queue.sqes == MAP_FAILED) {
+ munmap(sq_ptr, sring->ring_sz);
+ if (sq_ptr != cq_ptr) {
+ ksft_print_msg("failed to mmap uring queue!");
+ munmap(cq_ptr, cring->ring_sz);
+ return 1;
+ }
+ }
+
+ cring->head = cq_ptr + p.cq_off.head;
+ cring->tail = cq_ptr + p.cq_off.tail;
+ cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
+ cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
+ cring->queue.cqes = cq_ptr + p.cq_off.cqes;
+
+ return 0;
+}
+
+/* Init io_uring queues */
+int setup_io_uring(struct io_ring *s)
+{
+ struct io_uring_params para;
+
+ memset(&para, 0, sizeof(para));
+ s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, &para);
+ if (s->ring_fd < 0)
+ return 1;
+
+ return mmap_io_uring(para, s);
+}
+
+/*
+ * Get data from completion queue. the data buffer saved the file data
+ * return 0: success; others: error;
+ */
+int handle_uring_cq(struct io_ring *s)
+{
+ struct file_io *fi = NULL;
+ struct io_uring_queue *cring = &s->cq_ring;
+ struct io_uring_cqe *cqe;
+ unsigned int head;
+ off_t len = 0;
+
+ head = *cring->head;
+
+ do {
+ barrier();
+ if (head == *cring->tail)
+ break;
+ /* Get the entry */
+ cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask];
+ fi = (struct file_io *)cqe->user_data;
+ if (cqe->res < 0)
+ break;
+
+ int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
+
+ for (int i = 0; i < blocks; i++)
+ len += fi->iovecs[i].iov_len;
+
+ head++;
+ } while (1);
+
+ *cring->head = head;
+ barrier();
+
+ return (len != fi->file_sz);
+}
+
+/*
+ * Submit squeue. specify via IORING_OP_READV.
+ * the buffer need to be set metadata according to LAM mode
+ */
+int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam)
+{
+ int file_fd = fi->file_fd;
+ struct io_uring_queue *sring = &ring->sq_ring;
+ unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0;
+ struct io_uring_sqe *sqe;
+
+ off_t remain = fi->file_sz;
+ int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
+
+ while (remain) {
+ off_t bytes = remain;
+ void *buf;
+
+ if (bytes > URING_BLOCK_SZ)
+ bytes = URING_BLOCK_SZ;
+
+ fi->iovecs[cur_block].iov_len = bytes;
+
+ if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ))
+ return 1;
+
+ fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam);
+ remain -= bytes;
+ cur_block++;
+ }
+
+ next_tail = *sring->tail;
+ tail = next_tail;
+ next_tail++;
+
+ barrier();
+
+ index = tail & *ring->sq_ring.ring_mask;
+
+ sqe = &ring->sq_ring.queue.sqes[index];
+ sqe->fd = file_fd;
+ sqe->flags = 0;
+ sqe->opcode = IORING_OP_READV;
+ sqe->addr = (unsigned long)fi->iovecs;
+ sqe->len = blocks;
+ sqe->off = 0;
+ sqe->user_data = (uint64_t)fi;
+
+ sring->array[index] = index;
+ tail = next_tail;
+
+ if (*sring->tail != tail) {
+ *sring->tail = tail;
+ barrier();
+ }
+
+ if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Test LAM in async I/O and io_uring, read current binery through io_uring
+ * Set metadata in pointers to iovecs buffer.
+ */
+int do_uring(unsigned long lam)
+{
+ struct io_ring *ring;
+ struct file_io *fi;
+ struct stat st;
+ int ret = 1;
+ char path[PATH_MAX] = {0};
+
+ /* get current process path */
+ if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
+ return 1;
+
+ int file_fd = open(path, O_RDONLY);
+
+ if (file_fd < 0)
+ return 1;
+
+ if (fstat(file_fd, &st) < 0)
+ return 1;
+
+ off_t file_sz = st.st_size;
+
+ int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
+
+ fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
+ if (!fi)
+ return 1;
+
+ fi->file_sz = file_sz;
+ fi->file_fd = file_fd;
+
+ ring = malloc(sizeof(*ring));
+ if (!ring)
+ return 1;
+
+ memset(ring, 0, sizeof(struct io_ring));
+
+ if (setup_io_uring(ring))
+ goto out;
+
+ if (handle_uring_sq(ring, fi, lam))
+ goto out;
+
+ ret = handle_uring_cq(ring);
+
+out:
+ free(ring);
+
+ for (int i = 0; i < blocks; i++) {
+ if (fi->iovecs[i].iov_base) {
+ uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base);
+
+ switch (lam) {
+ case LAM_U57_BITS: /* Clear bits 62:57 */
+ addr = (addr & ~(LAM_U57_MASK));
+ break;
+ }
+ free((void *)addr);
+ fi->iovecs[i].iov_base = NULL;
+ }
+ }
+
+ free(fi);
+
+ return ret;
+}
+
+int handle_uring(struct testcases *test)
+{
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ return 1;
+
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ ret = do_uring(test->lam);
+ } else {
+ ret = 2;
+ }
+
+ return ret;
+}
+
+static int fork_test(struct testcases *test)
+{
+ int ret, child_ret;
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ ret = 1;
+ } else if (pid == 0) {
+ ret = test->test_func(test);
+ exit(ret);
+ } else {
+ wait(&child_ret);
+ ret = WEXITSTATUS(child_ret);
+ }
+
+ return ret;
+}
+
+static int handle_execve(struct testcases *test)
+{
+ int ret, child_ret;
+ int lam = test->lam;
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ ret = 1;
+ } else if (pid == 0) {
+ char path[PATH_MAX];
+
+ /* Set LAM mode in parent process */
+ if (set_lam(lam) != 0)
+ return 1;
+
+ /* Get current binary's path and the binary was run by execve */
+ if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
+ exit(-1);
+
+ /* run binary to get LAM mode and return to parent process */
+ if (execlp(path, path, "-t 0x0", NULL) < 0) {
+ perror("error on exec");
+ exit(-1);
+ }
+ } else {
+ wait(&child_ret);
+ ret = WEXITSTATUS(child_ret);
+ if (ret != LAM_NONE)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int handle_inheritance(struct testcases *test)
+{
+ int ret, child_ret;
+ int lam = test->lam;
+ pid_t pid;
+
+ /* Set LAM mode in parent process */
+ if (set_lam(lam) != 0)
+ return 1;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ return 1;
+ } else if (pid == 0) {
+ /* Set LAM mode in parent process */
+ int child_lam = get_lam();
+
+ exit(child_lam);
+ } else {
+ wait(&child_ret);
+ ret = WEXITSTATUS(child_ret);
+
+ if (lam != ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int thread_fn_get_lam(void *arg)
+{
+ return get_lam();
+}
+
+static int thread_fn_set_lam(void *arg)
+{
+ struct testcases *test = arg;
+
+ return set_lam(test->lam);
+}
+
+static int handle_thread(struct testcases *test)
+{
+ char stack[STACK_SIZE];
+ int ret, child_ret;
+ int lam = 0;
+ pid_t pid;
+
+ /* Set LAM mode in parent process */
+ if (!test->later) {
+ lam = test->lam;
+ if (set_lam(lam) != 0)
+ return 1;
+ }
+
+ pid = clone(thread_fn_get_lam, stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL);
+ if (pid < 0) {
+ perror("Clone failed.");
+ return 1;
+ }
+
+ waitpid(pid, &child_ret, 0);
+ ret = WEXITSTATUS(child_ret);
+
+ if (lam != ret)
+ return 1;
+
+ if (test->later) {
+ if (set_lam(test->lam) != 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int handle_thread_enable(struct testcases *test)
+{
+ char stack[STACK_SIZE];
+ int ret, child_ret;
+ int lam = test->lam;
+ pid_t pid;
+
+ pid = clone(thread_fn_set_lam, stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test);
+ if (pid < 0) {
+ perror("Clone failed.");
+ return 1;
+ }
+
+ waitpid(pid, &child_ret, 0);
+ ret = WEXITSTATUS(child_ret);
+
+ if (lam != ret)
+ return 1;
+
+ return 0;
+}
+static void run_test(struct testcases *test, int count)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < count; i++) {
+ struct testcases *t = test + i;
+
+ /* fork a process to run test case */
+ tests_cnt++;
+ ret = fork_test(t);
+
+ /* return 3 is not support LA57, the case should be skipped */
+ if (ret == 3) {
+ ksft_test_result_skip(t->msg);
+ continue;
+ }
+
+ if (ret != 0)
+ ret = (t->expected == ret);
+ else
+ ret = !(t->expected);
+
+ ksft_test_result(ret, t->msg);
+ }
+}
+
+static struct testcases uring_cases[] = {
+ {
+ .later = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_uring,
+ .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n",
+ },
+ {
+ .later = 1,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_uring,
+ .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
+ },
+};
+
+static struct testcases malloc_cases[] = {
+ {
+ .later = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_malloc,
+ .msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n",
+ },
+ {
+ .later = 1,
+ .expected = 2,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_malloc,
+ .msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
+ },
+};
+
+static struct testcases bits_cases[] = {
+ {
+ .test_func = handle_max_bits,
+ .msg = "BITS: Check default tag bits\n",
+ },
+};
+
+static struct testcases syscall_cases[] = {
+ {
+ .later = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_syscall,
+ .msg = "SYSCALL: LAM_U57. syscall with metadata\n",
+ },
+ {
+ .later = 1,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_syscall,
+ .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
+ },
+};
+
+static struct testcases mmap_cases[] = {
+ {
+ .later = 1,
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .addr = HIGH_ADDR,
+ .test_func = handle_mmap,
+ .msg = "MMAP: First mmap high address, then set LAM_U57.\n",
+ },
+ {
+ .later = 0,
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .addr = HIGH_ADDR,
+ .test_func = handle_mmap,
+ .msg = "MMAP: First LAM_U57, then High address.\n",
+ },
+ {
+ .later = 0,
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .addr = LOW_ADDR,
+ .test_func = handle_mmap,
+ .msg = "MMAP: First LAM_U57, then Low address.\n",
+ },
+};
+
+static struct testcases inheritance_cases[] = {
+ {
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_inheritance,
+ .msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n",
+ },
+ {
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_thread,
+ .msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n",
+ },
+ {
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_thread_enable,
+ .msg = "THREAD: [NEGATIVE] Enable LAM in child.\n",
+ },
+ {
+ .expected = 1,
+ .later = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_thread,
+ .msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n",
+ },
+ {
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_execve,
+ .msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n",
+ },
+};
+
+static void cmd_help(void)
+{
+ printf("usage: lam [-h] [-t test list]\n");
+ printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK);
+ printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n");
+ printf("\t-h: help\n");
+}
+
+/* Check for file existence */
+uint8_t file_Exists(const char *fileName)
+{
+ struct stat buffer;
+
+ uint8_t ret = (stat(fileName, &buffer) == 0);
+
+ return ret;
+}
+
+/* Sysfs idxd files */
+const char *dsa_configs[] = {
+ "echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id",
+ "echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode",
+ "echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority",
+ "echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size",
+ "echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold",
+ "echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type",
+ "echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name",
+ "echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id",
+ "echo dsa0 > /sys/bus/dsa/drivers/idxd/bind",
+ /* bind files and devices, generated a device file in /dev */
+ "echo wq0.1 > /sys/bus/dsa/drivers/user/bind",
+};
+
+/* DSA device file */
+const char *dsaDeviceFile = "/dev/dsa/wq0.1";
+/* file for io*/
+const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled";
+
+/*
+ * DSA depends on kernel cmdline "intel_iommu=on,sm_on"
+ * return pasid_enabled (0: disable 1:enable)
+ */
+int Check_DSA_Kernel_Setting(void)
+{
+ char command[256] = "";
+ char buf[256] = "";
+ char *ptr;
+ int rv = -1;
+
+ snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable);
+
+ FILE *cmd = popen(command, "r");
+
+ if (cmd) {
+ while (fgets(buf, sizeof(buf) - 1, cmd) != NULL);
+
+ pclose(cmd);
+ rv = strtol(buf, &ptr, 16);
+ }
+
+ return rv;
+}
+
+/*
+ * Config DSA's sysfs files as shared DSA's WQ.
+ * Generated a device file /dev/dsa/wq0.1
+ * Return: 0 OK; 1 Failed; 3 Skip(SVA disabled).
+ */
+int Dsa_Init_Sysfs(void)
+{
+ uint len = ARRAY_SIZE(dsa_configs);
+ const char **p = dsa_configs;
+
+ if (file_Exists(dsaDeviceFile) == 1)
+ return 0;
+
+ /* check the idxd driver */
+ if (file_Exists(dsaPasidEnable) != 1) {
+ printf("Please make sure idxd driver was loaded\n");
+ return 3;
+ }
+
+ /* Check SVA feature */
+ if (Check_DSA_Kernel_Setting() != 1) {
+ printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n");
+ return 3;
+ }
+
+ /* Check the idxd device file on /dev/dsa/ */
+ for (int i = 0; i < len; i++) {
+ if (system(p[i]))
+ return 1;
+ }
+
+ /* After config, /dev/dsa/wq0.1 should be generated */
+ return (file_Exists(dsaDeviceFile) != 1);
+}
+
+/*
+ * Open DSA device file, triger API: iommu_sva_alloc_pasid
+ */
+void *allocate_dsa_pasid(void)
+{
+ int fd;
+ void *wq;
+
+ fd = open(dsaDeviceFile, O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return MAP_FAILED;
+ }
+
+ wq = mmap(NULL, 0x1000, PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (wq == MAP_FAILED)
+ perror("mmap");
+
+ return wq;
+}
+
+int set_force_svm(void)
+{
+ int ret = 0;
+
+ ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA);
+
+ return ret;
+}
+
+int handle_pasid(struct testcases *test)
+{
+ uint tmp = test->cmd;
+ uint runed = 0x0;
+ int ret = 0;
+ void *wq = NULL;
+
+ ret = Dsa_Init_Sysfs();
+ if (ret != 0)
+ return ret;
+
+ for (int i = 0; i < 3; i++) {
+ int err = 0;
+
+ if (tmp & 0x1) {
+ /* run set lam mode*/
+ if ((runed & 0x1) == 0) {
+ err = set_lam(LAM_U57_BITS);
+ runed = runed | 0x1;
+ } else
+ err = 1;
+ } else if (tmp & 0x4) {
+ /* run force svm */
+ if ((runed & 0x4) == 0) {
+ err = set_force_svm();
+ runed = runed | 0x4;
+ } else
+ err = 1;
+ } else if (tmp & 0x2) {
+ /* run allocate pasid */
+ if ((runed & 0x2) == 0) {
+ runed = runed | 0x2;
+ wq = allocate_dsa_pasid();
+ if (wq == MAP_FAILED)
+ err = 1;
+ } else
+ err = 1;
+ }
+
+ ret = ret + err;
+ if (ret > 0)
+ break;
+
+ tmp = tmp >> 4;
+ }
+
+ if (wq != MAP_FAILED && wq != NULL)
+ if (munmap(wq, 0x1000))
+ printf("munmap failed %d\n", errno);
+
+ if (runed != 0x7)
+ ret = 1;
+
+ return (ret != 0);
+}
+
+/*
+ * Pasid test depends on idxd and SVA, kernel should enable iommu and sm.
+ * command line(intel_iommu=on,sm_on)
+ */
+static struct testcases pasid_cases[] = {
+ {
+ .expected = 1,
+ .cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute LAM, SVA, PASID in sequence\n",
+ },
+ {
+ .expected = 1,
+ .cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute PASID, SVA, LAM in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute SVA, LAM, PASID in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute SVA, PASID, LAM in sequence\n",
+ },
+};
+
+int main(int argc, char **argv)
+{
+ int c = 0;
+ unsigned int tests = TEST_MASK;
+
+ tests_cnt = 0;
+
+ if (!cpu_has_lam()) {
+ ksft_print_msg("Unsupported LAM feature!\n");
+ return -1;
+ }
+
+ while ((c = getopt(argc, argv, "ht:")) != -1) {
+ switch (c) {
+ case 't':
+ tests = strtoul(optarg, NULL, 16);
+ if (tests && !(tests & TEST_MASK)) {
+ ksft_print_msg("Invalid argument!\n");
+ return -1;
+ }
+ break;
+ case 'h':
+ cmd_help();
+ return 0;
+ default:
+ ksft_print_msg("Invalid argument\n");
+ return -1;
+ }
+ }
+
+ /*
+ * When tests is 0, it is not a real test case;
+ * the option used by test case(execve) to check the lam mode in
+ * process generated by execve, the process read back lam mode and
+ * check with lam mode in parent process.
+ */
+ if (!tests)
+ return (get_lam());
+
+ /* Run test cases */
+ if (tests & FUNC_MALLOC)
+ run_test(malloc_cases, ARRAY_SIZE(malloc_cases));
+
+ if (tests & FUNC_BITS)
+ run_test(bits_cases, ARRAY_SIZE(bits_cases));
+
+ if (tests & FUNC_MMAP)
+ run_test(mmap_cases, ARRAY_SIZE(mmap_cases));
+
+ if (tests & FUNC_SYSCALL)
+ run_test(syscall_cases, ARRAY_SIZE(syscall_cases));
+
+ if (tests & FUNC_URING)
+ run_test(uring_cases, ARRAY_SIZE(uring_cases));
+
+ if (tests & FUNC_INHERITE)
+ run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases));
+
+ if (tests & FUNC_PASID)
+ run_test(pasid_cases, ARRAY_SIZE(pasid_cases));
+
+ ksft_set_plan(tests_cnt);
+
+ return ksft_exit_pass();
+}