diff options
Diffstat (limited to 'arch/arm64')
66 files changed, 1161 insertions, 779 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9ac16a482ff1..ffa3c549a4ba 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -49,7 +49,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP + select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK @@ -70,6 +70,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS @@ -316,6 +317,27 @@ config ARM64_ERRATUM_832075 If unsure, say Y. +config ARM64_ERRATUM_834220 + bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault" + depends on KVM + default y + help + This option adds an alternative code sequence to work around ARM + erratum 834220 on Cortex-A57 parts up to r1p2. + + Affected Cortex-A57 parts might report a Stage 2 translation + fault as the result of a Stage 1 fault for load crossing a + page boundary when there is a permission or device memory + alignment fault at Stage 1 and a translation fault at Stage 2. + + The workaround is to verify that the Stage 1 translation + doesn't generate a fault before handling the Stage 2 fault. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT @@ -508,9 +530,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_GENERAL_HUGETLB - def_bool y - config ARCH_WANT_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index e81cd48d6245..925552e7b4f3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -269,6 +269,7 @@ clock-frequency = <0>; /* Updated by bootloader */ voltage-ranges = <1800 1800 3300 3300>; sdhci,auto-cmd12; + little-endian; bus-width = <4>; }; @@ -277,6 +278,7 @@ reg = <0x0 0x2300000 0x0 0x10000>; interrupts = <0 36 0x4>; /* Level high type */ gpio-controller; + little-endian; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; @@ -287,6 +289,7 @@ reg = <0x0 0x2310000 0x0 0x10000>; interrupts = <0 36 0x4>; /* Level high type */ gpio-controller; + little-endian; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; @@ -297,6 +300,7 @@ reg = <0x0 0x2320000 0x0 0x10000>; interrupts = <0 37 0x4>; /* Level high type */ gpio-controller; + little-endian; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; @@ -307,6 +311,7 @@ reg = <0x0 0x2330000 0x0 0x10000>; interrupts = <0 37 0x4>; /* Level high type */ gpio-controller; + little-endian; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index ce47792a983d..f7bd9bf0bbb3 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -237,7 +237,7 @@ EXPORT_SYMBOL(ce_aes_setkey); static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-ce", - .cra_priority = 300, + .cra_priority = 250, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index d56ec0715157..e4962f04201e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -19,7 +19,6 @@ struct alt_instr { void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 030cdcb46c6b..2731d3b25ed2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -77,6 +77,7 @@ #ifndef __ASSEMBLY__ #include <linux/stringify.h> +#include <asm/barrier.h> /* * Low-level accessors diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 12eff928ef8b..bb7b72734c24 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -193,6 +193,17 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm + /* + * @sym: The name of the per-cpu variable + * @reg: Result of per_cpu(sym, smp_processor_id()) + * @tmp: scratch register + */ + .macro this_cpu_ptr, sym, reg, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el1 + add \reg, \reg, \tmp + .endm + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 624f9679f4b0..9622eb48f894 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -64,27 +64,31 @@ do { \ #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1; \ + union { typeof(*p) __val; char __c[1]; } __u; \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("ldarb %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 2: \ asm volatile ("ldarh %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 4: \ asm volatile ("ldar %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 8: \ asm volatile ("ldar %0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ } \ - ___p1; \ + __u.__val; \ }) #define read_barrier_depends() do { } while(0) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 54efedaf331f..7fc294c3bc5b 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 9ea611ea69df..510c7b404454 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,7 +19,6 @@ #define __ASM_CMPXCHG_H #include <linux/bug.h> -#include <linux/mmdebug.h> #include <asm/atomic.h> #include <asm/barrier.h> diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 7fbed6919b54..eb8432bb82b8 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -23,7 +23,6 @@ */ #include <linux/types.h> #include <linux/sched.h> -#include <linux/ptrace.h> #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ @@ -234,7 +233,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) +#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 11d5bb0fdd54..8f271b83f910 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -29,8 +29,9 @@ #define ARM64_HAS_PAN 4 #define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_WORKAROUND_CAVIUM_23154 6 +#define ARM64_WORKAROUND_834220 7 -#define ARM64_NCAPS 7 +#define ARM64_NCAPS 8 #ifndef __ASSEMBLY__ @@ -46,8 +47,12 @@ enum ftr_type { #define FTR_STRICT true /* SANITY check strict matching required */ #define FTR_NONSTRICT false /* SANITY check ignored */ +#define FTR_SIGNED true /* Value should be treated as signed */ +#define FTR_UNSIGNED false /* Value should be treated as unsigned */ + struct arm64_ftr_bits { - bool strict; /* CPU Sanity check: strict matching required ? */ + bool sign; /* Value is signed ? */ + bool strict; /* CPU Sanity check: strict matching required ? */ enum ftr_type type; u8 shift; u8 width; @@ -123,6 +128,18 @@ cpuid_feature_extract_field(u64 features, int field) return cpuid_feature_extract_field_width(features, field, 4); } +static inline unsigned int __attribute_const__ +cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width) +{ + return (u64)(features << (64 - width - field)) >> (64 - width); +} + +static inline unsigned int __attribute_const__ +cpuid_feature_extract_unsigned_field(u64 features, int field) +{ + return cpuid_feature_extract_unsigned_field_width(features, field, 4); +} + static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) { return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); @@ -130,7 +147,9 @@ static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) { - return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width); + return ftrp->sign ? + cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) : + cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width); } static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 54d0ead41afc..61e08f360e31 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -18,7 +18,6 @@ #ifdef __KERNEL__ -#include <linux/acpi.h> #include <linux/types.h> #include <linux/vmalloc.h> @@ -26,22 +25,16 @@ #include <asm/xen/hypervisor.h> #define DMA_ERROR_CODE (~(dma_addr_t)0) -extern struct dma_map_ops *dma_ops; extern struct dma_map_ops dummy_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (unlikely(!dev)) - return dma_ops; - else if (dev->archdata.dma_ops) + if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; - else if (acpi_disabled) - return dma_ops; /* - * When ACPI is enabled, if arch_set_dma_ops is not called, - * we will disable device DMA capability by setting it - * to dummy_dma_ops. + * We expect no ISA devices, and all other DMA masters are expected to + * have someone call arch_setup_dma_ops at device creation time. */ return &dummy_dma_ops; } diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index ef572206f1c3..8e88a696c9cb 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -2,7 +2,9 @@ #define _ASM_EFI_H #include <asm/io.h> +#include <asm/mmu_context.h> #include <asm/neon.h> +#include <asm/tlbflush.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -10,6 +12,8 @@ extern void efi_init(void); #define efi_init() #endif +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + #define efi_call_virt(f, ...) \ ({ \ efi_##f##_t *__f; \ @@ -63,6 +67,11 @@ extern void efi_init(void); * Services are enabled and the EFI_RUNTIME_SERVICES bit set. */ +static inline void efi_set_pgd(struct mm_struct *mm) +{ + switch_mm(NULL, mm, NULL); +} + void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index c5534facf941..3c60f37e48ab 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -28,6 +28,8 @@ struct dyn_arch_ftrace { extern unsigned long ftrace_graph_call; +extern void return_to_handler(void); + static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index bb4052e85dba..bbc1e35aa601 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, @@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page) clear_bit(PG_dcache_clean, &page->flags); } +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte +extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index e54415ec6935..9732908bfc8a 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -138,16 +138,18 @@ extern struct pmu perf_ops_bp; /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { + u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); return 1 + - cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_BRPS_SHIFT); } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { + u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); return 1 + - cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_WRPS_SHIFT); } diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 23eb450b820b..b77197d941fc 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -1,10 +1,60 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H +#define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START_SP THREAD_START_SP + +#ifndef __ASSEMBLER__ + +#include <linux/percpu.h> + #include <asm-generic/irq.h> +#include <asm/thread_info.h> struct pt_regs; +DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); + +/* + * The highest address on the stack, and the first to be used. Used to + * find the dummy-stack frame put down by el?_irq() in entry.S, which + * is structured as follows: + * + * ------------ + * | | <- irq_stack_ptr + * top ------------ + * | x19 | <- irq_stack_ptr - 0x08 + * ------------ + * | x29 | <- irq_stack_ptr - 0x10 + * ------------ + * + * where x19 holds a copy of the task stack pointer where the struct pt_regs + * from kernel_entry can be found. + * + */ +#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) + +/* + * The offset from irq_stack_ptr where entry.S will store the original + * stack pointer. Used by unwind_frame() and dump_backtrace(). + */ +#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) + extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); +static inline int nr_legacy_irqs(void) +{ + return 0; +} + +static inline bool on_irq_stack(unsigned long sp, int cpu) +{ + /* variable names the same as kernel/stacktrace.c */ + unsigned long low = (unsigned long)per_cpu(irq_stack, cpu); + unsigned long high = low + IRQ_STACK_START_SP; + + return (low <= sp && sp <= high); +} + +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 17e92f05b1fe..25a40213bd9b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -99,12 +99,22 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } -static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) +/* + * vcpu_get_reg and vcpu_set_reg should always be passed a register number + * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on + * AArch32 with banked registers. + */ +static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, + u8 reg_num) { - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_reg32(vcpu, reg_num); + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +} - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, + unsigned long val) +{ + if (reg_num != 31) + vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; } /* Get vcpu SPSR for current mode */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index c0e87898ba96..24165784b803 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -101,7 +101,7 @@ static inline void cpu_set_default_tcr_t0sz(void) #define destroy_context(mm) do { } while(0) void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); -#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) /* * This is called when "tsk" is about to enter lazy TLB mode. diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d6739e836f7b..5c25b831273d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -90,7 +90,23 @@ /* * Contiguous page definitions. */ -#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +#ifdef CONFIG_ARM64_64K_PAGES +#define CONT_PTE_SHIFT 5 +#define CONT_PMD_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define CONT_PTE_SHIFT 7 +#define CONT_PMD_SHIFT 5 +#else +#define CONT_PTE_SHIFT 4 +#define CONT_PMD_SHIFT 4 +#endif + +#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) +#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) +#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) /* the the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9819a9426b69..69d2e2f86bce 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -81,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) @@ -166,6 +167,16 @@ extern struct page *empty_zero_page; ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_young(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + +/* + * Could the pte be present in the TLB? We must check mm_tlb_flush_pending + * so that we don't erroneously return false for pages that have been + * remapped as PROT_NONE but are yet to be flushed from the TLB. + */ +#define pte_accessible(mm, pte) \ + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { @@ -216,7 +227,8 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_CONT)); + pte = set_pte_bit(pte, __pgprot(PTE_CONT)); + return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -224,6 +236,11 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pmd_t pmd_mkcont(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | PMD_SECT_CONT); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -275,10 +292,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * hardware updates of the pte (ptep_set_access_flags safely changes * valid ptes without going through an invalid entry). */ - if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && - pte_valid(*ptep)) { - BUG_ON(!pte_young(pte)); - BUG_ON(pte_write(*ptep) && !pte_dirty(pte)); + if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && + pte_valid(*ptep) && pte_valid(pte)) { + VM_WARN_ONCE(!pte_young(pte), + "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(*ptep), pte_val(pte)); + VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte), + "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(*ptep), pte_val(pte)); } set_pte(ptep, pte); @@ -293,7 +314,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -659,7 +680,8 @@ extern int kern_addr_valid(unsigned long addr); #include <asm-generic/pgtable.h> -#define pgtable_cache_init() do { } while (0) +void pgd_cache_init(void); +#define pgtable_cache_init pgd_cache_init /* * On AArch64, the cache coherency is handled via the set_pte_at() function. diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h index 4df608a8459e..e368a55ebd22 100644 --- a/arch/arm64/include/asm/shmparam.h +++ b/arch/arm64/include/asm/shmparam.h @@ -21,7 +21,7 @@ * alignment value. Since we don't have aliasing D-caches, the rest of * the time we can safely use PAGE_SIZE. */ -#define COMPAT_SHMLBA 0x4000 +#define COMPAT_SHMLBA (4 * PAGE_SIZE) #include <asm-generic/shmparam.h> diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c85e96d174a5..fc9682bfe002 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -26,9 +26,28 @@ * The memory barriers are implicit with the load-acquire and store-release * instructions. */ +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + unsigned int tmp; + arch_spinlock_t lockval; -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + asm volatile( +" sevl\n" +"1: wfe\n" +"2: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 1b\n" + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ +" stxr %w1, %w0, %2\n" +" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ + /* LSE atomics */ +" nop\n" +" nop\n") + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : + : "memory"); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 7318f6d54aa9..801a16dbbdf6 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -16,14 +16,19 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H +struct task_struct; + struct stackframe { unsigned long fp; unsigned long sp; unsigned long pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned int graph; +#endif }; -extern int unwind_frame(struct stackframe *frame); -extern void walk_stackframe(struct stackframe *frame, +extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); +extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 90c7ff233735..abd64bd1f6d9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -73,10 +73,16 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +/* + * struct thread_info can be accessed directly via sp_el0. + */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct thread_info *)sp_el0; } #define thread_saved_pc(tsk) \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index ab9db0e9818c..d2ee1b21a10d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length) __apply_alternatives(®ion); } - -void free_alternatives_memory(void) -{ - free_reserved_area(__alt_instructions, __alt_instructions_end, - 0, "alternatives"); -} diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 937f5e58a4d3..3e01207917b1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -62,7 +62,7 @@ struct insn_emulation { }; static LIST_HEAD(insn_emulation); -static int nr_insn_emulated; +static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); static void register_emulation_hooks(struct insn_emulation_ops *ops) @@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static void register_insn_emulation(struct insn_emulation_ops *ops) +static void __init register_insn_emulation(struct insn_emulation_ops *ops) { unsigned long flags; struct insn_emulation *insn; @@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = { { } }; -static void register_insn_emulation_sysctl(struct ctl_table *table) +static void __init register_insn_emulation_sysctl(struct ctl_table *table) { unsigned long flags; int i = 0; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 24926f2504f7..feb6b4efa641 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -75,6 +75,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { (1 << MIDR_VARIANT_SHIFT) | 2), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_834220 + { + /* Cortex-A57 r0p0 - r1p2 */ + .desc = "ARM erratum 834220", + .capability = ARM64_WORKAROUND_834220, + MIDR_RANGE(MIDR_CORTEX_A57, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 2), + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_845719 { /* Cortex-A53 r0p[01234] */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c8cf89223b5a..5c90aa490a2b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -44,8 +44,9 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); -#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ +#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ + .sign = SIGNED, \ .strict = STRICT, \ .type = TYPE, \ .shift = SHIFT, \ @@ -53,6 +54,14 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); .safe_val = SAFE_VAL, \ } +/* Define a feature with signed values */ +#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + +/* Define a feature with unsigned value */ +#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + #define ARM64_FTR_END \ { \ .width = 0, \ @@ -99,7 +108,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -115,18 +124,18 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -144,12 +153,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = { static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), ARM64_FTR_END, }; @@ -675,7 +684,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -720,7 +729,7 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void setup_cpu_hwcaps(void) +static void __init setup_cpu_hwcaps(void) { int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; @@ -749,7 +758,8 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; @@ -888,7 +898,7 @@ static inline void set_sys_caps_initialised(void) #endif /* CONFIG_HOTPLUG_CPU */ -static void setup_feature_capabilities(void) +static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); enable_cpu_capabilities(arm64_features); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 706679d0a0b4..212ae6361d8b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -30,6 +30,7 @@ #include <linux/seq_file.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/delay.h> /* * In case the boot CPU is hotpluggable, we record its initial state and @@ -112,6 +113,10 @@ static int c_show(struct seq_file *m, void *v) */ seq_printf(m, "processor\t: %d\n", i); + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + /* * Dump out the common processor features in a single line. * Userspace should read the hwcaps with getauxval(AT_HWCAP) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index de46b50f4cdf..b6abc852f2a1 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,307 +11,34 @@ * */ -#include <linux/atomic.h> #include <linux/dmi.h> #include <linux/efi.h> -#include <linux/export.h> -#include <linux/memblock.h> -#include <linux/mm_types.h> -#include <linux/bootmem.h> -#include <linux/of.h> -#include <linux/of_fdt.h> -#include <linux/preempt.h> -#include <linux/rbtree.h> -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/spinlock.h> +#include <linux/init.h> -#include <asm/cacheflush.h> #include <asm/efi.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -struct efi_memory_map memmap; - -static u64 efi_system_table; - -static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; - -static struct mm_struct efi_mm = { - .mm_rb = RB_ROOT, - .pgd = efi_pgd, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), -}; - -static int __init is_normal_ram(efi_memory_desc_t *md) -{ - if (md->attribute & EFI_MEMORY_WB) - return 1; - return 0; -} - -/* - * Translate a EFI virtual address into a physical address: this is necessary, - * as some data members of the EFI system table are virtually remapped after - * SetVirtualAddressMap() has been called. - */ -static phys_addr_t efi_to_phys(unsigned long addr) +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { - efi_memory_desc_t *md; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - /* no virtual mapping has been installed by the stub */ - break; - if (md->virt_addr <= addr && - (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) - return md->phys_addr + addr - md->virt_addr; - } - return addr; -} - -static int __init uefi_init(void) -{ - efi_char16_t *c16; - void *config_tables; - u64 table_size; - char vendor[100] = "unknown"; - int i, retval; - - efi.systab = early_memremap(efi_system_table, - sizeof(efi_system_table_t)); - if (efi.systab == NULL) { - pr_warn("Unable to map EFI system table.\n"); - return -ENOMEM; - } - - set_bit(EFI_BOOT, &efi.flags); - set_bit(EFI_64BIT, &efi.flags); + pteval_t prot_val; /* - * Verify the EFI Table + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. */ - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { - pr_err("System table signature incorrect\n"); - retval = -EINVAL; - goto out; - } - if ((efi.systab->hdr.revision >> 16) < 2) - pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* Show what we know for posterity */ - c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor) * sizeof(efi_char16_t)); - if (c16) { - for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = c16[i]; - vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); - } - - pr_info("EFI v%u.%.02u by %s\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; - config_tables = early_memremap(efi_to_phys(efi.systab->tables), - table_size); - - retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, - sizeof(efi_config_table_64_t), NULL); - - early_memunmap(config_tables, table_size); -out: - early_memunmap(efi.systab, sizeof(efi_system_table_t)); - return retval; -} - -/* - * Return true for RAM regions we want to permanently reserve. - */ -static __init int is_reserve_region(efi_memory_desc_t *md) -{ - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - case EFI_PERSISTENT_MEMORY: - return 0; - default: - break; - } - return is_normal_ram(md); -} - -static __init void reserve_regions(void) -{ - efi_memory_desc_t *md; - u64 paddr, npages, size; - - if (efi_enabled(EFI_DBG)) - pr_info("Processing EFI memory map:\n"); - - for_each_efi_memory_desc(&memmap, md) { - paddr = md->phys_addr; - npages = md->num_pages; - - if (efi_enabled(EFI_DBG)) { - char buf[64]; - - pr_info(" 0x%012llx-0x%012llx %s", - paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md)); - } - - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - early_init_dt_add_memory_arch(paddr, size); - - if (is_reserve_region(md)) { - memblock_reserve(paddr, size); - if (efi_enabled(EFI_DBG)) - pr_cont("*"); - } - - if (efi_enabled(EFI_DBG)) - pr_cont("\n"); - } - - set_bit(EFI_MEMMAP, &efi.flags); -} - -void __init efi_init(void) -{ - struct efi_fdt_params params; - - /* Grab UEFI information placed in FDT by stub */ - if (!efi_get_fdt_params(¶ms)) - return; - - efi_system_table = params.system_table; - - memblock_reserve(params.mmap & PAGE_MASK, - PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); - memmap.phys_map = params.mmap; - memmap.map = early_memremap(params.mmap, params.mmap_size); - memmap.map_end = memmap.map + params.mmap_size; - memmap.desc_size = params.desc_size; - memmap.desc_version = params.desc_ver; - - if (uefi_init() < 0) - return; - - reserve_regions(); - early_memunmap(memmap.map, params.mmap_size); -} - -static bool __init efi_virtmap_init(void) -{ - efi_memory_desc_t *md; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - pgprot_t prot; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - return false; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - pr_info(" EFI remap 0x%016llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if (!is_normal_ram(md)) - prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE || - !PAGE_ALIGNED(md->phys_addr)) - prot = PAGE_KERNEL_EXEC; - else - prot = PAGE_KERNEL; - - create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); - } - return true; -} - -/* - * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., - * non-early mapping of the UEFI system table and virtual mappings for all - * EFI_MEMORY_RUNTIME regions. - */ -static int __init arm64_enable_runtime_services(void) -{ - u64 mapsize; - - if (!efi_enabled(EFI_BOOT)) { - pr_info("EFI services will not be available.\n"); - return -1; - } - - if (efi_runtime_disabled()) { - pr_info("EFI runtime services will be disabled.\n"); - return -1; - } - - pr_info("Remapping and enabling EFI services.\n"); - - mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache(memmap.phys_map, - mapsize); - if (!memmap.map) { - pr_err("Failed to remap EFI memory map\n"); - return -1; - } - memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; - - efi.systab = (__force void *)ioremap_cache(efi_system_table, - sizeof(efi_system_table_t)); - if (!efi.systab) { - pr_err("Failed to remap EFI System Table\n"); - return -1; - } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - - if (!efi_virtmap_init()) { - pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); - return -1; - } - - /* Set up runtime services function pointers */ - efi_native_runtime_setup(); - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - efi.runtime_version = efi.systab->hdr.revision; + if ((md->attribute & EFI_MEMORY_WB) == 0) + prot_val = PROT_DEVICE_nGnRE; + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) + prot_val = pgprot_val(PAGE_KERNEL_EXEC); + else + prot_val = pgprot_val(PAGE_KERNEL); + create_pgd_mapping(mm, md->phys_addr, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + __pgprot(prot_val | PTE_NG)); return 0; } -early_initcall(arm64_enable_runtime_services); static int __init arm64_dmi_init(void) { @@ -327,30 +54,6 @@ static int __init arm64_dmi_init(void) } core_initcall(arm64_dmi_init); -static void efi_set_pgd(struct mm_struct *mm) -{ - if (mm == &init_mm) - cpu_set_reserved_ttbr0(); - else - cpu_switch_mm(mm->pgd, mm); - - local_flush_tlb_all(); - if (icache_is_aivivt()) - __local_flush_icache_all(); -} - -void efi_virtmap_load(void) -{ - preempt_disable(); - efi_set_pgd(&efi_mm); -} - -void efi_virtmap_unload(void) -{ - efi_set_pgd(current->active_mm); - preempt_enable(); -} - /* * UpdateCapsule() depends on the system being shutdown via * ResetSystem(). diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7ed3d75f6304..1f7f5a2b61bf 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,6 +27,7 @@ #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> +#include <asm/irq.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -88,9 +89,12 @@ .if \el == 0 mrs x21, sp_el0 - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + mov tsk, sp + and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE .endif @@ -108,6 +112,13 @@ .endif /* + * Set sp_el0 to current thread_info. + */ + .if \el == 0 + msr sp_el0, tsk + .endif + + /* * Registers that may be useful after this macro is invoked: * * x21 - aborted SP @@ -164,8 +175,44 @@ alternative_endif .endm .macro get_thread_info, rd - mov \rd, sp - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack + mrs \rd, sp_el0 + .endm + + .macro irq_stack_entry + mov x19, sp // preserve the original sp + + /* + * Compare sp with the current thread_info, if the top + * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and + * should switch to the irq stack. + */ + and x25, x19, #~(THREAD_SIZE - 1) + cmp x25, tsk + b.ne 9998f + + this_cpu_ptr irq_stack, x25, x26 + mov x26, #IRQ_STACK_START_SP + add x26, x25, x26 + + /* switch to the irq stack */ + mov sp, x26 + + /* + * Add a dummy stack frame, this non-standard format is fixed up + * by unwind_frame() + */ + stp x29, x19, [sp, #-16]! + mov x29, sp + +9998: + .endm + + /* + * x19 should be preserved between irq_stack_entry and + * irq_stack_exit. + */ + .macro irq_stack_exit + mov sp, x19 .endm /* @@ -183,10 +230,11 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - adrp x1, handle_arch_irq - ldr x1, [x1, #:lo12:handle_arch_irq] + ldr_l x1, handle_arch_irq mov x0, sp + irq_stack_entry blr x1 + irq_stack_exit .endm .text @@ -358,10 +406,10 @@ el1_irq: bl trace_hardirqs_off #endif + get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT - get_thread_info tsk ldr w24, [tsk, #TI_PREEMPT] // get preempt count cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags @@ -599,6 +647,8 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 + and x9, x9, #~(THREAD_SIZE - 1) + msr sp_el0, x9 ret ENDPROC(cpu_switch_to) @@ -626,14 +676,14 @@ ret_fast_syscall_trace: work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ - ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' - tst x2, #PSR_MODE_MASK // user mode regs? - b.ne no_work_pending // returning to kernel enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume b ret_to_user work_resched: +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off // the IRQs are off here, inform the tracing code +#endif bl schedule /* @@ -645,7 +695,6 @@ ret_to_user: and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending enable_step_tsk x1, x2 -no_work_pending: kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4c46c54a3ad7..acc1afd5c749 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = { .notifier_call = fpsimd_cpu_pm_notifier, }; -static void fpsimd_pm_init(void) +static void __init fpsimd_pm_init(void) { cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); } diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c851be795080..ebecf9aa33d1 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, /* * Note: - * Due to modules and __init, code can disappear and change, - * we need to protect against faulting as well as code changing. - * We do this by aarch64_insn_*() which use the probe_kernel_*(). - * - * No lock is held here because all the modifications are run - * through stop_machine(). + * We are paranoid about modifying text, as if a bug were to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with aarch64_insn_*() which uses + * probe_kernel_*(), and make sure what we read is what we expected it + * to be before modifying it. */ if (validate) { if (aarch64_insn_read((void *)pc, &replaced)) @@ -93,6 +92,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return ftrace_modify_code(pc, old, new, true); } +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + int __init ftrace_dyn_arch_init(void) { return 0; @@ -125,23 +129,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, * on other archs. It's unlikely on AArch64. */ old = *parent; - *parent = return_hooker; trace.func = self_addr; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; + if (!ftrace_graph_entry(&trace)) return; - } err = ftrace_push_return_trace(old, self_addr, &trace.depth, frame_pointer); - if (err == -EBUSY) { - *parent = old; + if (err == -EBUSY) return; - } + else + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 23cfc08fc8ba..ffe9c2b6431b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -415,15 +415,17 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr_l x6, __bss_start - adr_l x7, __bss_stop - -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset + adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 @@ -606,6 +608,8 @@ ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9f17ec071ee0..2386b26c0712 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -30,6 +30,9 @@ unsigned long irq_err_count; +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ +DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); + int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_list(p, prec); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f4bc779e62e8..93e970231ca9 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -30,9 +30,6 @@ #include <asm/insn.h> #include <asm/sections.h> -#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX -#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 - void *module_alloc(unsigned long size) { void *p; @@ -75,15 +72,18 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) { - u64 imm_mask = (1 << len) - 1; s64 sval = do_reloc(op, place, val); switch (len) { case 16: *(s16 *)place = sval; + if (sval < S16_MIN || sval > U16_MAX) + return -ERANGE; break; case 32: *(s32 *)place = sval; + if (sval < S32_MIN || sval > U32_MAX) + return -ERANGE; break; case 64: *(s64 *)place = sval; @@ -92,34 +92,23 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) pr_err("Invalid length (%d) for data relocation\n", len); return 0; } - - /* - * Extract the upper value bits (including the sign bit) and - * shift them to bit 0. - */ - sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); - - /* - * Overflow has occurred if the value is not representable in - * len bits (i.e the bottom len bits are not sign-extended and - * the top bits are not all zero). - */ - if ((u64)(sval + 1) > 2) - return -ERANGE; - return 0; } +enum aarch64_insn_movw_imm_type { + AARCH64_INSN_IMM_MOVNZ, + AARCH64_INSN_IMM_MOVKZ, +}; + static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_insn_imm_type imm_type) + int lsb, enum aarch64_insn_movw_imm_type imm_type) { - u64 imm, limit = 0; + u64 imm; s64 sval; u32 insn = le32_to_cpu(*(u32 *)place); sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; + imm = sval >> lsb; if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* @@ -128,7 +117,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * immediate is less than zero. */ insn &= ~(3 << 29); - if ((s64)imm >= 0) { + if (sval >= 0) { /* >=0: Set the instruction to MOVZ (opcode 10b). */ insn |= 2 << 29; } else { @@ -140,29 +129,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, */ imm = ~imm; } - imm_type = AARCH64_INSN_IMM_MOVK; } /* Update the instruction with the new encoding. */ - insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); *(u32 *)place = cpu_to_le32(insn); - /* Shift out the immediate field. */ - sval >>= 16; - - /* - * For unsigned immediates, the overflow check is straightforward. - * For signed immediates, the sign bit is actually the bit past the - * most significant bit of the field. - * The AARCH64_INSN_IMM_16 immediate type is unsigned. - */ - if (imm_type != AARCH64_INSN_IMM_16) { - sval++; - limit++; - } - - /* Check the upper bits depending on the sign of the immediate. */ - if ((u64)sval > limit) + if (imm > U16_MAX) return -ERANGE; return 0; @@ -267,25 +240,25 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, @@ -302,7 +275,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, @@ -311,7 +284,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, @@ -320,7 +293,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 3aa74830cc69..ff4665462a02 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -164,8 +164,11 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif - walk_stackframe(&frame, callchain_trace, entry); + walk_stackframe(current, &frame, callchain_trace, entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f75b540bc3b4..88d742ba19d5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -344,11 +344,14 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = p->curr_ret_stack; +#endif stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || - unwind_frame(&frame)) + unwind_frame(p, &frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1971f491bb90..ff7f13239515 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -58,6 +58,12 @@ */ void ptrace_disable(struct task_struct *child) { + /* + * This would be better off in core code, but PTRACE_DETACH has + * grown its fair share of arch-specific worts and changing it + * is likely to cause regressions on obscure architectures. + */ + user_disable_single_step(child); } #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 6c4fd2810ecb..1718706fde83 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -43,8 +43,11 @@ void *return_address(unsigned int level) frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)return_address; /* dummy */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif - walk_stackframe(&frame, save_return_addr, &data); + walk_stackframe(current, &frame, save_return_addr, &data); if (!data.level) return data.addr; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index f586f7c875e2..e33fe33876ab 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -173,6 +173,9 @@ ENTRY(cpu_resume) /* load physical address of identity map page table in x1 */ adrp x1, idmap_pg_dir mov sp, x2 + /* save thread_info */ + and x2, x2, #~(THREAD_SIZE - 1) + msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context physical address * pointer and x1 to contain physical address of 1:1 page tables diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ccb6078ed9f2..4fad9787ab46 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -17,9 +17,11 @@ */ #include <linux/kernel.h> #include <linux/export.h> +#include <linux/ftrace.h> #include <linux/sched.h> #include <linux/stacktrace.h> +#include <asm/irq.h> #include <asm/stacktrace.h> /* @@ -35,25 +37,83 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int notrace unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; + unsigned long irq_stack_ptr; + + /* + * Use raw_smp_processor_id() to avoid false-positives from + * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping + * task stacks, we can be pre-empted in this case, so + * {raw_,}smp_processor_id() may give us the wrong value. Sleeping + * tasks can't ever be on an interrupt stack, so regardless of cpu, + * the checks will always fail. + */ + irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id()); low = frame->sp; - high = ALIGN(low, THREAD_SIZE); + /* irq stacks are not THREAD_SIZE aligned */ + if (on_irq_stack(frame->sp, raw_smp_processor_id())) + high = irq_stack_ptr; + else + high = ALIGN(low, THREAD_SIZE) - 0x20; - if (fp < low || fp > high - 0x18 || fp & 0xf) + if (fp < low || fp > high || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (tsk && tsk->ret_stack && + (frame->pc == (unsigned long)return_to_handler)) { + /* + * This is a case where function graph tracer has + * modified a return address (LR) in a stack frame + * to hook a function return. + * So replace it to an original value. + */ + frame->pc = tsk->ret_stack[frame->graph--].ret; + } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + /* + * Check whether we are going to walk through from interrupt stack + * to task stack. + * If we reach the end of the stack - and its an interrupt stack, + * unpack the dummy frame to find the original elr. + * + * Check the frame->fp we read from the bottom of the irq_stack, + * and the original task stack pointer are both in current->stack. + */ + if (frame->sp == irq_stack_ptr) { + struct pt_regs *irq_args; + unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + + if (object_is_on_stack((void *)orig_sp) && + object_is_on_stack((void *)frame->fp)) { + frame->sp = orig_sp; + + /* orig_sp is the saved pt_regs, find the elr */ + irq_args = (struct pt_regs *)orig_sp; + frame->pc = irq_args->pc; + } else { + /* + * This frame has a non-standard format, and we + * didn't fix it, because the data looked wrong. + * Refuse to output this frame. + */ + return -EINVAL; + } + } + return 0; } -void notrace walk_stackframe(struct stackframe *frame, +void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) { while (1) { @@ -61,7 +121,7 @@ void notrace walk_stackframe(struct stackframe *frame, if (fn(frame, data)) break; - ret = unwind_frame(frame); + ret = unwind_frame(tsk, frame); if (ret < 0) break; } @@ -112,8 +172,11 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif - walk_stackframe(&frame, save_trace, &data); + walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index fce95e17cf7f..1095aa483a1c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,3 +1,4 @@ +#include <linux/ftrace.h> #include <linux/percpu.h> #include <linux/slab.h> #include <asm/cacheflush.h> @@ -71,6 +72,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) local_dbg_save(flags); /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka suspend finishers) hence + * disable graph tracing during their execution. + */ + pause_graph_tracing(); + + /* * mm context saved on the stack, it will be restored when * the cpu comes out of reset through the identity mapped * page tables, so that the thread address space is properly @@ -111,6 +119,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) hw_breakpoint_restore(NULL); } + unpause_graph_tracing(); + /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 13339b6ffc1a..59779699a1a4 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,8 +52,11 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = -1; /* no task info */ +#endif do { - int ret = unwind_frame(&frame); + int ret = unwind_frame(NULL, &frame); if (ret < 0) return 0; } while (in_lock_functions(frame.pc)); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e9b9b5364393..cbedd724f48e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -146,17 +146,15 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; + unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + int skip; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) tsk = current; - if (regs) { - frame.fp = regs->regs[29]; - frame.sp = regs->sp; - frame.pc = regs->pc; - } else if (tsk == current) { + if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; @@ -168,21 +166,49 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif - pr_emerg("Call trace:\n"); + skip = !!regs; + printk("Call trace:\n"); while (1) { unsigned long where = frame.pc; unsigned long stack; int ret; - dump_backtrace_entry(where); - ret = unwind_frame(&frame); + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(where); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc); + } + ret = unwind_frame(tsk, &frame); if (ret < 0) break; stack = frame.sp; - if (in_exception_text(where)) + if (in_exception_text(where)) { + /* + * If we switched to the irq_stack before calling this + * exception handler, then the pt_regs will be on the + * task stack. The easiest way to tell is if the large + * pt_regs would overlap with the end of the irq_stack. + */ + if (stack < irq_stack_ptr && + (stack + sizeof(struct pt_regs)) > irq_stack_ptr) + stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + dump_mem("", "Exception stack", stack, stack + sizeof(struct pt_regs), false); + } } } @@ -456,22 +482,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); + pr_err("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pud_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); + pr_err("%s:%d: bad pud %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); } /* GENERIC_BUG traps */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1ee2c3937d4e..e3928f578891 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ */ #include <asm-generic/vmlinux.lds.h> +#include <asm/cache.h> #include <asm/kernel-pgtable.h> #include <asm/thread_info.h> #include <asm/memory.h> @@ -112,7 +113,6 @@ SECTIONS *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES @@ -127,7 +127,6 @@ SECTIONS ARM_EXIT_KEEP(EXIT_TEXT) } - ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) @@ -140,10 +139,7 @@ SECTIONS ARM_EXIT_KEEP(EXIT_DATA) } - PERCPU_SECTION(64) - - . = ALIGN(PAGE_SIZE); - __init_end = .; + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(4); .altinstructions : { @@ -156,9 +152,11 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_end = .; + _data = .; _sdata = .; - RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) PECOFF_EDATA_PADDING _edata = .; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 68a0759b1375..15f0477b0d2a 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -37,7 +37,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; - trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), + trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); ret = kvm_psci_call(vcpu); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 1599701ef044..86c289832272 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -864,6 +864,10 @@ ENTRY(__kvm_flush_vm_context) ENDPROC(__kvm_flush_vm_context) __kvm_hyp_panic: + // Stash PAR_EL1 before corrupting it in __restore_sysregs + mrs x0, par_el1 + push x0, xzr + // Guess the context by looking at VTTBR: // If zero, then we're already a host. // Otherwise restore a minimal host context before panicing. @@ -898,7 +902,7 @@ __kvm_hyp_panic: mrs x3, esr_el2 mrs x4, far_el2 mrs x5, hpfar_el2 - mrs x6, par_el1 + pop x6, xzr // active context PAR_EL1 mrs x7, tpidr_el2 mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ @@ -914,7 +918,7 @@ __kvm_hyp_panic: ENDPROC(__kvm_hyp_panic) __hyp_panic_str: - .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" + .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0" .align 2 @@ -1015,9 +1019,15 @@ el1_trap: b.ne 1f // Not an abort we care about /* This is an abort. Check for permission fault */ +alternative_if_not ARM64_WORKAROUND_834220 and x2, x1, #ESR_ELx_FSC_TYPE cmp x2, #FSC_PERM b.ne 1f // Not a permission fault +alternative_else + nop // Use the permission fault path to + nop // check for a valid S1 translation, + nop // regardless of the ESR value. +alternative_endif /* * Check for Stage-1 page table walk, which is guaranteed diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 85c57158dcd9..648112e90ed5 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -48,7 +48,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) /* Note: These now point to the banked copies */ *vcpu_spsr(vcpu) = new_spsr_value; - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ if (sctlr & (1 << 13)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 87a64e8db04c..d2650e84faf2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -78,7 +78,7 @@ static u32 get_ccsidr(u32 csselr) * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). */ static bool access_dcsw(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (!p->is_write) @@ -94,21 +94,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, * sys_regs and leave it in complete control of the caches. */ static bool access_vm_reg(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { - unsigned long val; bool was_enabled = vcpu_has_cache_enabled(vcpu); BUG_ON(!p->is_write); - val = *vcpu_reg(vcpu, p->Rt); if (!p->is_aarch32) { - vcpu_sys_reg(vcpu, r->reg) = val; + vcpu_sys_reg(vcpu, r->reg) = p->regval; } else { if (!p->is_32bit) - vcpu_cp15_64_high(vcpu, r->reg) = val >> 32; - vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; + vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval); + vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval); } kvm_toggle_cache(vcpu, was_enabled); @@ -122,22 +120,19 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, * for both AArch64 and AArch32 accesses. */ static bool access_gic_sgi(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 val; - if (!p->is_write) return read_from_write_only(vcpu, p); - val = *vcpu_reg(vcpu, p->Rt); - vgic_v3_dispatch_sgi(vcpu, val); + vgic_v3_dispatch_sgi(vcpu, p->regval); return true; } static bool trap_raz_wi(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) @@ -147,19 +142,19 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, } static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) { return ignore_write(vcpu, p); } else { - *vcpu_reg(vcpu, p->Rt) = (1 << 3); + p->regval = (1 << 3); return true; } } static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) { @@ -167,7 +162,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, } else { u32 val; asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val)); - *vcpu_reg(vcpu, p->Rt) = val; + p->regval = val; return true; } } @@ -200,17 +195,17 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, * now use the debug registers. */ static bool trap_debug_regs(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) { - vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu_sys_reg(vcpu, r->reg) = p->regval; vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } else { - *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); + p->regval = vcpu_sys_reg(vcpu, r->reg); } - trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt)); + trace_trap_reg(__func__, r->reg, p->is_write, p->regval); return true; } @@ -225,10 +220,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, * hyp.S code switches between host and guest values in future. */ static inline void reg_to_dbg(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, u64 *dbg_reg) { - u64 val = *vcpu_reg(vcpu, p->Rt); + u64 val = p->regval; if (p->is_32bit) { val &= 0xffffffffUL; @@ -240,19 +235,16 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu, } static inline void dbg_to_reg(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, u64 *dbg_reg) { - u64 val = *dbg_reg; - + p->regval = *dbg_reg; if (p->is_32bit) - val &= 0xffffffffUL; - - *vcpu_reg(vcpu, p->Rt) = val; + p->regval &= 0xffffffffUL; } static inline bool trap_bvr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; @@ -294,7 +286,7 @@ static inline void reset_bvr(struct kvm_vcpu *vcpu, } static inline bool trap_bcr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; @@ -337,7 +329,7 @@ static inline void reset_bcr(struct kvm_vcpu *vcpu, } static inline bool trap_wvr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; @@ -380,7 +372,7 @@ static inline void reset_wvr(struct kvm_vcpu *vcpu, } static inline bool trap_wcr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; @@ -687,7 +679,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { }; static bool trap_dbgidr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) { @@ -697,23 +689,23 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT); - *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | - (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | - (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) | - (6 << 16) | (el3 << 14) | (el3 << 12)); + p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | + (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | + (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) + | (6 << 16) | (el3 << 14) | (el3 << 12)); return true; } } static bool trap_debug32(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) { - vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt); + vcpu_cp14(vcpu, r->reg) = p->regval; vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } else { - *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg); + p->regval = vcpu_cp14(vcpu, r->reg); } return true; @@ -731,7 +723,7 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, */ static inline bool trap_xvr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; @@ -740,12 +732,12 @@ static inline bool trap_xvr(struct kvm_vcpu *vcpu, u64 val = *dbg_reg; val &= 0xffffffffUL; - val |= *vcpu_reg(vcpu, p->Rt) << 32; + val |= p->regval << 32; *dbg_reg = val; vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } else { - *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32; + p->regval = *dbg_reg >> 32; } trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -991,7 +983,7 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) * Return 0 if the access has been handled, and -1 if not. */ static int emulate_cp(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params, + struct sys_reg_params *params, const struct sys_reg_desc *table, size_t num) { @@ -1062,12 +1054,12 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); + int Rt = (hsr >> 5) & 0xf; int Rt2 = (hsr >> 10) & 0xf; params.is_aarch32 = true; params.is_32bit = false; params.CRm = (hsr >> 1) & 0xf; - params.Rt = (hsr >> 5) & 0xf; params.is_write = ((hsr & 1) == 0); params.Op0 = 0; @@ -1076,15 +1068,12 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, params.CRn = 0; /* - * Massive hack here. Store Rt2 in the top 32bits so we only - * have one register to deal with. As we use the same trap + * Make a 64-bit value out of Rt and Rt2. As we use the same trap * backends between AArch32 and AArch64, we get away with it. */ if (params.is_write) { - u64 val = *vcpu_reg(vcpu, params.Rt); - val &= 0xffffffff; - val |= *vcpu_reg(vcpu, Rt2) << 32; - *vcpu_reg(vcpu, params.Rt) = val; + params.regval = vcpu_get_reg(vcpu, Rt) & 0xffffffff; + params.regval |= vcpu_get_reg(vcpu, Rt2) << 32; } if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) @@ -1095,11 +1084,10 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, unhandled_cp_access(vcpu, ¶ms); out: - /* Do the opposite hack for the read side */ + /* Split up the value between registers for the read side */ if (!params.is_write) { - u64 val = *vcpu_reg(vcpu, params.Rt); - val >>= 32; - *vcpu_reg(vcpu, Rt2) = val; + vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); + vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval)); } return 1; @@ -1118,21 +1106,24 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, { struct sys_reg_params params; u32 hsr = kvm_vcpu_get_hsr(vcpu); + int Rt = (hsr >> 5) & 0xf; params.is_aarch32 = true; params.is_32bit = true; params.CRm = (hsr >> 1) & 0xf; - params.Rt = (hsr >> 5) & 0xf; + params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((hsr & 1) == 0); params.CRn = (hsr >> 10) & 0xf; params.Op0 = 0; params.Op1 = (hsr >> 14) & 0x7; params.Op2 = (hsr >> 17) & 0x7; - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) - return 1; - if (!emulate_cp(vcpu, ¶ms, global, nr_global)) + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || + !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!params.is_write) + vcpu_set_reg(vcpu, Rt, params.regval); return 1; + } unhandled_cp_access(vcpu, ¶ms); return 1; @@ -1175,7 +1166,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) } static int emulate_sys_reg(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) + struct sys_reg_params *params) { size_t num; const struct sys_reg_desc *table, *r; @@ -1230,6 +1221,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct sys_reg_params params; unsigned long esr = kvm_vcpu_get_hsr(vcpu); + int Rt = (esr >> 5) & 0x1f; + int ret; trace_kvm_handle_sys_reg(esr); @@ -1240,10 +1233,14 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run) params.CRn = (esr >> 10) & 0xf; params.CRm = (esr >> 1) & 0xf; params.Op2 = (esr >> 17) & 0x7; - params.Rt = (esr >> 5) & 0x1f; + params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = !(esr & 1); - return emulate_sys_reg(vcpu, ¶ms); + ret = emulate_sys_reg(vcpu, ¶ms); + + if (!params.is_write) + vcpu_set_reg(vcpu, Rt, params.regval); + return ret; } /****************************************************************************** diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index eaa324e4db4d..dbbb01cfbee9 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -28,7 +28,7 @@ struct sys_reg_params { u8 CRn; u8 CRm; u8 Op2; - u8 Rt; + u64 regval; bool is_write; bool is_aarch32; bool is_32bit; /* Only valid if is_aarch32 is true */ @@ -44,7 +44,7 @@ struct sys_reg_desc { /* Trapped access from guest, if non-NULL. */ bool (*access)(struct kvm_vcpu *, - const struct sys_reg_params *, + struct sys_reg_params *, const struct sys_reg_desc *); /* Initialization for vcpu. */ @@ -77,9 +77,9 @@ static inline bool ignore_write(struct kvm_vcpu *vcpu, } static inline bool read_zero(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p) + struct sys_reg_params *p) { - *vcpu_reg(vcpu, p->Rt) = 0; + p->regval = 0; return true; } diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 1e4576824165..ed90578fa120 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -31,13 +31,13 @@ #include "sys_regs.h" static bool access_actlr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, + struct sys_reg_params *p, const struct sys_reg_desc *r) { if (p->is_write) return ignore_write(vcpu, p); - *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1); + p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1); return true; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index cfa44a6adc0a..6df07069a025 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -81,26 +81,32 @@ ENDPROC(__flush_cache_user_range) /* * __flush_dcache_area(kaddr, size) * - * Ensure that the data held in the page kaddr is written back to the - * page in question. + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned and invalidated to the PoC. * * - kaddr - kernel address * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_line_size x2, x3 - add x1, x0, x1 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D line / unified line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret ENDPIPROC(__flush_dcache_area) /* + * __clean_dcache_area_pou(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoU. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pou) + dcache_by_line_op cvau, ish, x0, x1, x2, x3 + ret +ENDPROC(__clean_dcache_area_pou) + +/* * __inval_cache_range(start, end) * - start - start address of region * - end - end address of region diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index f636a2639f03..e87f53ff5f58 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -76,13 +76,28 @@ static void flush_context(unsigned int cpu) __flush_icache_all(); } -static int is_reserved_asid(u64 asid) +static bool check_update_reserved_asid(u64 asid, u64 newasid) { int cpu; - for_each_possible_cpu(cpu) - if (per_cpu(reserved_asids, cpu) == asid) - return 1; - return 0; + bool hit = false; + + /* + * Iterate over the set of reserved ASIDs looking for a match. + * If we find one, then we can update our mm to use newasid + * (i.e. the same ASID in the current generation) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old ASID are updated to reflect the mm. Failure to do + * so could result in us missing the reserved ASID in a future + * generation. + */ + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_asids, cpu) == asid) { + hit = true; + per_cpu(reserved_asids, cpu) = newasid; + } + } + + return hit; } static u64 new_context(struct mm_struct *mm, unsigned int cpu) @@ -92,12 +107,14 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) u64 generation = atomic64_read(&asid_generation); if (asid != 0) { + u64 newasid = generation | (asid & ~ASID_MASK); + /* * If our current ASID was active during a rollover, we * can continue to use it and this was just a false alarm. */ - if (is_reserved_asid(asid)) - return generation | (asid & ~ASID_MASK); + if (check_update_reserved_asid(asid, newasid)) + return newasid; /* * We had a valid ASID in a previous life, so try to re-use @@ -105,7 +122,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) */ asid &= ~ASID_MASK; if (!__test_and_set_bit(asid, asid_map)) - goto bump_gen; + return newasid; } /* @@ -129,10 +146,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) set_asid: __set_bit(asid, asid_map); cur_idx = asid; - -bump_gen: - asid |= generation; - return asid; + return asid | generation; } void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 13bbc3be6f5a..22e4cb4d6f53 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -24,8 +24,9 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kto); copy_page(kto, kfrom); - __flush_dcache_area(kto, PAGE_SIZE); + flush_dcache_page(page); } EXPORT_SYMBOL_GPL(__cpu_copy_user_page); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 131a199114b4..331c4ca6205c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -18,6 +18,7 @@ */ #include <linux/gfp.h> +#include <linux/acpi.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -28,9 +29,6 @@ #include <asm/cacheflush.h> -struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -42,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, static struct gen_pool *atomic_pool; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { @@ -515,13 +513,7 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - int ret; - - dma_ops = &swiotlb_dma_ops; - - ret = atomic_pool_init(); - - return ret; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -552,10 +544,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, { bool coherent = is_device_dma_coherent(dev); int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); + size_t iosize = size; void *addr; if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) return NULL; + + size = PAGE_ALIGN(size); + /* * Some drivers rely on this, and we probably don't want the * possibility of stale kernel data being read by devices anyway. @@ -566,7 +562,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, struct page **pages; pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); - pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, + pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle, flush_page); if (!pages) return NULL; @@ -574,7 +570,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, __builtin_return_address(0)); if (!addr) - iommu_dma_free(dev, pages, size, handle); + iommu_dma_free(dev, pages, iosize, handle); } else { struct page *page; /* @@ -591,7 +587,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, if (!addr) return NULL; - *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); if (iommu_dma_mapping_error(dev, *handle)) { if (coherent) __free_pages(page, get_order(size)); @@ -606,6 +602,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { + size_t iosize = size; + + size = PAGE_ALIGN(size); /* * @cpu_addr will be one of 3 things depending on how it was allocated: * - A remapped array of pages from iommu_dma_alloc(), for all @@ -617,17 +616,17 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, * Hence how dodgy the below logic looks... */ if (__in_atomic_pool(cpu_addr, size)) { - iommu_dma_unmap_page(dev, handle, size, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); __free_from_pool(cpu_addr, size); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); if (WARN_ON(!area || !area->pages)) return; - iommu_dma_free(dev, area->pages, size, &handle); + iommu_dma_free(dev, area->pages, iosize, &handle); dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else { - iommu_dma_unmap_page(dev, handle, size, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); __free_pages(virt_to_page(cpu_addr), get_order(size)); } } @@ -897,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb, return 0; } -static int register_iommu_dma_ops_notifier(struct bus_type *bus) +static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) { struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); int ret; @@ -984,8 +983,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, struct iommu_ops *iommu, bool coherent) { - if (!acpi_disabled && !dev->archdata.dma_ops) - dev->archdata.dma_ops = dma_ops; + if (!dev->archdata.dma_ops) + dev->archdata.dma_ops = &swiotlb_dma_ops; dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 19211c4a8911..92ddac1e8ca2 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -393,16 +393,16 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, - { do_bad, SIGBUS, 0, "reserved access flag fault" }, + { do_bad, SIGBUS, 0, "unknown 8" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, - { do_bad, SIGBUS, 0, "reserved permission fault" }, + { do_bad, SIGBUS, 0, "unknown 12" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, - { do_bad, SIGBUS, 0, "asynchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, @@ -410,16 +410,16 @@ static struct fault_info { { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, { do_bad, SIGBUS, 0, "synchronous parity error" }, - { do_bad, SIGBUS, 0, "asynchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, { do_bad, SIGBUS, 0, "unknown 32" }, { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, - { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGBUS, 0, "unknown 34" }, { do_bad, SIGBUS, 0, "unknown 35" }, { do_bad, SIGBUS, 0, "unknown 36" }, { do_bad, SIGBUS, 0, "unknown 37" }, @@ -433,21 +433,21 @@ static struct fault_info { { do_bad, SIGBUS, 0, "unknown 45" }, { do_bad, SIGBUS, 0, "unknown 46" }, { do_bad, SIGBUS, 0, "unknown 47" }, - { do_bad, SIGBUS, 0, "unknown 48" }, + { do_bad, SIGBUS, 0, "TLB conflict abort" }, { do_bad, SIGBUS, 0, "unknown 49" }, { do_bad, SIGBUS, 0, "unknown 50" }, { do_bad, SIGBUS, 0, "unknown 51" }, { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, - { do_bad, SIGBUS, 0, "unknown 53" }, + { do_bad, SIGBUS, 0, "implementation fault (unsupported exclusive)" }, { do_bad, SIGBUS, 0, "unknown 54" }, { do_bad, SIGBUS, 0, "unknown 55" }, { do_bad, SIGBUS, 0, "unknown 56" }, { do_bad, SIGBUS, 0, "unknown 57" }, - { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" }, + { do_bad, SIGBUS, 0, "unknown 58" }, { do_bad, SIGBUS, 0, "unknown 59" }, { do_bad, SIGBUS, 0, "unknown 60" }, - { do_bad, SIGBUS, 0, "unknown 61" }, - { do_bad, SIGBUS, 0, "unknown 62" }, + { do_bad, SIGBUS, 0, "section domain fault" }, + { do_bad, SIGBUS, 0, "page domain fault" }, { do_bad, SIGBUS, 0, "unknown 63" }, }; diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c26b804015e8..46649d6e6c5a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } +static void sync_icache_aliases(void *kaddr, unsigned long len) +{ + unsigned long addr = (unsigned long)kaddr; + + if (icache_is_aliasing()) { + __clean_dcache_area_pou(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } +} + static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len) { - if (vma->vm_flags & VM_EXEC) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __flush_dcache_area(kaddr, len); - __flush_icache_all(); - } else { - flush_icache_range(addr, addr + len); - } - } + if (vma->vm_flags & VM_EXEC) + sync_icache_aliases(kaddr, len); } /* @@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!page_mapping(page)) return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), - PAGE_SIZE << compound_order(page)); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + sync_icache_aliases(page_address(page), + PAGE_SIZE << compound_order(page)); + else if (icache_is_aivivt()) __flush_icache_all(); - } else if (icache_is_aivivt()) { - __flush_icache_all(); - } } /* diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 383b03ff38f8..82d607c3614e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,17 +41,289 @@ int pud_huge(pud_t pud) #endif } +static int find_num_contig(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, size_t *pgsize) +{ + pgd_t *pgd = pgd_offset(mm, addr); + pud_t *pud; + pmd_t *pmd; + + *pgsize = PAGE_SIZE; + if (!pte_cont(pte)) + return 1; + if (!pgd_present(*pgd)) { + VM_BUG_ON(!pgd_present(*pgd)); + return 1; + } + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + VM_BUG_ON(!pud_present(*pud)); + return 1; + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + VM_BUG_ON(!pmd_present(*pmd)); + return 1; + } + if ((pte_t *)pmd == ptep) { + *pgsize = PMD_SIZE; + return CONT_PMDS; + } + return CONT_PTES; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + size_t pgsize; + int i; + int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize); + unsigned long pfn; + pgprot_t hugeprot; + + if (ncontig == 1) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pfn = pte_pfn(pte); + hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); + for (i = 0; i < ncontig; i++) { + pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, + pte_val(pfn_pte(pfn, hugeprot))); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); + ptep++; + pfn += pgsize >> PAGE_SHIFT; + addr += pgsize; + } +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else if (sz == (PAGE_SIZE * CONT_PTES)) { + pmd_t *pmd = pmd_alloc(mm, pud, addr); + + WARN_ON(addr & (sz - 1)); + /* + * Note that if this code were ever ported to the + * 32-bit arm platform then it will cause trouble in + * the case where CONFIG_HIGHPTE is set, since there + * will be no pte_unmap() to correspond with this + * pte_alloc_map(). + */ + pte = pte_alloc_map(mm, NULL, pmd, addr); + } else if (sz == PMD_SIZE) { + if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && + pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } else if (sz == (PMD_SIZE * CONT_PMDS)) { + pmd_t *pmd; + + pmd = pmd_alloc(mm, pud, addr); + WARN_ON(addr & (sz - 1)); + return (pte_t *)pmd; + } + + pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, + sz, pte, pte_val(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + if (pte_cont(pmd_pte(*pmd))) { + pmd = pmd_offset( + pud, (addr & CONT_PMD_MASK)); + return (pte_t *)pmd; + } + if (pmd_huge(*pmd)) + return (pte_t *)pmd; + pte = pte_offset_kernel(pmd, addr); + if (pte_present(*pte) && pte_cont(*pte)) { + pte = pte_offset_kernel( + pmd, (addr & CONT_PTE_MASK)); + return pte; + } + return NULL; +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + + if (pagesize == CONT_PTE_SIZE) { + entry = pte_mkcont(entry); + } else if (pagesize == CONT_PMD_SIZE) { + entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); + } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + } + return entry; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte; + + if (pte_cont(*ptep)) { + int ncontig, i; + size_t pgsize; + pte_t *cpte; + bool is_dirty = false; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + /* save the 1st pte to return */ + pte = ptep_get_and_clear(mm, addr, cpte); + for (i = 1; i < ncontig; ++i) { + /* + * If HW_AFDBM is enabled, then the HW could + * turn on the dirty bit for any of the page + * in the set, so check them all. + */ + ++cpte; + if (pte_dirty(ptep_get_and_clear(mm, addr, cpte))) + is_dirty = true; + } + if (is_dirty) + return pte_mkdirty(pte); + else + return pte; + } else { + return ptep_get_and_clear(mm, addr, ptep); + } +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + pte_t *cpte; + + if (pte_cont(pte)) { + int ncontig, i, changed = 0; + size_t pgsize = 0; + unsigned long pfn = pte_pfn(pte); + /* Select all bits except the pfn */ + pgprot_t hugeprot = + __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ + pte_val(pte)); + + cpte = huge_pte_offset(vma->vm_mm, addr); + pfn = pte_pfn(*cpte); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) { + changed = ptep_set_access_flags(vma, addr, cpte, + pfn_pte(pfn, + hugeprot), + dirty); + pfn += pgsize >> PAGE_SHIFT; + } + return changed; + } else { + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + } +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_set_wrprotect(mm, addr, cpte); + } else { + ptep_set_wrprotect(mm, addr, ptep); + } +} + +void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(vma->vm_mm, addr); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_clear_flush(vma, addr, cpte); + } else { + ptep_clear_flush(vma, addr, ptep); + } +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else if (ps == (PAGE_SIZE * CONT_PTES)) { + hugetlb_add_hstate(CONT_PTE_SHIFT); + } else if (ps == (PMD_SIZE * CONT_PMDS)) { + hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; } return 1; } __setup("hugepagesz=", setup_hugepagesz); + +#ifdef CONFIG_ARM64_64K_PAGES +static __init int add_default_hugepagesz(void) +{ + if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) + hugetlb_add_hstate(CONT_PMD_SHIFT); + return 0; +} +arch_initcall(add_default_hugepagesz); +#endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 17bf39ac83ba..f3b061e67bfe 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -71,7 +71,7 @@ early_param("initrd", early_initrd); * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -120,17 +120,17 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return memblock_is_map_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif #ifndef CONFIG_SPARSEMEM -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { } #else -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { struct memblock_region *reg; @@ -360,7 +360,6 @@ void free_initmem(void) { fixup_init(); free_initmem_default(0); - free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e3f563c81c48..58faeaa7fbdc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -64,8 +64,12 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { - void *ptr = __va(memblock_alloc(sz, sz)); - BUG_ON(!ptr); + phys_addr_t phys; + void *ptr; + + phys = memblock_alloc(sz, sz); + BUG_ON(!phys); + ptr = __va(phys); memset(ptr, 0, sz); return ptr; } @@ -81,55 +85,19 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) do { /* * Need to have the least restrictive permissions available - * permissions will be fixed up later. Default the new page - * range as contiguous ptes. + * permissions will be fixed up later */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT)); + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); pfn++; } while (pte++, i++, i < PTRS_PER_PTE); } -/* - * Given a PTE with the CONT bit set, determine where the CONT range - * starts, and clear the entire range of PTE CONT bits. - */ -static void clear_cont_pte_range(pte_t *pte, unsigned long addr) -{ - int i; - - pte -= CONT_RANGE_OFFSET(addr); - for (i = 0; i < CONT_PTES; i++) { - set_pte(pte, pte_mknoncont(*pte)); - pte++; - } - flush_tlb_all(); -} - -/* - * Given a range of PTEs set the pfn and provided page protection flags - */ -static void __populate_init_pte(pte_t *pte, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot) -{ - unsigned long pfn = __phys_to_pfn(phys); - - do { - /* clear all the bits except the pfn, then apply the prot */ - set_pte(pte, pfn_pte(pfn, prot)); - pte++; - pfn++; - addr += PAGE_SIZE; - } while (addr != end); -} - static void alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, phys_addr_t phys, + unsigned long end, unsigned long pfn, pgprot_t prot, void *(*alloc)(unsigned long size)) { pte_t *pte; - unsigned long next; if (pmd_none(*pmd) || pmd_sect(*pmd)) { pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); @@ -142,27 +110,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { - next = min(end, (addr + CONT_SIZE) & CONT_MASK); - if (((addr | next | phys) & ~CONT_MASK) == 0) { - /* a block of CONT_PTES */ - __populate_init_pte(pte, addr, next, phys, - __pgprot(pgprot_val(prot) | PTE_CONT)); - } else { - /* - * If the range being split is already inside of a - * contiguous range but this PTE isn't going to be - * contiguous, then we want to unmark the adjacent - * ranges, then update the portion of the range we - * are interrested in. - */ - clear_cont_pte_range(pte, addr); - __populate_init_pte(pte, addr, next, phys, prot); - } - - pte += (next - addr) >> PAGE_SHIFT; - phys += next - addr; - addr = next; - } while (addr != end); + set_pte(pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); } static void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -223,7 +173,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } } } else { - alloc_init_pte(pmd, addr, next, phys, prot, alloc); + alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -300,6 +251,14 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, { unsigned long addr, length, end, next; + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); @@ -329,7 +288,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, early_alloc); } @@ -350,7 +309,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + return __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, size, prot, late_alloc); } @@ -362,8 +321,8 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end) * for now. This will get more fine grained later once all memory * is mapped */ - unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); if (end < kernel_x_start) { create_mapping(start, __phys_to_virt(start), @@ -421,6 +380,8 @@ static void __init map_mem(void) if (start >= end) break; + if (memblock_is_nomap(reg)) + continue; if (ARM64_SWAPPER_USES_SECTION_MAPS) { /* @@ -451,18 +412,18 @@ static void __init fixup_executable(void) { #ifdef CONFIG_DEBUG_RODATA /* now that we are actually fully mapped, make the start/end more fine grained */ - if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { unsigned long aligned_start = round_down(__pa(_stext), - SECTION_SIZE); + SWAPPER_BLOCK_SIZE); create_mapping(aligned_start, __phys_to_virt(aligned_start), __pa(_stext) - aligned_start, PAGE_KERNEL); } - if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { unsigned long aligned_end = round_up(__pa(__init_end), - SECTION_SIZE); + SWAPPER_BLOCK_SIZE); create_mapping(__pa(__init_end), (unsigned long)__init_end, aligned_end - __pa(__init_end), PAGE_KERNEL); @@ -475,7 +436,7 @@ void mark_rodata_ro(void) { create_mapping_late(__pa(_stext), (unsigned long)_stext, (unsigned long)_etext - (unsigned long)_stext, - PAGE_KERNEL_EXEC | PTE_RDONLY); + PAGE_KERNEL_ROX); } #endif @@ -505,6 +466,9 @@ void __init paging_init(void) empty_zero_page = virt_to_page(zero_page); + /* Ensure the zero page is visible to the page table walker */ + dsb(ishst); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index cb3ba1b812e7..ae11d4e03d0e 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -static int __init pgd_cache_init(void) +void __init pgd_cache_init(void) { + if (PGD_SIZE == PAGE_SIZE) + return; + /* * Naturally aligned pgds required by the architecture. */ - if (PGD_SIZE != PAGE_SIZE) - pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, - SLAB_PANIC, NULL); - return 0; + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, + SLAB_PANIC, NULL); } -core_initcall(pgd_cache_init); diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index 4c4d93c4bf65..146bd99a7532 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -62,3 +62,25 @@ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH #endif .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9c4dce312b2b..a3d867e723b4 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,8 +140,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) - .section ".text.init", #alloc, #execinstr - /* * __cpu_setup * diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cf3c7d4a1b58..b162ad70effc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -50,7 +50,7 @@ static const int bpf2a64[] = { [BPF_REG_8] = A64_R(21), [BPF_REG_9] = A64_R(22), /* read-only frame pointer to access stack */ - [BPF_REG_FP] = A64_FP, + [BPF_REG_FP] = A64_R(25), /* temporary register for internal BPF JIT */ [TMP_REG_1] = A64_R(23), [TMP_REG_2] = A64_R(24), @@ -139,6 +139,12 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) +#define _STACK_SIZE \ + (MAX_BPF_STACK \ + + 4 /* extra for skb_copy_bits buffer */) + +#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) + static void build_prologue(struct jit_ctx *ctx) { const u8 r6 = bpf2a64[BPF_REG_6]; @@ -150,10 +156,35 @@ static void build_prologue(struct jit_ctx *ctx) const u8 rx = bpf2a64[BPF_REG_X]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; - int stack_size = MAX_BPF_STACK; - stack_size += 4; /* extra for skb_copy_bits buffer */ - stack_size = STACK_ALIGN(stack_size); + /* + * BPF prog stack layout + * + * high + * original A64_SP => 0:+-----+ BPF prologue + * |FP/LR| + * current A64_FP => -16:+-----+ + * | ... | callee saved registers + * +-----+ + * | | x25/x26 + * BPF fp register => -80:+-----+ <= (BPF_FP) + * | | + * | ... | BPF prog stack + * | | + * +-----+ <= (BPF_FP - MAX_BPF_STACK) + * |RSVD | JIT scratchpad + * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) + * | | + * | ... | Function call stack + * | | + * +-----+ + * low + * + */ + + /* Save FP and LR registers to stay align with ARM64 AAPCS */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); /* Save callee-saved register */ emit(A64_PUSH(r6, r7, A64_SP), ctx); @@ -161,12 +192,15 @@ static void build_prologue(struct jit_ctx *ctx) if (ctx->tmp_used) emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx); - /* Set up BPF stack */ - emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + /* Save fp (x25) and x26. SP requires 16 bytes alignment */ + emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); - /* Set up frame pointer */ + /* Set up BPF prog stack base register (x25) */ emit(A64_MOV(1, fp, A64_SP), ctx); + /* Set up function call stack */ + emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); + /* Clear registers A and X */ emit_a64_mov_i64(ra, 0, ctx); emit_a64_mov_i64(rx, 0, ctx); @@ -182,13 +216,12 @@ static void build_epilogue(struct jit_ctx *ctx) const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tmp1 = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; - int stack_size = MAX_BPF_STACK; - - stack_size += 4; /* extra for skb_copy_bits buffer */ - stack_size = STACK_ALIGN(stack_size); /* We're done with BPF stack */ - emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx); + emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); + + /* Restore fs (x25) and x26 */ + emit(A64_POP(fp, A64_R(26), A64_SP), ctx); /* Restore callee-saved register */ if (ctx->tmp_used) @@ -196,8 +229,8 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(r8, r9, A64_SP), ctx); emit(A64_POP(r6, r7, A64_SP), ctx); - /* Restore frame pointer */ - emit(A64_MOV(1, fp, A64_SP), ctx); + /* Restore FP/LR registers */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); /* Set return value */ emit(A64_MOV(1, A64_R(0), r0), ctx); @@ -557,7 +590,25 @@ emit_cond_jmp: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: - goto notyet; + /* Load imm to a register then store it */ + ctx->tmp_used = 1; + emit_a64_mov_i(1, tmp2, off, ctx); + emit_a64_mov_i(1, tmp, imm, ctx); + switch (BPF_SIZE(code)) { + case BPF_W: + emit(A64_STR32(tmp, dst, tmp2), ctx); + break; + case BPF_H: + emit(A64_STRH(tmp, dst, tmp2), ctx); + break; + case BPF_B: + emit(A64_STRB(tmp, dst, tmp2), ctx); + break; + case BPF_DW: + emit(A64_STR64(tmp, dst, tmp2), ctx); + break; + } + break; /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_W: @@ -624,7 +675,7 @@ emit_cond_jmp: return -EINVAL; } emit_a64_mov_i64(r3, size, ctx); - emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx); + emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx); emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); @@ -758,7 +809,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, image_size, 2, ctx.image); - bpf_flush_icache(ctx.image, ctx.image + ctx.idx); + bpf_flush_icache(header, ctx.image + ctx.idx); set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)ctx.image; |