diff options
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/Kconfig | 36 | ||||
-rw-r--r-- | arch/arm/mm/cache-tauros2.c | 32 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 248 | ||||
-rw-r--r-- | arch/arm/mm/idmap.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/init.c | 24 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 8 | ||||
-rw-r--r-- | arch/arm/mm/proc-v7.S | 2 |
7 files changed, 256 insertions, 96 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 549f6d3aec5b..55347662e5ed 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1037,24 +1037,26 @@ config ARCH_SUPPORTS_BIG_ENDIAN This option specifies the architecture can support big endian operation. -config ARM_KERNMEM_PERMS - bool "Restrict kernel memory permissions" - depends on MMU - help - If this is set, kernel memory other than kernel text (and rodata) - will be made non-executable. The tradeoff is that each region is - padded to section-size (1MiB) boundaries (because their permissions - are different and splitting the 1M pages into 4K ones causes TLB - performance problems), wasting memory. - config DEBUG_RODATA bool "Make kernel text and rodata read-only" - depends on ARM_KERNMEM_PERMS + depends on MMU && !XIP_KERNEL + default y if CPU_V7 + help + If this is set, kernel text and rodata memory will be made + read-only, and non-text kernel memory will be made non-executable. + The tradeoff is that each region is padded to section-size (1MiB) + boundaries (because their permissions are different and splitting + the 1M pages into 4K ones causes TLB performance problems), which + can waste memory. + +config DEBUG_ALIGN_RODATA + bool "Make rodata strictly non-executable" + depends on DEBUG_RODATA default y help - If this is set, kernel text and rodata will be made read-only. This - is to help catch accidental or malicious attempts to change the - kernel's executable code. Additionally splits rodata from kernel - text so it can be made explicitly non-executable. This creates - another section-size padded region, so it can waste more memory - space while gaining the read-only protections. + If this is set, rodata will be made explicitly non-executable. This + provides protection on the rare chance that attackers might find and + use ROP gadgets that exist in the rodata section. This adds an + additional section-aligned split of rodata from kernel text so it + can be made explicitly non-executable. This padding may waste memory + space to gain the additional protection. diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 1e373d268c04..88255bea65e4 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -22,6 +22,11 @@ #include <asm/cputype.h> #include <asm/hardware/cache-tauros2.h> +/* CP15 PJ4 Control configuration register */ +#define CCR_L2C_PREFETCH_DISABLE BIT(24) +#define CCR_L2C_ECC_ENABLE BIT(23) +#define CCR_L2C_WAY7_4_DISABLE BIT(21) +#define CCR_L2C_BURST8_ENABLE BIT(20) /* * When Tauros2 is used on a CPU that supports the v7 hierarchical @@ -182,18 +187,18 @@ static void enable_extra_feature(unsigned int features) u = read_extra_features(); if (features & CACHE_TAUROS2_PREFETCH_ON) - u &= ~0x01000000; + u &= ~CCR_L2C_PREFETCH_DISABLE; else - u |= 0x01000000; + u |= CCR_L2C_PREFETCH_DISABLE; pr_info("Tauros2: %s L2 prefetch.\n", (features & CACHE_TAUROS2_PREFETCH_ON) ? "Enabling" : "Disabling"); if (features & CACHE_TAUROS2_LINEFILL_BURST8) - u |= 0x00100000; + u |= CCR_L2C_BURST8_ENABLE; else - u &= ~0x00100000; - pr_info("Tauros2: %s line fill burt8.\n", + u &= ~CCR_L2C_BURST8_ENABLE; + pr_info("Tauros2: %s burst8 line fill.\n", (features & CACHE_TAUROS2_LINEFILL_BURST8) ? "Enabling" : "Disabling"); @@ -287,16 +292,15 @@ void __init tauros2_init(unsigned int features) node = of_find_matching_node(NULL, tauros2_ids); if (!node) { pr_info("Not found marvell,tauros2-cache, disable it\n"); - return; + } else { + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; } - - ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); - if (ret) { - pr_info("Not found marvell,tauros-cache-features property, " - "disable extra features\n"); - features = 0; - } else - features = f; #endif tauros2_internal_init(features); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0eca3812527e..deac58d5f1f7 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -42,6 +42,55 @@ #include "dma.h" #include "mm.h" +struct arm_dma_alloc_args { + struct device *dev; + size_t size; + gfp_t gfp; + pgprot_t prot; + const void *caller; + bool want_vaddr; +}; + +struct arm_dma_free_args { + struct device *dev; + size_t size; + void *cpu_addr; + struct page *page; + bool want_vaddr; +}; + +struct arm_dma_allocator { + void *(*alloc)(struct arm_dma_alloc_args *args, + struct page **ret_page); + void (*free)(struct arm_dma_free_args *args); +}; + +struct arm_dma_buffer { + struct list_head list; + void *virt; + struct arm_dma_allocator *allocator; +}; + +static LIST_HEAD(arm_dma_bufs); +static DEFINE_SPINLOCK(arm_dma_bufs_lock); + +static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) +{ + struct arm_dma_buffer *buf, *found = NULL; + unsigned long flags; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_for_each_entry(buf, &arm_dma_bufs, list) { + if (buf->virt == virt) { + list_del(&buf->list); + found = buf; + break; + } + } + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + return found; +} + /* * The DMA API is built upon the notion of "buffer ownership". A buffer * is either exclusively owned by the CPU (and therefore may be accessed @@ -592,7 +641,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL #define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL -#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_pool(cpu_addr, size) do { } while (0) #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) @@ -610,7 +659,78 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, return page_address(page); } +static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_simple_buffer(args->dev, args->size, args->gfp, + ret_page); +} + +static void simple_allocator_free(struct arm_dma_free_args *args) +{ + __dma_free_buffer(args->page, args->size); +} +static struct arm_dma_allocator simple_allocator = { + .alloc = simple_allocator_alloc, + .free = simple_allocator_free, +}; + +static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_contiguous(args->dev, args->size, args->prot, + ret_page, args->caller, + args->want_vaddr); +} + +static void cma_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_contiguous(args->dev, args->page, args->cpu_addr, + args->size, args->want_vaddr); +} + +static struct arm_dma_allocator cma_allocator = { + .alloc = cma_allocator_alloc, + .free = cma_allocator_free, +}; + +static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_pool(args->size, ret_page); +} + +static void pool_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_pool(args->cpu_addr, args->size); +} + +static struct arm_dma_allocator pool_allocator = { + .alloc = pool_allocator_alloc, + .free = pool_allocator_free, +}; + +static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_remap_buffer(args->dev, args->size, args->gfp, + args->prot, ret_page, args->caller, + args->want_vaddr); +} + +static void remap_allocator_free(struct arm_dma_free_args *args) +{ + if (args->want_vaddr) + __dma_free_remap(args->cpu_addr, args->size); + + __dma_free_buffer(args->page, args->size); +} + +static struct arm_dma_allocator remap_allocator = { + .alloc = remap_allocator_alloc, + .free = remap_allocator_free, +}; static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, bool is_coherent, @@ -619,7 +739,16 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; void *addr; - bool want_vaddr; + bool allowblock, cma; + struct arm_dma_buffer *buf; + struct arm_dma_alloc_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .gfp = gfp, + .prot = prot, + .caller = caller, + .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + }; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; @@ -633,6 +762,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (!mask) return NULL; + buf = kzalloc(sizeof(*buf), gfp); + if (!buf) + return NULL; + if (mask < 0xffffffffULL) gfp |= GFP_DMA; @@ -644,28 +777,37 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, * platform; see CONFIG_HUGETLBFS. */ gfp &= ~(__GFP_COMP); + args.gfp = gfp; *handle = DMA_ERROR_CODE; - size = PAGE_ALIGN(size); - want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - - if (nommu()) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM)) - addr = __alloc_from_contiguous(dev, size, prot, &page, - caller, want_vaddr); - else if (is_coherent) - addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (!gfpflags_allow_blocking(gfp)) - addr = __alloc_from_pool(size, &page); + allowblock = gfpflags_allow_blocking(gfp); + cma = allowblock ? dev_get_cma_area(dev) : false; + + if (cma) + buf->allocator = &cma_allocator; + else if (nommu() || is_coherent) + buf->allocator = &simple_allocator; + else if (allowblock) + buf->allocator = &remap_allocator; else - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, - caller, want_vaddr); + buf->allocator = &pool_allocator; + + addr = buf->allocator->alloc(&args, &page); + + if (page) { + unsigned long flags; - if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); + buf->virt = args.want_vaddr ? addr : page; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_add(&buf->list, &arm_dma_bufs); + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + } else { + kfree(buf); + } - return want_vaddr ? addr : page; + return args.want_vaddr ? addr : page; } /* @@ -741,25 +883,21 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); - bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - - size = PAGE_ALIGN(size); - - if (nommu()) { - __dma_free_buffer(page, size); - } else if (!is_coherent && __free_from_pool(cpu_addr, size)) { + struct arm_dma_buffer *buf; + struct arm_dma_free_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .cpu_addr = cpu_addr, + .page = page, + .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + }; + + buf = arm_dma_buffer_find(cpu_addr); + if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) return; - } else if (!dev_get_cma_area(dev)) { - if (want_vaddr && !is_coherent) - __dma_free_remap(cpu_addr, size); - __dma_free_buffer(page, size); - } else { - /* - * Non-atomic allocations cannot be freed with IRQs disabled - */ - WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr); - } + + buf->allocator->free(&args); + kfree(buf); } void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, @@ -1122,6 +1260,9 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping, spin_unlock_irqrestore(&mapping->lock, flags); } +/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ +static const int iommu_order_array[] = { 9, 8, 4, 0 }; + static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp, struct dma_attrs *attrs) { @@ -1129,6 +1270,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, int count = size >> PAGE_SHIFT; int array_size = count * sizeof(struct page *); int i = 0; + int order_idx = 0; if (array_size <= PAGE_SIZE) pages = kzalloc(array_size, GFP_KERNEL); @@ -1154,6 +1296,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, return pages; } + /* Go straight to 4K chunks if caller says it's OK. */ + if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs)) + order_idx = ARRAY_SIZE(iommu_order_array) - 1; + /* * IOMMU can map any pages, so himem can also be used here */ @@ -1162,22 +1308,24 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, while (count) { int j, order; - for (order = __fls(count); order > 0; --order) { - /* - * We do not want OOM killer to be invoked as long - * as we can fall back to single pages, so we force - * __GFP_NORETRY for orders higher than zero. - */ - pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); - if (pages[i]) - break; + order = iommu_order_array[order_idx]; + + /* Drop down when we get small */ + if (__fls(count) < order) { + order_idx++; + continue; } - if (!pages[i]) { - /* - * Fall back to single page allocation. - * Might invoke OOM killer as last resort. - */ + if (order) { + /* See if it's easy to allocate a high-order chunk */ + pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); + + /* Go down a notch at first sign of pressure */ + if (!pages[i]) { + order_idx++; + continue; + } + } else { pages[i] = alloc_pages(gfp, 0); if (!pages[i]) goto error; diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index d65909697165..bd274a05b8ff 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -15,7 +15,7 @@ * page tables. */ pgd_t *idmap_pgd; -phys_addr_t (*arch_virt_to_idmap) (unsigned long x); +unsigned long (*arch_virt_to_idmap)(unsigned long x); #ifdef CONFIG_ARM_LPAE static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 49bd08178008..370581aeb871 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -572,8 +572,9 @@ void __init mem_init(void) } } -#ifdef CONFIG_ARM_KERNMEM_PERMS +#ifdef CONFIG_DEBUG_RODATA struct section_perm { + const char *name; unsigned long start; unsigned long end; pmdval_t mask; @@ -581,9 +582,13 @@ struct section_perm { pmdval_t clear; }; +/* First section-aligned location at or after __start_rodata. */ +extern char __start_rodata_section_aligned[]; + static struct section_perm nx_perms[] = { /* Make pages tables, etc before _stext RW (set NX). */ { + .name = "pre-text NX", .start = PAGE_OFFSET, .end = (unsigned long)_stext, .mask = ~PMD_SECT_XN, @@ -591,26 +596,26 @@ static struct section_perm nx_perms[] = { }, /* Make init RW (set NX). */ { + .name = "init NX", .start = (unsigned long)__init_begin, .end = (unsigned long)_sdata, .mask = ~PMD_SECT_XN, .prot = PMD_SECT_XN, }, -#ifdef CONFIG_DEBUG_RODATA /* Make rodata NX (set RO in ro_perms below). */ { - .start = (unsigned long)__start_rodata, + .name = "rodata NX", + .start = (unsigned long)__start_rodata_section_aligned, .end = (unsigned long)__init_begin, .mask = ~PMD_SECT_XN, .prot = PMD_SECT_XN, }, -#endif }; -#ifdef CONFIG_DEBUG_RODATA static struct section_perm ro_perms[] = { /* Make kernel code and rodata RX (set RO). */ { + .name = "text/rodata RO", .start = (unsigned long)_stext, .end = (unsigned long)__init_begin, #ifdef CONFIG_ARM_LPAE @@ -623,7 +628,6 @@ static struct section_perm ro_perms[] = { #endif }, }; -#endif /* * Updates section permissions only for the current mm (sections are @@ -670,8 +674,8 @@ void set_section_perms(struct section_perm *perms, int n, bool set, for (i = 0; i < n; i++) { if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { - pr_err("BUG: section %lx-%lx not aligned to %lx\n", - perms[i].start, perms[i].end, + pr_err("BUG: %s section %lx-%lx not aligned to %lx\n", + perms[i].name, perms[i].start, perms[i].end, SECTION_SIZE); continue; } @@ -712,7 +716,6 @@ void fix_kernmem_perms(void) stop_machine(__fix_kernmem_perms, NULL, NULL); } -#ifdef CONFIG_DEBUG_RODATA int __mark_rodata_ro(void *unused) { update_sections_early(ro_perms, ARRAY_SIZE(ro_perms)); @@ -735,11 +738,10 @@ void set_kernel_text_ro(void) set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, current->active_mm); } -#endif /* CONFIG_DEBUG_RODATA */ #else static inline void fix_kernmem_perms(void) { } -#endif /* CONFIG_ARM_KERNMEM_PERMS */ +#endif /* CONFIG_DEBUG_RODATA */ void free_tcmmem(void) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 88fbe0d23ca6..62f4d01941f7 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1253,7 +1253,7 @@ static inline void prepare_page_table(void) #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ - addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; + addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK; #endif for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); @@ -1335,7 +1335,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) #ifdef CONFIG_XIP_KERNEL map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); map.virtual = MODULES_VADDR; - map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; + map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK; map.type = MT_ROM; create_mapping(&map); #endif @@ -1426,7 +1426,11 @@ static void __init kmap_init(void) static void __init map_lowmem(void) { struct memblock_region *reg; +#ifdef CONFIG_XIP_KERNEL + phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE); +#else phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); +#endif phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 0f92d575a304..0f8963a7e7d9 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -487,7 +487,7 @@ __errata_finish: .align 2 __v7_setup_stack_ptr: - .word __v7_setup_stack - . + .word PHYS_RELATIVE(__v7_setup_stack, .) ENDPROC(__v7_setup) .bss |