From 2b87a2553aa04105724d6c844e223415985246ed Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 16 May 2019 12:04:37 +1000 Subject: powerpc/64s: Make boot look nice(r) Radix boot looks like this: ----------------------------------------------------- phys_mem_size = 0x200000000 dcache_bsize = 0x80 icache_bsize = 0x80 cpu_features = 0x0000c06f8f5fb1a7 possible = 0x0000fbffcf5fb1a7 always = 0x00000003800081a1 cpu_user_features = 0xdc0065c2 0xaee00000 mmu_features = 0xbc006041 firmware_features = 0x0000000010000000 hash-mmu: ppc64_pft_size = 0x0 hash-mmu: kernel vmalloc start = 0xc008000000000000 hash-mmu: kernel IO start = 0xc00a000000000000 hash-mmu: kernel vmemmap start = 0xc00c000000000000 ----------------------------------------------------- Fix: ----------------------------------------------------- phys_mem_size = 0x200000000 dcache_bsize = 0x80 icache_bsize = 0x80 cpu_features = 0x0000c06f8f5fb1a7 possible = 0x0000fbffcf5fb1a7 always = 0x00000003800081a1 cpu_user_features = 0xdc0065c2 0xaee00000 mmu_features = 0xbc006041 firmware_features = 0x0000000010000000 vmalloc start = 0xc008000000000000 IO start = 0xc00a000000000000 vmemmap start = 0xc00c000000000000 ----------------------------------------------------- Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190516020437.11783-1-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index b8ad14bb1170..e6d471058597 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1957,7 +1957,4 @@ void __init print_system_hash_info(void) if (htab_hash_mask) pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); - pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START); - pr_info("kernel IO start = 0x%lx\n", KERN_IO_START); - pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap); } -- cgit v1.2.3 From c784be435d5dae28d3b03db31753dd7a18733f0c Mon Sep 17 00:00:00 2001 From: Gautham R. Shenoy Date: Wed, 15 May 2019 13:15:52 +0530 Subject: powerpc/pseries: Fix cpu_hotplug_lock acquisition in resize_hpt() The calls to arch_add_memory()/arch_remove_memory() are always made with the read-side cpu_hotplug_lock acquired via memory_hotplug_begin(). On pSeries, arch_add_memory()/arch_remove_memory() eventually call resize_hpt() which in turn calls stop_machine() which acquires the read-side cpu_hotplug_lock again, thereby resulting in the recursive acquisition of this lock. In the absence of CONFIG_PROVE_LOCKING, we hadn't observed a system lockup during a memory hotplug operation because cpus_read_lock() is a per-cpu rwsem read, which, in the fast-path (in the absence of the writer, which in our case is a CPU-hotplug operation) simply increments the read_count on the semaphore. Thus a recursive read in the fast-path doesn't cause any problems. However, we can hit this problem in practice if there is a concurrent CPU-Hotplug operation in progress which is waiting to acquire the write-side of the lock. This will cause the second recursive read to block until the writer finishes. While the writer is blocked since the first read holds the lock. Thus both the reader as well as the writers fail to make any progress thereby blocking both CPU-Hotplug as well as Memory Hotplug operations. Memory-Hotplug CPU-Hotplug CPU 0 CPU 1 ------ ------ 1. down_read(cpu_hotplug_lock.rw_sem) [memory_hotplug_begin] 2. down_write(cpu_hotplug_lock.rw_sem) [cpu_up/cpu_down] 3. down_read(cpu_hotplug_lock.rw_sem) [stop_machine()] Lockdep complains as follows in these code-paths. swapper/0/1 is trying to acquire lock: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: stop_machine+0x2c/0x60 but task is already holding lock: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(cpu_hotplug_lock.rw_sem); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by swapper/0/1: #0: (____ptrval____) (&dev->mutex){....}, at: __driver_attach+0x12c/0x1b0 #1: (____ptrval____) (cpu_hotplug_lock.rw_sem){++++}, at: mem_hotplug_begin+0x20/0x50 #2: (____ptrval____) (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x54/0x1a0 stack backtrace: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc5-58373-gbc99402235f3-dirty #166 Call Trace: dump_stack+0xe8/0x164 (unreliable) __lock_acquire+0x1110/0x1c70 lock_acquire+0x240/0x290 cpus_read_lock+0x64/0xf0 stop_machine+0x2c/0x60 pseries_lpar_resize_hpt+0x19c/0x2c0 resize_hpt_for_hotplug+0x70/0xd0 arch_add_memory+0x58/0xfc devm_memremap_pages+0x5e8/0x8f0 pmem_attach_disk+0x764/0x830 nvdimm_bus_probe+0x118/0x240 really_probe+0x230/0x4b0 driver_probe_device+0x16c/0x1e0 __driver_attach+0x148/0x1b0 bus_for_each_dev+0x90/0x130 driver_attach+0x34/0x50 bus_add_driver+0x1a8/0x360 driver_register+0x108/0x170 __nd_driver_register+0xd0/0xf0 nd_pmem_driver_init+0x34/0x48 do_one_initcall+0x1e0/0x45c kernel_init_freeable+0x540/0x64c kernel_init+0x2c/0x160 ret_from_kernel_thread+0x5c/0x68 Fix this issue by 1) Requiring all the calls to pseries_lpar_resize_hpt() be made with cpu_hotplug_lock held. 2) In pseries_lpar_resize_hpt() invoke stop_machine_cpuslocked() as a consequence of 1) 3) To satisfy 1), in hpt_order_set(), call mmu_hash_ops.resize_hpt() with cpu_hotplug_lock held. Fixes: dbcf929c0062 ("powerpc/pseries: Add support for hash table resizing") Cc: stable@vger.kernel.org # v4.11+ Reported-by: Aneesh Kumar K.V Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1557906352-29048-1-git-send-email-ego@linux.vnet.ibm.com --- arch/powerpc/mm/book3s64/hash_utils.c | 9 ++++++++- arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e6d471058597..c363e850550e 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1931,10 +1932,16 @@ static int hpt_order_get(void *data, u64 *val) static int hpt_order_set(void *data, u64 val) { + int ret; + if (!mmu_hash_ops.resize_hpt) return -ENODEV; - return mmu_hash_ops.resize_hpt(val); + cpus_read_lock(); + ret = mmu_hash_ops.resize_hpt(val); + cpus_read_unlock(); + + return ret; } DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 09bb878c21e0..4f76e5f30c97 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1413,7 +1413,10 @@ static int pseries_lpar_resize_hpt_commit(void *data) return 0; } -/* Must be called in user context */ +/* + * Must be called in process context. The caller must hold the + * cpus_lock. + */ static int pseries_lpar_resize_hpt(unsigned long shift) { struct hpt_resize_state state = { @@ -1467,7 +1470,8 @@ static int pseries_lpar_resize_hpt(unsigned long shift) t1 = ktime_get(); - rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); + rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit, + &state, NULL); t2 = ktime_get(); -- cgit v1.2.3 From 7c7a532ba3fc51bf9527d191fb410786c1fdc73c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 12:36:09 +0000 Subject: powerpc/ptdump: Fix addresses display on PPC32 Commit 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") wrongly changed KERN_VIRT_START from 0 to PAGE_OFFSET, leading to a shift in the displayed addresses. Lets revert that change to resync walk_pagetables()'s addr val and pgd_t pointer for PPC32. Fixes: 453d87f6a8ae ("powerpc/mm: Warn if W+X pages found on boot") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/eb4d626514e22f85814830012642329018ef6af9.1565786091.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 6a88a9f585d4..3ad64fc11419 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -27,7 +27,7 @@ #include "ptdump.h" #ifdef CONFIG_PPC32 -#define KERN_VIRT_START PAGE_OFFSET +#define KERN_VIRT_START 0 #endif /* -- cgit v1.2.3 From e033829d2aaad85cb7cf46986c3be0bcc72f791e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 12:36:10 +0000 Subject: powerpc/ptdump: fix walk_pagetables() address mismatch walk_pagetables() always walk the entire pgdir from address 0 but considers PAGE_OFFSET or KERN_VIRT_START as the starting address of the walk, resulting in a possible mismatch in the displayed addresses. Ex: on PPC32, when KERN_VIRT_START was locally defined as PAGE_OFFSET, ptdump displayed 0x80000000 instead of 0xc0000000 for the first kernel page, because 0xc0000000 + 0xc0000000 = 0x80000000 Start the walk at st->start_address instead of starting at 0. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5aa2ac513295f594cce8ddb1c649f61947bd063d.1565786091.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/ptdump/ptdump.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 3ad64fc11419..74ff2bff4ea0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -299,17 +299,15 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) static void walk_pagetables(struct pg_state *st) { - pgd_t *pgd = pgd_offset_k(0UL); unsigned int i; - unsigned long addr; - - addr = st->start_address; + unsigned long addr = st->start_address & PGDIR_MASK; + pgd_t *pgd = pgd_offset_k(addr); /* * Traverse the linux pagetable structure and dump pages that are in * the hash pagetable. */ - for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { + for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); -- cgit v1.2.3 From 82242352728125ecd86cea4631b89a5e6be0d0fd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 12:36:11 +0000 Subject: powerpc/ptdump: drop dummy KERN_VIRT_START on PPC32 PPC32 doesn't have KERN_VIRT_START. Make PAGE_OFFSET the default starting address for the dump, and drop the dummy definition of KERN_VIRT_START. Only use KERN_VIRT_START for non radix PPC64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/334632b1df4775b0ccf3bdc8d6b201d14e3daedd.1565786091.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/ptdump/ptdump.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 74ff2bff4ea0..9a2186c133e6 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -26,10 +26,6 @@ #include "ptdump.h" -#ifdef CONFIG_PPC32 -#define KERN_VIRT_START 0 -#endif - /* * To visualise what is happening, * @@ -362,12 +358,13 @@ static int ptdump_show(struct seq_file *m, void *v) struct pg_state st = { .seq = m, .marker = address_markers, + .start_address = PAGE_OFFSET, }; - if (radix_enabled()) - st.start_address = PAGE_OFFSET; - else +#ifdef CONFIG_PPC64 + if (!radix_enabled()) st.start_address = KERN_VIRT_START; +#endif /* Traverse kernel page tables */ walk_pagetables(&st); @@ -405,12 +402,13 @@ void ptdump_check_wx(void) .seq = NULL, .marker = address_markers, .check_wx = true, + .start_address = PAGE_OFFSET, }; - if (radix_enabled()) - st.start_address = PAGE_OFFSET; - else +#ifdef CONFIG_PPC64 + if (!radix_enabled()) st.start_address = KERN_VIRT_START; +#endif walk_pagetables(&st); -- cgit v1.2.3 From f3a2ac05894b57e78cd6cc0adba1de642034d940 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 12:36:12 +0000 Subject: powerpc/ptdump: get out of note_prot_wx() when CONFIG_PPC_DEBUG_WX is not selected. When CONFIG_PPC_DEBUG_WX, note_prot_wx() is useless. Get out of it early and inconditionnally in that case, so that GCC can kick all the code out. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ff6c8f631bd4ce3a10e0cc241eb569816187bc20.1565786091.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 9a2186c133e6..ab6a572202b4 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -177,7 +177,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { - if (!st->check_wx) + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) return; if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) -- cgit v1.2.3 From 65e701b2d2a8593c44c8855aee2e087be7e11e75 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 12:36:13 +0000 Subject: powerpc/ptdump: drop non vital #ifdefs hashpagetable.c is only compiled when CONFIG_PPC_BOOK3S_64 is defined, so drop the test and its 'else' branch. Use IS_ENABLED(CONFIG_PPC_PSERIES) instead of #ifdef, this allows the code to be checked at any build. It is still optimised out by GCC. Use IS_ENABLED(CONFIG_PPC_64K_PAGES) instead of #ifdef. Use IS_ENABLED(CONFIG_SPARSEMEN_VMEMMAP) instead of #ifdef. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c8998ed32e4e3954b56a8dacecfe43319a2a0483.1565786091.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/plpar_wrappers.h | 6 ++++++ arch/powerpc/mm/ptdump/hashpagetable.c | 24 +++++++++--------------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index cff5a411e595..4497c8afb573 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -340,6 +340,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr) { return 0; } + +static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, + unsigned long *ptes) +{ + return 0; +} #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index 72f0e4a3d839..a07278027c6f 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -237,7 +237,6 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 return -1; } -#ifdef CONFIG_PPC_PSERIES static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { struct hash_pte ptes[4]; @@ -274,7 +273,6 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * } return -1; } -#endif static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, unsigned long *lp_bits) @@ -316,10 +314,9 @@ static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { -#ifdef CONFIG_PPC_PSERIES - if (firmware_has_feature(FW_FEATURE_LPAR)) + if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR)) return pseries_find(ea, psize, primary, v, r); -#endif + return native_find(ea, psize, primary, v, r); } @@ -386,12 +383,13 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) psize = mmu_vmalloc_psize; else psize = mmu_io_psize; -#ifdef CONFIG_PPC_64K_PAGES + /* check for secret 4K mappings */ - if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) || - ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && + ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO || + (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) psize = mmu_io_psize; -#endif + /* check for hashpte */ status = hpte_find(st, addr, psize); @@ -469,9 +467,10 @@ static void walk_linearmapping(struct pg_state *st) static void walk_vmemmap(struct pg_state *st) { -#ifdef CONFIG_SPARSEMEM_VMEMMAP struct vmemmap_backing *ptr = vmemmap_list; + if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + return; /* * Traverse the vmemmaped memory and dump pages that are in the hash * pagetable. @@ -481,7 +480,6 @@ static void walk_vmemmap(struct pg_state *st) ptr = ptr->list; } seq_puts(st->seq, "---[ vmemmap end ]---\n"); -#endif } static void populate_markers(void) @@ -495,11 +493,7 @@ static void populate_markers(void) address_markers[6].start_address = PHB_IO_END; address_markers[7].start_address = IOREMAP_BASE; address_markers[8].start_address = IOREMAP_END; -#ifdef CONFIG_PPC_BOOK3S_64 address_markers[9].start_address = H_VMEMMAP_START; -#else - address_markers[9].start_address = VMEMMAP_BASE; -#endif } static int ptdump_show(struct seq_file *m, void *v) -- cgit v1.2.3 From 45ff3c55958542c3b76075d59741297b8cb31cbb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Aug 2019 14:58:09 +0000 Subject: powerpc/kasan: Fix parallel loading of modules. Parallel loading of modules may lead to bad setup of shadow page table entries. First, lets align modules so that two modules never share the same shadow page. Second, ensure that two modules cannot allocate two page tables for the same PMD entry at the same time. This is done by using init_mm.page_table_lock in the same way as __pte_alloc_kernel() Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c97284f912128cbc3f2fe09d68e90e65fb3e6026.1565361876.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/kasan/kasan_init_32.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 74f4555a62ba..3282f8a2aecc 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,19 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l kasan_populate_pte(new, PAGE_READONLY); else kasan_populate_pte(new, PAGE_KERNEL_RO); - pmd_populate_kernel(&init_mm, pmd, new); + + smp_wmb(); /* See comment in __pte_alloc */ + + spin_lock(&init_mm.page_table_lock); + /* Has another populated it ? */ + if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) { + pmd_populate_kernel(&init_mm, pmd, new); + new = NULL; + } + spin_unlock(&init_mm.page_table_lock); + + if (new && slab_is_available()) + pte_free_kernel(&init_mm, new); } return 0; } @@ -137,7 +150,11 @@ void __init kasan_init(void) #ifdef CONFIG_MODULES void *module_alloc(unsigned long size) { - void *base = vmalloc_exec(size); + void *base; + + base = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __builtin_return_address(0)); if (!base) return NULL; -- cgit v1.2.3 From 663c0c9496a69f80011205ba3194049bcafd681d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 9 Aug 2019 14:58:10 +0000 Subject: powerpc/kasan: Fix shadow area set up for modules. When loading modules, from time to time an Oops is encountered during the init of shadow area for globals. This is due to the last page not always being mapped depending on the exact distance between the start and the end of the shadow area and the alignment with the page addresses. Fix this by aligning the starting address with the page address. Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Erhard F. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4f887e9b77d0d725cbb52035c7ece485c1c5fc14.1565361881.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/kasan/kasan_init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 3282f8a2aecc..802387b231ad 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -87,7 +87,7 @@ static int __ref kasan_init_region(void *start, size_t size) if (!slab_is_available()) block = memblock_alloc(k_end - k_start, PAGE_SIZE); - for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); void *va = block ? block + k_cur - k_start : kasan_get_one_page(); pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); -- cgit v1.2.3 From 9d6d712fbf7766f21c838940eebcd7b4d476c5e6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 10:02:20 +0000 Subject: powerpc/32s: Fix boot failure with DEBUG_PAGEALLOC without KASAN. When KASAN is selected, the definitive hash table has to be set up later, but there is already an early temporary one. When KASAN is not selected, there is no early hash table, so the setup of the definitive hash table cannot be delayed. Fixes: 72f208c6a8f7 ("powerpc/32s: move hash code patching out of MMU_init_hw()") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Jonathan Neuschafer Tested-by: Jonathan Neuschafer Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b7860c5e1e784d6b96ba67edf47dd6cbc2e78ab6.1565776892.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/head_32.S | 2 ++ arch/powerpc/mm/book3s32/mmu.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index f255e22184b4..c8b4f7ed318c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -897,9 +897,11 @@ start_here: bl machine_init bl __save_cpu_setup bl MMU_init +#ifdef CONFIG_KASAN BEGIN_MMU_FTR_SECTION bl MMU_init_hw_patch END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* * Go back to running unmapped so we can load up new values diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index e249fbf6b9c3..8d68f03bf5a4 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -358,6 +358,15 @@ void __init MMU_init_hw(void) hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; if (lg_n_hpteg > 16) hash_mb2 = 16 - LG_HPTEG_SIZE; + + /* + * When KASAN is selected, there is already an early temporary hash + * table and the switch to the final hash table is done later. + */ + if (IS_ENABLED(CONFIG_KASAN)) + return; + + MMU_init_hw_patch(); } void __init MMU_init_hw_patch(void) -- cgit v1.2.3 From ad628a34ec4e3558bf838195f60bbaa4c2b68f2a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 14 Aug 2019 14:36:10 +0000 Subject: powerpc/mm: don't display empty early ioremap area On the 8xx, the layout displayed at boot is: [ 0.000000] Memory: 121856K/131072K available (5728K kernel code, 592K rwdata, 1248K rodata, 560K init, 448K bss, 9216K reserved, 0K cma-reserved) [ 0.000000] Kernel virtual memory layout: [ 0.000000] * 0xffefc000..0xffffc000 : fixmap [ 0.000000] * 0xffefc000..0xffefc000 : early ioremap [ 0.000000] * 0xc9000000..0xffefc000 : vmalloc & ioremap [ 0.000000] SLUB: HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 Remove display of an empty early ioremap. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/f6267226038cb25a839b567319e240576e3f8565.1565793287.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/mem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9191a66b3bc5..828c535e8fd2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -306,8 +306,9 @@ void __init mem_init(void) pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); #endif /* CONFIG_NOT_COHERENT_CACHE */ - pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", - ioremap_bot, IOREMAP_TOP); + if (ioremap_bot != IOREMAP_TOP) + pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", + ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", VMALLOC_START, VMALLOC_END); #endif /* CONFIG_PPC32 */ -- cgit v1.2.3 From d9642117914c9d3f800b3bacc19d7e388b04edb4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Aug 2019 05:41:40 +0000 Subject: powerpc/mm: define empty update_mmu_cache() as static inline Only BOOK3S and FSL_BOOK3E have a usefull update_mmu_cache(). For the others, just define it static inline. In the meantime, simplify the FSL_BOOK3E related ifdef as book3e_hugetlb_preload() only exists when CONFIG_PPC_FSL_BOOK3E is selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/668aba4db6b9af6d8a151174e11a4289f1a6bbcd.1565933217.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/pgtable.h | 11 +++++++++++ arch/powerpc/include/asm/nohash/pgtable.h | 13 +++++++++++++ arch/powerpc/include/asm/pgtable.h | 12 ------------ arch/powerpc/mm/mem.c | 11 +++++++---- 4 files changed, 31 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index 6436b65ac7bc..0e1263455d73 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -26,5 +26,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to ensure coherency between the i-cache and d-cache + * for the page which has just been mapped in. + * On machines which use an MMU hash table, we use this to put a + * corresponding HPTE into the hash table ahead of time, instead of + * waiting for the inevitable extra hash-table miss exception. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 1ca1c1864b32..7fed9dc0f147 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -293,5 +293,18 @@ static inline int pgd_huge(pgd_t pgd) #define is_hugepd(hpd) (hugepd_ok(hpd)) #endif +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to ensure coherency between the i-cache and d-cache + * for the page which has just been mapped in. + */ +#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); +#else +static inline +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index c58ba7963688..c70916a7865a 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -77,18 +77,6 @@ extern void paging_init(void); #include - -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - * On machines which use an MMU hash table, we use this to put a - * corresponding HPTE into the hash table ahead of time, instead of - * waiting for the inevitable extra hash-table miss exception. - */ -extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); - #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 828c535e8fd2..ee8225cd2cd2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -415,10 +415,10 @@ EXPORT_SYMBOL(flush_icache_user_range); * * This must always be called with the pte lock held. */ +#ifdef CONFIG_PPC_BOOK3S void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { -#ifdef CONFIG_PPC_BOOK3S /* * We don't need to worry about _PAGE_PRESENT here because we are * called with either mm->page_table_lock held or ptl lock held @@ -456,13 +456,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, } hash_preload(vma->vm_mm, address, is_exec, trap); +} #endif /* CONFIG_PPC_BOOK3S */ -#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ - && defined(CONFIG_HUGETLB_PAGE) +#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) +{ if (is_vm_hugetlb_page(vma)) book3e_hugetlb_preload(vma, address, *ptep); -#endif } +#endif /* * System memory should not be in /proc/iomem but various tools expect it -- cgit v1.2.3 From 4c1616ef036ffaaea95a29d7b6abf9d3e8eb9d92 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Aug 2019 05:41:41 +0000 Subject: powerpc/mm: move FSL_BOOK3 version of update_mmu_cache() Move FSL_BOOK3E version of update_mmu_cache() at the same place as book3e_hugetlb_preload() as update_mmu_cache() is the only user of book3e_hugetlb_preload(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4d69fdc86df9c74adc71a60331a86f6afb8b5e9e.1565933217.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/hugetlb.h | 3 --- arch/powerpc/mm/mem.c | 8 -------- arch/powerpc/mm/nohash/book3e_hugetlbpage.c | 16 ++++++++++++++-- 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 20a101046cff..bd6504c28c2f 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -31,9 +31,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return 0; } -void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, - pte_t pte); - #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ee8225cd2cd2..58d11362eee4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -458,14 +458,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, hash_preload(vma->vm_mm, address, is_exec, trap); } #endif /* CONFIG_PPC_BOOK3S */ -#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE) -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep) -{ - if (is_vm_hugetlb_page(vma)) - book3e_hugetlb_preload(vma, address, *ptep); -} -#endif /* * System memory should not be in /proc/iomem but various tools expect it diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index 61915f4d3c7f..8b88be91b622 100644 --- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -122,8 +122,8 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) return found; } -void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, - pte_t pte) +static void +book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) { unsigned long mas1, mas2; u64 mas7_3; @@ -183,6 +183,18 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, local_irq_restore(flags); } +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * + * This must always be called with the pte lock held. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + if (is_vm_hugetlb_page(vma)) + book3e_hugetlb_preload(vma, address, *ptep); +} + void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { struct hstate *hstate = hstate_file(vma->vm_file); -- cgit v1.2.3 From e5a1edb9fe4cfa07e37a59475f8f7d0a8939c73e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Aug 2019 05:41:42 +0000 Subject: powerpc/mm: move update_mmu_cache() into book3s hash utils. update_mmu_cache() is only for BOOK3S, and can be simplified for BOOK3S32. Move it out of mem.c into respective BOOK3S32 and BOOK3S64 files containing hash utils. BOOK3S64 version of hash_preload() is only used locally, declare it static. Remove the radix_enabled() stuff in BOOK3S32 version. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/107aaf43583a5f5d09e0d4e84c4c4390ecfcd512.1565933217.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/book3s32/mmu.c | 46 +++++++++++++++++++++++++++++ arch/powerpc/mm/book3s64/hash_utils.c | 55 +++++++++++++++++++++++++++++++++-- arch/powerpc/mm/mem.c | 52 --------------------------------- arch/powerpc/mm/mmu_decl.h | 7 ++--- 4 files changed, 102 insertions(+), 58 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 8d68f03bf5a4..1e77dca8b497 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -309,6 +309,52 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + * + * This must always be called with the pte lock held. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) +{ + /* + * We don't need to worry about _PAGE_PRESENT here because we are + * called with either mm->page_table_lock held or ptl lock held + */ + unsigned long trap; + bool is_exec; + + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(*ptep) || address >= TASK_SIZE) + return; + + /* + * We try to figure out if we are coming from an instruction + * access fault and pass that down to __hash_page so we avoid + * double-faulting on execution of fresh text. We have to test + * for regs NULL since init will get here first thing at boot. + * + * We also avoid filling the hash if not coming from a fault. + */ + + trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; + switch (trap) { + case 0x300: + is_exec = false; + break; + case 0x400: + is_exec = true; + break; + default: + return; + } + + hash_preload(vma->vm_mm, address, is_exec, trap); +} + /* * Initialize the hash table and patch the instructions in hashtable.S. */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c363e850550e..c3bfef08dcf8 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1519,8 +1519,8 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) } #endif -void hash_preload(struct mm_struct *mm, unsigned long ea, - bool is_exec, unsigned long trap) +static void hash_preload(struct mm_struct *mm, unsigned long ea, + bool is_exec, unsigned long trap) { int hugepage_shift; unsigned long vsid; @@ -1600,6 +1600,57 @@ out_exit: local_irq_restore(flags); } +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + * + * This must always be called with the pte lock held. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep) +{ + /* + * We don't need to worry about _PAGE_PRESENT here because we are + * called with either mm->page_table_lock held or ptl lock held + */ + unsigned long trap; + bool is_exec; + + if (radix_enabled()) { + prefetch((void *)address); + return; + } + + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(*ptep) || address >= TASK_SIZE) + return; + + /* + * We try to figure out if we are coming from an instruction + * access fault and pass that down to __hash_page so we avoid + * double-faulting on execution of fresh text. We have to test + * for regs NULL since init will get here first thing at boot. + * + * We also avoid filling the hash if not coming from a fault. + */ + + trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; + switch (trap) { + case 0x300: + is_exec = false; + break; + case 0x400: + is_exec = true; + break; + default: + return; + } + + hash_preload(vma->vm_mm, address, is_exec, trap); +} + #ifdef CONFIG_PPC_MEM_KEYS /* * Return the protection key associated with the given address and the diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 58d11362eee4..69f99128a8d6 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -407,58 +407,6 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, } EXPORT_SYMBOL(flush_icache_user_range); -/* - * This is called at the end of handling a user page fault, when the - * fault has been handled by updating a PTE in the linux page tables. - * We use it to preload an HPTE into the hash table corresponding to - * the updated linux PTE. - * - * This must always be called with the pte lock held. - */ -#ifdef CONFIG_PPC_BOOK3S -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep) -{ - /* - * We don't need to worry about _PAGE_PRESENT here because we are - * called with either mm->page_table_lock held or ptl lock held - */ - unsigned long trap; - bool is_exec; - - if (radix_enabled()) { - prefetch((void *)address); - return; - } - - /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ - if (!pte_young(*ptep) || address >= TASK_SIZE) - return; - - /* We try to figure out if we are coming from an instruction - * access fault and pass that down to __hash_page so we avoid - * double-faulting on execution of fresh text. We have to test - * for regs NULL since init will get here first thing at boot - * - * We also avoid filling the hash if not coming from a fault - */ - - trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; - switch (trap) { - case 0x300: - is_exec = false; - break; - case 0x400: - is_exec = true; - break; - default: - return; - } - - hash_preload(vma->vm_mm, address, is_exec, trap); -} -#endif /* CONFIG_PPC_BOOK3S */ - /* * System memory should not be in /proc/iomem but various tools expect it * (eg kdump). diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 32c1a191c28a..9f325a7a09cb 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -82,10 +82,6 @@ static inline void print_system_hash_info(void) {} #else /* CONFIG_PPC_MMU_NOHASH */ -extern void hash_preload(struct mm_struct *mm, unsigned long ea, - bool is_exec, unsigned long trap); - - extern void _tlbie(unsigned long address); extern void _tlbia(void); @@ -95,6 +91,9 @@ void print_system_hash_info(void); #ifdef CONFIG_PPC32 +void hash_preload(struct mm_struct *mm, unsigned long ea, + bool is_exec, unsigned long trap); + extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); -- cgit v1.2.3 From f49f4e2b68b683491263e92c229ff344d44759a7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Aug 2019 05:41:43 +0000 Subject: powerpc/mm: Simplify update_mmu_cache() on BOOK3S32 On BOOK3S32, hash_preload() neither use is_exec nor trap, so drop those parameters and simplify update_mmu_cached(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/35f143c6fe29f9fd25c7f3cd4448ae401029ce3c.1565933217.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/book3s32/mmu.c | 30 +++++++----------------------- arch/powerpc/mm/mmu_decl.h | 3 +-- arch/powerpc/mm/pgtable_32.c | 2 +- 3 files changed, 9 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 1e77dca8b497..5d7d35eb96fb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -297,8 +297,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* * Preload a translation in the hash table */ -void hash_preload(struct mm_struct *mm, unsigned long ea, - bool is_exec, unsigned long trap) +void hash_preload(struct mm_struct *mm, unsigned long ea) { pmd_t *pmd; @@ -324,35 +323,20 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * We don't need to worry about _PAGE_PRESENT here because we are * called with either mm->page_table_lock held or ptl lock held */ - unsigned long trap; - bool is_exec; /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(*ptep) || address >= TASK_SIZE) return; - /* - * We try to figure out if we are coming from an instruction - * access fault and pass that down to __hash_page so we avoid - * double-faulting on execution of fresh text. We have to test - * for regs NULL since init will get here first thing at boot. - * - * We also avoid filling the hash if not coming from a fault. - */ + /* We have to test for regs NULL since init will get here first thing at boot */ + if (!current->thread.regs) + return; - trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; - switch (trap) { - case 0x300: - is_exec = false; - break; - case 0x400: - is_exec = true; - break; - default: + /* We also avoid filling the hash if not coming from a fault */ + if (TRAP(current->thread.regs) != 0x300 && TRAP(current->thread.regs) != 0x400) return; - } - hash_preload(vma->vm_mm, address, is_exec, trap); + hash_preload(vma->vm_mm, address); } /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 9f325a7a09cb..adbaf2167214 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -91,8 +91,7 @@ void print_system_hash_info(void); #ifdef CONFIG_PPC32 -void hash_preload(struct mm_struct *mm, unsigned long ea, - bool is_exec, unsigned long trap); +void hash_preload(struct mm_struct *mm, unsigned long ea); extern void mapin_ram(void); extern void setbat(int index, unsigned long virt, phys_addr_t phys, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 35cb96cfc258..97f401a06fcc 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -252,7 +252,7 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); #ifdef CONFIG_PPC_BOOK3S_32 if (ktext) - hash_preload(&init_mm, v, false, 0x300); + hash_preload(&init_mm, v); #endif v += PAGE_SIZE; p += PAGE_SIZE; -- cgit v1.2.3 From f204338f8e29777302042cba578b0da44f69695f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Aug 2019 05:41:44 +0000 Subject: powerpc/mm: ppc 603 doesn't need update_mmu_cache() On powerpc 603, there is no hash table so get out of update_mmu_cache() early. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6133e0f115d955fac4061536dab0fa7480a1c433.1565933217.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/book3s32/mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 5d7d35eb96fb..50a1991d257f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -319,6 +319,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return; /* * We don't need to worry about _PAGE_PRESENT here because we are * called with either mm->page_table_lock held or ptl lock held -- cgit v1.2.3 From 8f51e3929470942e6a8744061254fdeef646cd36 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 Jul 2019 18:46:34 +1000 Subject: powerpc/64s/radix: Fix memory hotplug section page table creation create_physical_mapping expects physical addresses, but creating and splitting these mappings after boot is supplying virtual (effective) addresses. This can be irritated by booting with mem= to limit memory then probing an unused physical memory range: echo > /sys/devices/system/memory/probe This mostly works by accident, firstly because __va(__va(x)) == __va(x) so the virtual address does not get corrupted. Secondly because pfn_pte masks out the upper bits of the pfn beyond the physical address limit, so a pfn constructed with a 0xc000000000000000 virtual linear address will be masked back to the correct physical address in the pte. Fixes: 6cc27341b21a8 ("powerpc/mm: add radix__create_section_mapping()") Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190724084638.24982-1-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b4ca9e95e678..c5cc16ab1954 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -902,7 +902,7 @@ int __meminit radix__create_section_mapping(unsigned long start, unsigned long e return -1; } - return create_physical_mapping(start, end, nid); + return create_physical_mapping(__pa(start), __pa(end), nid); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) -- cgit v1.2.3 From 31f210cf42d4b308eacef89b6cb0b1459338b8de Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 Jul 2019 18:46:35 +1000 Subject: powerpc/64s/radix: Fix memory hot-unplug page table split create_physical_mapping expects physical addresses, but splitting these mapping on hot unplug is supplying virtual (effective) addresses. Fixes: 4dd5f8a99e791 ("powerpc/mm/radix: Split linear mapping on hot-unplug") Signed-off-by: Nicholas Piggin Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190724084638.24982-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index c5cc16ab1954..2204d8eeb784 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -737,8 +737,8 @@ static int __meminit stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(params->aligned_start, params->start, -1); - create_physical_mapping(params->end, params->aligned_end, -1); + create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1); + create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1); spin_lock(&init_mm.page_table_lock); return 0; } -- cgit v1.2.3 From 8aee077292a34eaca717445290f12d23f8bd7732 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:11 +0000 Subject: powerpc/mm: drop ppc_md.iounmap() and __iounmap() ppc_md.iounmap() is never set, drop it. Once ppc_md.iounmap() is gone, iounmap() remains the only user of __iounmap() and iounmap() does nothing else than calling __iounmap(). So drop iounmap() and make __iounmap() the new iounmap(). Reviewed-by: Christoph Hellwig Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d73ba92bb7a387cc58cc34666d7f5158a45851b0.1566309262.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/io.h | 5 ----- arch/powerpc/include/asm/machdep.h | 2 -- arch/powerpc/mm/pgtable_64.c | 11 +---------- 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 23e5d5d16c7e..02d6256fe1ea 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -712,9 +712,6 @@ static inline void iosync(void) * * __ioremap_caller is the same as above but takes an explicit caller * reference rather than using __builtin_return_address(0) * - * * __iounmap, is the low level implementation used by iounmap and cannot - * be hooked (but can be used by a hook on iounmap) - * */ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, @@ -734,8 +731,6 @@ extern void __iomem *__ioremap(phys_addr_t, unsigned long size, extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller); -extern void __iounmap(volatile void __iomem *addr); - extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot); extern void __iounmap_at(void *ea, unsigned long size); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index c43d6eca9edd..3370df4bdaa0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -33,8 +33,6 @@ struct machdep_calls { #ifdef CONFIG_PPC64 void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller); - void (*iounmap)(volatile void __iomem *token); - #ifdef CONFIG_PM void (*iommu_save)(void); void (*iommu_restore)(void); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 9ad59b733984..57cdd6182932 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -266,7 +266,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, * Unmap an IO region and remove it from imalloc'd list. * Access to IO memory should be serialized by driver. */ -void __iounmap(volatile void __iomem *token) +void iounmap(volatile void __iomem *token) { void *addr; @@ -283,21 +283,12 @@ void __iounmap(volatile void __iomem *token) vunmap(addr); } -void iounmap(volatile void __iomem *token) -{ - if (ppc_md.iounmap) - ppc_md.iounmap(token); - else - __iounmap(token); -} - EXPORT_SYMBOL(ioremap); EXPORT_SYMBOL(ioremap_wc); EXPORT_SYMBOL(ioremap_prot); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); #ifndef __PAGETABLE_PUD_FOLDED -- cgit v1.2.3 From 492643e81e58e7585cccb22c37814060e3f59268 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:12 +0000 Subject: powerpc/mm: drop function __ioremap() __ioremap() is not used anymore, drop it. Suggested-by: Christoph Hellwig Signed-off-by: Christophe Leroy Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ccc439f481a0884e00a6be1bab44bab2a4477fea.1566309262.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/io.h | 6 ------ arch/powerpc/mm/pgtable_32.c | 11 ++--------- arch/powerpc/mm/pgtable_64.c | 7 ------- 3 files changed, 2 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 02d6256fe1ea..8e65ba59f06a 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -705,10 +705,6 @@ static inline void iosync(void) * create hand-made mappings for use only by the PCI code and cannot * currently be hooked. Must be page aligned. * - * * __ioremap is the low level implementation used by ioremap and - * ioremap_prot and cannot be hooked (but can be used by a hook on one - * of the previous ones) - * * * __ioremap_caller is the same as above but takes an explicit caller * reference rather than using __builtin_return_address(0) * @@ -726,8 +722,6 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); extern void iounmap(volatile void __iomem *addr); -extern void __iomem *__ioremap(phys_addr_t, unsigned long size, - unsigned long flags); extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 97f401a06fcc..8cc5e9e83fc3 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -91,12 +91,6 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) } EXPORT_SYMBOL(ioremap_prot); -void __iomem * -__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) -{ - return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0)); -} - void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { @@ -127,8 +121,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call */ if (slab_is_available() && p <= virt_to_phys(high_memory - 1) && page_is_ram(__phys_to_pfn(p))) { - printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n", - (unsigned long long)p, __builtin_return_address(0)); + pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__, + (unsigned long long)p, __builtin_return_address(0)); return NULL; } #endif @@ -171,7 +165,6 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call out: return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); } -EXPORT_SYMBOL(__ioremap); void iounmap(volatile void __iomem *addr) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 57cdd6182932..2882419737b9 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -203,12 +203,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, return ret; } -void __iomem * __ioremap(phys_addr_t addr, unsigned long size, - unsigned long flags) -{ - return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0)); -} - void __iomem * ioremap(phys_addr_t addr, unsigned long size) { pgprot_t prot = pgprot_noncached(PAGE_KERNEL); @@ -286,7 +280,6 @@ void iounmap(volatile void __iomem *token) EXPORT_SYMBOL(ioremap); EXPORT_SYMBOL(ioremap_wc); EXPORT_SYMBOL(ioremap_prot); -EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap_at); -- cgit v1.2.3 From 14b4d97669b79d1ac83e64d6795098394e15ab1b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:13 +0000 Subject: powerpc/mm: rework io-workaround invocation. ppc_md.ioremap() is only used for I/O workaround on CELL platform, so indirect function call can be avoided. This patch reworks the io-workaround and ioremap() functions to use the global 'io_workaround_inited' flag for the activation of io-workaround. When CONFIG_PPC_IO_WORKAROUNDS or CONFIG_PPC_INDIRECT_MMIO are not selected, the I/O workaround ioremap() voids and the global flag is not used. Signed-off-by: Christophe Leroy Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/5fa3ef069fbd0f152512afaae19e7a60161454cf.1566309262.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/io-workarounds.h | 20 ++++++++++++++++++++ arch/powerpc/include/asm/machdep.h | 2 -- arch/powerpc/kernel/io-workarounds.c | 13 +++++-------- arch/powerpc/mm/pgtable_64.c | 17 +++++++++-------- 4 files changed, 34 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h index 01567ea4ceaf..3cce499fbe27 100644 --- a/arch/powerpc/include/asm/io-workarounds.h +++ b/arch/powerpc/include/asm/io-workarounds.h @@ -8,6 +8,7 @@ #ifndef _IO_WORKAROUNDS_H #define _IO_WORKAROUNDS_H +#ifdef CONFIG_PPC_IO_WORKAROUNDS #include #include @@ -32,4 +33,23 @@ extern int spiderpci_iowa_init(struct iowa_bus *, void *); #define SPIDER_PCI_DUMMY_READ 0x0810 #define SPIDER_PCI_DUMMY_READ_BASE 0x0814 +#endif + +#if defined(CONFIG_PPC_IO_WORKAROUNDS) && defined(CONFIG_PPC_INDIRECT_MMIO) +extern bool io_workaround_inited; + +static inline bool iowa_is_active(void) +{ + return unlikely(io_workaround_inited); +} +#else +static inline bool iowa_is_active(void) +{ + return false; +} +#endif + +void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, + pgprot_t prot, void *caller); + #endif /* _IO_WORKAROUNDS_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 3370df4bdaa0..657ec893bdcb 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -31,8 +31,6 @@ struct pci_host_bridge; struct machdep_calls { char *name; #ifdef CONFIG_PPC64 - void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, - pgprot_t prot, void *caller); #ifdef CONFIG_PM void (*iommu_save)(void); void (*iommu_restore)(void); diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index fbd2d0007c52..0276bc8c8969 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = { }; #ifdef CONFIG_PPC_INDIRECT_MMIO -static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, - pgprot_t prot, void *caller) +void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, + pgprot_t prot, void *caller) { struct iowa_bus *bus; void __iomem *res = __ioremap_caller(addr, size, prot, caller); @@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, } return res; } -#else /* CONFIG_PPC_INDIRECT_MMIO */ -#define iowa_ioremap NULL #endif /* !CONFIG_PPC_INDIRECT_MMIO */ +bool io_workaround_inited; + /* Enable IO workaround */ static void io_workaround_init(void) { - static int io_workaround_inited; - if (io_workaround_inited) return; ppc_pci_io = iowa_pci_io; - ppc_md.ioremap = iowa_ioremap; - io_workaround_inited = 1; + io_workaround_inited = true; } /* Register new bus to support workaround */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 2882419737b9..0a147daeb0f2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -208,8 +209,8 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size) pgprot_t prot = pgprot_noncached(PAGE_KERNEL); void *caller = __builtin_return_address(0); - if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, prot, caller); + if (iowa_is_active()) + return iowa_ioremap(addr, size, prot, caller); return __ioremap_caller(addr, size, prot, caller); } @@ -218,8 +219,8 @@ void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL); void *caller = __builtin_return_address(0); - if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, prot, caller); + if (iowa_is_active()) + return iowa_ioremap(addr, size, prot, caller); return __ioremap_caller(addr, size, prot, caller); } @@ -228,8 +229,8 @@ void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) pgprot_t prot = pgprot_cached(PAGE_KERNEL); void *caller = __builtin_return_address(0); - if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, prot, caller); + if (iowa_is_active()) + return iowa_ioremap(addr, size, prot, caller); return __ioremap_caller(addr, size, prot, caller); } @@ -250,8 +251,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, */ pte = pte_mkprivileged(pte); - if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller); + if (iowa_is_active()) + return iowa_ioremap(addr, size, pte_pgprot(pte), caller); return __ioremap_caller(addr, size, pte_pgprot(pte), caller); } -- cgit v1.2.3 From 4634c375db7a082b2522621519a5fb6eba977584 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:14 +0000 Subject: powerpc/mm: move common 32/64 bits ioremap functions into ioremap.c ioremap(), ioremap_wc() and ioremap_coherent() are now identical on PPC32 and PPC64 as iowa_is_active() will always return false on PPC32. Move them into a new common location called ioremap.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6223803ce024d6ab4dfaa919f44098aed5b4bc33.1566309262.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/ioremap.c | 36 ++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/pgtable_32.c | 27 --------------------------- arch/powerpc/mm/pgtable_64.c | 33 --------------------------------- 4 files changed, 37 insertions(+), 61 deletions(-) create mode 100644 arch/powerpc/mm/ioremap.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 0f499db315d6..29c682fe9144 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -7,7 +7,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ - pgtable-frag.o \ + pgtable-frag.o ioremap.o \ init-common.o mmu_context.o drmem.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c new file mode 100644 index 000000000000..7f1d462e7745 --- /dev/null +++ b/arch/powerpc/mm/ioremap.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +void __iomem *ioremap(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_noncached(PAGE_KERNEL); + void *caller = __builtin_return_address(0); + + if (iowa_is_active()) + return iowa_ioremap(addr, size, prot, caller); + return __ioremap_caller(addr, size, prot, caller); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_wc(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL); + void *caller = __builtin_return_address(0); + + if (iowa_is_active()) + return iowa_ioremap(addr, size, prot, caller); + return __ioremap_caller(addr, size, prot, caller); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_cached(PAGE_KERNEL); + void *caller = __builtin_return_address(0); + + if (iowa_is_active()) + return iowa_ioremap(addr, size, prot, caller); + return __ioremap_caller(addr, size, prot, caller); +} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8cc5e9e83fc3..a363d87d0a9d 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -38,24 +38,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -void __iomem * -ioremap(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_noncached(PAGE_KERNEL); - - return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap); - -void __iomem * -ioremap_wc(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL); - - return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_wc); - void __iomem * ioremap_wt(phys_addr_t addr, unsigned long size) { @@ -65,15 +47,6 @@ ioremap_wt(phys_addr_t addr, unsigned long size) } EXPORT_SYMBOL(ioremap_wt); -void __iomem * -ioremap_coherent(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_cached(PAGE_KERNEL); - - return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_coherent); - void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 0a147daeb0f2..358233ea8d85 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -204,36 +203,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, return ret; } -void __iomem * ioremap(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_noncached(PAGE_KERNEL); - void *caller = __builtin_return_address(0); - - if (iowa_is_active()) - return iowa_ioremap(addr, size, prot, caller); - return __ioremap_caller(addr, size, prot, caller); -} - -void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL); - void *caller = __builtin_return_address(0); - - if (iowa_is_active()) - return iowa_ioremap(addr, size, prot, caller); - return __ioremap_caller(addr, size, prot, caller); -} - -void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_cached(PAGE_KERNEL); - void *caller = __builtin_return_address(0); - - if (iowa_is_active()) - return iowa_ioremap(addr, size, prot, caller); - return __ioremap_caller(addr, size, prot, caller); -} - void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { @@ -278,8 +247,6 @@ void iounmap(volatile void __iomem *token) vunmap(addr); } -EXPORT_SYMBOL(ioremap); -EXPORT_SYMBOL(ioremap_wc); EXPORT_SYMBOL(ioremap_prot); EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); -- cgit v1.2.3 From edfe1a5679263827ff94eb478c6ee0d65d467adf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:15 +0000 Subject: powerpc/mm: move ioremap_prot() into ioremap.c Both ioremap_prot() are idenfical, move them into ioremap.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0b3eb0e0f1490a99fd6c983e166fb8946233f151.1566309262.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/ioremap.c | 19 +++++++++++++++++++ arch/powerpc/mm/pgtable_32.c | 17 ----------------- arch/powerpc/mm/pgtable_64.c | 24 ------------------------ 3 files changed, 19 insertions(+), 41 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 7f1d462e7745..3d388aaa3ee6 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -34,3 +34,22 @@ void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) return iowa_ioremap(addr, size, prot, caller); return __ioremap_caller(addr, size, prot, caller); } + +void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) +{ + pte_t pte = __pte(flags); + void *caller = __builtin_return_address(0); + + /* writeable implies dirty for kernel addresses */ + if (pte_write(pte)) + pte = pte_mkdirty(pte); + + /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ + pte = pte_exprotect(pte); + pte = pte_mkprivileged(pte); + + if (iowa_is_active()) + return iowa_ioremap(addr, size, pte_pgprot(pte), caller); + return __ioremap_caller(addr, size, pte_pgprot(pte), caller); +} +EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a363d87d0a9d..ac5a267467fa 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -47,23 +47,6 @@ ioremap_wt(phys_addr_t addr, unsigned long size) } EXPORT_SYMBOL(ioremap_wt); -void __iomem * -ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) -{ - pte_t pte = __pte(flags); - - /* writeable implies dirty for kernel addresses */ - if (pte_write(pte)) - pte = pte_mkdirty(pte); - - /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ - pte = pte_exprotect(pte); - pte = pte_mkprivileged(pte); - - return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_prot); - void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 358233ea8d85..2b9078e1bc43 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -203,29 +203,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, return ret; } -void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, - unsigned long flags) -{ - pte_t pte = __pte(flags); - void *caller = __builtin_return_address(0); - - /* writeable implies dirty for kernel addresses */ - if (pte_write(pte)) - pte = pte_mkdirty(pte); - - /* we don't want to let _PAGE_EXEC leak out */ - pte = pte_exprotect(pte); - /* - * Force kernel mapping. - */ - pte = pte_mkprivileged(pte); - - if (iowa_is_active()) - return iowa_ioremap(addr, size, pte_pgprot(pte), caller); - return __ioremap_caller(addr, size, pte_pgprot(pte), caller); -} - - /* * Unmap an IO region and remove it from imalloc'd list. * Access to IO memory should be serialized by driver. @@ -247,7 +224,6 @@ void iounmap(volatile void __iomem *token) vunmap(addr); } -EXPORT_SYMBOL(ioremap_prot); EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap_at); -- cgit v1.2.3 From 7cd9b317b630683b0c8eb2dfcfb046003ad6b97b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:16 +0000 Subject: powerpc/mm: make ioremap_bot common to all Drop multiple definitions of ioremap_bot and make one common to all subarches. Only CONFIG_PPC_BOOK3E_64 had a global static init value for ioremap_bot. Now ioremap_bot is set in early_init_mmu_global(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/920eebfd9f36f14c79d1755847f5bf7c83703bdd.1566309262.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 -- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 - arch/powerpc/include/asm/nohash/32/pgtable.h | 2 -- arch/powerpc/include/asm/pgtable.h | 2 ++ arch/powerpc/mm/ioremap.c | 3 +++ arch/powerpc/mm/mmu_decl.h | 1 - arch/powerpc/mm/nohash/tlb.c | 2 ++ arch/powerpc/mm/pgtable_32.c | 3 --- arch/powerpc/mm/pgtable_64.c | 3 --- 9 files changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 838de59f6754..aa1bc5f8da90 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -201,8 +201,6 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #include #include -extern unsigned long ioremap_bot; - /* Bits to mask out from a PGD to get to the PUD page */ #define PGD_MASKED_BITS 0 diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 8e47fb85dfa6..03c9a14dd902 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -289,7 +289,6 @@ extern unsigned long __kernel_io_end; #define KERN_IO_END __kernel_io_end extern struct page *vmemmap; -extern unsigned long ioremap_bot; extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 0284f8f5305f..7ce2a7c9fade 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -11,8 +11,6 @@ #include /* For sub-arch specific PPC_PIN_SIZE */ #include -extern unsigned long ioremap_bot; - #ifdef CONFIG_44x extern int icache_44x_need_flush; #endif diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index c70916a7865a..8b7865a2d576 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -68,6 +68,8 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +extern unsigned long ioremap_bot; + /* * kern_addr_valid is intended to indicate whether an address is a valid * kernel address. Most 32-bit archs define it as always true (like this) diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 3d388aaa3ee6..eaf5f8a4a63f 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -3,6 +3,9 @@ #include #include +unsigned long ioremap_bot; +EXPORT_SYMBOL(ioremap_bot); + void __iomem *ioremap(phys_addr_t addr, unsigned long size) { pgprot_t prot = pgprot_noncached(PAGE_KERNEL); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index adbaf2167214..c750ac9ec713 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -106,7 +106,6 @@ extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ -extern unsigned long ioremap_bot; extern unsigned long __max_low_memory; extern phys_addr_t __initial_memory_limit_addr; extern phys_addr_t total_memory; diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index bf60983a58c7..696f568253a0 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -703,6 +703,8 @@ static void __init early_init_mmu_global(void) * for use by the TLB miss code */ linear_map_top = memblock_end_of_DRAM(); + + ioremap_bot = IOREMAP_BASE; } static void __init early_mmu_set_memory_limit(void) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index ac5a267467fa..102901a19f3c 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -33,9 +33,6 @@ #include -unsigned long ioremap_bot; -EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ - extern char etext[], _stext[], _sinittext[], _einittext[]; void __iomem * diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 2b9078e1bc43..d865e053052d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -98,9 +98,6 @@ unsigned long __pte_frag_nr; EXPORT_SYMBOL(__pte_frag_nr); unsigned long __pte_frag_size_shift; EXPORT_SYMBOL(__pte_frag_size_shift); -unsigned long ioremap_bot; -#else /* !CONFIG_PPC_BOOK3S_64 */ -unsigned long ioremap_bot = IOREMAP_BASE; #endif int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) -- cgit v1.2.3 From f381d5711f091facd8847a54a2377cc0d1519df2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:17 +0000 Subject: powerpc/mm: Move ioremap functions out of pgtable_32/64.c Create ioremap_32.c and ioremap_64.c and move respective ioremap functions out of pgtable_32.c and pgtable_64.c In the meantime, fix a few comments and changes a printk() to pr_warn(). Also fix a few oversplitted lines. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b5c8b02ccefd4ede64c61b53cf64fb5dacb35740.1566309263.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/ioremap_32.c | 104 +++++++++++++++++++++++++++++++++++ arch/powerpc/mm/ioremap_64.c | 123 +++++++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/pgtable_32.c | 99 --------------------------------- arch/powerpc/mm/pgtable_64.c | 128 +------------------------------------------ 5 files changed, 229 insertions(+), 227 deletions(-) create mode 100644 arch/powerpc/mm/ioremap_32.c create mode 100644 arch/powerpc/mm/ioremap_64.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 29c682fe9144..5e147986400d 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -7,7 +7,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ - pgtable-frag.o ioremap.o \ + pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c new file mode 100644 index 000000000000..fb43ba71aa54 --- /dev/null +++ b/arch/powerpc/mm/ioremap_32.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include + +void __iomem *ioremap_wt(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL); + + return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wt); + +void __iomem * +__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) +{ + unsigned long v, i; + phys_addr_t p; + int err; + + /* + * Choose an address to map it to. + * Once the vmalloc system is running, we use it. + * Before then, we use space going down from IOREMAP_TOP + * (ioremap_bot records where we're up to). + */ + p = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - p; + + /* + * If the address lies within the first 16 MB, assume it's in ISA + * memory space + */ + if (p < 16 * 1024 * 1024) + p += _ISA_MEM_BASE; + +#ifndef CONFIG_CRASH_DUMP + /* + * Don't allow anybody to remap normal RAM that we're using. + * mem_init() sets high_memory so only do the check after that. + */ + if (slab_is_available() && p <= virt_to_phys(high_memory - 1) && + page_is_ram(__phys_to_pfn(p))) { + pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__, + (unsigned long long)p, __builtin_return_address(0)); + return NULL; + } +#endif + + if (size == 0) + return NULL; + + /* + * Is it already mapped? Perhaps overlapped by a previous + * mapping. + */ + v = p_block_mapped(p); + if (v) + goto out; + + if (slab_is_available()) { + struct vm_struct *area; + area = get_vm_area_caller(size, VM_IOREMAP, caller); + if (area == 0) + return NULL; + area->phys_addr = p; + v = (unsigned long)area->addr; + } else { + v = (ioremap_bot -= size); + } + + /* + * Should check if it is a candidate for a BAT mapping + */ + + err = 0; + for (i = 0; i < size && err == 0; i += PAGE_SIZE) + err = map_kernel_page(v + i, p + i, prot); + if (err) { + if (slab_is_available()) + vunmap((void *)v); + return NULL; + } + +out: + return (void __iomem *)(v + ((unsigned long)addr & ~PAGE_MASK)); +} + +void iounmap(volatile void __iomem *addr) +{ + /* + * If mapped by BATs then there is nothing to do. + * Calling vfree() generates a benign warning. + */ + if (v_block_mapped((unsigned long)addr)) + return; + + if (addr > high_memory && (unsigned long)addr < ioremap_bot) + vunmap((void *)(PAGE_MASK & (unsigned long)addr)); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c new file mode 100644 index 000000000000..57f3b096143c --- /dev/null +++ b/arch/powerpc/mm/ioremap_64.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, + pgprot_t prot, int nid) +{ + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (err) { + if (slab_is_available()) + unmap_kernel_range(ea, size); + else + WARN_ON_ONCE(1); /* Should clean up */ + return err; + } + } + + return 0; +} + +/** + * Low level function to establish the page tables for an IO mapping + */ +void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) +{ + /* We don't support the 4K PFN hack with ioremap */ + if (pgprot_val(prot) & H_PAGE_4K_PFN) + return NULL; + + if ((ea + size) >= (void *)IOREMAP_END) { + pr_warn("Outside the supported range\n"); + return NULL; + } + + WARN_ON(pa & ~PAGE_MASK); + WARN_ON(((unsigned long)ea) & ~PAGE_MASK); + WARN_ON(size & ~PAGE_MASK); + + if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE)) + return NULL; + + return (void __iomem *)ea; +} +EXPORT_SYMBOL(__ioremap_at); + +/** + * Low level function to tear down the page tables for an IO mapping. This is + * used for mappings that are manipulated manually, like partial unmapping of + * PCI IOs or ISA space. + */ +void __iounmap_at(void *ea, unsigned long size) +{ + WARN_ON(((unsigned long)ea) & ~PAGE_MASK); + WARN_ON(size & ~PAGE_MASK); + + unmap_kernel_range((unsigned long)ea, size); +} +EXPORT_SYMBOL(__iounmap_at); + +void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, + pgprot_t prot, void *caller) +{ + phys_addr_t paligned; + void __iomem *ret; + + /* + * Choose an address to map it to. Once the vmalloc system is running, + * we use it. Before that, we map using addresses going up from + * ioremap_bot. vmalloc will use the addresses from IOREMAP_BASE + * through ioremap_bot. + */ + paligned = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - paligned; + + if (size == 0 || paligned == 0) + return NULL; + + if (slab_is_available()) { + struct vm_struct *area; + + area = __get_vm_area_caller(size, VM_IOREMAP, ioremap_bot, + IOREMAP_END, caller); + if (area == NULL) + return NULL; + + area->phys_addr = paligned; + ret = __ioremap_at(paligned, area->addr, size, prot); + } else { + ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot); + if (ret) + ioremap_bot += size; + } + + if (ret) + ret += addr & ~PAGE_MASK; + return ret; +} + +/* + * Unmap an IO region and remove it from vmalloc'd list. + * Access to IO memory should be serialized by driver. + */ +void iounmap(volatile void __iomem *token) +{ + void *addr; + + if (!slab_is_available()) + return; + + addr = (void *)((unsigned long __force)PCI_FIX_ADDR(token) & PAGE_MASK); + + if ((unsigned long)addr < ioremap_bot) { + pr_warn("Attempt to iounmap early bolted mapping at 0x%p\n", addr); + return; + } + vunmap(addr); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 102901a19f3c..8ec5dfb65b2e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -35,104 +34,6 @@ extern char etext[], _stext[], _sinittext[], _einittext[]; -void __iomem * -ioremap_wt(phys_addr_t addr, unsigned long size) -{ - pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL); - - return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); -} -EXPORT_SYMBOL(ioremap_wt); - -void __iomem * -__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) -{ - unsigned long v, i; - phys_addr_t p; - int err; - - /* - * Choose an address to map it to. - * Once the vmalloc system is running, we use it. - * Before then, we use space going down from IOREMAP_TOP - * (ioremap_bot records where we're up to). - */ - p = addr & PAGE_MASK; - size = PAGE_ALIGN(addr + size) - p; - - /* - * If the address lies within the first 16 MB, assume it's in ISA - * memory space - */ - if (p < 16*1024*1024) - p += _ISA_MEM_BASE; - -#ifndef CONFIG_CRASH_DUMP - /* - * Don't allow anybody to remap normal RAM that we're using. - * mem_init() sets high_memory so only do the check after that. - */ - if (slab_is_available() && p <= virt_to_phys(high_memory - 1) && - page_is_ram(__phys_to_pfn(p))) { - pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__, - (unsigned long long)p, __builtin_return_address(0)); - return NULL; - } -#endif - - if (size == 0) - return NULL; - - /* - * Is it already mapped? Perhaps overlapped by a previous - * mapping. - */ - v = p_block_mapped(p); - if (v) - goto out; - - if (slab_is_available()) { - struct vm_struct *area; - area = get_vm_area_caller(size, VM_IOREMAP, caller); - if (area == 0) - return NULL; - area->phys_addr = p; - v = (unsigned long) area->addr; - } else { - v = (ioremap_bot -= size); - } - - /* - * Should check if it is a candidate for a BAT mapping - */ - - err = 0; - for (i = 0; i < size && err == 0; i += PAGE_SIZE) - err = map_kernel_page(v + i, p + i, prot); - if (err) { - if (slab_is_available()) - vunmap((void *)v); - return NULL; - } - -out: - return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); -} - -void iounmap(volatile void __iomem *addr) -{ - /* - * If mapped by BATs then there is nothing to do. - * Calling vfree() generates a benign warning. - */ - if (v_block_mapped((unsigned long)addr)) - return; - - if (addr > high_memory && (unsigned long) addr < ioremap_bot) - vunmap((void *) (PAGE_MASK & (unsigned long)addr)); -} -EXPORT_SYMBOL(iounmap); - static void __init *early_alloc_pgtable(unsigned long size) { void *ptr = memblock_alloc(size, size); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index d865e053052d..e78832dce7bb 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * This file contains ioremap and related functions for 64-bit machines. + * This file contains pgtable related functions for 64-bit machines. * * Derived from arch/ppc64/mm/init.c * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -100,131 +99,6 @@ unsigned long __pte_frag_size_shift; EXPORT_SYMBOL(__pte_frag_size_shift); #endif -int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) -{ - unsigned long i; - - for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); - if (err) { - if (slab_is_available()) - unmap_kernel_range(ea, size); - else - WARN_ON_ONCE(1); /* Should clean up */ - return err; - } - } - - return 0; -} - -/** - * __ioremap_at - Low level function to establish the page tables - * for an IO mapping - */ -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) -{ - /* We don't support the 4K PFN hack with ioremap */ - if (pgprot_val(prot) & H_PAGE_4K_PFN) - return NULL; - - if ((ea + size) >= (void *)IOREMAP_END) { - pr_warn("Outside the supported range\n"); - return NULL; - } - - WARN_ON(pa & ~PAGE_MASK); - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE)) - return NULL; - - return (void __iomem *)ea; -} - -/** - * __iounmap_from - Low level function to tear down the page tables - * for an IO mapping. This is used for mappings that - * are manipulated manually, like partial unmapping of - * PCI IOs or ISA space. - */ -void __iounmap_at(void *ea, unsigned long size) -{ - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - unmap_kernel_range((unsigned long)ea, size); -} - -void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, - pgprot_t prot, void *caller) -{ - phys_addr_t paligned; - void __iomem *ret; - - /* - * Choose an address to map it to. - * Once the imalloc system is running, we use it. - * Before that, we map using addresses going - * up from ioremap_bot. imalloc will use - * the addresses from ioremap_bot through - * IMALLOC_END - * - */ - paligned = addr & PAGE_MASK; - size = PAGE_ALIGN(addr + size) - paligned; - - if ((size == 0) || (paligned == 0)) - return NULL; - - if (slab_is_available()) { - struct vm_struct *area; - - area = __get_vm_area_caller(size, VM_IOREMAP, - ioremap_bot, IOREMAP_END, - caller); - if (area == NULL) - return NULL; - - area->phys_addr = paligned; - ret = __ioremap_at(paligned, area->addr, size, prot); - } else { - ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot); - if (ret) - ioremap_bot += size; - } - - if (ret) - ret += addr & ~PAGE_MASK; - return ret; -} - -/* - * Unmap an IO region and remove it from imalloc'd list. - * Access to IO memory should be serialized by driver. - */ -void iounmap(volatile void __iomem *token) -{ - void *addr; - - if (!slab_is_available()) - return; - - addr = (void *) ((unsigned long __force) - PCI_FIX_ADDR(token) & PAGE_MASK); - if ((unsigned long)addr < ioremap_bot) { - printk(KERN_WARNING "Attempt to iounmap early bolted mapping" - " at 0x%p\n", addr); - return; - } - vunmap(addr); -} - -EXPORT_SYMBOL(__ioremap_at); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(__iounmap_at); - #ifndef __PAGETABLE_PUD_FOLDED /* 4 level page table */ struct page *pgd_page(pgd_t pgd) -- cgit v1.2.3 From 191e42063a7241e5c3a1d1f36896a20b147517e9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:18 +0000 Subject: powerpc/mm: refactor ioremap_range() and use ioremap_page_range() book3s64's ioremap_range() is almost same as fallback ioremap_range(), except that it calls radix__ioremap_range() when radix is enabled. radix__ioremap_range() is also very similar to the other ones, expect that it calls ioremap_page_range when slab is available. PPC32 __ioremap_caller() have a loop doing the same thing as ioremap_range() so use it on PPC32 as well. Lets keep only one version of ioremap_range() which calls ioremap_page_range() on all platforms when slab is available. At the same time, drop the nid parameter which is not used. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4b1dca7096b01823b101be7338983578641547f1.1566309263.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/64/radix.h | 3 --- arch/powerpc/include/asm/io.h | 2 ++ arch/powerpc/mm/book3s64/pgtable.c | 21 --------------------- arch/powerpc/mm/book3s64/radix_pgtable.c | 20 -------------------- arch/powerpc/mm/ioremap.c | 24 ++++++++++++++++++++++++ arch/powerpc/mm/ioremap_32.c | 6 ++---- arch/powerpc/mm/ioremap_64.c | 21 +-------------------- 7 files changed, 29 insertions(+), 68 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index e04a839cb5b9..574eca33f893 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -266,9 +266,6 @@ extern void radix__vmemmap_remove_mapping(unsigned long start, extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int psz); -extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa, - unsigned long size, pgprot_t prot, int nid); - static inline unsigned long radix__get_tree_size(void) { unsigned long rts_field; diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 8e65ba59f06a..8e00d95f9600 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -722,6 +722,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); extern void iounmap(volatile void __iomem *addr); +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot); + extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 7d0e0d0d22c4..4c8bed856533 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -446,24 +446,3 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } - -int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) -{ - unsigned long i; - - if (radix_enabled()) - return radix__ioremap_range(ea, pa, size, prot, nid); - - for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); - if (err) { - if (slab_is_available()) - unmap_kernel_range(ea, size); - else - WARN_ON_ONCE(1); /* Should clean up */ - return err; - } - } - - return 0; -} diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 2204d8eeb784..5bab67fe96e0 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1218,26 +1218,6 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) return 1; } -int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, - pgprot_t prot, int nid) -{ - if (likely(slab_is_available())) { - int err = ioremap_page_range(ea, ea + size, pa, prot); - if (err) - unmap_kernel_range(ea, size); - return err; - } else { - unsigned long i; - - for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); - if (WARN_ON_ONCE(err)) /* Should clean up */ - return err; - } - return 0; - } -} - int __init arch_ioremap_p4d_supported(void) { return 0; diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index eaf5f8a4a63f..50ee6544d0b7 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include #include unsigned long ioremap_bot; @@ -56,3 +58,25 @@ void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long f return __ioremap_caller(addr, size, pte_pgprot(pte), caller); } EXPORT_SYMBOL(ioremap_prot); + +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot) +{ + unsigned long i; + + if (slab_is_available()) { + int err = ioremap_page_range(ea, ea + size, pa, prot); + + if (err) + unmap_kernel_range(ea, size); + return err; + } + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + + if (WARN_ON_ONCE(err)) /* Should clean up */ + return err; + } + + return 0; +} diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index fb43ba71aa54..85b90a62e084 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -17,7 +17,7 @@ EXPORT_SYMBOL(ioremap_wt); void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { - unsigned long v, i; + unsigned long v; phys_addr_t p; int err; @@ -76,9 +76,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call * Should check if it is a candidate for a BAT mapping */ - err = 0; - for (i = 0; i < size && err == 0; i += PAGE_SIZE) - err = map_kernel_page(v + i, p + i, prot); + err = ioremap_range((unsigned long)v, p, size, prot); if (err) { if (slab_is_available()) vunmap((void *)v); diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index 57f3b096143c..d132ce1e538d 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -4,25 +4,6 @@ #include #include -int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, - pgprot_t prot, int nid) -{ - unsigned long i; - - for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); - if (err) { - if (slab_is_available()) - unmap_kernel_range(ea, size); - else - WARN_ON_ONCE(1); /* Should clean up */ - return err; - } - } - - return 0; -} - /** * Low level function to establish the page tables for an IO mapping */ @@ -41,7 +22,7 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_ WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); - if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE)) + if (ioremap_range((unsigned long)ea, pa, size, prot)) return NULL; return (void __iomem *)ea; -- cgit v1.2.3 From 4a45b7460cf458012a6930f675e141256b81dcf4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:19 +0000 Subject: powerpc/mm: refactor ioremap vm area setup. PPC32 and PPC64 are doing the same once SLAB is available. Create a do_ioremap() function that calls get_vm_area and do the mapping. For PPC64, we add the 4K PFN hack sanity check to __ioremap_caller() in order to avoid using __ioremap_at(). Other checks in __ioremap_at() are irrelevant for __ioremap_caller(). On PPC64, VM area is allocated in the range [ioremap_bot ; IOREMAP_END] On PPC32, VM area is allocated in the range [VMALLOC_START ; VMALLOC_END] Lets define IOREMAP_START is ioremap_bot for PPC64, and alias IOREMAP_START/END to VMALLOC_START/END on PPC32 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/42e7e36ad32e0fdf76692426cc642799c9f689b8.1566309263.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/32/pgtable.h | 4 ++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/include/asm/io.h | 2 ++ arch/powerpc/include/asm/nohash/32/pgtable.h | 4 ++++ arch/powerpc/include/asm/nohash/64/pgtable.h | 1 + arch/powerpc/mm/ioremap.c | 20 ++++++++++++++++++++ arch/powerpc/mm/ioremap_32.c | 15 ++++----------- arch/powerpc/mm/ioremap_64.c | 17 +++++++---------- 8 files changed, 43 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index aa1bc5f8da90..331a29a501a1 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -165,6 +165,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define IOREMAP_TOP KVIRT_TOP #endif +/* PPC32 shares vmalloc area with ioremap */ +#define IOREMAP_START VMALLOC_START +#define IOREMAP_END VMALLOC_END + /* * Just any arbitrary offset to the start of the vmalloc VM area: the * current 16MB value just means that there will be a 64MB "hole" after the diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 03c9a14dd902..b01624e5c467 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -316,6 +316,7 @@ extern unsigned long pci_io_base; #define PHB_IO_BASE (ISA_IO_END) #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) +#define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_IO_END) /* Advertise special mapping type for AGP */ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 8e00d95f9600..dc529ea0fffa 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -723,6 +723,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); extern void iounmap(volatile void __iomem *addr); int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot); +void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, + pgprot_t prot, void *caller); extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 7ce2a7c9fade..3e1a4c1e40f0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -93,6 +93,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define IOREMAP_TOP KVIRT_TOP #endif +/* PPC32 shares vmalloc area with ioremap */ +#define IOREMAP_START VMALLOC_START +#define IOREMAP_END VMALLOC_END + /* * Just any arbitrary offset to the start of the vmalloc VM area: the * current 16MB value just means that there will be a 64MB "hole" after the diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index b9f66cf15c31..9a33b8bd842d 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -53,6 +53,7 @@ #define PHB_IO_BASE (ISA_IO_END) #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) +#define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 50ee6544d0b7..57630325846c 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -80,3 +80,23 @@ int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t return 0; } + +void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, + pgprot_t prot, void *caller) +{ + struct vm_struct *area; + int ret; + + area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller); + if (area == NULL) + return NULL; + + area->phys_addr = pa; + ret = ioremap_range((unsigned long)area->addr, pa, size, prot); + if (!ret) + return (void __iomem *)area->addr + offset; + + free_vm_area(area); + + return NULL; +} diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index 85b90a62e084..fcf343dbf2bf 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -18,7 +18,7 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { unsigned long v; - phys_addr_t p; + phys_addr_t p, offset; int err; /* @@ -28,6 +28,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call * (ioremap_bot records where we're up to). */ p = addr & PAGE_MASK; + offset = addr & ~PAGE_MASK; size = PAGE_ALIGN(addr + size) - p; /* @@ -62,12 +63,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call goto out; if (slab_is_available()) { - struct vm_struct *area; - area = get_vm_area_caller(size, VM_IOREMAP, caller); - if (area == 0) - return NULL; - area->phys_addr = p; - v = (unsigned long)area->addr; + return do_ioremap(p, offset, size, prot, caller); } else { v = (ioremap_bot -= size); } @@ -77,11 +73,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call */ err = ioremap_range((unsigned long)v, p, size, prot); - if (err) { - if (slab_is_available()) - vunmap((void *)v); + if (err) return NULL; - } out: return (void __iomem *)(v + ((unsigned long)addr & ~PAGE_MASK)); diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index d132ce1e538d..e37b68b7f0e8 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -46,9 +46,13 @@ EXPORT_SYMBOL(__iounmap_at); void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { - phys_addr_t paligned; + phys_addr_t paligned, offset; void __iomem *ret; + /* We don't support the 4K PFN hack with ioremap */ + if (pgprot_val(prot) & H_PAGE_4K_PFN) + return NULL; + /* * Choose an address to map it to. Once the vmalloc system is running, * we use it. Before that, we map using addresses going up from @@ -56,21 +60,14 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, * through ioremap_bot. */ paligned = addr & PAGE_MASK; + offset = addr & ~PAGE_MASK; size = PAGE_ALIGN(addr + size) - paligned; if (size == 0 || paligned == 0) return NULL; if (slab_is_available()) { - struct vm_struct *area; - - area = __get_vm_area_caller(size, VM_IOREMAP, ioremap_bot, - IOREMAP_END, caller); - if (area == NULL) - return NULL; - - area->phys_addr = paligned; - ret = __ioremap_at(paligned, area->addr, size, prot); + return do_ioremap(paligned, offset, size, prot, caller); } else { ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot); if (ret) -- cgit v1.2.3 From 163918fc5741d755cf9d477ebfcb761f09b82982 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2019 14:07:20 +0000 Subject: powerpc/mm: split out early ioremap path. ioremap does things differently depending on whether SLAB is available or not at different levels. Try to separate the early path from the beginning. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3acd2dbe04b04f111475e7a59f2b6f2ab9b95ab6.1566309263.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/io.h | 3 ++- arch/powerpc/mm/ioremap.c | 17 +++++++---------- arch/powerpc/mm/ioremap_32.c | 13 +++++-------- arch/powerpc/mm/ioremap_64.c | 30 +++++++++++++++++++++--------- 4 files changed, 35 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index dc529ea0fffa..a63ec938636d 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -722,7 +722,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); extern void iounmap(volatile void __iomem *addr); -int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot); +int early_ioremap_range(unsigned long ea, phys_addr_t pa, + unsigned long size, pgprot_t prot); void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, pgprot_t prot, void *caller); diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 57630325846c..fc669643ce6a 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -59,18 +59,11 @@ void __iomem *ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long f } EXPORT_SYMBOL(ioremap_prot); -int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot) +int early_ioremap_range(unsigned long ea, phys_addr_t pa, + unsigned long size, pgprot_t prot) { unsigned long i; - if (slab_is_available()) { - int err = ioremap_page_range(ea, ea + size, pa, prot); - - if (err) - unmap_kernel_range(ea, size); - return err; - } - for (i = 0; i < size; i += PAGE_SIZE) { int err = map_kernel_page(ea + i, pa + i, prot); @@ -86,16 +79,20 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, { struct vm_struct *area; int ret; + unsigned long va; area = __get_vm_area_caller(size, VM_IOREMAP, IOREMAP_START, IOREMAP_END, caller); if (area == NULL) return NULL; area->phys_addr = pa; - ret = ioremap_range((unsigned long)area->addr, pa, size, prot); + va = (unsigned long)area->addr; + + ret = ioremap_page_range(va, va + size, pa, prot); if (!ret) return (void __iomem *)area->addr + offset; + unmap_kernel_range(va, size); free_vm_area(area); return NULL; diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index fcf343dbf2bf..f36121f25243 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -60,24 +60,21 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call */ v = p_block_mapped(p); if (v) - goto out; + return (void __iomem *)v + offset; - if (slab_is_available()) { + if (slab_is_available()) return do_ioremap(p, offset, size, prot, caller); - } else { - v = (ioremap_bot -= size); - } /* * Should check if it is a candidate for a BAT mapping */ - err = ioremap_range((unsigned long)v, p, size, prot); + err = early_ioremap_range(ioremap_bot - size, p, size, prot); if (err) return NULL; + ioremap_bot -= size; -out: - return (void __iomem *)(v + ((unsigned long)addr & ~PAGE_MASK)); + return (void __iomem *)ioremap_bot + offset; } void iounmap(volatile void __iomem *addr) diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index e37b68b7f0e8..fd29e51700cd 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -9,6 +9,9 @@ */ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) { + int ret; + unsigned long va = (unsigned long)ea; + /* We don't support the 4K PFN hack with ioremap */ if (pgprot_val(prot) & H_PAGE_4K_PFN) return NULL; @@ -22,7 +25,15 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_ WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); - if (ioremap_range((unsigned long)ea, pa, size, prot)) + if (slab_is_available()) { + ret = ioremap_page_range(va, va + size, pa, prot); + if (ret) + unmap_kernel_range(va, size); + } else { + ret = early_ioremap_range(va, pa, size, prot); + } + + if (ret) return NULL; return (void __iomem *)ea; @@ -48,6 +59,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, { phys_addr_t paligned, offset; void __iomem *ret; + int err; /* We don't support the 4K PFN hack with ioremap */ if (pgprot_val(prot) & H_PAGE_4K_PFN) @@ -66,16 +78,16 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, if (size == 0 || paligned == 0) return NULL; - if (slab_is_available()) { + if (slab_is_available()) return do_ioremap(paligned, offset, size, prot, caller); - } else { - ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot); - if (ret) - ioremap_bot += size; - } - if (ret) - ret += addr & ~PAGE_MASK; + err = early_ioremap_range(ioremap_bot, paligned, size, prot); + if (err) + return NULL; + + ret = (void __iomem *)ioremap_bot + offset; + ioremap_bot += size; + return ret; } -- cgit v1.2.3 From 12c3f1fd87bf4e55f06d079a45d6f15e2f6f9750 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 26 Aug 2019 15:52:14 +0000 Subject: powerpc/32s: get rid of CPU_FTR_601 feature Now that 601 is exclusive from other 6xx, CPU_FTR_601 and associated fixups are useless. Drop this feature and use #ifdefs instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ecdb7194a17dbfa01865df6a82979533adc2c70b.1566834712.git.christophe.leroy@c-s.fr --- arch/powerpc/configs/pmac32_defconfig | 1 - arch/powerpc/include/asm/cputable.h | 14 ++++++++------ arch/powerpc/include/asm/ppc_asm.h | 28 +++++++--------------------- arch/powerpc/include/asm/ptrace.h | 6 +++++- arch/powerpc/include/asm/timex.h | 34 +++------------------------------- arch/powerpc/kernel/cputable.c | 6 ++++-- arch/powerpc/kernel/entry_32.S | 22 ++++++++++++++++------ arch/powerpc/mm/book3s32/mmu.c | 15 ++++++--------- arch/powerpc/mm/ptdump/bats.c | 2 +- arch/powerpc/platforms/Kconfig | 3 ++- 10 files changed, 52 insertions(+), 79 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 7e6654848531..4e6e95f92646 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -20,7 +20,6 @@ CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_PMAC=y -CONFIG_PPC601_SYNC_FIX=y CONFIG_GEN_RTC=y CONFIG_HIGHMEM=y CONFIG_BINFMT_MISC=m diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index d05f0c28e515..0aad095896d6 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -145,7 +145,6 @@ static inline void cpu_feature_keys_init(void) { } /* Definitions for features that only exist on 32-bit chips */ #ifdef CONFIG_PPC32 -#define CPU_FTR_601 ASM_CONST(0x00001000) #define CPU_FTR_L2CR ASM_CONST(0x00002000) #define CPU_FTR_SPEC7450 ASM_CONST(0x00004000) #define CPU_FTR_TAU ASM_CONST(0x00008000) @@ -167,7 +166,6 @@ static inline void cpu_feature_keys_init(void) { } #else /* CONFIG_PPC32 */ /* Define these to 0 for the sake of tests in common code */ -#define CPU_FTR_601 (0) #define CPU_FTR_PPC_LE (0) #endif @@ -294,7 +292,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_MAYBE_CAN_NAP 0 #endif -#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \ +#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC) #define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE) @@ -498,7 +496,9 @@ static inline void cpu_feature_keys_init(void) { } #else enum { CPU_FTRS_POSSIBLE = -#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_601 + CPU_FTRS_PPC601 | +#elif defined(CONFIG_PPC_BOOK3S_32) CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | @@ -574,8 +574,10 @@ enum { #else enum { CPU_FTRS_ALWAYS = -#ifdef CONFIG_PPC_BOOK3S_32 - CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & +#ifdef CONFIG_PPC_BOOK3S_601 + CPU_FTRS_PPC601 & +#elif defined(CONFIG_PPC_BOOK3S_32) + CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 & diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index dd3b191bdcea..6b03dff61a05 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -383,19 +383,9 @@ n: /* various errata or part fixups */ #ifdef CONFIG_PPC601_SYNC_FIX -#define SYNC \ -BEGIN_FTR_SECTION \ - sync; \ - isync; \ -END_FTR_SECTION_IFSET(CPU_FTR_601) -#define SYNC_601 \ -BEGIN_FTR_SECTION \ - sync; \ -END_FTR_SECTION_IFSET(CPU_FTR_601) -#define ISYNC_601 \ -BEGIN_FTR_SECTION \ - isync; \ -END_FTR_SECTION_IFSET(CPU_FTR_601) +#define SYNC sync; isync +#define SYNC_601 sync +#define ISYNC_601 isync #else #define SYNC #define SYNC_601 @@ -421,15 +411,11 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define MFTBU(dest) mfspr dest, SPRN_TBRU #endif -#ifndef CONFIG_SMP -#define TLBSYNC -#else /* CONFIG_SMP */ /* tlbsync is not implemented on 601 */ -#define TLBSYNC \ -BEGIN_FTR_SECTION \ - tlbsync; \ - sync; \ -END_FTR_SECTION_IFCLR(CPU_FTR_601) +#if !defined(CONFIG_SMP) || defined(CONFIG_PPC_BOOK3S_601) +#define TLBSYNC +#else +#define TLBSYNC tlbsync; sync #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index feee1b21bbd5..ee3ada66deb5 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -203,7 +203,11 @@ do { \ #endif /* __powerpc64__ */ #define arch_has_single_step() (1) -#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601)) +#ifndef CONFIG_BOOK3S_601 +#define arch_has_block_step() (true) +#else +#define arch_has_block_step() (false) +#endif #define ARCH_HAS_USER_SINGLE_STEP_REPORT /* diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 926b9f91a3ef..d2d2c4bd8435 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -17,38 +17,10 @@ typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) { -#ifdef __powerpc64__ + if (IS_ENABLED(CONFIG_BOOK3S_601)) + return 0; + return mftb(); -#else - cycles_t ret; - - /* - * For the "cycle" counter we use the timebase lower half. - * Currently only used on SMP. - */ - - ret = 0; - - __asm__ __volatile__( -#ifdef CONFIG_PPC_8xx - "97: mftb %0\n" -#else - "97: mfspr %0, %2\n" -#endif - "99:\n" - ".section __ftr_fixup,\"a\"\n" - ".align 2\n" - "98:\n" - " .long %1\n" - " .long 0\n" - " .long 97b-98b\n" - " .long 99b-98b\n" - " .long 0\n" - " .long 0\n" - ".previous" - : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL)); - return ret; -#endif } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index bfe5f4a2886b..e745abc5457a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_601 { /* 601 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00010000, @@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc601", }, +#endif /* CONFIG_PPC_BOOK3S_601 */ +#ifdef CONFIG_PPC_BOOK3S_6xx { /* 603 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00030000, @@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CONFIG_PPC_BOOK3S_32 */ +#endif /* CONFIG_PPC_BOOK3S_6xx */ #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 972b05504a0a..d60908ea37fb 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -777,11 +777,19 @@ fast_exception_return: 1: lis r3,exc_exit_restart_end@ha addi r3,r3,exc_exit_restart_end@l cmplw r12,r3 +#if CONFIG_PPC_BOOK3S_601 + bge 2b +#else bge 3f +#endif lis r4,exc_exit_restart@ha addi r4,r4,exc_exit_restart@l cmplw r12,r4 +#if CONFIG_PPC_BOOK3S_601 + blt 2b +#else blt 3f +#endif lis r3,fee_restarts@ha tophys(r3,r3) lwz r5,fee_restarts@l(r3) @@ -800,9 +808,6 @@ fee_restarts: /* aargh, we don't know which trap this is */ /* but the 601 doesn't implement the RI bit, so assume it's OK */ 3: -BEGIN_FTR_SECTION - b 2b -END_FTR_SECTION_IFSET(CPU_FTR_601) li r10,-1 stw r10,_TRAP(r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -1270,11 +1275,19 @@ nonrecoverable: lis r10,exc_exit_restart_end@ha addi r10,r10,exc_exit_restart_end@l cmplw r12,r10 +#ifdef CONFIG_PPC_BOOK3S_601 + bgelr +#else bge 3f +#endif lis r11,exc_exit_restart@ha addi r11,r11,exc_exit_restart@l cmplw r12,r11 +#ifdef CONFIG_PPC_BOOK3S_601 + bltlr +#else blt 3f +#endif lis r10,ee_restarts@ha lwz r12,ee_restarts@l(r10) addi r12,r12,1 @@ -1283,9 +1296,6 @@ nonrecoverable: blr 3: /* OK, we can't recover, kill this process */ /* but the 601 doesn't implement the RI bit, so assume it's OK */ -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFSET(CPU_FTR_601) lwz r3,_TRAP(r1) andi. r0,r3,1 beq 5f diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 50a1991d257f..84d5fab94f8f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -74,7 +74,7 @@ static int find_free_bat(void) { int b; - if (cpu_has_feature(CPU_FTR_601)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) { for (b = 0; b < 4; b++) { struct ppc_bat *bat = BATS[b]; @@ -106,7 +106,7 @@ static int find_free_bat(void) */ static unsigned int block_size(unsigned long base, unsigned long top) { - unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20; + unsigned int max_size = IS_ENABLED(CONFIG_PPC_BOOK3S_601) ? SZ_8M : SZ_256M; unsigned int base_shift = (ffs(base) - 1) & 31; unsigned int block_shift = (fls(top - base) - 1) & 31; @@ -189,7 +189,7 @@ void mmu_mark_initmem_nx(void) unsigned long top = (unsigned long)_etext - PAGE_OFFSET; unsigned long size; - if (cpu_has_feature(CPU_FTR_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return; for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { @@ -227,7 +227,7 @@ void mmu_mark_rodata_ro(void) int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; - if (cpu_has_feature(CPU_FTR_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return; for (i = 0; i < nb; i++) { @@ -259,7 +259,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, flags &= ~_PAGE_COHERENT; bl = (size >> 17) - 1; - if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_601)) { /* 603, 604, etc. */ /* Do DBAT first */ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE @@ -441,7 +441,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, BUG_ON(first_memblock_base != 0); /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000)); else /* Anything else has 256M mapped */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); @@ -459,9 +459,6 @@ void __init setup_kuep(bool disabled) { pr_info("Activating Kernel Userspace Execution Prevention\n"); - if (cpu_has_feature(CPU_FTR_601)) - pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n"); - if (disabled) pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); } diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index a0d23e96e841..4154feac1da3 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -149,7 +149,7 @@ static int bats_show_603(struct seq_file *m, void *v) static int bats_open(struct inode *inode, struct file *file) { - if (cpu_has_feature(CPU_FTR_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return single_open(file, bats_show_601, NULL); return single_open(file, bats_show_603, NULL); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index f3fb79fccc72..d82e3664ffdf 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -197,7 +197,8 @@ endmenu config PPC601_SYNC_FIX bool "Workarounds for PPC601 bugs" - depends on PPC_BOOK3S_32 && PPC_PMAC + depends on PPC_BOOK3S_601 && PPC_PMAC + default y help Some versions of the PPC601 (the first PowerPC chip) have bugs which mean that extra synchronization instructions are required near -- cgit v1.2.3 From f2902a2fb40c589b886d21518ef8a1ee87f76b0c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Aug 2019 15:22:30 +0200 Subject: powerpc: use the generic dma coherent remap allocator This switches to using common code for the DMA allocations, including potential use of the CMA allocator if configured. Switching to the generic code enables DMA allocations from atomic context, which is required by the DMA API documentation, and also adds various other minor features drivers start relying upon. It also makes sure we have on tested code base for all architectures that require uncached pte bits for coherent DMA allocations. Another advantage is that consistent memory allocations now share the general vmalloc pool instead of needing an explicit careout from it. Signed-off-by: Christoph Hellwig Tested-by: Christophe Leroy # tested on 8xx Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190814132230.31874-2-hch@lst.de --- arch/powerpc/Kconfig | 12 - arch/powerpc/include/asm/book3s/32/pgtable.h | 12 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 12 +- arch/powerpc/mm/dma-noncoherent.c | 318 +-------------------------- arch/powerpc/mm/mem.c | 4 - arch/powerpc/mm/ptdump/ptdump.c | 9 - arch/powerpc/platforms/Kconfig.cputype | 2 + 7 files changed, 17 insertions(+), 352 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index cfb21856559b..edc19363a729 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1141,18 +1141,6 @@ config TASK_SIZE default "0x80000000" if PPC_8xx default "0xc0000000" -config CONSISTENT_SIZE_BOOL - bool "Set custom consistent memory pool size" - depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE - help - This option allows you to set the size of the - consistent memory pool. This pool of virtual memory - is used to make consistent memory allocations. - -config CONSISTENT_SIZE - hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL - default "0x00200000" if NOT_COHERENT_CACHE - config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && PPC_8xx && \ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 331a29a501a1..0796533d37dd 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -148,21 +148,15 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #include -#ifdef CONFIG_HIGHMEM -#define KVIRT_TOP PKMAP_BASE -#else -#define KVIRT_TOP FIXADDR_START -#endif - /* * ioremap_bot starts at that address. Early ioremaps move down from there, * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#ifdef CONFIG_HIGHMEM +#define IOREMAP_TOP PKMAP_BASE #else -#define IOREMAP_TOP KVIRT_TOP +#define IOREMAP_TOP FIXADDR_START #endif /* PPC32 shares vmalloc area with ioremap */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 3e1a4c1e40f0..552b96eef0c8 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -76,21 +76,15 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #include -#ifdef CONFIG_HIGHMEM -#define KVIRT_TOP PKMAP_BASE -#else -#define KVIRT_TOP FIXADDR_START -#endif - /* * ioremap_bot starts at that address. Early ioremaps move down from there, * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#ifdef CONFIG_HIGHMEM +#define IOREMAP_TOP PKMAP_BASE #else -#define IOREMAP_TOP KVIRT_TOP +#define IOREMAP_TOP FIXADDR_START #endif /* PPC32 shares vmalloc area with ioremap */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index c617282d5b2a..4272ca5e8159 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -4,310 +4,18 @@ * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) * * Copyright (C) 2000 Russell King - * - * Consistent memory allocators. Used for DMA devices that want to - * share uncached memory with the processor core. The function return - * is the virtual address and 'dma_handle' is the physical address. - * Mostly stolen from the ARM port, with some changes for PowerPC. - * -- Dan - * - * Reorganized to get rid of the arch-specific consistent_* functions - * and provide non-coherent implementations for the DMA API. -Matt - * - * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() - * implementation. This is pulled straight from ARM and barely - * modified. -Matt */ -#include -#include #include #include -#include #include #include #include #include -#include #include #include -#include - -/* - * This address range defaults to a value that is safe for all - * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It - * can be further configured for specific applications under - * the "Advanced Setup" menu. -Matt - */ -#define CONSISTENT_BASE (IOREMAP_TOP) -#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE) -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) - -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static DEFINE_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct ppc_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; -}; - -static struct ppc_vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; - -static struct ppc_vm_region * -ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct ppc_vm_region *c, *new; - - new = kmalloc(sizeof(struct ppc_vm_region), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - - found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - - spin_unlock_irqrestore(&consistent_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); - out: - return NULL; -} - -static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) -{ - struct ppc_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_start == addr) - goto out; - } - c = NULL; - out: - return c; -} - -/* - * Allocate DMA-coherent memory space and return both the kernel remapped - * virtual and bus address for that space. - */ -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - struct page *page; - struct ppc_vm_region *c; - unsigned long order; - u64 mask = ISA_DMA_THRESHOLD, limit; - - if (dev) { - mask = dev->coherent_dma_mask; - - /* - * Sanity check the DMA mask - it must be non-zero, and - * must be able to be satisfied by a DMA allocation. - */ - if (mask == 0) { - dev_warn(dev, "coherent DMA mask is unset\n"); - goto no_page; - } - - if ((~mask) & ISA_DMA_THRESHOLD) { - dev_warn(dev, "coherent DMA mask %#llx is smaller " - "than system GFP_DMA mask %#llx\n", - mask, (unsigned long long)ISA_DMA_THRESHOLD); - goto no_page; - } - } - - - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || - size >= (CONSISTENT_END - CONSISTENT_BASE)) { - printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", - size, mask); - return NULL; - } - - order = get_order(size); - - /* Might be useful if we ever have a real legacy DMA zone... */ - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - flush_dcache_range(kaddr, kaddr + size); - } - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = ppc_vm_region_alloc(&consistent_head, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - unsigned long vaddr = c->vm_start; - struct page *end = page + (1 << order); - - split_page(page, order); - - /* - * Set the "dma handle" - */ - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - - do { - SetPageReserved(page); - map_kernel_page(vaddr, page_to_phys(page), - pgprot_noncached(PAGE_KERNEL)); - page++; - vaddr += PAGE_SIZE; - } while (size -= PAGE_SIZE); - - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - - return (void *)c->vm_start; - } - - if (page) - __free_pages(page, order); - no_page: - return NULL; -} - -/* - * free a page as defined by the above mapping. - */ -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - struct ppc_vm_region *c; - unsigned long flags, addr; - - size = PAGE_ALIGN(size); - - spin_lock_irqsave(&consistent_lock, flags); - - c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - addr = c->vm_start; - do { - pte_t *ptep; - unsigned long pfn; - - ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr), - addr), - addr), - addr); - if (!pte_none(*ptep) && pte_present(*ptep)) { - pfn = pte_pfn(*ptep); - pte_clear(&init_mm, addr, ptep); - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - __free_reserved_page(page); - } - } - addr += PAGE_SIZE; - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - list_del(&c->vm_list); - - spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; - - no_area: - spin_unlock_irqrestore(&consistent_lock, flags); - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, vaddr); - dump_stack(); -} - /* * make an area consistent. */ @@ -408,23 +116,15 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, __dma_sync_page(paddr, size, dir); } -/* - * Return the PFN for a given cpu virtual address returned by arch_dma_alloc. - */ -long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, - dma_addr_t dma_addr) +void arch_dma_prep_coherent(struct page *page, size_t size) { - /* This should always be populated, so we don't test every - * level. If that fails, we'll have a nice crash which - * will be as good as a BUG_ON() - */ - unsigned long cpu_addr = (unsigned long)vaddr; - pgd_t *pgd = pgd_offset_k(cpu_addr); - pud_t *pud = pud_offset(pgd, cpu_addr); - pmd_t *pmd = pmd_offset(pud, cpu_addr); - pte_t *ptep = pte_offset_kernel(pmd, cpu_addr); + unsigned long kaddr = (unsigned long)page_address(page); - if (pte_none(*ptep) || !pte_present(*ptep)) - return 0; - return pte_pfn(*ptep); + flush_dcache_range(kaddr, kaddr + size); +} + +static int __init atomic_pool_init(void) +{ + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); } +postcore_initcall(atomic_pool_init); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 69f99128a8d6..be941d382c8d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -302,10 +302,6 @@ void __init mem_init(void) pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); #endif /* CONFIG_HIGHMEM */ -#ifdef CONFIG_NOT_COHERENT_CACHE - pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", - IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); -#endif /* CONFIG_NOT_COHERENT_CACHE */ if (ioremap_bot != IOREMAP_TOP) pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", ioremap_bot, IOREMAP_TOP); diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index ab6a572202b4..2f9ddc29c535 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -84,10 +84,6 @@ static struct addr_marker address_markers[] = { #else { 0, "Early I/O remap start" }, { 0, "Early I/O remap end" }, -#ifdef CONFIG_NOT_COHERENT_CACHE - { 0, "Consistent mem start" }, - { 0, "Consistent mem end" }, -#endif #ifdef CONFIG_HIGHMEM { 0, "Highmem PTEs start" }, { 0, "Highmem PTEs end" }, @@ -335,11 +331,6 @@ static void populate_markers(void) #else /* !CONFIG_PPC64 */ address_markers[i++].start_address = ioremap_bot; address_markers[i++].start_address = IOREMAP_TOP; -#ifdef CONFIG_NOT_COHERENT_CACHE - address_markers[i++].start_address = IOREMAP_TOP; - address_markers[i++].start_address = IOREMAP_TOP + - CONFIG_CONSISTENT_SIZE; -#endif #ifdef CONFIG_HIGHMEM address_markers[i++].start_address = PKMAP_BASE; address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 68c5cc075bfc..12543e53fa96 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -460,8 +460,10 @@ config NOT_COHERENT_CACHE depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU + select DMA_DIRECT_REMAP default n if PPC_47x default y -- cgit v1.2.3 From 139a1d2842ec181cf017502a46bb8d947682a960 Mon Sep 17 00:00:00 2001 From: Michael Anderson Date: Thu, 22 Aug 2019 00:48:35 -0300 Subject: powerpc/mm: Use UV_WRITE_PATE ucall to register a PATE When Ultravisor (UV) is enabled, the partition table is stored in secure memory and can only be accessed via the UV. The Hypervisor (HV) however maintains a copy of the partition table in normal memory to allow Nest MMU translations to occur (for normal VMs). The HV copy includes partition table entries (PATE)s for secure VMs which would currently be unused (Nest MMU translations cannot access secure memory) but they would be needed as we add functionality. This patch adds the UV_WRITE_PATE ucall which is used to update the PATE for a VM (both normal and secure) when Ultravisor is enabled. Signed-off-by: Michael Anderson Signed-off-by: Madhavan Srinivasan Signed-off-by: Ram Pai [ cclaudio: Write the PATE in HV's table before doing that in UV's ] Signed-off-by: Claudio Carvalho Reviewed-by: Ryan Grimm Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-5-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/ultravisor-api.h | 5 ++++ arch/powerpc/include/asm/ultravisor.h | 8 +++++ arch/powerpc/mm/book3s64/pgtable.c | 50 +++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h index 88ffa78f9d61..8cd49abff4f3 100644 --- a/arch/powerpc/include/asm/ultravisor-api.h +++ b/arch/powerpc/include/asm/ultravisor-api.h @@ -11,6 +11,7 @@ #include /* Return codes */ +#define U_BUSY H_BUSY #define U_FUNCTION H_FUNCTION #define U_NOT_AVAILABLE H_NOT_AVAILABLE #define U_P2 H_P2 @@ -18,6 +19,10 @@ #define U_P4 H_P4 #define U_P5 H_P5 #define U_PARAMETER H_PARAMETER +#define U_PERMISSION H_PERMISSION #define U_SUCCESS H_SUCCESS +/* opcodes */ +#define UV_WRITE_PATE 0xF104 + #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */ diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h index dc6e1ea198f2..6fe1f365dec8 100644 --- a/arch/powerpc/include/asm/ultravisor.h +++ b/arch/powerpc/include/asm/ultravisor.h @@ -8,7 +8,15 @@ #ifndef _ASM_POWERPC_ULTRAVISOR_H #define _ASM_POWERPC_ULTRAVISOR_H +#include +#include + int early_init_dt_scan_ultravisor(unsigned long node, const char *uname, int depth, void *data); +static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1) +{ + return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1); +} + #endif /* _ASM_POWERPC_ULTRAVISOR_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 7d0e0d0d22c4..4173f6931009 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -209,21 +211,10 @@ void __init mmu_partition_table_init(void) powernv_set_nmmu_ptcr(ptcr); } -void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, - unsigned long dw1) +static void flush_partition(unsigned int lpid, bool radix) { - unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); - - partition_tb[lpid].patb0 = cpu_to_be64(dw0); - partition_tb[lpid].patb1 = cpu_to_be64(dw1); - - /* - * Global flush of TLBs and partition table caches for this lpid. - * The type of flush (hash or radix) depends on what the previous - * use of this partition ID was, not the new use. - */ asm volatile("ptesync" : : : "memory"); - if (old & PATB_HR) { + if (radix) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : @@ -237,6 +228,39 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } + +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1) +{ + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); + + /* + * When ultravisor is enabled, the partition table is stored in secure + * memory and can only be accessed doing an ultravisor call. However, we + * maintain a copy of the partition table in normal memory to allow Nest + * MMU translations to occur (for normal VMs). + * + * Therefore, here we always update partition_tb, regardless of whether + * we are running under an ultravisor or not. + */ + partition_tb[lpid].patb0 = cpu_to_be64(dw0); + partition_tb[lpid].patb1 = cpu_to_be64(dw1); + + /* + * If ultravisor is enabled, we do an ultravisor call to register the + * partition table entry (PATE), which also do a global flush of TLBs + * and partition table caches for the lpid. Otherwise, just do the + * flush. The type of flush (hash or radix) depends on what the previous + * use of the partition ID was, not the new use. + */ + if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) { + uv_register_pate(lpid, dw0, dw1); + pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n", + dw0, dw1); + } else { + flush_partition(lpid, (old & PATB_HR)); + } +} EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); static pmd_t *get_pmd_from_cache(struct mm_struct *mm) -- cgit v1.2.3 From 5223134029a87db925ecc9449f9501bad391a52e Mon Sep 17 00:00:00 2001 From: Claudio Carvalho Date: Thu, 22 Aug 2019 00:48:36 -0300 Subject: powerpc/mm: Write to PTCR only if ultravisor disabled In ultravisor enabled systems, PTCR becomes ultravisor privileged only for writing and an attempt to write to it will cause a Hypervisor Emulation Assitance interrupt. This patch uses the set_ptcr_when_no_uv() function to restrict PTCR writing to only when ultravisor is disabled. Signed-off-by: Claudio Carvalho Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190822034838.27876-6-cclaudio@linux.ibm.com --- arch/powerpc/include/asm/ultravisor.h | 12 ++++++++++++ arch/powerpc/mm/book3s64/hash_utils.c | 5 +++-- arch/powerpc/mm/book3s64/pgtable.c | 2 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 8 +++++--- 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h index 6fe1f365dec8..d7aa97aa7834 100644 --- a/arch/powerpc/include/asm/ultravisor.h +++ b/arch/powerpc/include/asm/ultravisor.h @@ -10,10 +10,22 @@ #include #include +#include int early_init_dt_scan_ultravisor(unsigned long node, const char *uname, int depth, void *data); +/* + * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for + * writing and an attempt to write to it will cause a Hypervisor Emulation + * Assistance interrupt. + */ +static inline void set_ptcr_when_no_uv(u64 val) +{ + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + mtspr(SPRN_PTCR, val); +} + static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1) { return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index b8ad14bb1170..88aab920caca 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -1075,8 +1076,8 @@ void hash__early_init_mmu_secondary(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else - mtspr(SPRN_PTCR, - __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + set_ptcr_when_no_uv(__pa(partition_tb) | + (PATB_SIZE_SHIFT - 12)); } /* Initialize SLB */ slb_initialize(); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 4173f6931009..01a7570c10e0 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -207,7 +207,7 @@ void __init mmu_partition_table_init(void) * 64 K size. */ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); - mtspr(SPRN_PTCR, ptcr); + set_ptcr_when_no_uv(ptcr); powernv_set_nmmu_ptcr(ptcr); } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b4ca9e95e678..5bc118b4a634 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -650,8 +651,9 @@ void radix__early_init_mmu_secondary(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); - mtspr(SPRN_PTCR, - __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + set_ptcr_when_no_uv(__pa(partition_tb) | + (PATB_SIZE_SHIFT - 12)); + radix_init_amor(); } @@ -667,7 +669,7 @@ void radix__mmu_cleanup_all(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) { lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); - mtspr(SPRN_PTCR, 0); + set_ptcr_when_no_uv(0); powernv_set_nmmu_ptcr(0); radix__flush_tlb_all(); } -- cgit v1.2.3 From 9b123d1ea23701bc00ebf712f1e03b25b8195eeb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 2 Aug 2019 20:57:01 +1000 Subject: powerpc/64s/exception: reduce page fault unnecessary loads This avoids 3 loads in the radix page fault case, 1 load in the hash fault case, and 2 loads in the hash miss page fault case. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190802105709.27696-37-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 38 ++++++++++++++++------------------- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- 2 files changed, 19 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f54a38417904..d0018dd17e0a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1159,11 +1159,11 @@ EXC_COMMON_BEGIN(data_access_common) * EX_DAR and EX_DSISR have saved DAR/DSISR */ INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1 - ld r12,_MSR(r1) - ld r3,_DAR(r1) - ld r4,_DSISR(r1) - li r5,0x300 + ld r4,_DAR(r1) + ld r5,_DSISR(r1) BEGIN_MMU_FTR_SECTION + ld r6,_MSR(r1) + li r3,0x300 b do_hash_page /* Try to handle as hpte fault */ MMU_FTR_SECTION_ELSE b handle_page_fault @@ -1211,11 +1211,11 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80) INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_BEGIN(instruction_access_common) INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2 - ld r12,_MSR(r1) - ld r3,_DAR(r1) - ld r4,_DSISR(r1) - li r5,0x400 + ld r4,_DAR(r1) + ld r5,_DSISR(r1) BEGIN_MMU_FTR_SECTION + ld r6,_MSR(r1) + li r3,0x400 b do_hash_page /* Try to handle as hpte fault */ MMU_FTR_SECTION_ELSE b handle_page_fault @@ -2260,7 +2260,7 @@ do_hash_page: #ifdef CONFIG_PPC_BOOK3S_64 lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h ori r0,r0,DSISR_BAD_FAULT_64S@l - and. r0,r4,r0 /* weird error? */ + and. r0,r5,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ ld r11, PACA_THREAD_INFO(r13) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ @@ -2268,15 +2268,13 @@ do_hash_page: bne 77f /* then don't call hash_page now */ /* - * r3 contains the faulting address - * r4 msr - * r5 contains the trap number - * r6 contains dsisr + * r3 contains the trap number + * r4 contains the faulting address + * r5 contains dsisr + * r6 msr * * at return r3 = 0 for success, 1 for page fault, negative for error */ - mr r4,r12 - ld r6,_DSISR(r1) bl __hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if __hash_page succeeded */ @@ -2286,16 +2284,15 @@ do_hash_page: /* Error */ blt- 13f - /* Reload DSISR into r4 for the DABR check below */ - ld r4,_DSISR(r1) + /* Reload DAR/DSISR into r4/r5 for the DABR check below */ + ld r4,_DAR(r1) + ld r5,_DSISR(r1) #endif /* CONFIG_PPC_BOOK3S_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: -11: andis. r0,r4,DSISR_DABRMATCH@h +11: andis. r0,r5,DSISR_DABRMATCH@h bne- handle_dabr_fault - ld r4,_DAR(r1) - ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 @@ -2342,7 +2339,6 @@ handle_dabr_fault: * the access, or panic if there isn't a handler. */ 77: bl save_nvgprs - mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD li r5,SIGSEGV bl bad_page_fault diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 4d0bb194ed70..fe99bba39b69 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1462,8 +1462,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, - unsigned long dsisr) +int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, + unsigned long msr) { unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; -- cgit v1.2.3 From ed6546bdc61b7c4bd926cebd82ba52d056fcefa1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 3 Sep 2019 01:29:26 +1000 Subject: powerpc/64s: remove register_process_table callback This callback is only required because the partition table init comes before process table allocation on powernv (aka bare metal aka native). Change the order to allocate the process table first, and remove the callback. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190902152931.17840-2-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 4 --- arch/powerpc/mm/book3s64/hash_utils.c | 6 ----- arch/powerpc/mm/book3s64/pgtable.c | 3 --- arch/powerpc/mm/book3s64/radix_pgtable.c | 45 +++++++++----------------------- arch/powerpc/platforms/pseries/lpar.c | 17 ++++++++++-- 5 files changed, 27 insertions(+), 48 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 23b83d3593e2..bb3deb76c951 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -206,7 +206,6 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); -extern void radix_init_native(void); extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); static inline void early_init_mmu(void) @@ -238,9 +237,6 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, first_memblock_size); } -extern int (*register_process_table)(unsigned long base, unsigned long page_size, - unsigned long tbl_size); - #ifdef CONFIG_PPC_PSERIES extern void radix_init_pseries(void); #else diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fe99bba39b69..7aed27ea5361 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -859,12 +859,6 @@ static void __init htab_initialize(void) /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; - /* - * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall - * to inform the hypervisor that we wish to use the HPT. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - register_process_table(0, 0, 0); #ifdef CONFIG_FA_DUMP /* * If firmware assisted dump is active firmware preserves diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 206b43ae4000..97f3be778c79 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -23,9 +23,6 @@ EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; EXPORT_SYMBOL(__pmd_frag_size_shift); -int (*register_process_table)(unsigned long base, unsigned long page_size, - unsigned long tbl_size); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is called when relaxing access to a hugepage. It's also called in the page diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 71b649473045..83fa7864e8f4 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -34,19 +34,6 @@ unsigned int mmu_pid_bits; unsigned int mmu_base_pid; -static int native_register_process_table(unsigned long base, unsigned long pg_sz, - unsigned long table_size) -{ - unsigned long patb0, patb1; - - patb0 = be64_to_cpu(partition_tb[0].patb0); - patb1 = base | table_size | PATB_GR; - - mmu_partition_table_set_entry(0, patb0, patb1); - - return 0; -} - static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) { @@ -381,18 +368,8 @@ static void __init radix_init_pgtable(void) */ rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); - /* - * Fill in the partition table. We are suppose to use effective address - * of process table here. But our linear mapping also enable us to use - * physical address here. - */ - register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); + pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); /* * The init_mm context is given the first available (non-zero) PID, @@ -413,22 +390,24 @@ static void __init radix_init_pgtable(void) static void __init radix_init_partition_table(void) { - unsigned long rts_field, dw0; + unsigned long rts_field, dw0, dw1; mmu_partition_table_init(); rts_field = radix__get_tree_size(); dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; - mmu_partition_table_set_entry(0, dw0, 0); + dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR; + mmu_partition_table_set_entry(0, dw0, dw1); + + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); pr_info("Initializing Radix MMU\n"); pr_info("Partition table %p\n", partition_tb); } -void __init radix_init_native(void) -{ - register_process_table = native_register_process_table; -} - static int __init get_idx_from_shift(unsigned int shift) { int idx = -1; @@ -622,8 +601,9 @@ void __init radix__early_init_mmu(void) __pmd_frag_nr = RADIX_PMD_FRAG_NR; __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT; + radix_init_pgtable(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) { - radix_init_native(); lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); @@ -634,7 +614,6 @@ void __init radix__early_init_mmu(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); - radix_init_pgtable(); /* Switch to the guard PID before turning on MMU */ radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4f76e5f30c97..b3205a6c950c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1531,16 +1531,29 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; - register_process_table = pseries_lpar_register_process_table; if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; + + /* + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall + * to inform the hypervisor that we wish to use the HPT. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pseries_lpar_register_process_table(0, 0, 0); } void radix_init_pseries(void) { pr_info("Using radix MMU under hypervisor\n"); - register_process_table = pseries_lpar_register_process_table; + + pseries_lpar_register_process_table(__pa(process_tb), + 0, PRTB_SIZE_SHIFT - 12); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); } #ifdef CONFIG_PPC_SMLPAR -- cgit v1.2.3 From 99161de3a283af59f2813da6cbdccc1d2784c7de Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 3 Sep 2019 01:29:27 +1000 Subject: powerpc/64s/radix: tidy up TLB flushing code There should be no functional changes. - Use calls to existing radix_tlb.c functions in flush_partition. - Rename radix__flush_tlb_lpid to radix__flush_all_lpid and similar, because they flush everything, matching flush_all_mm rather than flush_tlb_mm for the lpid. - Remove some unused radix_tlb.c flush primitives. Signed-off: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190902152931.17840-3-npiggin@gmail.com --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 12 +-- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- arch/powerpc/mm/book3s64/pgtable.c | 13 +-- arch/powerpc/mm/book3s64/radix_tlb.c | 117 +++++---------------- 4 files changed, 34 insertions(+), 110 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 05147cecb8df..4ce795d30377 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -17,8 +17,8 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid, unsigned long addr, unsigned long page_size); extern void radix__flush_pwc_lpid(unsigned int lpid); -extern void radix__flush_tlb_lpid(unsigned int lpid); -extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); +extern void radix__flush_all_lpid(unsigned int lpid); +extern void radix__flush_all_lpid_guest(unsigned int lpid); #else static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; static inline void radix__flush_tlb_lpid_page(unsigned int lpid, @@ -31,11 +31,7 @@ static inline void radix__flush_pwc_lpid(unsigned int lpid) { WARN_ON(1); } -static inline void radix__flush_tlb_lpid(unsigned int lpid) -{ - WARN_ON(1); -} -static inline void radix__local_flush_tlb_lpid_guest(unsigned int lpid) +static inline void radix__flush_all_lpid(unsigned int lpid) { WARN_ON(1); } @@ -73,6 +69,4 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); extern void radix__flush_tlb_all(void); -extern void radix__local_flush_tlb_lpid(unsigned int lpid); - #endif diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 735e0ac6f5b2..b3316da2f13e 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -398,7 +398,7 @@ static void kvmhv_flush_lpid(unsigned int lpid) long rc; if (!kvmhv_on_pseries()) { - radix__flush_tlb_lpid(lpid); + radix__flush_all_lpid(lpid); return; } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 97f3be778c79..c2b87c5ba50b 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -210,20 +210,17 @@ void __init mmu_partition_table_init(void) static void flush_partition(unsigned int lpid, bool radix) { - asm volatile("ptesync" : : : "memory"); if (radix) { - asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); + radix__flush_all_lpid(lpid); + radix__flush_all_lpid_guest(lpid); } else { + asm volatile("ptesync" : : : "memory"); asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + /* do we need fixup here ?*/ + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } - /* do we need fixup here ?*/ - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 71f7fede2fa4..082f90d068ee 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -116,22 +116,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static __always_inline void __tlbiel_lpid(unsigned long lpid, int set, - unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = PPC_BIT(52); /* IS = 2 */ - rb |= set << PPC_BITLSHIFT(51); - rs = 0; /* LPID comes from LPIDR */ - prs = 0; /* partition scoped */ - r = 1; /* radix format */ - - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(lpid, 1, rb, rs, ric, prs, r); -} - static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -146,23 +130,20 @@ static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } -static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set, - unsigned long ric) +static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; rb = PPC_BIT(52); /* IS = 2 */ - rb |= set << PPC_BITLSHIFT(51); - rs = 0; /* LPID comes from LPIDR */ + rs = lpid; prs = 1; /* process scoped */ r = 1; /* radix format */ - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(lpid, 1, rb, rs, ric, prs, r); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } - static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { @@ -285,34 +266,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) -{ - int set; - - VM_BUG_ON(mfspr(SPRN_LPID) != lpid); - - asm volatile("ptesync": : :"memory"); - - /* - * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, - * also flush the entire Page Walk Cache. - */ - __tlbiel_lpid(lpid, 0, ric); - - /* For PWC, only one flush is needed */ - if (ric == RIC_FLUSH_PWC) { - asm volatile("ptesync": : :"memory"); - return; - } - - /* For the remaining sets, just flush the TLB */ - for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST "; isync" : : :"memory"); -} - static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) { asm volatile("ptesync": : :"memory"); @@ -337,35 +290,28 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static __always_inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) +static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) { - int set; - - VM_BUG_ON(mfspr(SPRN_LPID) != lpid); - - asm volatile("ptesync": : :"memory"); - /* - * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, - * also flush the entire Page Walk Cache. + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. */ - __tlbiel_lpid_guest(lpid, 0, ric); - - /* For PWC, only one flush is needed */ - if (ric == RIC_FLUSH_PWC) { - asm volatile("ptesync": : :"memory"); - return; + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); } - - /* For the remaining sets, just flush the TLB */ - for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); } - static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -835,32 +781,19 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); /* * Flush partition scoped translations from LPID (=LPIDR) */ -void radix__flush_tlb_lpid(unsigned int lpid) +void radix__flush_all_lpid(unsigned int lpid) { _tlbie_lpid(lpid, RIC_FLUSH_ALL); } -EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid); +EXPORT_SYMBOL_GPL(radix__flush_all_lpid); /* - * Flush partition scoped translations from LPID (=LPIDR) + * Flush process scoped translations from LPID (=LPIDR) */ -void radix__local_flush_tlb_lpid(unsigned int lpid) +void radix__flush_all_lpid_guest(unsigned int lpid) { - _tlbiel_lpid(lpid, RIC_FLUSH_ALL); + _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); } -EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); - -/* - * Flush process scoped translations from LPID (=LPIDR). - * Important difference, the guest normally manages its own translations, - * but some cases e.g., vCPU CPU migration require KVM to flush. - */ -void radix__local_flush_tlb_lpid_guest(unsigned int lpid) -{ - _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); -} -EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); - static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize); -- cgit v1.2.3 From fd13daea5f72605a0a7386ebedbb5ff2b2a48da4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 3 Sep 2019 01:29:28 +1000 Subject: powerpc/64s: make mmu_partition_table_set_entry TLB flush optional No functional change. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190902152931.17840-4-npiggin@gmail.com --- arch/powerpc/include/asm/mmu.h | 2 +- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index ba94ce8c22d7..0699cfeeb8c9 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -257,7 +257,7 @@ extern void radix__mmu_cleanup_all(void); /* Functions for creating and updating partition table on POWER9 */ extern void mmu_partition_table_init(void); extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, - unsigned long dw1); + unsigned long dw1, bool flush); #endif /* CONFIG_PPC64 */ struct mm_struct; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index b3316da2f13e..fff90f2c3de2 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -411,7 +411,7 @@ static void kvmhv_flush_lpid(unsigned int lpid) void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) { if (!kvmhv_on_pseries()) { - mmu_partition_table_set_entry(lpid, dw0, dw1); + mmu_partition_table_set_entry(lpid, dw0, dw1, true); return; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7aed27ea5361..b73d08b54d12 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -825,7 +825,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * For now, UPRT is 0 and we have no segment table. */ htab_size = __ilog2(htab_size) - 18; - mmu_partition_table_set_entry(0, hash_table | htab_size, 0); + mmu_partition_table_set_entry(0, hash_table | htab_size, 0, true); pr_info("Partition table %p\n", partition_tb); } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index c2b87c5ba50b..6fab9c0bbbaf 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -224,7 +224,7 @@ static void flush_partition(unsigned int lpid, bool radix) } void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, - unsigned long dw1) + unsigned long dw1, bool flush) { unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); @@ -251,7 +251,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, uv_register_pate(lpid, dw0, dw1); pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n", dw0, dw1); - } else { + } else if (flush) { flush_partition(lpid, (old & PATB_HR)); } } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 83fa7864e8f4..078a7eeec1f5 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -396,7 +396,7 @@ static void __init radix_init_partition_table(void) rts_field = radix__get_tree_size(); dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR; - mmu_partition_table_set_entry(0, dw0, dw1); + mmu_partition_table_set_entry(0, dw0, dw1, true); asm volatile("ptesync" : : : "memory"); asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : -- cgit v1.2.3 From 7e71c428a60e2029585be7d7cc22775f442e5b2c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 3 Sep 2019 01:29:29 +1000 Subject: powerpc/64s/pseries: radix flush translations before MMU is enabled at boot Radix guests are responsible for managing their own translation caches, so make them match bare metal radix and hash, and make each CPU flush all its translations right before enabling its MMU. Radix guests may not flush partition scope translations, so in tlbiel_all, make these flushes conditional on CPU_FTR_HVMODE. Process scope translations are the only type visible to the guest. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190902152931.17840-5-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 6 ++---- arch/powerpc/mm/book3s64/radix_tlb.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 078a7eeec1f5..e1e711c4704a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -616,8 +616,7 @@ void __init radix__early_init_mmu(void) /* Switch to the guard PID before turning on MMU */ radix__switch_mmu_context(NULL, &init_mm); - if (cpu_has_feature(CPU_FTR_HVMODE)) - tlbiel_all(); + tlbiel_all(); } void radix__early_init_mmu_secondary(void) @@ -637,8 +636,7 @@ void radix__early_init_mmu_secondary(void) } radix__switch_mmu_context(NULL, &init_mm); - if (cpu_has_feature(CPU_FTR_HVMODE)) - tlbiel_all(); + tlbiel_all(); } void radix__mmu_cleanup_all(void) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 082f90d068ee..f9cf8ae59831 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -51,11 +51,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) * and partition table entries. Then flush the remaining sets of the * TLB. */ - tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); - for (set = 1; set < num_sets; set++) - tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); - /* Do the same for process scoped entries. */ + if (early_cpu_has_feature(CPU_FTR_HVMODE)) { + /* MSR[HV] should flush partition scope translations first. */ + tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + } + + /* Flush process scoped entries. */ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); for (set = 1; set < num_sets; set++) tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); -- cgit v1.2.3 From 7d805accbec57a151bd0dd305a1109feebdfd4a4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 3 Sep 2019 01:29:30 +1000 Subject: powerpc/64s: remove unnecessary translation cache flushes at boot The various translation structure invalidations performed in early boot when the MMU is off are not required, because everything is invalidated immediately before a CPU first enables its MMU (see early_init_mmu and early_init_mmu_secondary). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190902152931.17840-6-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/mm/book3s64/pgtable.c | 5 +++++ arch/powerpc/mm/book3s64/radix_pgtable.c | 8 +------- arch/powerpc/platforms/pseries/lpar.c | 5 ----- 4 files changed, 7 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index b73d08b54d12..7684a596158b 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -825,7 +825,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * For now, UPRT is 0 and we have no segment table. */ htab_size = __ilog2(htab_size) - 18; - mmu_partition_table_set_entry(0, hash_table | htab_size, 0, true); + mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false); pr_info("Partition table %p\n", partition_tb); } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 6fab9c0bbbaf..351eb78eed55 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -252,6 +252,11 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n", dw0, dw1); } else if (flush) { + /* + * Boot does not need to flush, because MMU is off and each + * CPU does a tlbiel_all() before switching them on, which + * flushes everything. + */ flush_partition(lpid, (old & PATB_HR)); } } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index e1e711c4704a..0d1107fb34c1 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -396,13 +396,7 @@ static void __init radix_init_partition_table(void) rts_field = radix__get_tree_size(); dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR; - mmu_partition_table_set_entry(0, dw0, dw1, true); - - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); + mmu_partition_table_set_entry(0, dw0, dw1, false); pr_info("Initializing Radix MMU\n"); pr_info("Partition table %p\n", partition_tb); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b3205a6c950c..36b846f6e74e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1549,11 +1549,6 @@ void radix_init_pseries(void) pseries_lpar_register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); - asm volatile("ptesync" : : : "memory"); - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); } #ifdef CONFIG_PPC_SMLPAR -- cgit v1.2.3 From 2275d7b5754a573ffb2ca9e40bd0546eeb986696 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 3 Sep 2019 01:29:31 +1000 Subject: powerpc/64s/radix: introduce options to disable use of the tlbie instruction Introduce two options to control the use of the tlbie instruction. A boot time option which completely disables the kernel using the instruction, this is currently incompatible with HASH MMU, KVM, and coherent accelerators. And a debugfs option can be switched at runtime and avoids using tlbie for invalidating CPU TLBs for normal process and kernel address mappings. Coherent accelerators are still managed with tlbie, as will KVM partition scope translations. Cross-CPU TLB flushing is implemented with IPIs and tlbiel. This is a basic implementation which does not attempt to make any optimisation beyond the tlbie implementation. This is useful for performance testing among other things. For example in certain situations on large systems, using IPIs may be faster than tlbie as they can be directed rather than broadcast. Later we may also take advantage of the IPIs to do more interesting things such as trim the mm cpumask more aggressively. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190902152931.17840-7-npiggin@gmail.com --- Documentation/admin-guide/kernel-parameters.txt | 4 + arch/powerpc/include/asm/book3s/64/tlbflush.h | 9 ++ arch/powerpc/kvm/book3s_hv.c | 6 + arch/powerpc/mm/book3s64/pgtable.c | 47 ++++++ arch/powerpc/mm/book3s64/radix_tlb.c | 190 +++++++++++++++++++++--- drivers/misc/cxl/main.c | 4 + drivers/misc/ocxl/main.c | 4 + 7 files changed, 246 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4923d8f726e8..3cd757f9feaa 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -860,6 +860,10 @@ disable_radix [PPC] Disable RADIX MMU mode on POWER9 + disable_tlbie [PPC] + Disable TLBIE instruction. Currently does not work + with KVM, with HASH MMU, or with coherent accelerators. + disable_cpu_apicid= [X86,APIC,SMP] Format: The number of initial APIC ID for the diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index ebf572ea621e..7aa8195b6cff 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -162,4 +162,13 @@ static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long addre radix__flush_tlb_pwc(tlb, address); } + +extern bool tlbie_capable; +extern bool tlbie_enabled; + +static inline bool cputlb_use_tlbie(void) +{ + return tlbie_enabled; +} + #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cde3f5a4b3e4..3cdaa2a09a19 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5462,6 +5462,12 @@ static int kvmppc_radix_possible(void) static int kvmppc_book3s_init_hv(void) { int r; + + if (!tlbie_capable) { + pr_err("KVM-HV: Host does not support TLBIE\n"); + return -ENODEV; + } + /* * FIXME!! Do we need to check on all cpus ? */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 351eb78eed55..75483b40fcb1 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -469,3 +470,49 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } + +/* + * Does the CPU support tlbie? + */ +bool tlbie_capable __read_mostly = true; +EXPORT_SYMBOL(tlbie_capable); + +/* + * Should tlbie be used for management of CPU TLBs, for kernel and process + * address spaces? tlbie may still be used for nMMU accelerators, and for KVM + * guest address spaces. + */ +bool tlbie_enabled __read_mostly = true; + +static int __init setup_disable_tlbie(char *str) +{ + if (!radix_enabled()) { + pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n"); + return 1; + } + + tlbie_capable = false; + tlbie_enabled = false; + + return 1; +} +__setup("disable_tlbie", setup_disable_tlbie); + +static int __init pgtable_debugfs_setup(void) +{ + if (!tlbie_capable) + return 0; + + /* + * There is no locking vs tlb flushing when changing this value. + * The tlb flushers will see one value or another, and use either + * tlbie or tlbiel with IPIs. In both cases the TLBs will be + * invalidated as expected. + */ + debugfs_create_bool("tlbie_enabled", 0600, + powerpc_debugfs_root, + &tlbie_enabled); + + return 0; +} +arch_initcall(pgtable_debugfs_setup); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index f9cf8ae59831..631be42abd33 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -270,6 +270,39 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +struct tlbiel_pid { + unsigned long pid; + unsigned long ric; +}; + +static void do_tlbiel_pid(void *info) +{ + struct tlbiel_pid *t = info; + + if (t->ric == RIC_FLUSH_TLB) + _tlbiel_pid(t->pid, RIC_FLUSH_TLB); + else if (t->ric == RIC_FLUSH_PWC) + _tlbiel_pid(t->pid, RIC_FLUSH_PWC); + else + _tlbiel_pid(t->pid, RIC_FLUSH_ALL); +} + +static inline void _tlbiel_pid_multicast(struct mm_struct *mm, + unsigned long pid, unsigned long ric) +{ + struct cpumask *cpus = mm_cpumask(mm); + struct tlbiel_pid t = { .pid = pid, .ric = ric }; + + on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); + /* + * Always want the CPU translations to be invalidated with tlbiel in + * these paths, so while coprocessors must use tlbie, we can not + * optimise away the tlbiel component. + */ + if (atomic_read(&mm->context.copros) > 0) + _tlbie_pid(pid, RIC_FLUSH_ALL); +} + static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) { asm volatile("ptesync": : :"memory"); @@ -370,6 +403,53 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +struct tlbiel_va { + unsigned long pid; + unsigned long va; + unsigned long psize; + unsigned long ric; +}; + +static void do_tlbiel_va(void *info) +{ + struct tlbiel_va *t = info; + + if (t->ric == RIC_FLUSH_TLB) + _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); + else if (t->ric == RIC_FLUSH_PWC) + _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); + else + _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); +} + +static inline void _tlbiel_va_multicast(struct mm_struct *mm, + unsigned long va, unsigned long pid, + unsigned long psize, unsigned long ric) +{ + struct cpumask *cpus = mm_cpumask(mm); + struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; + on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); + if (atomic_read(&mm->context.copros) > 0) + _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); +} + +struct tlbiel_va_range { + unsigned long pid; + unsigned long start; + unsigned long end; + unsigned long page_size; + unsigned long psize; + bool also_pwc; +}; + +static void do_tlbiel_va_range(void *info) +{ + struct tlbiel_va_range *t = info; + + _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, + t->psize, t->also_pwc); +} + static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long psize, unsigned long ric) { @@ -393,6 +473,21 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned long pid, unsigned long page_size, + unsigned long psize, bool also_pwc) +{ + struct cpumask *cpus = mm_cpumask(mm); + struct tlbiel_va_range t = { .start = start, .end = end, + .pid = pid, .page_size = page_size, + .psize = psize, .also_pwc = also_pwc }; + + on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); + if (atomic_read(&mm->context.copros) > 0) + _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); +} + /* * Base TLB flushing operations: * @@ -530,10 +625,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto local; } - if (mm_needs_flush_escalation(mm)) - _tlbie_pid(pid, RIC_FLUSH_ALL); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + if (cputlb_use_tlbie()) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); + } } else { local: _tlbiel_pid(pid, RIC_FLUSH_TLB); @@ -559,7 +658,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) goto local; } } - _tlbie_pid(pid, RIC_FLUSH_ALL); + if (cputlb_use_tlbie()) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); } else { local: _tlbiel_pid(pid, RIC_FLUSH_ALL); @@ -594,7 +696,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, exit_flush_lazy_tlbs(mm); goto local; } - _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + if (cputlb_use_tlbie()) + _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + else + _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); } else { local: _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); @@ -616,6 +721,24 @@ EXPORT_SYMBOL(radix__flush_tlb_page); #define radix__flush_all_mm radix__local_flush_all_mm #endif /* CONFIG_SMP */ +static void do_tlbiel_kernel(void *info) +{ + _tlbiel_pid(0, RIC_FLUSH_ALL); +} + +static inline void _tlbiel_kernel_broadcast(void) +{ + on_each_cpu(do_tlbiel_kernel, NULL, 1); + if (tlbie_capable) { + /* + * Coherent accelerators don't refcount kernel memory mappings, + * so have to always issue a tlbie for them. This is quite a + * slow path anyway. + */ + _tlbie_pid(0, RIC_FLUSH_ALL); + } +} + /* * If kernel TLBIs ever become local rather than global, then * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it @@ -623,7 +746,10 @@ EXPORT_SYMBOL(radix__flush_tlb_page); */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { - _tlbie_pid(0, RIC_FLUSH_ALL); + if (cputlb_use_tlbie()) + _tlbie_pid(0, RIC_FLUSH_ALL); + else + _tlbiel_kernel_broadcast(); } EXPORT_SYMBOL(radix__flush_tlb_kernel_range); @@ -679,10 +805,14 @@ is_local: if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { - if (mm_needs_flush_escalation(mm)) - _tlbie_pid(pid, RIC_FLUSH_ALL); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + if (cputlb_use_tlbie()) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); + } } } else { bool hflush = flush_all_sizes; @@ -707,8 +837,8 @@ is_local: gflush = false; } - asm volatile("ptesync": : :"memory"); if (local) { + asm volatile("ptesync": : :"memory"); __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) __tlbiel_va_range(hstart, hend, pid, @@ -717,7 +847,8 @@ is_local: __tlbiel_va_range(gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G); asm volatile("ptesync": : :"memory"); - } else { + } else if (cputlb_use_tlbie()) { + asm volatile("ptesync": : :"memory"); __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) __tlbie_va_range(hstart, hend, pid, @@ -727,6 +858,15 @@ is_local: PUD_SIZE, MMU_PAGE_1G); fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } else { + _tlbiel_va_range_multicast(mm, + start, end, pid, page_size, mmu_virtual_psize, false); + if (hflush) + _tlbiel_va_range_multicast(mm, + hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); + if (gflush) + _tlbiel_va_range_multicast(mm, + gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false); } } preempt_enable(); @@ -903,16 +1043,26 @@ is_local: if (local) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { - if (mm_needs_flush_escalation(mm)) - also_pwc = true; + if (cputlb_use_tlbie()) { + if (mm_needs_flush_escalation(mm)) + also_pwc = true; + + _tlbie_pid(pid, + also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); + } else { + _tlbiel_pid_multicast(mm, pid, + also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); + } - _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } } else { if (local) _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); - else + else if (cputlb_use_tlbie()) _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); + else + _tlbiel_va_range_multicast(mm, + start, end, pid, page_size, psize, also_pwc); } preempt_enable(); } @@ -954,7 +1104,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) exit_flush_lazy_tlbs(mm); goto local; } - _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + if (cputlb_use_tlbie()) + _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + else + _tlbiel_va_range_multicast(mm, + addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } else { local: _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 482a2c1b340a..43b312d06e3e 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "cxl.h" @@ -315,6 +316,9 @@ static int __init init_cxl(void) { int rc = 0; + if (!tlbie_capable) + return -EINVAL; + if ((rc = cxl_file_init())) return rc; diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c index 7210d9e059be..ef73cf35dda2 100644 --- a/drivers/misc/ocxl/main.c +++ b/drivers/misc/ocxl/main.c @@ -2,12 +2,16 @@ // Copyright 2017 IBM Corp. #include #include +#include #include "ocxl_internal.h" static int __init init_ocxl(void) { int rc = 0; + if (!tlbie_capable) + return -EINVAL; + rc = ocxl_file_init(); if (rc) return rc; -- cgit v1.2.3 From dac39f788546e2eb9838eca551ccd6fd413e75c7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 11 Sep 2019 21:57:44 +1000 Subject: powerpc/64s: Remove overlaps_kvm_tmp() kvm_tmp is now in .text and so doesn't need a special overlap check. Signed-off-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190911115746.12433-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/sections.h | 11 ----------- arch/powerpc/mm/book3s64/hash_utils.c | 4 ---- 2 files changed, 15 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 4a1664a8658d..5a9b6eb651b6 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -61,17 +61,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } -static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end) -{ -#ifdef CONFIG_KVM_GUEST - extern char kvm_tmp[]; - return start < (unsigned long)kvm_tmp && - (unsigned long)&kvm_tmp[1024 * 1024] < end; -#else - return 0; -#endif -} - #ifdef PPC64_ELF_ABI_v1 #define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7684a596158b..3410ea9f4de1 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -273,10 +273,6 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (overlaps_kernel_text(vaddr, vaddr + step)) tprot &= ~HPTE_R_N; - /* Make kvm guest trampolines executable */ - if (overlaps_kvm_tmp(vaddr, vaddr + step)) - tprot &= ~HPTE_R_N; - /* * If relocatable, check if it overlaps interrupt vectors that * are copied down to real 0. For relocatable kernel -- cgit v1.2.3 From ec5b705c48365549c483fab17d68d15d83bef265 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 23 Aug 2019 10:22:00 -0400 Subject: powerpc/mm/radix: remove useless kernel messages Booting a POWER9 PowerNV system generates a few messages below with "____ptrval____" due to the pointers printed without a specifier extension (i.e unadorned %p) are hashed to prevent leaking information about the kernel memory layout. radix-mmu: Initializing Radix MMU radix-mmu: Partition table (____ptrval____) radix-mmu: Mapped 0x0000000000000000-0x0000000040000000 with 1.00 GiB pages (exec) radix-mmu: Mapped 0x0000000040000000-0x0000002000000000 with 1.00 GiB pages radix-mmu: Mapped 0x0000200000000000-0x0000202000000000 with 1.00 GiB pages radix-mmu: Process table (____ptrval____) and radix root for kernel: (____ptrval____) Signed-off-by: Qian Cai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1566570120-16529-1-git-send-email-cai@lca.pw --- arch/powerpc/mm/book3s64/radix_pgtable.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 0d1107fb34c1..3a1fbf9cb8f8 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -369,8 +369,6 @@ static void __init radix_init_pgtable(void) rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); - pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); - /* * The init_mm context is given the first available (non-zero) PID, * which is the "guard PID" and contains no page table. PIDR should @@ -399,7 +397,6 @@ static void __init radix_init_partition_table(void) mmu_partition_table_set_entry(0, dw0, dw1, false); pr_info("Initializing Radix MMU\n"); - pr_info("Partition table %p\n", partition_tb); } static int __init get_idx_from_shift(unsigned int shift) -- cgit v1.2.3