diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 244 |
1 files changed, 147 insertions, 97 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7206a634270b..3ff4394a2e1b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -238,23 +238,13 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, __tlb_reset_range(tlb); } -static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) +static void tlb_flush_mmu_free(struct mmu_gather *tlb) { - if (!tlb->end) - return; + struct mmu_gather_batch *batch; - tlb_flush(tlb); - mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif - __tlb_reset_range(tlb); -} - -static void tlb_flush_mmu_free(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { free_pages_and_swap_cache(batch->pages, batch->nr); batch->nr = 0; @@ -330,6 +320,21 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ * See the comment near struct mmu_table_batch. */ +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE + /* + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); +#endif +} + static void tlb_remove_table_smp_sync(void *arg) { /* Simply deliver the interrupt */ @@ -366,6 +371,7 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { + tlb_table_invalidate(tlb); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } @@ -375,23 +381,16 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - /* - * When there's less then two users of this mm there cannot be a - * concurrent page-table walk. - */ - if (atomic_read(&tlb->mm->mm_users) < 2) { - __tlb_remove_table(table); - return; - } - if (*batch == NULL) { *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { + tlb_table_invalidate(tlb); tlb_remove_table_one(table); return; } (*batch)->nr = 0; } + (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) tlb_table_flush(tlb); @@ -853,6 +852,10 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, return NULL; } } + + if (pte_devmap(pte)) + return NULL; + print_bad_pte(vma, addr, pte, NULL); return NULL; } @@ -917,6 +920,8 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, } } + if (pmd_devmap(pmd)) + return NULL; if (is_zero_pfn(pfn)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) @@ -1417,11 +1422,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { - if (next - addr != HPAGE_PMD_SIZE) { - VM_BUG_ON_VMA(vma_is_anonymous(vma) && - !rwsem_is_locked(&tlb->mm->mmap_sem), vma); + if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false, NULL); - } else if (zap_huge_pmd(tlb, vma, pmd, addr)) + else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ } @@ -1603,20 +1606,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); - for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { + for ( ; vma && vma->vm_start < end; vma = vma->vm_next) unmap_single_vma(&tlb, vma, start, end, NULL); - - /* - * zap_page_range does not specify whether mmap_sem should be - * held for read or write. That allows parallel zap_page_range - * operations to unmap a PTE and defer a flush meaning that - * this call observes pte_none and fails to flush the TLB. - * Rather than adding a complex API, ensure that no stale - * TLB entries exist when this call returns. - */ - flush_tlb_range(vma, start, end); - } - mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } @@ -1886,6 +1877,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; + if (!pfn_modify_allowed(pfn, pgprot)) + return -EACCES; + track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, @@ -1921,6 +1915,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, track_pfn_insert(vma, &pgprot, pfn); + if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) + return -EACCES; + /* * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* @@ -1982,6 +1979,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; spinlock_t *ptl; + int err = 0; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) @@ -1989,12 +1987,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); + if (!pfn_modify_allowed(pfn, prot)) { + err = -EACCES; + break; + } set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); - return 0; + return err; } static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, @@ -2003,6 +2005,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, { pmd_t *pmd; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); @@ -2011,9 +2014,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, VM_BUG_ON(pmd_trans_huge(*pmd)); do { next = pmd_addr_end(addr, end); - if (remap_pte_range(mm, pmd, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pte_range(mm, pmd, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (pmd++, addr = next, addr != end); return 0; } @@ -2024,6 +2028,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, { pud_t *pud; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; pud = pud_alloc(mm, p4d, addr); @@ -2031,9 +2036,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, return -ENOMEM; do { next = pud_addr_end(addr, end); - if (remap_pmd_range(mm, pud, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pmd_range(mm, pud, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (pud++, addr = next, addr != end); return 0; } @@ -2044,6 +2050,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, { p4d_t *p4d; unsigned long next; + int err; pfn -= addr >> PAGE_SHIFT; p4d = p4d_alloc(mm, pgd, addr); @@ -2051,9 +2058,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, return -ENOMEM; do { next = p4d_addr_end(addr, end); - if (remap_pud_range(mm, p4d, addr, next, - pfn + (addr >> PAGE_SHIFT), prot)) - return -ENOMEM; + err = remap_pud_range(mm, p4d, addr, next, + pfn + (addr >> PAGE_SHIFT), prot); + if (err) + return err; } while (p4d++, addr = next, addr != end); return 0; } @@ -2503,7 +2511,7 @@ static int wp_page_copy(struct vm_fault *vmf) cow_user_page(new_page, old_page, vmf->address, vma); } - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) + if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_new; __SetPageUptodate(new_page); @@ -3003,8 +3011,8 @@ int do_swap_page(struct vm_fault *vmf) goto out_page; } - if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, - &memcg, false)) { + if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, + &memcg, false)) { ret = VM_FAULT_OOM; goto out_page; } @@ -3165,7 +3173,8 @@ static int do_anonymous_page(struct vm_fault *vmf) if (!page) goto oom; - if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) + if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg, + false)) goto oom_free_page; /* @@ -3372,7 +3381,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) if (write) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); + add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); page_add_file_rmap(page, true); /* * deposit and withdraw with pmd lock held @@ -3661,7 +3670,7 @@ static int do_cow_fault(struct vm_fault *vmf) if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL, + if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL, &vmf->memcg, false)) { put_page(vmf->cow_page); return VM_FAULT_OOM; @@ -4125,7 +4134,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * space. Kernel faults are handled more gracefully. */ if (flags & FAULT_FLAG_USER) - mem_cgroup_oom_enable(); + mem_cgroup_enter_user_fault(); if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); @@ -4133,7 +4142,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, ret = __handle_mm_fault(vma, address, flags); if (flags & FAULT_FLAG_USER) { - mem_cgroup_oom_disable(); + mem_cgroup_exit_user_fault(); /* * The task may have entered a memcg OOM situation but * if the allocation error was handled gracefully (no @@ -4397,6 +4406,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); + if (!maddr) + return -ENOMEM; + if (write) memcpy_toio(maddr + offset, buf, len); else @@ -4568,69 +4580,91 @@ EXPORT_SYMBOL(__might_fault); #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) -static void clear_gigantic_page(struct page *page, - unsigned long addr, - unsigned int pages_per_huge_page) -{ - int i; - struct page *p = page; - - might_sleep(); - for (i = 0; i < pages_per_huge_page; - i++, p = mem_map_next(p, page, i)) { - cond_resched(); - clear_user_highpage(p, addr + i * PAGE_SIZE); - } -} -void clear_huge_page(struct page *page, - unsigned long addr_hint, unsigned int pages_per_huge_page) +/* + * Process all subpages of the specified huge page with the specified + * operation. The target subpage will be processed last to keep its + * cache lines hot. + */ +static inline void process_huge_page( + unsigned long addr_hint, unsigned int pages_per_huge_page, + void (*process_subpage)(unsigned long addr, int idx, void *arg), + void *arg) { int i, n, base, l; unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); - if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { - clear_gigantic_page(page, addr, pages_per_huge_page); - return; - } - - /* Clear sub-page to access last to keep its cache lines hot */ + /* Process target subpage last to keep its cache lines hot */ might_sleep(); n = (addr_hint - addr) / PAGE_SIZE; if (2 * n <= pages_per_huge_page) { - /* If sub-page to access in first half of huge page */ + /* If target subpage in first half of huge page */ base = 0; l = n; - /* Clear sub-pages at the end of huge page */ + /* Process subpages at the end of huge page */ for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { cond_resched(); - clear_user_highpage(page + i, addr + i * PAGE_SIZE); + process_subpage(addr + i * PAGE_SIZE, i, arg); } } else { - /* If sub-page to access in second half of huge page */ + /* If target subpage in second half of huge page */ base = pages_per_huge_page - 2 * (pages_per_huge_page - n); l = pages_per_huge_page - n; - /* Clear sub-pages at the begin of huge page */ + /* Process subpages at the begin of huge page */ for (i = 0; i < base; i++) { cond_resched(); - clear_user_highpage(page + i, addr + i * PAGE_SIZE); + process_subpage(addr + i * PAGE_SIZE, i, arg); } } /* - * Clear remaining sub-pages in left-right-left-right pattern - * towards the sub-page to access + * Process remaining subpages in left-right-left-right pattern + * towards the target subpage */ for (i = 0; i < l; i++) { int left_idx = base + i; int right_idx = base + 2 * l - 1 - i; cond_resched(); - clear_user_highpage(page + left_idx, - addr + left_idx * PAGE_SIZE); + process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg); cond_resched(); - clear_user_highpage(page + right_idx, - addr + right_idx * PAGE_SIZE); + process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg); + } +} + +static void clear_gigantic_page(struct page *page, + unsigned long addr, + unsigned int pages_per_huge_page) +{ + int i; + struct page *p = page; + + might_sleep(); + for (i = 0; i < pages_per_huge_page; + i++, p = mem_map_next(p, page, i)) { + cond_resched(); + clear_user_highpage(p, addr + i * PAGE_SIZE); + } +} + +static void clear_subpage(unsigned long addr, int idx, void *arg) +{ + struct page *page = arg; + + clear_user_highpage(page + idx, addr); +} + +void clear_huge_page(struct page *page, + unsigned long addr_hint, unsigned int pages_per_huge_page) +{ + unsigned long addr = addr_hint & + ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); + + if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { + clear_gigantic_page(page, addr, pages_per_huge_page); + return; } + + process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page); } static void copy_user_gigantic_page(struct page *dst, struct page *src, @@ -4652,11 +4686,31 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src, } } +struct copy_subpage_arg { + struct page *dst; + struct page *src; + struct vm_area_struct *vma; +}; + +static void copy_subpage(unsigned long addr, int idx, void *arg) +{ + struct copy_subpage_arg *copy_arg = arg; + + copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, + addr, copy_arg->vma); +} + void copy_user_huge_page(struct page *dst, struct page *src, - unsigned long addr, struct vm_area_struct *vma, + unsigned long addr_hint, struct vm_area_struct *vma, unsigned int pages_per_huge_page) { - int i; + unsigned long addr = addr_hint & + ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); + struct copy_subpage_arg arg = { + .dst = dst, + .src = src, + .vma = vma, + }; if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { copy_user_gigantic_page(dst, src, addr, vma, @@ -4664,11 +4718,7 @@ void copy_user_huge_page(struct page *dst, struct page *src, return; } - might_sleep(); - for (i = 0; i < pages_per_huge_page; i++) { - cond_resched(); - copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); - } + process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); } long copy_huge_page_from_user(struct page *dst_page, |