diff options
author | Linus Torvalds | 2017-11-03 09:03:50 -0700 |
---|---|---|
committer | Linus Torvalds | 2017-11-03 09:03:50 -0700 |
commit | f0395d5b4d691164a6e4d107590636db80b29bf6 (patch) | |
tree | 6a362f035c703d3d1719deb2ae6c9cd8ce8ca671 | |
parent | fb615d61b5583db92e3793709b97e35dc9499c2a (diff) | |
parent | 2628bd6fc052bd85e9864dae4de494d8a6313391 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"7 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm, swap: fix race between swap count continuation operations
mm/huge_memory.c: deposit page table when copying a PMD migration entry
initramfs: fix initramfs rebuilds w/ compression after disabling
fs/hugetlbfs/inode.c: fix hwpoison reserve accounting
ocfs2: fstrim: Fix start offset of first cluster group during fstrim
mm, /proc/pid/pagemap: fix soft dirty marking for PMD migration entry
userfaultfd: hugetlbfs: prevent UFFDIO_COPY to fill beyond the end of i_size
-rw-r--r-- | fs/hugetlbfs/inode.c | 5 | ||||
-rw-r--r-- | fs/ocfs2/alloc.c | 24 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 6 | ||||
-rw-r--r-- | include/linux/swap.h | 4 | ||||
-rw-r--r-- | mm/huge_memory.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 32 | ||||
-rw-r--r-- | mm/swapfile.c | 23 | ||||
-rw-r--r-- | usr/Makefile | 9 |
8 files changed, 86 insertions, 20 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 59073e9f01a4..ed113ea17aff 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -842,9 +842,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) { struct inode *inode = mapping->host; + pgoff_t index = page->index; remove_huge_page(page); - hugetlb_fix_reserve_counts(inode); + if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + return 0; } diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a177eae3aa1a..addd7c5f2d3e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7304,13 +7304,24 @@ out: static int ocfs2_trim_extent(struct super_block *sb, struct ocfs2_group_desc *gd, - u32 start, u32 count) + u64 group, u32 start, u32 count) { u64 discard, bcount; + struct ocfs2_super *osb = OCFS2_SB(sb); bcount = ocfs2_clusters_to_blocks(sb, count); - discard = le64_to_cpu(gd->bg_blkno) + - ocfs2_clusters_to_blocks(sb, start); + discard = ocfs2_clusters_to_blocks(sb, start); + + /* + * For the first cluster group, the gd->bg_blkno is not at the start + * of the group, but at an offset from the start. If we add it while + * calculating discard for first group, we will wrongly start fstrim a + * few blocks after the desried start block and the range can cross + * over into the next cluster group. So, add it only if this is not + * the first cluster group. + */ + if (group != osb->first_cluster_group_blkno) + discard += le64_to_cpu(gd->bg_blkno); trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); @@ -7318,7 +7329,7 @@ static int ocfs2_trim_extent(struct super_block *sb, } static int ocfs2_trim_group(struct super_block *sb, - struct ocfs2_group_desc *gd, + struct ocfs2_group_desc *gd, u64 group, u32 start, u32 max, u32 minbits) { int ret = 0, count = 0, next; @@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb, next = ocfs2_find_next_bit(bitmap, max, start); if ((next - start) >= minbits) { - ret = ocfs2_trim_extent(sb, gd, + ret = ocfs2_trim_extent(sb, gd, group, start, next - start); if (ret < 0) { mlog_errno(ret); @@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) } gd = (struct ocfs2_group_desc *)gd_bh->b_data; - cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); + cnt = ocfs2_trim_group(sb, gd, group, + first_bit, last_bit, minlen); brelse(gd_bh); gd_bh = NULL; if (cnt < 0) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 280282b05bc7..6744bd706ecf 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1311,13 +1311,15 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; struct page *page = NULL; - if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) + if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; if (pmd_present(pmd)) { page = pmd_page(pmd); flags |= PM_PRESENT; + if (pmd_soft_dirty(pmd)) + flags |= PM_SOFT_DIRTY; if (pm->show_pfn) frame = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); @@ -1329,6 +1331,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; + if (pmd_swp_soft_dirty(pmd)) + flags |= PM_SOFT_DIRTY; VM_BUG_ON(!is_pmd_migration_entry(pmd)); page = migration_entry_to_page(entry); } diff --git a/include/linux/swap.h b/include/linux/swap.h index b489bd77bbdc..f02fb5db8914 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -266,6 +266,10 @@ struct swap_info_struct { * both locks need hold, hold swap_lock * first. */ + spinlock_t cont_lock; /* + * protect swap count continuation page + * list. + */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ }; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 269b5df58543..1981ed697dab 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd = pmd_swp_mksoft_dirty(pmd); set_pmd_at(src_mm, addr, src_pmd, pmd); } + add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); + atomic_long_inc(&dst_mm->nr_ptes); + pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); set_pmd_at(dst_mm, addr, dst_pmd, pmd); ret = 0; goto out_unlock; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 424b0ef08a60..2d2ff5e8bf2b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, unsigned long src_addr, struct page **pagep) { + struct address_space *mapping; + pgoff_t idx; + unsigned long size; int vm_shared = dst_vma->vm_flags & VM_SHARED; struct hstate *h = hstate_vma(dst_vma); pte_t _dst_pte; @@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, __SetPageUptodate(page); set_page_huge_active(page); + mapping = dst_vma->vm_file->f_mapping; + idx = vma_hugecache_offset(h, dst_vma, dst_addr); + /* * If shared, add to page cache */ if (vm_shared) { - struct address_space *mapping = dst_vma->vm_file->f_mapping; - pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr); + size = i_size_read(mapping->host) >> huge_page_shift(h); + ret = -EFAULT; + if (idx >= size) + goto out_release_nounlock; + /* + * Serialization between remove_inode_hugepages() and + * huge_add_to_page_cache() below happens through the + * hugetlb_fault_mutex_table that here must be hold by + * the caller. + */ ret = huge_add_to_page_cache(page, mapping, idx); if (ret) goto out_release_nounlock; @@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ptl = huge_pte_lockptr(h, dst_mm, dst_pte); spin_lock(ptl); + /* + * Recheck the i_size after holding PT lock to make sure not + * to leave any page mapped (as page_mapped()) beyond the end + * of the i_size (remove_inode_hugepages() is strict about + * enforcing that). If we bail out here, we'll also leave a + * page in the radix tree in the vm_shared case beyond the end + * of the i_size, but remove_inode_hugepages() will take care + * of it as soon as we drop the hugetlb_fault_mutex_table. + */ + size = i_size_read(mapping->host) >> huge_page_shift(h); + ret = -EFAULT; + if (idx >= size) + goto out_release_unlock; + ret = -EEXIST; if (!huge_pte_none(huge_ptep_get(dst_pte))) goto out_release_unlock; diff --git a/mm/swapfile.c b/mm/swapfile.c index bf91dc9e7a79..e47a21e64764 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void) p->flags = SWP_USED; spin_unlock(&swap_lock); spin_lock_init(&p->lock); + spin_lock_init(&p->cont_lock); return p; } @@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) head = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; + spin_lock(&si->cont_lock); /* * Page allocation does not initialize the page's lru field, * but it does always reset its private field. @@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) * a continuation page, free our allocation and use this one. */ if (!(count & COUNT_CONTINUED)) - goto out; + goto out_unlock_cont; map = kmap_atomic(list_page) + offset; count = *map; @@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) * free our allocation and use this one. */ if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) - goto out; + goto out_unlock_cont; } list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ +out_unlock_cont: + spin_unlock(&si->cont_lock); out: unlock_cluster(ci); spin_unlock(&si->lock); @@ -3604,6 +3608,7 @@ static bool swap_count_continued(struct swap_info_struct *si, struct page *head; struct page *page; unsigned char *map; + bool ret; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head) != SWP_CONTINUED) { @@ -3611,6 +3616,7 @@ static bool swap_count_continued(struct swap_info_struct *si, return false; /* need to add count continuation */ } + spin_lock(&si->cont_lock); offset &= ~PAGE_MASK; page = list_entry(head->lru.next, struct page, lru); map = kmap_atomic(page) + offset; @@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si, if (*map == SWAP_CONT_MAX) { kunmap_atomic(map); page = list_entry(page->lru.next, struct page, lru); - if (page == head) - return false; /* add count continuation */ + if (page == head) { + ret = false; /* add count continuation */ + goto out; + } map = kmap_atomic(page) + offset; init_map: *map = 0; /* we didn't zero the page */ } @@ -3645,7 +3653,7 @@ init_map: *map = 0; /* we didn't zero the page */ kunmap_atomic(map); page = list_entry(page->lru.prev, struct page, lru); } - return true; /* incremented */ + ret = true; /* incremented */ } else { /* decrementing */ /* @@ -3671,8 +3679,11 @@ init_map: *map = 0; /* we didn't zero the page */ kunmap_atomic(map); page = list_entry(page->lru.prev, struct page, lru); } - return count == COUNT_CONTINUED; + ret = count == COUNT_CONTINUED; } +out: + spin_unlock(&si->cont_lock); + return ret; } /* diff --git a/usr/Makefile b/usr/Makefile index 34a9fcd0f537..237a028693ce 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -8,6 +8,7 @@ PHONY += klibcdirs suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION)) datafile_y = initramfs_data.cpio$(suffix_y) +datafile_d_y = .$(datafile_y).d AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)" @@ -30,12 +31,12 @@ ramfs-args := \ $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \ $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID)) -# .initramfs_data.cpio.d is used to identify all files included +# $(datafile_d_y) is used to identify all files included # in initramfs and to detect if any files are added/removed. # Removed files are identified by directory timestamp being updated # The dependency list is generated by gen_initramfs.sh -l -ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),) - include $(obj)/.initramfs_data.cpio.d +ifneq ($(wildcard $(obj)/$(datafile_d_y)),) + include $(obj)/$(datafile_d_y) endif quiet_cmd_initfs = GEN $@ @@ -53,5 +54,5 @@ $(deps_initramfs): klibcdirs # 3) If gen_init_cpio are newer than initramfs_data.cpio # 4) arguments to gen_initramfs.sh changes $(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs - $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d + $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/$(datafile_d_y) $(call if_changed,initfs) |