aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h5
-rw-r--r--mm/mmap.c228
-rw-r--r--mm/mremap.c17
3 files changed, 158 insertions, 92 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 49a58807719b..579449d6c23b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2710,8 +2710,9 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
-extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
- struct list_head *uf, bool downgrade);
+extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+ unsigned long start, size_t len, struct list_head *uf,
+ bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
diff --git a/mm/mmap.c b/mm/mmap.c
index 8c9e526994be..6e587f4e3a7d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2363,47 +2363,6 @@ static void unmap_region(struct mm_struct *mm,
}
/*
- * Create a list of vma's touched by the unmap, removing them from the mm's
- * vma list as we go..
- */
-static bool
-detach_vmas_to_be_unmapped(struct mm_struct *mm, struct ma_state *mas,
- struct vm_area_struct *vma, struct vm_area_struct *prev,
- unsigned long end)
-{
- struct vm_area_struct **insertion_point;
- struct vm_area_struct *tail_vma = NULL;
-
- insertion_point = (prev ? &prev->vm_next : &mm->mmap);
- vma->vm_prev = NULL;
- vma_mas_szero(mas, vma->vm_start, end);
- do {
- if (vma->vm_flags & VM_LOCKED)
- mm->locked_vm -= vma_pages(vma);
- mm->map_count--;
- tail_vma = vma;
- vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
- *insertion_point = vma;
- if (vma)
- vma->vm_prev = prev;
- else
- mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
- tail_vma->vm_next = NULL;
-
- /*
- * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
- * VM_GROWSUP VMA. Such VMAs can change their size under
- * down_read(mmap_lock) and collide with the VMA we are about to unmap.
- */
- if (vma && (vma->vm_flags & VM_GROWSDOWN))
- return false;
- if (prev && (prev->vm_flags & VM_GROWSUP))
- return false;
- return true;
-}
-
-/*
* __split_vma() bypasses sysctl_max_map_count checking. We use this where it
* has already been checked or doesn't make sense to fail.
*/
@@ -2485,40 +2444,51 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return __split_vma(mm, vma, addr, new_below);
}
-/* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
- * work. This now handles partial unmappings.
- * Jeremy Fitzhardinge <jeremy@goop.org>
- */
-int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
- struct list_head *uf, bool downgrade)
+static inline int
+unlock_range(struct vm_area_struct *start, struct vm_area_struct **tail,
+ unsigned long limit)
{
- unsigned long end;
- struct vm_area_struct *vma, *prev, *last;
- int error = -ENOMEM;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ struct mm_struct *mm = start->vm_mm;
+ struct vm_area_struct *tmp = start;
+ int count = 0;
- if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
- return -EINVAL;
+ while (tmp && tmp->vm_start < limit) {
+ *tail = tmp;
+ count++;
+ if (tmp->vm_flags & VM_LOCKED)
+ mm->locked_vm -= vma_pages(tmp);
- len = PAGE_ALIGN(len);
- end = start + len;
- if (len == 0)
- return -EINVAL;
+ tmp = tmp->vm_next;
+ }
- /* arch_unmap() might do unmaps itself. */
- arch_unmap(mm, start, end);
+ return count;
+}
- /* Find the first overlapping VMA where start < vma->vm_end */
- vma = find_vma_intersection(mm, start, end);
- if (!vma)
- return 0;
+/*
+ * do_mas_align_munmap() - munmap the aligned region from @start to @end.
+ * @mas: The maple_state, ideally set up to alter the correct tree location.
+ * @vma: The starting vm_area_struct
+ * @mm: The mm_struct
+ * @start: The aligned start address to munmap.
+ * @end: The aligned end address to munmap.
+ * @uf: The userfaultfd list_head
+ * @downgrade: Set to true to attempt a write downgrade of the mmap_sem
+ *
+ * If @downgrade is true, check return code for potential release of the lock.
+ */
+static int
+do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct list_head *uf, bool downgrade)
+{
+ struct vm_area_struct *prev, *last;
+ int error = -ENOMEM;
+ /* we have start < vma->vm_end */
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
+ if (mas_preallocate(mas, vma, GFP_KERNEL))
return -ENOMEM;
- prev = vma->vm_prev;
- /* we have start < vma->vm_end */
+ mas->last = end - 1;
/*
* If we need to split any vma, do it now to save pain later.
*
@@ -2539,17 +2509,31 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
error = __split_vma(mm, vma, start, 0);
if (error)
goto split_failed;
+
prev = vma;
+ vma = __vma_next(mm, prev);
+ mas->index = start;
+ mas_reset(mas);
+ } else {
+ prev = vma->vm_prev;
}
+ if (vma->vm_end >= end)
+ last = vma;
+ else
+ last = find_vma_intersection(mm, end - 1, end);
+
/* Does it split the last one? */
- last = find_vma(mm, end);
- if (last && end > last->vm_start) {
+ if (last && end < last->vm_end) {
error = __split_vma(mm, last, end, 1);
+
if (error)
goto split_failed;
+
+ if (vma == last)
+ vma = __vma_next(mm, prev);
+ mas_reset(mas);
}
- vma = __vma_next(mm, prev);
if (unlikely(uf)) {
/*
@@ -2562,16 +2546,46 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
* failure that it's not worth optimizing it for.
*/
error = userfaultfd_unmap_prep(vma, start, end, uf);
+
if (error)
goto userfaultfd_error;
}
- /* Detach vmas from rbtree */
- if (!detach_vmas_to_be_unmapped(mm, &mas, vma, prev, end))
- downgrade = false;
+ /*
+ * unlock any mlock()ed ranges before detaching vmas, count the number
+ * of VMAs to be dropped, and return the tail entry of the affected
+ * area.
+ */
+ mm->map_count -= unlock_range(vma, &last, end);
+ /* Drop removed area from the tree */
+ mas_store_prealloc(mas, NULL);
- if (downgrade)
- mmap_write_downgrade(mm);
+ /* Detach vmas from the MM linked list */
+ vma->vm_prev = NULL;
+ if (prev)
+ prev->vm_next = last->vm_next;
+ else
+ mm->mmap = last->vm_next;
+
+ if (last->vm_next) {
+ last->vm_next->vm_prev = prev;
+ last->vm_next = NULL;
+ } else
+ mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
+
+ /*
+ * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+ * VM_GROWSUP VMA. Such VMAs can change their size under
+ * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+ */
+ if (downgrade) {
+ if (last && (last->vm_flags & VM_GROWSDOWN))
+ downgrade = false;
+ else if (prev && (prev->vm_flags & VM_GROWSUP))
+ downgrade = false;
+ else
+ mmap_write_downgrade(mm);
+ }
unmap_region(mm, vma, prev, start, end);
@@ -2585,14 +2599,63 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
map_count_exceeded:
split_failed:
userfaultfd_error:
- mas_destroy(&mas);
+ mas_destroy(mas);
return error;
}
+/*
+ * do_mas_munmap() - munmap a given range.
+ * @mas: The maple state
+ * @mm: The mm_struct
+ * @start: The start address to munmap
+ * @len: The length of the range to munmap
+ * @uf: The userfaultfd list_head
+ * @downgrade: set to true if the user wants to attempt to write_downgrade the
+ * mmap_sem
+ *
+ * This function takes a @mas that is either pointing to the previous VMA or set
+ * to MA_START and sets it up to remove the mapping(s). The @len will be
+ * aligned and any arch_unmap work will be preformed.
+ *
+ * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
+ */
+int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+ unsigned long start, size_t len, struct list_head *uf,
+ bool downgrade)
+{
+ unsigned long end;
+ struct vm_area_struct *vma;
+
+ if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
+ return -EINVAL;
+
+ end = start + PAGE_ALIGN(len);
+ if (end == start)
+ return -EINVAL;
+
+ /* arch_unmap() might do unmaps itself. */
+ arch_unmap(mm, start, end);
+
+ /* Find the first overlapping VMA */
+ vma = mas_find(mas, end - 1);
+ if (!vma)
+ return 0;
+
+ return do_mas_align_munmap(mas, vma, mm, start, end, uf, downgrade);
+}
+
+/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
+ * @mm: The mm_struct
+ * @start: The start address to munmap
+ * @len: The length to be munmapped.
+ * @uf: The userfaultfd list_head
+ */
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
struct list_head *uf)
{
- return __do_munmap(mm, start, len, uf, false);
+ MA_STATE(mas, &mm->mm_mt, start, start);
+
+ return do_mas_munmap(&mas, mm, start, len, uf, false);
}
unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2626,7 +2689,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}
/* Unmap any existing mapping in the area */
- if (do_munmap(mm, addr, len, uf))
+ if (do_mas_munmap(&mas, mm, addr, len, uf, false))
return -ENOMEM;
/*
@@ -2845,11 +2908,12 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
int ret;
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
+ MA_STATE(mas, &mm->mm_mt, start, start);
if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = __do_munmap(mm, start, len, &uf, downgrade);
+ ret = do_mas_munmap(&mas, mm, start, len, &uf, downgrade);
/*
* Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
@@ -2978,7 +3042,7 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
int ret;
arch_unmap(mm, newbrk, oldbrk);
- ret = __do_munmap(mm, newbrk, oldbrk - newbrk, uf, true);
+ ret = do_mas_munmap(mas, mm, newbrk, oldbrk-newbrk, uf, true);
validate_mm_mt(mm);
return ret;
}
@@ -3116,9 +3180,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (ret)
goto limits_failed;
- if (find_vma_intersection(mm, addr, addr + len))
- ret = do_munmap(mm, addr, len, &uf);
-
+ ret = do_mas_munmap(&mas, mm, addr, len, &uf, 0);
if (ret)
goto munmap_failed;
diff --git a/mm/mremap.c b/mm/mremap.c
index b522cd0259a0..e0fba9004246 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -975,20 +975,23 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
/*
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
- * __do_munmap does all the needed commit accounting, and
+ * do_mas_munmap does all the needed commit accounting, and
* downgrades mmap_lock to read if so directed.
*/
if (old_len >= new_len) {
int retval;
+ MA_STATE(mas, &mm->mm_mt, addr + new_len, addr + new_len);
- retval = __do_munmap(mm, addr+new_len, old_len - new_len,
- &uf_unmap, true);
- if (retval < 0 && old_len != new_len) {
- ret = retval;
- goto out;
+ retval = do_mas_munmap(&mas, mm, addr + new_len,
+ old_len - new_len, &uf_unmap, true);
/* Returning 1 indicates mmap_lock is downgraded to read. */
- } else if (retval == 1)
+ if (retval == 1) {
downgraded = true;
+ } else if (retval < 0 && old_len != new_len) {
+ ret = retval;
+ goto out;
+ }
+
ret = addr;
goto out;
}