diff options
author | Linus Torvalds | 2014-01-23 19:11:50 -0800 |
---|---|---|
committer | Linus Torvalds | 2014-01-23 19:11:50 -0800 |
commit | 3aacd625f20129f5a41ea3ff3b5353b0e4dabd01 (patch) | |
tree | 7cf4ea65397f80098b30494df31cfc8f5fa26d63 /mm/mlock.c | |
parent | 7e21774db5cc9cf8fe93a64a2f0c6cf47db8ab24 (diff) | |
parent | 2a1d689c9ba42a6066540fb221b6ecbd6298b728 (diff) |
Merge branch 'akpm' (incoming from Andrew)
Merge second patch-bomb from Andrew Morton:
- various misc bits
- the rest of MM
- add generic fixmap.h, use it
- backlight updates
- dynamic_debug updates
- printk() updates
- checkpatch updates
- binfmt_elf
- ramfs
- init/
- autofs4
- drivers/rtc
- nilfs
- hfsplus
- Documentation/
- coredump
- procfs
- fork
- exec
- kexec
- kdump
- partitions
- rapidio
- rbtree
- userns
- memstick
- w1
- decompressors
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (197 commits)
lib/decompress_unlz4.c: always set an error return code on failures
romfs: fix returm err while getting inode in fill_super
drivers/w1/masters/w1-gpio.c: add strong pullup emulation
drivers/memstick/host/rtsx_pci_ms.c: fix ms card data transfer bug
userns: relax the posix_acl_valid() checks
arch/sh/kernel/dwarf.c: use rbtree postorder iteration helper instead of solution using repeated rb_erase()
fs-ext3-use-rbtree-postorder-iteration-helper-instead-of-opencoding-fix
fs/ext3: use rbtree postorder iteration helper instead of opencoding
fs/jffs2: use rbtree postorder iteration helper instead of opencoding
fs/ext4: use rbtree postorder iteration helper instead of opencoding
fs/ubifs: use rbtree postorder iteration helper instead of opencoding
net/netfilter/ipset/ip_set_hash_netiface.c: use rbtree postorder iteration instead of opencoding
rbtree/test: test rbtree_postorder_for_each_entry_safe()
rbtree/test: move rb_node to the middle of the test struct
rapidio: add modular rapidio core build into powerpc and mips branches
partitions/efi: complete documentation of gpt kernel param purpose
kdump: add /sys/kernel/vmcoreinfo ABI documentation
kdump: fix exported size of vmcoreinfo note
kexec: add sysctl to disable kexec_load
fs/exec.c: call arch_pick_mmap_layout() only once
...
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 108 |
1 files changed, 62 insertions, 46 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index 10819ed4df3e..4e1a68162285 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page) } /* + * Isolate a page from LRU with optional get_page() pin. + * Assumes lru_lock already held and page already pinned. + */ +static bool __munlock_isolate_lru_page(struct page *page, bool getpage) +{ + if (PageLRU(page)) { + struct lruvec *lruvec; + + lruvec = mem_cgroup_page_lruvec(page, page_zone(page)); + if (getpage) + get_page(page); + ClearPageLRU(page); + del_page_from_lru_list(page, lruvec, page_lru(page)); + return true; + } + + return false; +} + +/* * Finish munlock after successful page isolation * * Page must be locked. This is a wrapper for try_to_munlock() @@ -126,9 +146,9 @@ static void __munlock_isolated_page(struct page *page) static void __munlock_isolation_failed(struct page *page) { if (PageUnevictable(page)) - count_vm_event(UNEVICTABLE_PGSTRANDED); + __count_vm_event(UNEVICTABLE_PGSTRANDED); else - count_vm_event(UNEVICTABLE_PGMUNLOCKED); + __count_vm_event(UNEVICTABLE_PGMUNLOCKED); } /** @@ -152,28 +172,34 @@ static void __munlock_isolation_failed(struct page *page) unsigned int munlock_vma_page(struct page *page) { unsigned int nr_pages; + struct zone *zone = page_zone(page); BUG_ON(!PageLocked(page)); - if (TestClearPageMlocked(page)) { - nr_pages = hpage_nr_pages(page); - mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - if (!isolate_lru_page(page)) - __munlock_isolated_page(page); - else - __munlock_isolation_failed(page); - } else { - nr_pages = hpage_nr_pages(page); - } - /* - * Regardless of the original PageMlocked flag, we determine nr_pages - * after touching the flag. This leaves a possible race with a THP page - * split, such that a whole THP page was munlocked, but nr_pages == 1. - * Returning a smaller mask due to that is OK, the worst that can - * happen is subsequent useless scanning of the former tail pages. - * The NR_MLOCK accounting can however become broken. + * Serialize with any parallel __split_huge_page_refcount() which + * might otherwise copy PageMlocked to part of the tail pages before + * we clear it in the head page. It also stabilizes hpage_nr_pages(). */ + spin_lock_irq(&zone->lru_lock); + + nr_pages = hpage_nr_pages(page); + if (!TestClearPageMlocked(page)) + goto unlock_out; + + __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); + + if (__munlock_isolate_lru_page(page, true)) { + spin_unlock_irq(&zone->lru_lock); + __munlock_isolated_page(page); + goto out; + } + __munlock_isolation_failed(page); + +unlock_out: + spin_unlock_irq(&zone->lru_lock); + +out: return nr_pages - 1; } @@ -253,8 +279,8 @@ static int __mlock_posix_error_return(long retval) static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec, int *pgrescued) { - VM_BUG_ON(PageLRU(page)); - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (page_mapcount(page) <= 1 && page_evictable(page)) { pagevec_add(pvec, page); @@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) struct page *page = pvec->pages[i]; if (TestClearPageMlocked(page)) { - struct lruvec *lruvec; - int lru; - - if (PageLRU(page)) { - lruvec = mem_cgroup_page_lruvec(page, zone); - lru = page_lru(page); - /* - * We already have pin from follow_page_mask() - * so we can spare the get_page() here. - */ - ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, lru); - } else { - __munlock_isolation_failed(page); - goto skip_munlock; - } - - } else { -skip_munlock: /* - * We won't be munlocking this page in the next phase - * but we still need to release the follow_page_mask() - * pin. We cannot do it under lru_lock however. If it's - * the last pin, __page_cache_release would deadlock. + * We already have pin from follow_page_mask() + * so we can spare the get_page() here. */ - pagevec_add(&pvec_putback, pvec->pages[i]); - pvec->pages[i] = NULL; + if (__munlock_isolate_lru_page(page, false)) + continue; + else + __munlock_isolation_failed(page); } + + /* + * We won't be munlocking this page in the next phase + * but we still need to release the follow_page_mask() + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release() would deadlock. + */ + pagevec_add(&pvec_putback, pvec->pages[i]); + pvec->pages[i] = NULL; } delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); |