aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds2022-08-05 16:32:45 -0700
committerLinus Torvalds2022-08-05 16:32:45 -0700
commit6614a3c3164a5df2b54abb0b3559f51041cf705b (patch)
tree1c25c23d9efed988705287fc2ccb78e0e76e311d /tools
parent74cae210a335d159f2eb822e261adee905b6951a (diff)
parent360614c01f81f48a89d8b13f8fa69c3ae0a1f5c7 (diff)
Merge tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Most of the MM queue. A few things are still pending. Liam's maple tree rework didn't make it. This has resulted in a few other minor patch series being held over for next time. Multi-gen LRU still isn't merged as we were waiting for mapletree to stabilize. The current plan is to merge MGLRU into -mm soon and to later reintroduce mapletree, with a view to hopefully getting both into 6.1-rc1. Summary: - The usual batches of cleanups from Baoquan He, Muchun Song, Miaohe Lin, Yang Shi, Anshuman Khandual and Mike Rapoport - Some kmemleak fixes from Patrick Wang and Waiman Long - DAMON updates from SeongJae Park - memcg debug/visibility work from Roman Gushchin - vmalloc speedup from Uladzislau Rezki - more folio conversion work from Matthew Wilcox - enhancements for coherent device memory mapping from Alex Sierra - addition of shared pages tracking and CoW support for fsdax, from Shiyang Ruan - hugetlb optimizations from Mike Kravetz - Mel Gorman has contributed some pagealloc changes to improve latency and realtime behaviour. - mprotect soft-dirty checking has been improved by Peter Xu - Many other singleton patches all over the place" [ XFS merge from hell as per Darrick Wong in https://lore.kernel.org/all/YshKnxb4VwXycPO8@magnolia/ ] * tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (282 commits) tools/testing/selftests/vm/hmm-tests.c: fix build mm: Kconfig: fix typo mm: memory-failure: convert to pr_fmt() mm: use is_zone_movable_page() helper hugetlbfs: fix inaccurate comment in hugetlbfs_statfs() hugetlbfs: cleanup some comments in inode.c hugetlbfs: remove unneeded header file hugetlbfs: remove unneeded hugetlbfs_ops forward declaration hugetlbfs: use helper macro SZ_1{K,M} mm: cleanup is_highmem() mm/hmm: add a test for cross device private faults selftests: add soft-dirty into run_vmtests.sh selftests: soft-dirty: add test for mprotect mm/mprotect: fix soft-dirty check in can_change_pte_writable() mm: memcontrol: fix potential oom_lock recursion deadlock mm/gup.c: fix formatting in check_and_migrate_movable_page() xfs: fail dax mount if reflink is enabled on a partition mm/memcontrol.c: remove the redundant updating of stats_flush_threshold userfaultfd: don't fail on unrecognized features hugetlb_cgroup: fix wrong hugetlb cgroup numa stat ...
Diffstat (limited to 'tools')
-rw-r--r--tools/cgroup/memcg_shrinker.py71
-rw-r--r--tools/testing/memblock/linux/kmemleak.h2
-rw-r--r--tools/testing/selftests/vm/Makefile1
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c325
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c2
-rw-r--r--tools/testing/selftests/vm/hugetlb-madvise.c5
-rw-r--r--tools/testing/selftests/vm/mrelease_test.c16
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh15
-rw-r--r--tools/testing/selftests/vm/soft-dirty.c67
-rwxr-xr-xtools/testing/selftests/vm/test_hmm.sh24
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c4
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.c8
-rwxr-xr-xtools/testing/selftests/vm/va_128TBswitch.sh54
-rw-r--r--tools/vm/page_owner_sort.c30
-rw-r--r--tools/vm/slabinfo.c32
15 files changed, 588 insertions, 68 deletions
diff --git a/tools/cgroup/memcg_shrinker.py b/tools/cgroup/memcg_shrinker.py
new file mode 100644
index 000000000000..706ab27666a4
--- /dev/null
+++ b/tools/cgroup/memcg_shrinker.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 Roman Gushchin <roman.gushchin@linux.dev>
+# Copyright (C) 2022 Meta
+
+import os
+import argparse
+import sys
+
+
+def scan_cgroups(cgroup_root):
+ cgroups = {}
+
+ for root, subdirs, _ in os.walk(cgroup_root):
+ for cgroup in subdirs:
+ path = os.path.join(root, cgroup)
+ ino = os.stat(path).st_ino
+ cgroups[ino] = path
+
+ # (memcg ino, path)
+ return cgroups
+
+
+def scan_shrinkers(shrinker_debugfs):
+ shrinkers = []
+
+ for root, subdirs, _ in os.walk(shrinker_debugfs):
+ for shrinker in subdirs:
+ count_path = os.path.join(root, shrinker, "count")
+ with open(count_path) as f:
+ for line in f.readlines():
+ items = line.split(' ')
+ ino = int(items[0])
+ # (count, shrinker, memcg ino)
+ shrinkers.append((int(items[1]), shrinker, ino))
+ return shrinkers
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Display biggest shrinkers')
+ parser.add_argument('-n', '--lines', type=int, help='Number of lines to print')
+
+ args = parser.parse_args()
+
+ cgroups = scan_cgroups("/sys/fs/cgroup/")
+ shrinkers = scan_shrinkers("/sys/kernel/debug/shrinker/")
+ shrinkers = sorted(shrinkers, reverse = True, key = lambda x: x[0])
+
+ n = 0
+ for s in shrinkers:
+ count, name, ino = (s[0], s[1], s[2])
+ if count == 0:
+ break
+
+ if ino == 0 or ino == 1:
+ cg = "/"
+ else:
+ try:
+ cg = cgroups[ino]
+ except KeyError:
+ cg = "unknown (%d)" % ino
+
+ print("%-8s %-20s %s" % (count, name, cg))
+
+ n += 1
+ if args.lines and n >= args.lines:
+ break
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/memblock/linux/kmemleak.h b/tools/testing/memblock/linux/kmemleak.h
index 462f8c5e8aa0..5fed13bb9ec4 100644
--- a/tools/testing/memblock/linux/kmemleak.h
+++ b/tools/testing/memblock/linux/kmemleak.h
@@ -7,7 +7,7 @@ static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size)
}
static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
- int min_count, gfp_t gfp)
+ gfp_t gfp)
{
}
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 108587cb327a..d9fa6a9ea584 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -93,6 +93,7 @@ TEST_PROGS := run_vmtests.sh
TEST_FILES := test_vmalloc.sh
TEST_FILES += test_hmm.sh
+TEST_FILES += va_128TBswitch.sh
include ../lib.mk
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 203323967b50..529f53b40296 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -36,6 +36,7 @@
* in the usual include/uapi/... directory.
*/
#include "../../../../lib/test_hmm_uapi.h"
+#include "../../../../mm/gup_test.h"
struct hmm_buffer {
void *ptr;
@@ -46,12 +47,22 @@ struct hmm_buffer {
uint64_t faults;
};
+enum {
+ HMM_PRIVATE_DEVICE_ONE,
+ HMM_PRIVATE_DEVICE_TWO,
+ HMM_COHERENCE_DEVICE_ONE,
+ HMM_COHERENCE_DEVICE_TWO,
+};
+
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
#define NTIMES 10
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */
FIXTURE(hmm)
{
@@ -60,6 +71,21 @@ FIXTURE(hmm)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm)
+{
+ int device_number;
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
+{
+ .device_number = HMM_PRIVATE_DEVICE_ONE,
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
+{
+ .device_number = HMM_COHERENCE_DEVICE_ONE,
+};
+
FIXTURE(hmm2)
{
int fd0;
@@ -68,6 +94,24 @@ FIXTURE(hmm2)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm2)
+{
+ int device_number0;
+ int device_number1;
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
+{
+ .device_number0 = HMM_PRIVATE_DEVICE_ONE,
+ .device_number1 = HMM_PRIVATE_DEVICE_TWO,
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
+{
+ .device_number0 = HMM_COHERENCE_DEVICE_ONE,
+ .device_number1 = HMM_COHERENCE_DEVICE_TWO,
+};
+
static int hmm_open(int unit)
{
char pathname[HMM_PATH_MAX];
@@ -81,12 +125,19 @@ static int hmm_open(int unit)
return fd;
}
+static bool hmm_is_coherent_type(int dev_num)
+{
+ return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
+}
+
FIXTURE_SETUP(hmm)
{
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd = hmm_open(0);
+ self->fd = hmm_open(variant->device_number);
+ if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
+ SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
}
@@ -95,9 +146,11 @@ FIXTURE_SETUP(hmm2)
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd0 = hmm_open(0);
+ self->fd0 = hmm_open(variant->device_number0);
+ if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
+ SKIP(exit(0), "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
- self->fd1 = hmm_open(1);
+ self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
}
@@ -211,6 +264,20 @@ static void hmm_nanosleep(unsigned int n)
nanosleep(&t, NULL);
}
+static int hmm_migrate_sys_to_dev(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
/*
* Simple NULL test of device open/close.
*/
@@ -875,7 +942,7 @@ TEST_F(hmm, migrate)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -923,7 +990,7 @@ TEST_F(hmm, migrate_fault)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -936,7 +1003,7 @@ TEST_F(hmm, migrate_fault)
ASSERT_EQ(ptr[i], i);
/* Migrate memory to the device again. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -976,7 +1043,7 @@ TEST_F(hmm, migrate_shared)
ASSERT_NE(buffer->ptr, MAP_FAILED);
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, -ENOENT);
hmm_buffer_free(buffer);
@@ -1015,7 +1082,7 @@ TEST_F(hmm2, migrate_mixed)
p = buffer->ptr;
/* Migrating a protected area should be an error. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
ASSERT_EQ(ret, -EINVAL);
/* Punch a hole after the first page address. */
@@ -1023,7 +1090,7 @@ TEST_F(hmm2, migrate_mixed)
ASSERT_EQ(ret, 0);
/* We expect an error if the vma doesn't cover the range. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
ASSERT_EQ(ret, -EINVAL);
/* Page 2 will be a read-only zero page. */
@@ -1055,13 +1122,13 @@ TEST_F(hmm2, migrate_mixed)
/* Now try to migrate pages 2-5 to device 1. */
buffer->ptr = p + 2 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 4);
/* Page 5 won't be migrated to device 0 because it's on device 1. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, -ENOENT);
buffer->ptr = p;
@@ -1070,8 +1137,12 @@ TEST_F(hmm2, migrate_mixed)
}
/*
- * Migrate anonymous memory to device private memory and fault it back to system
- * memory multiple times.
+ * Migrate anonymous memory to device memory and back to system memory
+ * multiple times. In case of private zone configuration, this is done
+ * through fault pages accessed by CPU. In case of coherent zone configuration,
+ * the pages from the device should be explicitly migrated back to system memory.
+ * The reason is Coherent device zone has coherent access by CPU, therefore
+ * it will not generate any page fault.
*/
TEST_F(hmm, migrate_multiple)
{
@@ -1107,8 +1178,7 @@ TEST_F(hmm, migrate_multiple)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
- npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -1116,7 +1186,13 @@ TEST_F(hmm, migrate_multiple)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Fault pages back to system memory and check them. */
+ /* Migrate back to system memory and check them. */
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ }
+
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
@@ -1354,13 +1430,13 @@ TEST_F(hmm2, snapshot)
/* Page 5 will be migrated to device 0. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
/* Page 6 will be migrated to device 1. */
buffer->ptr = p + 6 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
@@ -1377,9 +1453,16 @@ TEST_F(hmm2, snapshot)
ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
- HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ if (!hmm_is_coherent_type(variant->device_number0)) {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ } else {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
+ HMM_DMIRROR_PROT_WRITE);
+ }
hmm_buffer_free(buffer);
}
@@ -1520,9 +1603,19 @@ TEST_F(hmm2, double_map)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Punch a hole after the first page address. */
- ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ /* Migrate pages to device 1 and try to read from device 0. */
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what device 0 read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
hmm_buffer_free(buffer);
}
@@ -1685,4 +1778,190 @@ TEST_F(hmm, exclusive_cow)
hmm_buffer_free(buffer);
}
+static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
+ int npages, int size, int flags)
+{
+ struct gup_test gup = {
+ .nr_pages_per_call = npages,
+ .addr = addr,
+ .gup_flags = FOLL_WRITE | flags,
+ .size = size,
+ };
+
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl on error\n");
+ return errno;
+ }
+
+ return 0;
+}
+
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+ struct hmm_buffer *buffer;
+ int gup_fd;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1)
+ SKIP(return, "Skipping test, could not find gup_test driver");
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr,
+ GUP_BASIC_TEST, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 1 * self->page_size,
+ GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 2 * self->page_size,
+ PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 3 * self->page_size,
+ PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0);
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
+ } else {
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
+ }
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
+ /*
+ * Check again the content on the pages. Make sure there's no
+ * corrupted data.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ close(gup_fd);
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test copy-on-write in device pages.
+ * In case of writing to COW private page(s), a page fault will migrate pages
+ * back to system memory first. Then, these pages will be duplicated. In case
+ * of COW device coherent type, pages are duplicated directly from device
+ * memory.
+ */
+TEST_F(hmm, hmm_cow_in_device)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+ pid_t pid;
+ int status;
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (!pid) {
+ /* Child process waitd for SIGTERM from the parent. */
+ while (1) {
+ }
+ perror("Should not reach this\n");
+ exit(0);
+ }
+ /* Parent process writes to COW pages(s) and gets a
+ * new copy in system. In case of device private pages,
+ * this write causes a migration to system mem first.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Terminate child and wait */
+ EXPECT_EQ(0, kill(pid, SIGTERM));
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_NE(0, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ for (i = 0; i < npages; i++)
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
+
+ hmm_buffer_free(buffer);
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 585978f181ed..e63a0214f639 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -107,7 +107,7 @@ static void register_region_with_uffd(char *addr, size_t len)
int main(int argc, char *argv[])
{
- size_t length;
+ size_t length = 0;
if (argc != 2 && argc != 3) {
printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]);
diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c
index 6c6af40f5747..3c9943131881 100644
--- a/tools/testing/selftests/vm/hugetlb-madvise.c
+++ b/tools/testing/selftests/vm/hugetlb-madvise.c
@@ -89,10 +89,11 @@ void write_fault_pages(void *addr, unsigned long nr_pages)
void read_fault_pages(void *addr, unsigned long nr_pages)
{
- unsigned long i, tmp;
+ unsigned long dummy = 0;
+ unsigned long i;
for (i = 0; i < nr_pages; i++)
- tmp += *((unsigned long *)(addr + (i * huge_page_size)));
+ dummy += *((unsigned long *)(addr + (i * huge_page_size)));
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c
index 96671c2f7d48..6c62966ab5db 100644
--- a/tools/testing/selftests/vm/mrelease_test.c
+++ b/tools/testing/selftests/vm/mrelease_test.c
@@ -62,19 +62,22 @@ static int alloc_noexit(unsigned long nr_pages, int pipefd)
/* The process_mrelease calls in this test are expected to fail */
static void run_negative_tests(int pidfd)
{
+ int res;
/* Test invalid flags. Expect to fail with EINVAL error code. */
if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
errno != EINVAL) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
perror("process_mrelease with wrong flags");
- exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ exit(res);
}
/*
* Test reaping while process is alive with no pending SIGKILL.
* Expect to fail with EINVAL error code.
*/
if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
perror("process_mrelease on a live process");
- exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ exit(res);
}
}
@@ -100,8 +103,9 @@ int main(void)
/* Test a wrong pidfd */
if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
perror("process_mrelease with wrong pidfd");
- exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ exit(res);
}
/* Start the test with 1MB child memory allocation */
@@ -156,8 +160,9 @@ retry:
run_negative_tests(pidfd);
if (kill(pid, SIGKILL)) {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
perror("kill");
- exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ exit(res);
}
success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
@@ -172,9 +177,10 @@ retry:
if (errno == ESRCH) {
retry = (size <= MAX_SIZE_MB);
} else {
+ res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
perror("process_mrelease");
waitpid(pid, NULL, 0);
- exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+ exit(res);
}
}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 41fce8bea929..de86983b8a0f 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -151,7 +151,7 @@ if [ $VADDR64 -ne 0 ]; then
run_test ./virtual_address_range
# virtual address 128TB switch test
- run_test ./va_128TBswitch
+ run_test ./va_128TBswitch.sh
fi # VADDR64
# vmalloc stability smoke test
@@ -179,4 +179,17 @@ run_test ./ksm_tests -N -m 1
# KSM test with 2 NUMA nodes and merge_across_nodes = 0
run_test ./ksm_tests -N -m 0
+# protection_keys tests
+if [ -x ./protection_keys_32 ]
+then
+ run_test ./protection_keys_32
+fi
+
+if [ -x ./protection_keys_64 ]
+then
+ run_test ./protection_keys_64
+fi
+
+run_test ./soft-dirty
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c
index 08ab62a4a9d0..e3a43f5d4fa2 100644
--- a/tools/testing/selftests/vm/soft-dirty.c
+++ b/tools/testing/selftests/vm/soft-dirty.c
@@ -121,13 +121,76 @@ static void test_hugepage(int pagemap_fd, int pagesize)
free(map);
}
+static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
+{
+ const char *type[] = {"file", "anon"};
+ const char *fname = "./soft-dirty-test-file";
+ int test_fd;
+ char *map;
+
+ if (anon) {
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (!map)
+ ksft_exit_fail_msg("anon mmap failed\n");
+ } else {
+ test_fd = open(fname, O_RDWR | O_CREAT);
+ if (test_fd < 0) {
+ ksft_test_result_skip("Test %s open() file failed\n", __func__);
+ return;
+ }
+ unlink(fname);
+ ftruncate(test_fd, pagesize);
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_SHARED, test_fd, 0);
+ if (!map)
+ ksft_exit_fail_msg("file mmap failed\n");
+ }
+
+ *map = 1;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s dirty bit of new written page\n",
+ __func__, type[anon]);
+ clear_softdirty();
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after clear_refs\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RO\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ|PROT_WRITE);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RW\n",
+ __func__, type[anon]);
+ *map = 2;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s soft-dirty after rewritten\n",
+ __func__, type[anon]);
+
+ munmap(map, pagesize);
+
+ if (!anon)
+ close(test_fd);
+}
+
+static void test_mprotect_anon(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, true);
+}
+
+static void test_mprotect_file(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, false);
+}
+
int main(int argc, char **argv)
{
int pagemap_fd;
int pagesize;
ksft_print_header();
- ksft_set_plan(5);
+ ksft_set_plan(15);
pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
if (pagemap_fd < 0)
@@ -138,6 +201,8 @@ int main(int argc, char **argv)
test_simple(pagemap_fd, pagesize);
test_vma_reuse(pagemap_fd, pagesize);
test_hugepage(pagemap_fd, pagesize);
+ test_mprotect_anon(pagemap_fd, pagesize);
+ test_mprotect_file(pagemap_fd, pagesize);
close(pagemap_fd);
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
index 0647b525a625..539c9371e592 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -40,11 +40,26 @@ check_test_requirements()
load_driver()
{
- modprobe $DRIVER > /dev/null 2>&1
+ if [ $# -eq 0 ]; then
+ modprobe $DRIVER > /dev/null 2>&1
+ else
+ if [ $# -eq 2 ]; then
+ modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+ > /dev/null 2>&1
+ else
+ echo "Missing module parameters. Make sure pass"\
+ "spm_addr_dev0 and spm_addr_dev1"
+ usage
+ fi
+ fi
if [ $? == 0 ]; then
major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
mknod /dev/hmm_dmirror0 c $major 0
mknod /dev/hmm_dmirror1 c $major 1
+ if [ $# -eq 2 ]; then
+ mknod /dev/hmm_dmirror2 c $major 2
+ mknod /dev/hmm_dmirror3 c $major 3
+ fi
fi
}
@@ -58,7 +73,7 @@ run_smoke()
{
echo "Running smoke test. Note, this test provides basic coverage."
- load_driver
+ load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
}
@@ -75,6 +90,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+ echo "# Smoke testing with SPM enabled"
+ echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>"
+ echo
exit 0
}
@@ -84,7 +102,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
- run_smoke
+ run_smoke $2 $3
else
usage
fi
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 4bc24581760d..7c3f1b0ab468 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -931,7 +931,7 @@ static int faulting_process(int signal_test)
unsigned long split_nr_pages;
unsigned long lastnr;
struct sigaction act;
- unsigned long signalled = 0;
+ volatile unsigned long signalled = 0;
split_nr_pages = (nr_pages + 1) / 2;
@@ -946,7 +946,7 @@ static int faulting_process(int signal_test)
}
for (nr = 0; nr < split_nr_pages; nr++) {
- int steps = 1;
+ volatile int steps = 1;
unsigned long offset = nr * page_size;
if (signal_test) {
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
index da6ec3b53ea8..1d2068989883 100644
--- a/tools/testing/selftests/vm/va_128TBswitch.c
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -231,7 +231,7 @@ static struct testcase hugetlb_testcases[] = {
static int run_test(struct testcase *test, int count)
{
void *p;
- int i, ret = 0;
+ int i, ret = KSFT_PASS;
for (i = 0; i < count; i++) {
struct testcase *t = test + i;
@@ -242,13 +242,13 @@ static int run_test(struct testcase *test, int count)
if (p == MAP_FAILED) {
printf("FAILED\n");
- ret = 1;
+ ret = KSFT_FAIL;
continue;
}
if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
printf("FAILED\n");
- ret = 1;
+ ret = KSFT_FAIL;
} else {
/*
* Do a dereference of the address returned so that we catch
@@ -280,7 +280,7 @@ int main(int argc, char **argv)
int ret;
if (!supported_arch())
- return 0;
+ return KSFT_SKIP;
ret = run_test(testcases, ARRAY_SIZE(testcases));
if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh
new file mode 100755
index 000000000000..41580751dc51
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Adam Sindelar (Meta) <adam@wowsignal.io>
+#
+# This is a test for mmap behavior with 5-level paging. This script wraps the
+# real test to check that the kernel is configured to support at least 5
+# pagetable levels.
+
+# 1 means the test failed
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+fail()
+{
+ echo "$1"
+ exit $exitcode
+}
+
+check_supported_x86_64()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+
+ # gzip -dcfq automatically handles both compressed and plaintext input.
+ # See man 1 gzip under '-f'.
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ exit $ksft_skip
+ fi
+}
+
+check_test_requirements()
+{
+ # The test supports x86_64 and powerpc64. We currently have no useful
+ # eligibility check for powerpc64, and the test itself will reject other
+ # architectures.
+ case `uname -m` in
+ "x86_64")
+ check_supported_x86_64
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+}
+
+check_test_requirements
+./va_128TBswitch
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index c149427eb1c9..ec2e67c85b84 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -8,7 +8,7 @@
* Or sort by total memory:
* ./page_owner_sort -m page_owner_full.txt sorted_page_owner.txt
*
- * See Documentation/vm/page_owner.rst
+ * See Documentation/mm/page_owner.rst
*/
#include <stdio.h>
@@ -470,23 +470,23 @@ static bool match_str_list(const char *str, char **list, int list_size)
static bool is_need(char *buf)
{
- if ((filter & FILTER_UNRELEASE) && get_free_ts_nsec(buf) != 0)
- return false;
- if ((filter & FILTER_PID) && !match_num_list(get_pid(buf), fc.pids, fc.pids_size))
- return false;
- if ((filter & FILTER_TGID) &&
- !match_num_list(get_tgid(buf), fc.tgids, fc.tgids_size))
- return false;
+ if ((filter & FILTER_UNRELEASE) && get_free_ts_nsec(buf) != 0)
+ return false;
+ if ((filter & FILTER_PID) && !match_num_list(get_pid(buf), fc.pids, fc.pids_size))
+ return false;
+ if ((filter & FILTER_TGID) &&
+ !match_num_list(get_tgid(buf), fc.tgids, fc.tgids_size))
+ return false;
- char *comm = get_comm(buf);
+ char *comm = get_comm(buf);
- if ((filter & FILTER_COMM) &&
- !match_str_list(comm, fc.comms, fc.comms_size)) {
- free(comm);
- return false;
- }
+ if ((filter & FILTER_COMM) &&
+ !match_str_list(comm, fc.comms, fc.comms_size)) {
free(comm);
- return true;
+ return false;
+ }
+ free(comm);
+ return true;
}
static void add_list(char *buf, int len, char *ext_buf)
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 5b98f3ee58a5..0fffaeedee76 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -125,7 +125,7 @@ static void usage(void)
"-n|--numa Show NUMA information\n"
"-N|--lines=K Show the first K slabs\n"
"-o|--ops Show kmem_cache_ops\n"
- "-P|--partial Sort by number of partial slabs\n"
+ "-P|--partial Sort by number of partial slabs\n"
"-r|--report Detailed report on single slabs\n"
"-s|--shrink Shrink slabs\n"
"-S|--Size Sort by size\n"
@@ -1067,15 +1067,27 @@ static void sort_slabs(void)
for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
int result;
- if (sort_size)
- result = slab_size(s1) < slab_size(s2);
- else if (sort_active)
- result = slab_activity(s1) < slab_activity(s2);
- else if (sort_loss)
- result = slab_waste(s1) < slab_waste(s2);
- else if (sort_partial)
- result = s1->partial < s2->partial;
- else
+ if (sort_size) {
+ if (slab_size(s1) == slab_size(s2))
+ result = strcasecmp(s1->name, s2->name);
+ else
+ result = slab_size(s1) < slab_size(s2);
+ } else if (sort_active) {
+ if (slab_activity(s1) == slab_activity(s2))
+ result = strcasecmp(s1->name, s2->name);
+ else
+ result = slab_activity(s1) < slab_activity(s2);
+ } else if (sort_loss) {
+ if (slab_waste(s1) == slab_waste(s2))
+ result = strcasecmp(s1->name, s2->name);
+ else
+ result = slab_waste(s1) < slab_waste(s2);
+ } else if (sort_partial) {
+ if (s1->partial == s2->partial)
+ result = strcasecmp(s1->name, s2->name);
+ else
+ result = s1->partial < s2->partial;
+ } else
result = strcasecmp(s1->name, s2->name);
if (show_inverted)