From 18ec1eaf58fbf2d9009a752a102a3d8e0d905a0f Mon Sep 17 00:00:00 2001 From: Kirill A. Shutemov Date: Fri, 13 Sep 2019 12:54:52 +0300 Subject: x86/mm: Enable 5-level paging support by default Support of boot-time switching between 4- and 5-level paging mode is upstream since 4.17. We run internal testing with 5-level paging support enabled for a while and it doesn't not cause any functional or performance regression on 4-level paging hardware. The only 5-level paging related regressions I saw were in early boot code that runs independently from CONFIG_X86_5LEVEL. The next major release of distributions expected to have CONFIG_X86_5LEVEL=y. Enable the option by default. It may help to catch obscure bugs early. Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: https://lkml.kernel.org/r/20190913095452.40592-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58eae28c3dd6..d4bbebe1d72f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1483,6 +1483,7 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" + default y select DYNAMIC_MEMORY_LAYOUT select SPARSEMEM_VMEMMAP depends on X86_64 -- cgit v1.2.3 From 77df779de742d6616d4ddd177cba152a75259104 Mon Sep 17 00:00:00 2001 From: Sylvain 'ythier' Hitier Date: Sun, 15 Sep 2019 11:09:25 +0200 Subject: x86/cpu: Clean up intel_tlb_table[] - Remove the unneeded backslash at EOL: that's not a macro. And let's please checkpatch by aligning to open parenthesis. - For 0x4f descriptor, remove " */" from the info field. - For 0xc2 descriptor, sync the beginning of info to match the tlb_type. (The value of info fields could be made more regular, but it's unused by the code and will be read only by developers, so don't bother.) Signed-off-by: Sylvain 'ythier' Hitier Cc: Alex Shi Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Hans de Goede Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Ricardo Neri Cc: Rik van Riel Cc: Thomas Gleixner Cc: trivial@kernel.org Link: https://lkml.kernel.org/r/20190915090917.GA5086@lilas Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8d6d92ebeb54..24e619d1bf79 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -813,7 +813,7 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, - { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, + { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, @@ -841,7 +841,7 @@ static const struct _tlb_table intel_tlb_table[] = { { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, - { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" }, + { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, { 0x00, 0, 0 } }; @@ -853,8 +853,8 @@ static void intel_tlb_lookup(const unsigned char desc) return; /* look up this descriptor in the table */ - for (k = 0; intel_tlb_table[k].descriptor != desc && \ - intel_tlb_table[k].descriptor != 0; k++) + for (k = 0; intel_tlb_table[k].descriptor != desc && + intel_tlb_table[k].descriptor != 0; k++) ; if (intel_tlb_table[k].tlb_type == 0) -- cgit v1.2.3 From a2f7a0bfcaaa3928e4876d15edd4dfdc09e139b6 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 25 Sep 2019 09:44:53 +0800 Subject: x86/mm: Fix function name typo in pmd_read_atomic() comment The function involved should be pte_offset_map_lock() and we never have function pmd_offset_map_lock defined. Signed-off-by: Wei Yang Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190925014453.20236-1-richardw.yang@linux.intel.com [ Minor edits. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-3level.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index e3633795fb22..1796462ff143 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -44,10 +44,10 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) * pmd_populate rightfully does a set_64bit, but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pmd_offset_map_lock() while holding the + * this all places running pte_offset_map_lock() while holding the * mmap_sem in read mode, shall read the pmdp pointer using this * function to know if the pmd is null nor not, and in turn to know if - * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * * Without THP if the mmap_sem is hold for reading, the pmd can only -- cgit v1.2.3 From 44e09568cf2d874cb2a8e2ac35acf71a9ae3402b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Sep 2019 08:38:57 +0200 Subject: x86/mm: Clean up the pmd_read_atomic() comments Fix spelling, consistent parenthesis and grammar - and also clarify the language where needed. Reviewed-by: Wei Yang Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-3level.h | 44 ++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 1796462ff143..5afb5e0fe903 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) #define pmd_read_atomic pmd_read_atomic /* - * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by gcc. Problem is, in certain places - * where pte_offset_map_lock is called, concurrent page faults are + * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by GCC. Problem is, in certain places + * where pte_offset_map_lock() is called, concurrent page faults are * allowed, if the mmap_sem is hold for reading. An example is mincore * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_populate() rightfully does a set_64bit(), but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pte_offset_map_lock() while holding the + * because GCC will not read the 64-bit value of the pmd atomically. + * + * To fix this all places running pte_offset_map_lock() while holding the * mmap_sem in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null nor not, and in turn to know if + * function to know if the pmd is null or not, and in turn to know if * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * - * Without THP if the mmap_sem is hold for reading, the pmd can only - * transition from null to not null while pmd_read_atomic runs. So + * Without THP if the mmap_sem is held for reading, the pmd can only + * transition from null to not null while pmd_read_atomic() runs. So * we can always return atomic pmd values with this function. * - * With THP if the mmap_sem is hold for reading, the pmd can become + * With THP if the mmap_sem is held for reading, the pmd can become * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic. We could read it really - * atomically here with a atomic64_read for the THP enabled case (and + * at any time under pmd_read_atomic(). We could read it truly + * atomically here with an atomic64_read() for the THP enabled case (and * it would be a whole lot simpler), but to avoid using cmpxchg8b we * only return an atomic pmdval if the low part of the pmdval is later - * found stable (i.e. pointing to a pte). And we're returning a none - * pmdval if the low part of the pmd is none. In some cases the high - * and low part of the pmdval returned may not be consistent if THP is - * enabled (the low part may point to previously mapped hugepage, - * while the high part may point to a more recently mapped hugepage), - * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part - * of the pmd to be read atomically to decide if the pmd is unstable - * or not, with the only exception of when the low part of the pmd is - * zero in which case we return a none pmd. + * found to be stable (i.e. pointing to a pte). We are also returning a + * 'none' (zero) pmdval if the low part of the pmd is zero. + * + * In some cases the high and low part of the pmdval returned may not be + * consistent if THP is enabled (the low part may point to previously + * mapped hugepage, while the high part may point to a more recently + * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only + * needs the low part of the pmd to be read atomically to decide if the + * pmd is unstable or not, with the only exception when the low part + * of the pmd is zero, in which case we return a 'none' pmd. */ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { -- cgit v1.2.3 From 8d04a5f97a5fa9d7afdf46eda3a5ceaa973a1bcc Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Nov 2019 17:15:58 -0800 Subject: x86/mm/pat: Convert the PAT tree to a generic interval tree With some considerations, the custom pat_rbtree implementation can be simplified to use most of the generic interval_tree machinery: - The tree inorder traversal can slightly differ when there are key ('start') collisions in the tree due to one going left and another right. This, however, only affects the output of debugfs' pat_memtype_list file. - Generic interval trees are now fully closed [a, b], for which we need to adjust the last endpoint (ie: end - 1). - Erasing logic must remain untouched as well. - In order for the types to remain u64, the 'memtype_interval' calls are introduced, as opposed to simply using struct interval_tree. In addition, the PAT tree might potentially also benefit by the fast overlap detection for the insertion case when looking up the first overlapping node in the tree. No change in behavior is intended. Finally, I've tested this on various servers, via sanity warnings, running side by side with the current version and so far see no differences in the returned pointer node when doing memtype_rb_lowest_match() lookups. Signed-off-by: Davidlohr Bueso Reviewed-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20191121011601.20611-2-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/x86/mm/pat_rbtree.c | 162 ++++++++++++----------------------------------- 1 file changed, 42 insertions(+), 120 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 65ebe4b88f7c..c3d119cd155d 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -5,14 +5,13 @@ * Authors: Venkatesh Pallipadi * Suresh B Siddha * - * Interval tree (augmented rbtree) used to store the PAT memory type - * reservations. + * Interval tree used to store the PAT memory type reservations. */ #include #include #include -#include +#include #include #include @@ -33,72 +32,33 @@ * * memtype_lock protects the rbtree. */ - -static struct rb_root memtype_rbroot = RB_ROOT; - -static int is_node_overlap(struct memtype *node, u64 start, u64 end) +static inline u64 memtype_interval_start(struct memtype *memtype) { - if (node->start >= end || node->end <= start) - return 0; - - return 1; + return memtype->start; } -static u64 get_subtree_max_end(struct rb_node *node) +static inline u64 memtype_interval_end(struct memtype *memtype) { - u64 ret = 0; - if (node) { - struct memtype *data = rb_entry(node, struct memtype, rb); - ret = data->subtree_max_end; - } - return ret; + return memtype->end - 1; } +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + memtype_interval_start, memtype_interval_end, + static, memtype_interval) -#define NODE_END(node) ((node)->end) - -RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb, - struct memtype, rb, u64, subtree_max_end, NODE_END) - -/* Find the first (lowest start addr) overlapping range from rb tree */ -static struct memtype *memtype_rb_lowest_match(struct rb_root *root, - u64 start, u64 end) -{ - struct rb_node *node = root->rb_node; - struct memtype *last_lower = NULL; - - while (node) { - struct memtype *data = rb_entry(node, struct memtype, rb); - - if (get_subtree_max_end(node->rb_left) > start) { - /* Lowest overlap if any must be on left side */ - node = node->rb_left; - } else if (is_node_overlap(data, start, end)) { - last_lower = data; - break; - } else if (start >= data->start) { - /* Lowest overlap if any must be on right side */ - node = node->rb_right; - } else { - break; - } - } - return last_lower; /* Returns NULL if there is no overlap */ -} +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; enum { MEMTYPE_EXACT_MATCH = 0, MEMTYPE_END_MATCH = 1 }; -static struct memtype *memtype_rb_match(struct rb_root *root, - u64 start, u64 end, int match_type) +static struct memtype *memtype_match(struct rb_root_cached *root, + u64 start, u64 end, int match_type) { struct memtype *match; - match = memtype_rb_lowest_match(root, start, end); + match = memtype_interval_iter_first(root, start, end); while (match != NULL && match->start < end) { - struct rb_node *node; - if ((match_type == MEMTYPE_EXACT_MATCH) && (match->start == start) && (match->end == end)) return match; @@ -107,26 +67,21 @@ static struct memtype *memtype_rb_match(struct rb_root *root, (match->start < start) && (match->end == end)) return match; - node = rb_next(&match->rb); - if (node) - match = rb_entry(node, struct memtype, rb); - else - match = NULL; + match = memtype_interval_iter_next(match, start, end); } return NULL; /* Returns NULL if there is no match */ } -static int memtype_rb_check_conflict(struct rb_root *root, +static int memtype_rb_check_conflict(struct rb_root_cached *root, u64 start, u64 end, enum page_cache_mode reqtype, enum page_cache_mode *newtype) { - struct rb_node *node; struct memtype *match; enum page_cache_mode found_type = reqtype; - match = memtype_rb_lowest_match(&memtype_rbroot, start, end); + match = memtype_interval_iter_first(&memtype_rbroot, start, end); if (match == NULL) goto success; @@ -136,19 +91,12 @@ static int memtype_rb_check_conflict(struct rb_root *root, dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); found_type = match->type; - node = rb_next(&match->rb); - while (node) { - match = rb_entry(node, struct memtype, rb); - - if (match->start >= end) /* Checked all possible matches */ - goto success; - - if (is_node_overlap(match, start, end) && - match->type != found_type) { + match = memtype_interval_iter_next(match, start, end); + while (match) { + if (match->type != found_type) goto failure; - } - node = rb_next(&match->rb); + match = memtype_interval_iter_next(match, start, end); } success: if (newtype) @@ -163,44 +111,21 @@ failure: return -EBUSY; } -static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) -{ - struct rb_node **node = &(root->rb_node); - struct rb_node *parent = NULL; - - while (*node) { - struct memtype *data = rb_entry(*node, struct memtype, rb); - - parent = *node; - if (data->subtree_max_end < newdata->end) - data->subtree_max_end = newdata->end; - if (newdata->start <= data->start) - node = &((*node)->rb_left); - else if (newdata->start > data->start) - node = &((*node)->rb_right); - } - - newdata->subtree_max_end = newdata->end; - rb_link_node(&newdata->rb, parent, node); - rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb); -} - int rbt_memtype_check_insert(struct memtype *new, enum page_cache_mode *ret_type) { int err = 0; err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, - new->type, ret_type); + new->type, ret_type); + if (err) + return err; - if (!err) { - if (ret_type) - new->type = *ret_type; + if (ret_type) + new->type = *ret_type; - new->subtree_max_end = new->end; - memtype_rb_insert(&memtype_rbroot, new); - } - return err; + memtype_interval_insert(new, &memtype_rbroot); + return 0; } struct memtype *rbt_memtype_erase(u64 start, u64 end) @@ -214,26 +139,23 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) * it then checks with END_MATCH, i.e. shrink the size of a node * from the end for the mremap case. */ - data = memtype_rb_match(&memtype_rbroot, start, end, - MEMTYPE_EXACT_MATCH); + data = memtype_match(&memtype_rbroot, start, end, + MEMTYPE_EXACT_MATCH); if (!data) { - data = memtype_rb_match(&memtype_rbroot, start, end, - MEMTYPE_END_MATCH); + data = memtype_match(&memtype_rbroot, start, end, + MEMTYPE_END_MATCH); if (!data) return ERR_PTR(-EINVAL); } if (data->start == start) { /* munmap: erase this node */ - rb_erase_augmented(&data->rb, &memtype_rbroot, - &memtype_rb_augment_cb); + memtype_interval_remove(data, &memtype_rbroot); } else { /* mremap: update the end value of this node */ - rb_erase_augmented(&data->rb, &memtype_rbroot, - &memtype_rb_augment_cb); + memtype_interval_remove(data, &memtype_rbroot); data->end = start; - data->subtree_max_end = data->end; - memtype_rb_insert(&memtype_rbroot, data); + memtype_interval_insert(data, &memtype_rbroot); return NULL; } @@ -242,24 +164,24 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) struct memtype *rbt_memtype_lookup(u64 addr) { - return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); + return memtype_interval_iter_first(&memtype_rbroot, addr, + addr + PAGE_SIZE); } #if defined(CONFIG_DEBUG_FS) int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) { - struct rb_node *node; + struct memtype *match; int i = 1; - node = rb_first(&memtype_rbroot); - while (node && pos != i) { - node = rb_next(node); + match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + while (match && pos != i) { + match = memtype_interval_iter_next(match, 0, ULONG_MAX); i++; } - if (node) { /* pos == i */ - struct memtype *this = rb_entry(node, struct memtype, rb); - *out = *this; + if (match) { /* pos == i */ + *out = *match; return 0; } else { return 1; -- cgit v1.2.3 From 6a9930b1c50d83facfa0f78e4f2f9ba0364f43f3 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Nov 2019 17:15:59 -0800 Subject: x86/mm/pat: Do not pass 'rb_root' down the memtype tree helper functions Get rid of the passing the rb_root down the helper calls; there is only one: &memtype_rbroot. No change in functionality. [ mingo: Fixed the changelog which described a different version of the patch. ] Signed-off-by: Davidlohr Bueso Reviewed-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20191121011601.20611-3-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/x86/mm/pat_rbtree.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index c3d119cd155d..d31ca773d4bb 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -52,12 +52,11 @@ enum { MEMTYPE_END_MATCH = 1 }; -static struct memtype *memtype_match(struct rb_root_cached *root, - u64 start, u64 end, int match_type) +static struct memtype *memtype_match(u64 start, u64 end, int match_type) { struct memtype *match; - match = memtype_interval_iter_first(root, start, end); + match = memtype_interval_iter_first(&memtype_rbroot, start, end); while (match != NULL && match->start < end) { if ((match_type == MEMTYPE_EXACT_MATCH) && (match->start == start) && (match->end == end)) @@ -73,10 +72,9 @@ static struct memtype *memtype_match(struct rb_root_cached *root, return NULL; /* Returns NULL if there is no match */ } -static int memtype_rb_check_conflict(struct rb_root_cached *root, - u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) { struct memtype *match; enum page_cache_mode found_type = reqtype; @@ -116,8 +114,7 @@ int rbt_memtype_check_insert(struct memtype *new, { int err = 0; - err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, - new->type, ret_type); + err = memtype_check_conflict(new->start, new->end, new->type, ret_type); if (err) return err; @@ -139,11 +136,9 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) * it then checks with END_MATCH, i.e. shrink the size of a node * from the end for the mremap case. */ - data = memtype_match(&memtype_rbroot, start, end, - MEMTYPE_EXACT_MATCH); + data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); if (!data) { - data = memtype_match(&memtype_rbroot, start, end, - MEMTYPE_END_MATCH); + data = memtype_match(start, end, MEMTYPE_END_MATCH); if (!data) return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 511aaca834fe2dc0b652406bda6283842fdc70ce Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Nov 2019 17:16:00 -0800 Subject: x86/mm/pat: Drop the rbt_ prefix from external memtype calls Drop the rbt_memtype_*() call rbt_ prefix, as we no longer use an rbtree directly. Signed-off-by: Davidlohr Bueso Reviewed-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20191121011601.20611-4-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 8 ++++---- arch/x86/mm/pat_internal.h | 20 ++++++++++---------- arch/x86/mm/pat_rbtree.c | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d9fbd4f69920..2d758e19ef22 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_lock(&memtype_lock); - err = rbt_memtype_check_insert(new, new_type); + err = memtype_check_insert(new, new_type); if (err) { pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, @@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end) } spin_lock(&memtype_lock); - entry = rbt_memtype_erase(start, end); + entry = memtype_erase(start, end); spin_unlock(&memtype_lock); if (IS_ERR(entry)) { @@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) spin_lock(&memtype_lock); - entry = rbt_memtype_lookup(paddr); + entry = memtype_lookup(paddr); if (entry != NULL) rettype = entry->type; else @@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos) return NULL; spin_lock(&memtype_lock); - ret = rbt_memtype_copy_nth_element(print_entry, pos); + ret = memtype_copy_nth_element(print_entry, pos); spin_unlock(&memtype_lock); if (!ret) { diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index eeb5caeb089b..79a06684349e 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm) } #ifdef CONFIG_X86_PAT -extern int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *new_type); -extern struct memtype *rbt_memtype_erase(u64 start, u64 end); -extern struct memtype *rbt_memtype_lookup(u64 addr); -extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); +extern int memtype_check_insert(struct memtype *new, + enum page_cache_mode *new_type); +extern struct memtype *memtype_erase(u64 start, u64 end); +extern struct memtype *memtype_lookup(u64 addr); +extern int memtype_copy_nth_element(struct memtype *out, loff_t pos); #else -static inline int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *new_type) +static inline int memtype_check_insert(struct memtype *new, + enum page_cache_mode *new_type) { return 0; } -static inline struct memtype *rbt_memtype_erase(u64 start, u64 end) +static inline struct memtype *memtype_erase(u64 start, u64 end) { return NULL; } -static inline struct memtype *rbt_memtype_lookup(u64 addr) +static inline struct memtype *memtype_lookup(u64 addr) { return NULL; } -static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) +static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) { return 0; } #endif diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index d31ca773d4bb..47a1bf30748f 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -109,8 +109,8 @@ failure: return -EBUSY; } -int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) +int memtype_check_insert(struct memtype *new, + enum page_cache_mode *ret_type) { int err = 0; @@ -125,13 +125,13 @@ int rbt_memtype_check_insert(struct memtype *new, return 0; } -struct memtype *rbt_memtype_erase(u64 start, u64 end) +struct memtype *memtype_erase(u64 start, u64 end) { struct memtype *data; /* * Since the memtype_rbroot tree allows overlapping ranges, - * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free + * memtype_erase() checks with EXACT_MATCH first, i.e. free * a whole node for the munmap case. If no such entry is found, * it then checks with END_MATCH, i.e. shrink the size of a node * from the end for the mremap case. @@ -157,14 +157,14 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end) return data; } -struct memtype *rbt_memtype_lookup(u64 addr) +struct memtype *memtype_lookup(u64 addr) { return memtype_interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE); } #if defined(CONFIG_DEBUG_FS) -int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) +int memtype_copy_nth_element(struct memtype *out, loff_t pos) { struct memtype *match; int i = 1; -- cgit v1.2.3 From 7f264dab5b60343358e788d4c939c166c22ea4a2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Nov 2019 17:16:01 -0800 Subject: x86/mm/pat: Rename pat_rbtree.c to pat_interval.c Considering the previous changes, this is a more proper name. Signed-off-by: Davidlohr Bueso Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20191121011601.20611-5-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 2 +- arch/x86/mm/pat_interval.c | 185 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/pat_rbtree.c | 185 --------------------------------------------- 3 files changed, 186 insertions(+), 186 deletions(-) create mode 100644 arch/x86/mm/pat_interval.c delete mode 100644 arch/x86/mm/pat_rbtree.c (limited to 'arch') diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 84373dc9b341..de403df8eadc 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace -obj-$(CONFIG_X86_PAT) += pat_rbtree.o +obj-$(CONFIG_X86_PAT) += pat_interval.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c new file mode 100644 index 000000000000..47a1bf30748f --- /dev/null +++ b/arch/x86/mm/pat_interval.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi + * Suresh B Siddha + * + * Interval tree used to store the PAT memory type reservations. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "pat_internal.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) with tree ordered + * on starting address. Tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course. + * + * memtype_lock protects the rbtree. + */ +static inline u64 memtype_interval_start(struct memtype *memtype) +{ + return memtype->start; +} + +static inline u64 memtype_interval_end(struct memtype *memtype) +{ + return memtype->end - 1; +} +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + memtype_interval_start, memtype_interval_end, + static, memtype_interval) + +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; + +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_match(u64 start, u64 end, int match_type) +{ + struct memtype *match; + + match = memtype_interval_iter_first(&memtype_rbroot, start, end); + while (match != NULL && match->start < end) { + if ((match_type == MEMTYPE_EXACT_MATCH) && + (match->start == start) && (match->end == end)) + return match; + + if ((match_type == MEMTYPE_END_MATCH) && + (match->start < start) && (match->end == end)) + return match; + + match = memtype_interval_iter_next(match, start, end); + } + + return NULL; /* Returns NULL if there is no match */ +} + +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) +{ + struct memtype *match; + enum page_cache_mode found_type = reqtype; + + match = memtype_interval_iter_first(&memtype_rbroot, start, end); + if (match == NULL) + goto success; + + if (match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); + found_type = match->type; + + match = memtype_interval_iter_next(match, start, end); + while (match) { + if (match->type != found_type) + goto failure; + + match = memtype_interval_iter_next(match, start, end); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(match->type)); + return -EBUSY; +} + +int memtype_check_insert(struct memtype *new, + enum page_cache_mode *ret_type) +{ + int err = 0; + + err = memtype_check_conflict(new->start, new->end, new->type, ret_type); + if (err) + return err; + + if (ret_type) + new->type = *ret_type; + + memtype_interval_insert(new, &memtype_rbroot); + return 0; +} + +struct memtype *memtype_erase(u64 start, u64 end) +{ + struct memtype *data; + + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!data) { + data = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!data) + return ERR_PTR(-EINVAL); + } + + if (data->start == start) { + /* munmap: erase this node */ + memtype_interval_remove(data, &memtype_rbroot); + } else { + /* mremap: update the end value of this node */ + memtype_interval_remove(data, &memtype_rbroot); + data->end = start; + memtype_interval_insert(data, &memtype_rbroot); + return NULL; + } + + return data; +} + +struct memtype *memtype_lookup(u64 addr) +{ + return memtype_interval_iter_first(&memtype_rbroot, addr, + addr + PAGE_SIZE); +} + +#if defined(CONFIG_DEBUG_FS) +int memtype_copy_nth_element(struct memtype *out, loff_t pos) +{ + struct memtype *match; + int i = 1; + + match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + while (match && pos != i) { + match = memtype_interval_iter_next(match, 0, ULONG_MAX); + i++; + } + + if (match) { /* pos == i */ + *out = *match; + return 0; + } else { + return 1; + } +} +#endif diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c deleted file mode 100644 index 47a1bf30748f..000000000000 --- a/arch/x86/mm/pat_rbtree.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi - * Suresh B Siddha - * - * Interval tree used to store the PAT memory type reservations. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course. - * - * memtype_lock protects the rbtree. - */ -static inline u64 memtype_interval_start(struct memtype *memtype) -{ - return memtype->start; -} - -static inline u64 memtype_interval_end(struct memtype *memtype) -{ - return memtype->end - 1; -} -INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, - memtype_interval_start, memtype_interval_end, - static, memtype_interval) - -static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; - -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_match(u64 start, u64 end, int match_type) -{ - struct memtype *match; - - match = memtype_interval_iter_first(&memtype_rbroot, start, end); - while (match != NULL && match->start < end) { - if ((match_type == MEMTYPE_EXACT_MATCH) && - (match->start == start) && (match->end == end)) - return match; - - if ((match_type == MEMTYPE_END_MATCH) && - (match->start < start) && (match->end == end)) - return match; - - match = memtype_interval_iter_next(match, start, end); - } - - return NULL; /* Returns NULL if there is no match */ -} - -static int memtype_check_conflict(u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) -{ - struct memtype *match; - enum page_cache_mode found_type = reqtype; - - match = memtype_interval_iter_first(&memtype_rbroot, start, end); - if (match == NULL) - goto success; - - if (match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; - - match = memtype_interval_iter_next(match, start, end); - while (match) { - if (match->type != found_type) - goto failure; - - match = memtype_interval_iter_next(match, start, end); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(match->type)); - return -EBUSY; -} - -int memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) -{ - int err = 0; - - err = memtype_check_conflict(new->start, new->end, new->type, ret_type); - if (err) - return err; - - if (ret_type) - new->type = *ret_type; - - memtype_interval_insert(new, &memtype_rbroot); - return 0; -} - -struct memtype *memtype_erase(u64 start, u64 end) -{ - struct memtype *data; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!data) { - data = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!data) - return ERR_PTR(-EINVAL); - } - - if (data->start == start) { - /* munmap: erase this node */ - memtype_interval_remove(data, &memtype_rbroot); - } else { - /* mremap: update the end value of this node */ - memtype_interval_remove(data, &memtype_rbroot); - data->end = start; - memtype_interval_insert(data, &memtype_rbroot); - return NULL; - } - - return data; -} - -struct memtype *memtype_lookup(u64 addr) -{ - return memtype_interval_iter_first(&memtype_rbroot, addr, - addr + PAGE_SIZE); -} - -#if defined(CONFIG_DEBUG_FS) -int memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ - struct memtype *match; - int i = 1; - - match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); - while (match && pos != i) { - match = memtype_interval_iter_next(match, 0, ULONG_MAX); - i++; - } - - if (match) { /* pos == i */ - *out = *match; - return 0; - } else { - return 1; - } -} -#endif -- cgit v1.2.3