aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/memremap.h19
-rw-r--r--include/linux/mm.h5
-rw-r--r--mm/memcontrol.c7
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/memremap.c10
-rw-r--r--mm/migrate_device.c16
-rw-r--r--mm/rmap.c5
7 files changed, 53 insertions, 17 deletions
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 732dde5988fb..09320b7f706c 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -41,6 +41,13 @@ struct vmem_altmap {
* A more complete discussion of unaddressable memory may be found in
* include/linux/hmm.h and Documentation/mm/hmm.rst.
*
+ * MEMORY_DEVICE_COHERENT:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is used on platforms that have an advanced system bus (like CAPI or CXL). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allowed to pin such memory so that it can always be evicted.
+ *
* MEMORY_DEVICE_FS_DAX:
* Host memory that has similar access semantics as System RAM i.e. DMA
* coherent and supports page pinning. In support of coordinating page
@@ -61,6 +68,7 @@ struct vmem_altmap {
enum memory_type {
/* 0 is reserved to catch uninitialized type fields */
MEMORY_DEVICE_PRIVATE = 1,
+ MEMORY_DEVICE_COHERENT,
MEMORY_DEVICE_FS_DAX,
MEMORY_DEVICE_GENERIC,
MEMORY_DEVICE_PCI_P2PDMA,
@@ -150,6 +158,17 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}
+static inline bool is_device_coherent_page(const struct page *page)
+{
+ return is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_COHERENT;
+}
+
+static inline bool folio_is_device_coherent(const struct folio *folio)
+{
+ return is_device_coherent_page(&folio->page);
+}
+
#ifdef CONFIG_ZONE_DEVICE
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
void memunmap_pages(struct dev_pagemap *pgmap);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2d01e49253b..64393ed3330a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -28,6 +28,7 @@
#include <linux/sched.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
+#include <linux/memremap.h>
struct mempolicy;
struct anon_vma;
@@ -1537,7 +1538,9 @@ static inline bool is_longterm_pinnable_page(struct page *page)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page));
+ return !(is_device_coherent_page(page) ||
+ is_zone_movable_page(page) ||
+ is_zero_pfn(page_to_pfn(page)));
}
#else
static inline bool is_longterm_pinnable_page(struct page *page)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1497affe08c4..b1868784f895 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5716,8 +5716,8 @@ out:
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
* target for charge migration. if @target is not NULL, the entry is stored
* in target->ent.
- * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PRIVATE
- * (so ZONE_DEVICE page and thus not on the lru).
+ * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is device memory and
+ * thus not on the lru.
* For now we such page is charge like a regular page would be as for all
* intent and purposes it is just special memory taking the place of a
* regular page.
@@ -5755,7 +5755,8 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
*/
if (page_memcg(page) == mc.from) {
ret = MC_TARGET_PAGE;
- if (is_device_private_page(page))
+ if (is_device_private_page(page) ||
+ is_device_coherent_page(page))
ret = MC_TARGET_DEVICE;
if (target)
target->page = page;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f7612ccdb299..b7ca5db7e60e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1686,12 +1686,16 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
goto unlock;
}
- if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+ switch (pgmap->type) {
+ case MEMORY_DEVICE_PRIVATE:
+ case MEMORY_DEVICE_COHERENT:
/*
- * TODO: Handle HMM pages which may need coordination
+ * TODO: Handle device pages which may need coordination
* with device-side memory.
*/
goto unlock;
+ default:
+ break;
}
/*
diff --git a/mm/memremap.c b/mm/memremap.c
index 8b5c8fd4ea8e..f0955785150f 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -315,6 +315,16 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
return ERR_PTR(-EINVAL);
}
break;
+ case MEMORY_DEVICE_COHERENT:
+ if (!pgmap->ops->page_free) {
+ WARN(1, "Missing page_free method\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!pgmap->owner) {
+ WARN(1, "Missing owner\n");
+ return ERR_PTR(-EINVAL);
+ }
+ break;
case MEMORY_DEVICE_FS_DAX:
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
WARN(1, "File system DAX not supported\n");
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 5052093d0262..a4847ad65da3 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -518,7 +518,7 @@ EXPORT_SYMBOL(migrate_vma_setup);
* handle_pte_fault()
* do_anonymous_page()
* to map in an anonymous zero page but the struct page will be a ZONE_DEVICE
- * private page.
+ * private or coherent page.
*/
static void migrate_vma_insert_page(struct migrate_vma *migrate,
unsigned long addr,
@@ -594,11 +594,8 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
page_to_pfn(page));
entry = swp_entry_to_pte(swp_entry);
} else {
- /*
- * For now we only support migrating to un-addressable device
- * memory.
- */
- if (is_zone_device_page(page)) {
+ if (is_zone_device_page(page) &&
+ !is_device_coherent_page(page)) {
pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
goto abort;
}
@@ -701,10 +698,11 @@ void migrate_vma_pages(struct migrate_vma *migrate)
mapping = page_mapping(page);
- if (is_device_private_page(newpage)) {
+ if (is_device_private_page(newpage) ||
+ is_device_coherent_page(newpage)) {
/*
- * For now only support private anonymous when migrating
- * to un-addressable device memory.
+ * For now only support anonymous memory migrating to
+ * device private or coherent memory.
*/
if (mapping) {
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
diff --git a/mm/rmap.c b/mm/rmap.c
index 83172ee0ea35..0532fd92ecb3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1953,7 +1953,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
- if (folio_is_zone_device(folio)) {
+ if (folio_is_device_private(folio)) {
unsigned long pfn = folio_pfn(folio);
swp_entry_t entry;
pte_t swp_pte;
@@ -2124,7 +2124,8 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
TTU_SYNC)))
return;
- if (folio_is_zone_device(folio) && !folio_is_device_private(folio))
+ if (folio_is_zone_device(folio) &&
+ (!folio_is_device_private(folio) && !folio_is_device_coherent(folio)))
return;
/*