diff options
author | Linus Torvalds | 2019-09-21 10:07:42 -0700 |
---|---|---|
committer | Linus Torvalds | 2019-09-21 10:07:42 -0700 |
commit | 84da111de0b4be15bd500deff773f5116f39f7be (patch) | |
tree | 76b5796f8258397bf7a3926b742a89166a8501ef /drivers | |
parent | 227c3e9eb5cf3552c2cc83225df6d14adb05f8e8 (diff) | |
parent | 62974fc389b364d8af70e044836362222bd3ae53 (diff) |
Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull hmm updates from Jason Gunthorpe:
"This is more cleanup and consolidation of the hmm APIs and the very
strongly related mmu_notifier interfaces. Many places across the tree
using these interfaces are touched in the process. Beyond that a
cleanup to the page walker API and a few memremap related changes
round out the series:
- General improvement of hmm_range_fault() and related APIs, more
documentation, bug fixes from testing, API simplification &
consolidation, and unused API removal
- Simplify the hmm related kconfigs to HMM_MIRROR and DEVICE_PRIVATE,
and make them internal kconfig selects
- Hoist a lot of code related to mmu notifier attachment out of
drivers by using a refcount get/put attachment idiom and remove the
convoluted mmu_notifier_unregister_no_release() and related APIs.
- General API improvement for the migrate_vma API and revision of its
only user in nouveau
- Annotate mmu_notifiers with lockdep and sleeping region debugging
Two series unrelated to HMM or mmu_notifiers came along due to
dependencies:
- Allow pagemap's memremap_pages family of APIs to work without
providing a struct device
- Make walk_page_range() and related use a constant structure for
function pointers"
* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (75 commits)
libnvdimm: Enable unit test infrastructure compile checks
mm, notifier: Catch sleeping/blocking for !blockable
kernel.h: Add non_block_start/end()
drm/radeon: guard against calling an unpaired radeon_mn_unregister()
csky: add missing brackets in a macro for tlb.h
pagewalk: use lockdep_assert_held for locking validation
pagewalk: separate function pointers from iterator data
mm: split out a new pagewalk.h header from mm.h
mm/mmu_notifiers: annotate with might_sleep()
mm/mmu_notifiers: prime lockdep
mm/mmu_notifiers: add a lockdep map for invalidate_range_start/end
mm/mmu_notifiers: remove the __mmu_notifier_invalidate_range_start/end exports
mm/hmm: hmm_range_fault() infinite loop
mm/hmm: hmm_range_fault() NULL pointer bug
mm/hmm: fix hmm_range_fault()'s handling of swapped out pages
mm/mmu_notifiers: remove unregister_no_release
RDMA/odp: remove ib_ucontext from ib_umem
RDMA/odp: use mmu_notifier_get/put for 'struct ib_ucontext_per_mm'
RDMA/mlx5: Use odp instead of mr->umem in pagefault_mr
RDMA/mlx5: Use ib_umem_start instead of umem.address
...
Diffstat (limited to 'drivers')
30 files changed, 649 insertions, 992 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index f6e5c0282fc1..2e98c016cb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - depends on HMM_MIRROR + depends on MMU + select HMM_MIRROR + select MMU_NOTIFIER help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 48a2070e72f2..bdf849da32e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> +#include <linux/mmu_notifier.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -1469,6 +1470,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + mmu_notifier_synchronize(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index f1f8cdd695d3..31d4deb5d294 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, - const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); unsigned long start = update->start; unsigned long end = update->end; - bool blockable = update->blockable; + bool blockable = mmu_notifier_range_blockable(update); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ @@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, - const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct mmu_notifier_range *update) { struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); unsigned long start = update->start; unsigned long end = update->end; - bool blockable = update->blockable; + bool blockable = mmu_notifier_range_blockable(update); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ @@ -482,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range) range->flags = hmm_range_flags; range->values = hmm_range_values; range->pfn_shift = PAGE_SHIFT; - INIT_LIST_HEAD(&range->list); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 13b144c8f67d..dff41d0a85fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -794,7 +794,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct hmm_range *range; unsigned long i; uint64_t *pfns; - int retry = 0; int r = 0; if (!mm) /* Happens during process shutdown */ @@ -835,10 +834,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 0 : range->flags[HMM_PFN_WRITE]; range->pfn_flags_mask = 0; range->pfns = pfns; - hmm_range_register(range, mirror, start, - start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); + range->start = start; + range->end = start + ttm->num_pages * PAGE_SIZE; + + hmm_range_register(range, mirror); -retry: /* * Just wait for range to be valid, safe to ignore return value as we * will use the return value of hmm_range_fault() below under the @@ -847,24 +847,12 @@ retry: hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); down_read(&mm->mmap_sem); - - r = hmm_range_fault(range, true); - if (unlikely(r < 0)) { - if (likely(r == -EAGAIN)) { - /* - * return -EAGAIN, mmap_sem is dropped - */ - if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) - goto retry; - else - pr_err("Retry hmm fault too many times\n"); - } - - goto out_up_read; - } - + r = hmm_range_fault(range, 0); up_read(&mm->mmap_sem); + if (unlikely(r < 0)) + goto out_free_pfns; + for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_device_entry_to_page(range, pfns[i]); if (unlikely(!pages[i])) { @@ -880,9 +868,6 @@ retry: return 0; -out_up_read: - if (likely(r != -EAGAIN)) - up_read(&mm->mmap_sem); out_free_pfns: hmm_range_unregister(range); kvfree(pfns); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3bb75d11a662..c89326125d71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -687,9 +687,6 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; - /* Use for delayed freeing of kfd_process structure */ - struct rcu_head rcu; - unsigned int pasid; unsigned int doorbell_index; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 0c6ac043ae3c..40e3fc0c6942 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -62,8 +62,8 @@ static struct workqueue_struct *kfd_restore_wq; static struct kfd_process *find_process(const struct task_struct *thread); static void kfd_process_ref_release(struct kref *ref); -static struct kfd_process *create_process(const struct task_struct *thread, - struct file *filep); +static struct kfd_process *create_process(const struct task_struct *thread); +static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); static void evict_process_worker(struct work_struct *work); static void restore_process_worker(struct work_struct *work); @@ -289,7 +289,15 @@ struct kfd_process *kfd_create_process(struct file *filep) if (process) { pr_debug("Process already found\n"); } else { - process = create_process(thread, filep); + process = create_process(thread); + if (IS_ERR(process)) + goto out; + + ret = kfd_process_init_cwsr_apu(process, filep); + if (ret) { + process = ERR_PTR(ret); + goto out; + } if (!procfs.kobj) goto out; @@ -478,11 +486,9 @@ static void kfd_process_ref_release(struct kref *ref) queue_work(kfd_process_wq, &p->release_work); } -static void kfd_process_destroy_delayed(struct rcu_head *rcu) +static void kfd_process_free_notifier(struct mmu_notifier *mn) { - struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); - - kfd_unref_process(p); + kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } static void kfd_process_notifier_release(struct mmu_notifier *mn, @@ -534,12 +540,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_unlock(&p->mutex); - mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); - mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); + mmu_notifier_put(&p->mmu_notifier); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, + .free_notifier = kfd_process_free_notifier, }; static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) @@ -609,81 +615,69 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) return 0; } -static struct kfd_process *create_process(const struct task_struct *thread, - struct file *filep) +/* + * On return the kfd_process is fully operational and will be freed when the + * mm is released + */ +static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; int err = -ENOMEM; process = kzalloc(sizeof(*process), GFP_KERNEL); - if (!process) goto err_alloc_process; - process->pasid = kfd_pasid_alloc(); - if (process->pasid == 0) - goto err_alloc_pasid; - - if (kfd_alloc_process_doorbells(process) < 0) - goto err_alloc_doorbells; - kref_init(&process->ref); - mutex_init(&process->mutex); - process->mm = thread->mm; - - /* register notifier */ - process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; - err = mmu_notifier_register(&process->mmu_notifier, process->mm); - if (err) - goto err_mmu_notifier; - - hash_add_rcu(kfd_processes_table, &process->kfd_processes, - (uintptr_t)process->mm); - process->lead_thread = thread->group_leader; - get_task_struct(process->lead_thread); - INIT_LIST_HEAD(&process->per_device_data); - + INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); + INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); + process->last_restore_timestamp = get_jiffies_64(); kfd_event_init_process(process); + process->is_32bit_user_mode = in_compat_syscall(); + + process->pasid = kfd_pasid_alloc(); + if (process->pasid == 0) + goto err_alloc_pasid; + + if (kfd_alloc_process_doorbells(process) < 0) + goto err_alloc_doorbells; err = pqm_init(&process->pqm, process); if (err != 0) goto err_process_pqm_init; /* init process apertures*/ - process->is_32bit_user_mode = in_compat_syscall(); err = kfd_init_apertures(process); if (err != 0) goto err_init_apertures; - INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); - INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); - process->last_restore_timestamp = get_jiffies_64(); - - err = kfd_process_init_cwsr_apu(process, filep); + /* Must be last, have to use release destruction after this */ + process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; + err = mmu_notifier_register(&process->mmu_notifier, process->mm); if (err) - goto err_init_cwsr; + goto err_register_notifier; + + get_task_struct(process->lead_thread); + hash_add_rcu(kfd_processes_table, &process->kfd_processes, + (uintptr_t)process->mm); return process; -err_init_cwsr: +err_register_notifier: kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: - hash_del_rcu(&process->kfd_processes); - synchronize_rcu(); - mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); -err_mmu_notifier: - mutex_destroy(&process->mutex); kfd_free_process_doorbells(process); err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: + mutex_destroy(&process->mutex); kfree(process); err_alloc_process: return ERR_PTR(err); diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 96b9814e6d06..3558df043592 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -86,9 +86,10 @@ config DRM_NOUVEAU_SVM bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support" depends on DEVICE_PRIVATE depends on DRM_NOUVEAU - depends on HMM_MIRROR + depends on MMU depends on STAGING - select MIGRATE_VMA_HELPER + select HMM_MIRROR + select MMU_NOTIFIER default n help Say Y here if you want to enable experimental support for diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 1333220787a1..fa1439941596 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -44,8 +44,6 @@ #define DMEM_CHUNK_SIZE (2UL << 20) #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) -struct nouveau_migrate; - enum nouveau_aper { NOUVEAU_APER_VIRT, NOUVEAU_APER_VRAM, @@ -86,21 +84,13 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page) return container_of(page->pgmap, struct nouveau_dmem, pagemap); } -struct nouveau_dmem_fault { - struct nouveau_drm *drm; - struct nouveau_fence *fence; - dma_addr_t *dma; - unsigned long npages; -}; +static unsigned long nouveau_dmem_page_addr(struct page *page) +{ + struct nouveau_dmem_chunk *chunk = page->zone_device_data; + unsigned long idx = page_to_pfn(page) - chunk->pfn_first; -struct nouveau_migrate { - struct vm_area_struct *vma; - struct nouveau_drm *drm; - struct nouveau_fence *fence; - unsigned long npages; - dma_addr_t *dma; - unsigned long dma_nr; -}; + return (idx << PAGE_SHIFT) + chunk->bo->bo.offset; +} static void nouveau_dmem_page_free(struct page *page) { @@ -125,165 +115,90 @@ static void nouveau_dmem_page_free(struct page *page) spin_unlock(&chunk->lock); } -static void -nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { - struct nouveau_dmem_fault *fault = private; - struct nouveau_drm *drm = fault->drm; - struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr); - if (!dpage) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - continue; - } - lock_page(dpage); - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED; - npages++; - } - - /* Allocate storage for DMA addresses, so we can unmap later. */ - fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); - if (!fault->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; - struct page *spage, *dpage; - u64 src_addr, dst_addr; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - fault->dma[fault->npages] = - dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, fault->dma[fault->npages])) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } - - dst_addr = fault->dma[fault->npages++]; - - chunk = spage->zone_device_data; - src_addr = page_to_pfn(spage) - chunk->pfn_first; - src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, - NOUVEAU_APER_VRAM, src_addr); - if (ret) { - dst_pfns[i] = MIGRATE_PFN_ERROR; - __free_page(dpage); - continue; - } + if (fence) { + nouveau_fence_wait(*fence, true, false); + nouveau_fence_unref(fence); + } else { + /* + * FIXME wait for channel to be IDLE before calling finalizing + * the hmem object. + */ } +} - nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); - - return; - -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; +static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, + struct vm_fault *vmf, struct migrate_vma *args, + dma_addr_t *dma_addr) +{ + struct device *dev = drm->dev->dev; + struct page *dpage, *spage; - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + spage = migrate_pfn_to_page(args->src[0]); + if (!spage || !(args->src[0] & MIGRATE_PFN_MIGRATE)) + return 0; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address); + if (!dpage) + return VM_FAULT_SIGBUS; + lock_page(dpage); - __free_page(page); - } -} + *dma_addr = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto error_free_page; -void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) -{ - struct nouveau_dmem_fault *fault = private; - struct nouveau_drm *drm = fault->drm; + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr, + NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage))) + goto error_dma_unmap; - if (fault->fence) { - nouveau_fence_wait(fault->fence, true, false); - nouveau_fence_unref(&fault->fence); - } else { - /* - * FIXME wait for channel to be IDLE before calling finalizing - * the hmem object below (nouveau_migrate_hmem_fini()). - */ - } + args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; + return 0; - while (fault->npages--) { - dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(fault->dma); +error_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +error_free_page: + __free_page(dpage); + return VM_FAULT_SIGBUS; } -static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { - .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, - .finalize_and_map = nouveau_dmem_fault_finalize_and_map, -}; - static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) { struct nouveau_dmem *dmem = page_to_dmem(vmf->page); - unsigned long src[1] = {0}, dst[1] = {0}; - struct nouveau_dmem_fault fault = { .drm = dmem->drm }; - int ret; + struct nouveau_drm *drm = dmem->drm; + struct nouveau_fence *fence; + unsigned long src = 0, dst = 0; + dma_addr_t dma_addr = 0; + vm_fault_t ret; + struct migrate_vma args = { + .vma = vmf->vma, + .start = vmf->address, + .end = vmf->address + PAGE_SIZE, + .src = &src, + .dst = &dst, + }; /* * FIXME what we really want is to find some heuristic to migrate more * than just one page on CPU fault. When such fault happens it is very * likely that more surrounding page will CPU fault too. */ - ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, - vmf->address, vmf->address + PAGE_SIZE, - src, dst, &fault); - if (ret) + if (migrate_vma_setup(&args) < 0) return VM_FAULT_SIGBUS; + if (!args.cpages) + return 0; - if (dst[0] == MIGRATE_PFN_ERROR) - return VM_FAULT_SIGBUS; + ret = nouveau_dmem_fault_copy_one(drm, vmf, &args, &dma_addr); + if (ret || dst == 0) + goto done; - return 0; + nouveau_fence_new(dmem->migrate.chan, false, &fence); + migrate_vma_pages(&args); + nouveau_dmem_fence_done(&fence); + dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +done: + migrate_vma_finalize(&args); + return ret; } static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { @@ -642,188 +557,115 @@ out_free: drm->dmem = NULL; } -static void -nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, - const unsigned long *src_pfns, - unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, + unsigned long src, dma_addr_t *dma_addr) { - struct nouveau_migrate *migrate = private; - struct nouveau_drm *drm = migrate->drm; struct device *dev = drm->dev->dev; - unsigned long addr, i, npages = 0; - nouveau_migrate_copy_t copy; - int ret; - - /* First allocate new memory */ - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct page *dpage, *spage; - - dst_pfns[i] = 0; - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) - continue; - - dpage = nouveau_dmem_page_alloc_locked(drm); - if (!dpage) - continue; - - dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | - MIGRATE_PFN_LOCKED | - MIGRATE_PFN_DEVICE; - npages++; - } - - if (!npages) - return; - - /* Allocate storage for DMA addresses, so we can unmap later. */ - migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); - if (!migrate->dma) - goto error; - - /* Copy things over */ - copy = drm->dmem->migrate.copy_func; - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { - struct nouveau_dmem_chunk *chunk; - struct page *spage, *dpage; - u64 src_addr, dst_addr; - - dpage = migrate_pfn_to_page(dst_pfns[i]); - if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; - - chunk = dpage->zone_device_data; - dst_addr = page_to_pfn(dpage) - chunk->pfn_first; - dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; - - spage = migrate_pfn_to_page(src_pfns[i]); - if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - migrate->dma[migrate->dma_nr] = - dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - - src_addr = migrate->dma[migrate->dma_nr++]; + struct page *dpage, *spage; - ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, - NOUVEAU_APER_HOST, src_addr); - if (ret) { - nouveau_dmem_page_free_locked(drm, dpage); - dst_pfns[i] = 0; - continue; - } - } - - nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); + spage = migrate_pfn_to_page(src); + if (!spage || !(src & MIGRATE_PFN_MIGRATE)) + goto out; - return; + dpage = nouveau_dmem_page_alloc_locked(drm); + if (!dpage) + return 0; -error: - for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { - struct page *page; + *dma_addr = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, *dma_addr)) + goto out_free_page; - if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) - continue; + if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_VRAM, + nouveau_dmem_page_addr(dpage), NOUVEAU_APER_HOST, + *dma_addr)) + goto out_dma_unmap; - page = migrate_pfn_to_page(dst_pfns[i]); - dst_pfns[i] = MIGRATE_PFN_ERROR; - if (page == NULL) - continue; + return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; - __free_page(page); - } +out_dma_unmap: + dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); +out_free_page: + nouveau_dmem_page_free_locked(drm, dpage); +out: + return 0; } -void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, - const unsigned long *src_pfns, - const unsigned long *dst_pfns, - unsigned long start, - unsigned long end, - void *private) +static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, + struct migrate_vma *args, dma_addr_t *dma_addrs) { - struct nouveau_migrate *migrate = private; - struct nouveau_drm *drm = migrate->drm; - - if (migrate->fence) { - nouveau_fence_wait(migrate->fence, true, false); - nouveau_fence_unref(&migrate->fence); - } else { - /* - * FIXME wait for channel to be IDLE before finalizing - * the hmem object below (nouveau_migrate_hmem_fini()) ? - */ + struct nouveau_fence *fence; + unsigned long addr = args->start, nr_dma = 0, i; + + for (i = 0; addr < args->end; i++) { + args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i], + dma_addrs + nr_dma); + if (args->dst[i]) + nr_dma++; + addr += PAGE_SIZE; } - while (migrate->dma_nr--) { - dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - } - kfree(migrate->dma); + nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); + migrate_vma_pages(args); + nouveau_dmem_fence_done(&fence); + while (nr_dma--) { + dma_unmap_page(drm->dev->dev, dma_addrs[nr_dma], PAGE_SIZE, + DMA_BIDIRECTIONAL); + } /* - * FIXME optimization: update GPU page table to point to newly - * migrated memory. + * FIXME optimization: update GPU page table to point to newly migrated + * memory. */ + migrate_vma_finalize(args); } -static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { - .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, - .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, -}; - int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long *src_pfns, *dst_pfns, npages; - struct nouveau_migrate migrate = {0}; - unsigned long i, c, max; - int ret = 0; - - npages = (end - start) >> PAGE_SHIFT; - max = min(SG_MAX_SINGLE_ALLOC, npages); - src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (src_pfns == NULL) - return -ENOMEM; - dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); - if (dst_pfns == NULL) { - kfree(src_pfns); - return -ENOMEM; - } + unsigned long npages = (end - start) >> PAGE_SHIFT; + unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages); + dma_addr_t *dma_addrs; + struct migrate_vma args = { + .vma = vma, + .start = start, + }; + unsigned long c, i; + int ret = -ENOMEM; + + args.src = kcalloc(max, sizeof(args.src), GFP_KERNEL); + if (!args.src) + goto out; + args.dst = kcalloc(max, sizeof(args.dst), GFP_KERNEL); + if (!args.dst) + goto out_free_src; - migrate.drm = drm; - migrate.vma = vma; - migrate.npages = npages; - for (i = 0; i < npages; i += c) { - unsigned long next; + dma_addrs = kmalloc_array(max, sizeof(*dma_addrs), GFP_KERNEL); + if (!dma_addrs) + goto out_free_dst; + for (i = 0; i < npages; i += c) { c = min(SG_MAX_SINGLE_ALLOC, npages); - next = start + (c << PAGE_SHIFT); - ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, - next, src_pfns, dst_pfns, &migrate); + args.end = start + (c << PAGE_SHIFT); + ret = migrate_vma_setup(&args); if (ret) - goto out; - start = next; + goto out_free_dma; + + if (args.cpages) + nouveau_dmem_migrate_chunk(drm, &args, dma_addrs); + args.start = args.end; } + ret = 0; +out_free_dma: + kfree(dma_addrs); +out_free_dst: + kfree(args.dst); +out_free_src: + kfree(args.src); out: - kfree(dst_pfns); - kfree(src_pfns); return ret; } @@ -841,11 +683,10 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, npages = (range->end - range->start) >> PAGE_SHIFT; for (i = 0; i < npages; ++i) { - struct nouveau_dmem_chunk *chunk; struct page *page; uint64_t addr; - page = hmm_pfn_to_page(range, range->pfns[i]); + page = hmm_device_entry_to_page(range, range->pfns[i]); if (page == NULL) continue; @@ -859,10 +700,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, continue; } - chunk = page->zone_device_data; - addr = page_to_pfn(page) - chunk->pfn_first; - addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; - + addr = nouveau_dmem_page_addr(page); range->pfns[i] &= ((1UL << range->pfn_shift) - 1); range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h index 9d97d756fb7d..92394be5d649 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.h +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h @@ -45,16 +45,5 @@ static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} static inline void nouveau_dmem_suspend(struct nouveau_drm *drm) {} static inline void nouveau_dmem_resume(struct nouveau_drm *drm) {} - -static inline int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, - struct hmm_range *range) {} #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index bdc948352467..2cd83849600f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -28,6 +28,7 @@ #include <linux/pci.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> +#include <linux/mmu_notifier.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_ioctl.h> @@ -1290,6 +1291,8 @@ nouveau_drm_exit(void) #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_unregister(&nouveau_platform_driver); #endif + if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)) + mmu_notifier_synchronize(); } module_init(nouveau_drm_init); diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index a835cebb6d90..668d4bd0c118 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -252,13 +252,13 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) static int nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror, - const struct hmm_update *update) + const struct mmu_notifier_range *update) { struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror); unsigned long start = update->start; unsigned long limit = update->end; - if (!update->blockable) + if (!mmu_notifier_range_blockable(update)) return -EAGAIN; SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit); @@ -485,31 +485,29 @@ nouveau_range_done(struct hmm_range *range) } static int -nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) +nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) { long ret; range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, mirror, - range->start, range->end, - PAGE_SHIFT); + ret = hmm_range_register(range, &svmm->mirror); if (ret) { - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); return (int)ret; } if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) { - up_read(&range->vma->vm_mm->mmap_sem); - return -EAGAIN; + up_read(&svmm->mm->mmap_sem); + return -EBUSY; } - ret = hmm_range_fault(range, true); + ret = hmm_range_fault(range, 0); if (ret <= 0) { if (ret == 0) ret = -EBUSY; - up_read(&range->vma->vm_mm->mmap_sem); + up_read(&svmm->mm->mmap_sem); hmm_range_unregister(range); return ret; } @@ -682,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ - range.vma = vma; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; @@ -690,7 +687,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = nouveau_range_fault(&svmm->mirror, &range); + ret = nouveau_range_fault(svmm, &range); if (ret == 0) { mutex_lock(&svmm->mutex); if (!nouveau_range_done(&range)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 05b88491ccb9..d59b004f6695 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2449,9 +2449,6 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; - - struct mutex mn_lock; - DECLARE_HASHTABLE(mn_hash, 7); }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 88eb7cb522bb..5d017f0aec66 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1325,8 +1325,6 @@ int radeon_device_init(struct radeon_device *rdev, init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); - mutex_init(&rdev->mn_lock); - hash_init(rdev->mn_hash); r = radeon_gem_init(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 5838162f687f..431e6b64b77d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -35,6 +35,7 @@ #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> +#include <linux/mmu_notifier.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> @@ -623,6 +624,7 @@ static void __exit radeon_exit(void) { pci_unregister_driver(pdriver); radeon_unregister_atpx_handler(); + mmu_notifier_synchronize(); } module_init(radeon_init); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 6902f998ede9..dbab9a3a969b 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -37,17 +37,8 @@ #include "radeon.h" struct radeon_mn { - /* constant after initialisation */ - struct radeon_device *rdev; - struct mm_struct *mm; struct mmu_notifier mn; - /* only used on destruction */ - struct work_struct work; - - /* protected by rdev->mn_lock */ - struct hlist_node node; - /* objects protected by lock */ struct mutex lock; struct rb_root_cached objects; @@ -59,55 +50,6 @@ struct radeon_mn_node { }; /** - * radeon_mn_destroy - destroy the rmn - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void radeon_mn_destroy(struct work_struct *work) -{ - struct radeon_mn *rmn = container_of(work, struct radeon_mn, work); - struct radeon_device *rdev = rmn->rdev; - struct radeon_mn_node *node, *next_node; - struct radeon_bo *bo, *next_bo; - - mutex_lock(&rdev->mn_lock); - mutex_lock(&rmn->lock); - hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &rmn->objects.rb_root, it.rb) { - - interval_tree_remove(&node->it, &rmn->objects); - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - mutex_unlock(&rmn->lock); - mutex_unlock(&rdev->mn_lock); - mmu_notifier_unregister(&rmn->mn, rmn->mm); - kfree(rmn); -} - -/** - * radeon_mn_release - callback to notify about mm destruction - * - * @mn: our notifier - * @mn: the mm this callback is about - * - * Shedule a work item to lazy destroy our notifier. - */ -static void radeon_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); - INIT_WORK(&rmn->work, radeon_mn_destroy); - schedule_work(&rmn->work); -} - -/** * radeon_mn_invalidate_range_start - callback to notify about mm change * * @mn: our notifier @@ -183,65 +125,44 @@ out_unlock: return ret; } -static const struct mmu_notifier_ops radeon_mn_ops = { - .release = radeon_mn_release, - .invalidate_range_start = radeon_mn_invalidate_range_start, -}; +static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct mmu_notifier_range range = { + .mm = mm, + .start = 0, + .end = ULONG_MAX, + .flags = 0, + .event = MMU_NOTIFY_UNMAP, + }; + + radeon_mn_invalidate_range_start(mn, &range); +} -/** - * radeon_mn_get - create notifier context - * - * @rdev: radeon device pointer - * - * Creates a notifier context for current->mm. - */ -static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) +static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm) { - struct mm_struct *mm = current->mm; struct radeon_mn *rmn; - int r; - - if (down_write_killable(&mm->mmap_sem)) - return ERR_PTR(-EINTR); - - mutex_lock(&rdev->mn_lock); - - hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) - goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) { - rmn = ERR_PTR(-ENOMEM); - goto release_locks; - } + if (!rmn) + return ERR_PTR(-ENOMEM); - rmn->rdev = rdev; - rmn->mm = mm; - rmn->mn.ops = &radeon_mn_ops; mutex_init(&rmn->lock); rmn->objects = RB_ROOT_CACHED; - - r = __mmu_notifier_register(&rmn->mn, mm); - if (r) - goto free_rmn; - - hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm); - -release_locks: - mutex_unlock(&rdev->mn_lock); - up_write(&mm->mmap_sem); - - return rmn; - -free_rmn: - mutex_unlock(&rdev->mn_lock); - up_write(&mm->mmap_sem); - kfree(rmn); + return &rmn->mn; +} - return ERR_PTR(r); +static void radeon_mn_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct radeon_mn, mn)); } +static const struct mmu_notifier_ops radeon_mn_ops = { + .release = radeon_mn_release, + .invalidate_range_start = radeon_mn_invalidate_range_start, + .alloc_notifier = radeon_mn_alloc_notifier, + .free_notifier = radeon_mn_free_notifier, +}; + /** * radeon_mn_register - register a BO for notifier updates * @@ -254,15 +175,16 @@ free_rmn: int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) { unsigned long end = addr + radeon_bo_size(bo) - 1; - struct radeon_device *rdev = bo->rdev; + struct mmu_notifier *mn; struct radeon_mn *rmn; struct radeon_mn_node *node = NULL; struct list_head bos; struct interval_tree_node *it; - rmn = radeon_mn_get(rdev); - if (IS_ERR(rmn)) - return PTR_ERR(rmn); + mn = mmu_notifier_get(&radeon_mn_ops, current->mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + rmn = container_of(mn, struct radeon_mn, mn); INIT_LIST_HEAD(&bos); @@ -309,22 +231,16 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) */ void radeon_mn_unregister(struct radeon_bo *bo) { - struct radeon_device *rdev = bo->rdev; - struct radeon_mn *rmn; + struct radeon_mn *rmn = bo->mn; struct list_head *head; - mutex_lock(&rdev->mn_lock); - rmn = bo->mn; - if (rmn == NULL) { - mutex_unlock(&rdev->mn_lock); + if (!rmn) return; - } mutex_lock(&rmn->lock); /* save the next list entry for later */ head = bo->mn_list.next; - bo->mn = NULL; list_del(&bo->mn_list); if (list_empty(head)) { @@ -335,5 +251,7 @@ void radeon_mn_unregister(struct radeon_bo *bo) } mutex_unlock(&rmn->lock); - mutex_unlock(&rdev->mn_lock); + + mmu_notifier_put(&rmn->mn); + bo->mn = NULL; } diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 85e103b147cc..b44b1c322ec8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -55,6 +55,7 @@ config INFINIBAND_ON_DEMAND_PAGING bool "InfiniBand on-demand paging support" depends on INFINIBAND_USER_MEM select MMU_NOTIFIER + select INTERVAL_TREE default y ---help--- On demand paging support for the InfiniBand subsystem. diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ea8661a00651..b5631b8a0397 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2562,6 +2562,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); + SET_DEVICE_OP(dev_ops, invalidate_range); SET_DEVICE_OP(dev_ops, iw_accept); SET_DEVICE_OP(dev_ops, iw_add_ref); SET_DEVICE_OP(dev_ops, iw_connect); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 56553668256f..41f9e268e3fb 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -184,9 +184,6 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz); /** * ib_umem_get - Pin and DMA map userspace memory. * - * If access flags indicate ODP memory, avoid pinning. Instead, stores - * the mm for future page fault handling in conjunction with MMU notifiers. - * * @udata: userspace context to pin memory for * @addr: userspace virtual address to start at * @size: length of region to pin @@ -231,36 +228,19 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!can_do_mlock()) return ERR_PTR(-EPERM); - if (access & IB_ACCESS_ON_DEMAND) { - umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); - if (!umem) - return ERR_PTR(-ENOMEM); - umem->is_odp = 1; - } else { - umem = kzalloc(sizeof(*umem), GFP_KERNEL); - if (!umem) - return ERR_PTR(-ENOMEM); - } + if (access & IB_ACCESS_ON_DEMAND) + return ERR_PTR(-EOPNOTSUPP); - umem->context = context; + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + umem->ibdev = context->device; umem->length = size; umem->address = addr; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); - if (access & IB_ACCESS_ON_DEMAND) { - if (WARN_ON_ONCE(!context->invalidate_range)) { - ret = -EINVAL; - goto umem_kfree; - } - - ret = ib_umem_odp_get(to_ib_umem_odp(umem), access); - if (ret) - goto umem_kfree; - return umem; - } - page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { ret = -ENOMEM; @@ -346,15 +326,6 @@ umem_kfree: } EXPORT_SYMBOL(ib_umem_get); -static void __ib_umem_release_tail(struct ib_umem *umem) -{ - mmdrop(umem->owning_mm); - if (umem->is_odp) - kfree(to_ib_umem_odp(umem)); - else - kfree(umem); -} - /** * ib_umem_release - release memory pinned with ib_umem_get * @umem: umem struct to release @@ -363,17 +334,14 @@ void ib_umem_release(struct ib_umem *umem) { if (!umem) return; + if (umem->is_odp) + return ib_umem_odp_release(to_ib_umem_odp(umem)); - if (umem->is_odp) { - ib_umem_odp_release(to_ib_umem_odp(umem)); - __ib_umem_release_tail(umem); - return; - } - - __ib_umem_release(umem->context->device, umem, 1); + __ib_umem_release(umem->ibdev, umem, 1); atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm); - __ib_umem_release_tail(umem); + mmdrop(umem->owning_mm); + kfree(umem); } EXPORT_SYMBOL(ib_umem_release); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c0e15db34680..9aebe9ce8b07 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -39,44 +39,14 @@ #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/hugetlb.h> -#include <linux/interval_tree_generic.h> +#include <linux/interval_tree.h> #include <linux/pagemap.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> -/* - * The ib_umem list keeps track of memory regions for which the HW - * device request to receive notification when the related memory - * mapping is changed. - * - * ib_umem_lock protects the list. - */ - -static u64 node_start(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_start(umem_odp); -} - -/* Note that the representation of the intervals in the interval tree - * considers the ending point as contained in the interval, while the - * function ib_umem_end returns the first address which is not contained - * in the umem. - */ -static u64 node_last(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_end(umem_odp) - 1; -} - -INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, - node_start, node_last, static, rbt_ib_umem) +#include "uverbs.h" static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) { @@ -104,31 +74,34 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) mutex_unlock(&umem_odp->umem_mutex); } -static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, - u64 start, u64 end, void *cookie) -{ - /* - * Increase the number of notifiers running, to - * prevent any further fault handling on this MR. - */ - ib_umem_notifier_start_account(umem_odp); - complete_all(&umem_odp->notifier_completion); - umem_odp->umem.context->invalidate_range( - umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - return 0; -} - static void ib_umem_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct ib_ucontext_per_mm *per_mm = container_of(mn, struct ib_ucontext_per_mm, mn); + struct rb_node *node; down_read(&per_mm->umem_rwsem); - if (per_mm->active) - rbt_ib_umem_for_each_in_range( - &per_mm->umem_tree, 0, ULLONG_MAX, - ib_umem_notifier_release_trampoline, true, NULL); + if (!per_mm->mn.users) + goto out; + + for (node = rb_first_cached(&per_mm->umem_tree); node; + node = rb_next(node)) { + struct ib_umem_odp *umem_odp = + rb_entry(node, struct ib_umem_odp, interval_tree.rb); + + /* + * Increase the number of notifiers running, to prevent any + * further fault handling on this MR. + */ + ib_umem_notifier_start_account(umem_odp); + complete_all(&umem_odp->notifier_completion); + umem_odp->umem.ibdev->ops.invalidate_range( + umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + } + +out: up_read(&per_mm->umem_rwsem); } @@ -136,7 +109,7 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_start_account(item); - item->umem.context->invalidate_range(item, start, end); + item->umem.ibdev->ops.invalidate_range(item, start, end); return 0; } @@ -152,10 +125,10 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, else if (!down_read_trylock(&per_mm->umem_rwsem)) return -EAGAIN; - if (!per_mm->active) { + if (!per_mm->mn.users) { up_read(&per_mm->umem_rwsem); /* - * At this point active is permanently set and visible to this + * At this point users is permanently zero and visible to this * CPU without a lock, that fact is relied on to skip the unlock * in range_end. */ @@ -185,7 +158,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, struct ib_ucontext_per_mm *per_mm = container_of(mn, struct ib_ucontext_per_mm, mn); - if (unlikely(!per_mm->active)) + if (unlikely(!per_mm->mn.users)) return; rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, @@ -194,212 +167,250 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, up_read(&per_mm->umem_rwsem); } -static const struct mmu_notifier_ops ib_umem_notifiers = { - .release = ib_umem_notifier_release, - .invalidate_range_start = ib_umem_notifier_invalidate_range_start, - .invalidate_range_end = ib_umem_notifier_invalidate_range_end, -}; - -static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - - down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) - rbt_ib_umem_insert(&umem_odp->interval_tree, - &per_mm->umem_tree); - up_write(&per_mm->umem_rwsem); -} - -static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - - down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) - rbt_ib_umem_remove(&umem_odp->interval_tree, - &per_mm->umem_tree); - complete_all(&umem_odp->notifier_completion); - - up_write(&per_mm->umem_rwsem); -} - -static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, - struct mm_struct *mm) +static struct mmu_notifier *ib_umem_alloc_notifier(struct mm_struct *mm) { struct ib_ucontext_per_mm *per_mm; - int ret; per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL); if (!per_mm) return ERR_PTR(-ENOMEM); - per_mm->context = ctx; - per_mm->mm = mm; per_mm->umem_tree = RB_ROOT_CACHED; init_rwsem(&per_mm->umem_rwsem); - per_mm->active = true; + WARN_ON(mm != current->mm); rcu_read_lock(); per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); + return &per_mm->mn; +} - WARN_ON(mm != current->mm); - - per_mm->mn.ops = &ib_umem_notifiers; - ret = mmu_notifier_register(&per_mm->mn, per_mm->mm); - if (ret) { - dev_err(&ctx->device->dev, - "Failed to register mmu_notifier %d\n", ret); - goto out_pid; - } +static void ib_umem_free_notifier(struct mmu_notifier *mn) +{ + struct ib_ucontext_per_mm *per_mm = + container_of(mn, struct ib_ucontext_per_mm, mn); - list_add(&per_mm->ucontext_list, &ctx->per_mm_list); - return per_mm; + WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); -out_pid: put_pid(per_mm->tgid); kfree(per_mm); - return ERR_PTR(ret); } -static int get_per_mm(struct ib_umem_odp *umem_odp) +static const struct mmu_notifier_ops ib_umem_notifiers = { + .release = ib_umem_notifier_release, + .invalidate_range_start = ib_umem_notifier_invalidate_range_start, + .invalidate_range_end = ib_umem_notifier_invalidate_range_end, + .alloc_notifier = ib_umem_alloc_notifier, + .free_notifier = ib_umem_free_notifier, +}; + +static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp) { - struct ib_ucontext *ctx = umem_odp->umem.context; struct ib_ucontext_per_mm *per_mm; + struct mmu_notifier *mn; + int ret; - /* - * Generally speaking we expect only one or two per_mm in this list, - * so no reason to optimize this search today. - */ - mutex_lock(&ctx->per_mm_list_lock); - list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) { - if (per_mm->mm == umem_odp->umem.owning_mm) - goto found; + umem_odp->umem.is_odp = 1; + if (!umem_odp->is_implicit_odp) { + size_t page_size = 1UL << umem_odp->page_shift; + size_t pages; + + umem_odp->interval_tree.start = + ALIGN_DOWN(umem_odp->umem.address, page_size); + if (check_add_overflow(umem_odp->umem.address, + umem_odp->umem.length, + &umem_odp->interval_tree.last)) + return -EOVERFLOW; + umem_odp->interval_tree.last = + ALIGN(umem_odp->interval_tree.last, page_size); + if (unlikely(umem_odp->interval_tree.last < page_size)) + return -EOVERFLOW; + + pages = (umem_odp->interval_tree.last - + umem_odp->interval_tree.start) >> + umem_odp->page_shift; + if (!pages) + return -EINVAL; + + /* + * Note that the representation of the intervals in the + * interval tree considers the ending point as contained in + * the interval. + */ + umem_odp->interval_tree.last--; + + umem_odp->page_list = kvcalloc( + pages, sizeof(*umem_odp->page_list), GFP_KERNEL); + if (!umem_odp->page_list) + return -ENOMEM; + + umem_odp->dma_list = kvcalloc( + pages, sizeof(*umem_odp->dma_list), GFP_KERNEL); + if (!umem_odp->dma_list) { + ret = -ENOMEM; + goto out_page_list; + } } - per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm); - if (IS_ERR(per_mm)) { - mutex_unlock(&ctx->per_mm_list_lock); - return PTR_ERR(per_mm); + mn = mmu_notifier_get(&ib_umem_notifiers, umem_odp->umem.owning_mm); + if (IS_ERR(mn)) { + ret = PTR_ERR(mn); + goto out_dma_list; } + umem_odp->per_mm = per_mm = + container_of(mn, struct ib_ucontext_per_mm, mn); -found: - umem_odp->per_mm = per_mm; - per_mm->odp_mrs_count++; - mutex_unlock(&ctx->per_mm_list_lock); + mutex_init(&umem_odp->umem_mutex); + init_completion(&umem_odp->notifier_completion); + + if (!umem_odp->is_implicit_odp) { + down_write(&per_mm->umem_rwsem); + interval_tree_insert(&umem_odp->interval_tree, + &per_mm->umem_tree); + up_write(&per_mm->umem_rwsem); + } + mmgrab(umem_odp->umem.owning_mm); return 0; -} -static void free_per_mm(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu)); +out_dma_list: + kvfree(umem_odp->dma_list); +out_page_list: + kvfree(umem_odp->page_list); + return ret; } -static void put_per_mm(struct ib_umem_odp *umem_odp) +/** + * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem + * + * Implicit ODP umems do not have a VA range and do not have any page lists. + * They exist only to hold the per_mm reference to help the driver create + * children umems. + * + * @udata: udata from the syscall being used to create the umem + * @access: ib_reg_mr access flags + */ +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, + int access) { - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_ucontext *ctx = umem_odp->umem.context; - bool need_free; - - mutex_lock(&ctx->per_mm_list_lock); - umem_odp->per_mm = NULL; - per_mm->odp_mrs_count--; - need_free = per_mm->odp_mrs_count == 0; - if (need_free) - list_del(&per_mm->ucontext_list); - mutex_unlock(&ctx->per_mm_list_lock); - - if (!need_free) - return; + struct ib_ucontext *context = + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; + struct ib_umem *umem; + struct ib_umem_odp *umem_odp; + int ret; - /* - * NOTE! mmu_notifier_unregister() can happen between a start/end - * callback, resulting in an start/end, and thus an unbalanced - * lock. This doesn't really matter to us since we are about to kfree - * the memory that holds the lock, however LOCKDEP doesn't like this. - */ - down_write(&per_mm->umem_rwsem); - per_mm->active = false; - up_write(&per_mm->umem_rwsem); + if (access & IB_ACCESS_HUGETLB) + return ERR_PTR(-EINVAL); - WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); - mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm); - put_pid(per_mm->tgid); - mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm); + if (!context) + return ERR_PTR(-EIO); + if (WARN_ON_ONCE(!context->device->ops.invalidate_range)) + return ERR_PTR(-EINVAL); + + umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); + umem = &umem_odp->umem; + umem->ibdev = context->device; + umem->writable = ib_access_writable(access); + umem->owning_mm = current->mm; + umem_odp->is_implicit_odp = 1; + umem_odp->page_shift = PAGE_SHIFT; + + ret = ib_init_umem_odp(umem_odp); + if (ret) { + kfree(umem_odp); + return ERR_PTR(ret); + } + return umem_odp; } +EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, - unsigned long addr, size_t size) +/** + * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit + * parent ODP umem + * + * @root: The parent umem enclosing the child. This must be allocated using + * ib_alloc_implicit_odp_umem() + * @addr: The starting userspace VA + * @size: The length of the userspace VA + */ +struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root, + unsigned long addr, size_t size) { - struct ib_ucontext_per_mm *per_mm = root->per_mm; - struct ib_ucontext *ctx = per_mm->context; + /* + * Caller must ensure that root cannot be freed during the call to + * ib_alloc_odp_umem. + */ struct ib_umem_odp *odp_data; struct ib_umem *umem; - int pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON(!root->is_implicit_odp)) + return ERR_PTR(-EINVAL); + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); if (!odp_data) return ERR_PTR(-ENOMEM); umem = &odp_data->umem; - umem->context = ctx; + umem->ibdev = root->umem.ibdev; umem->length = size; umem->address = addr; - odp_data->page_shift = PAGE_SHIFT; umem->writable = root->umem.writable; - umem->is_odp = 1; - odp_data->per_mm = per_mm; - umem->owning_mm = per_mm->mm; - mmgrab(umem->owning_mm); - - mutex_init(&odp_data->umem_mutex); - init_completion(&odp_data->notifier_completion); - - odp_data->page_list = - vzalloc(array_size(pages, sizeof(*odp_data->page_list))); - if (!odp_data->page_list) { - ret = -ENOMEM; - goto out_odp_data; - } + umem->owning_mm = root->umem.owning_mm; + odp_data->page_shift = PAGE_SHIFT; - odp_data->dma_list = - vzalloc(array_size(pages, sizeof(*odp_data->dma_list))); - if (!odp_data->dma_list) { - ret = -ENOMEM; - goto out_page_list; + ret = ib_init_umem_odp(odp_data); + if (ret) { + kfree(odp_data); + return ERR_PTR(ret); } - - /* - * Caller must ensure that the umem_odp that the per_mm came from - * cannot be freed during the call to ib_alloc_odp_umem. - */ - mutex_lock(&ctx->per_mm_list_lock); - per_mm->odp_mrs_count++; - mutex_unlock(&ctx->per_mm_list_lock); - add_umem_to_per_mm(odp_data); - return odp_data; - -out_page_list: - vfree(odp_data->page_list); -out_odp_data: - mmdrop(umem->owning_mm); - kfree(odp_data); - return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_alloc_odp_umem); +EXPORT_SYMBOL(ib_umem_odp_alloc_child); -int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) +/** + * ib_umem_odp_get - Create a umem_odp for a userspace va + * + * @udata: userspace context to pin memory for + * @addr: userspace virtual address to start at + * @size: length of region to pin + * @access: IB_ACCESS_xxx flags for memory being pinned + * + * The driver should use when the access flags indicate ODP memory. It avoids + * pinning, instead, stores the mm for future page fault handling in + * conjunction with MMU notifiers. + */ +struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, + size_t size, int access) { - struct ib_umem *umem = &umem_odp->umem; - /* - * NOTE: This must called in a process context where umem->owning_mm - * == current->mm - */ - struct mm_struct *mm = umem->owning_mm; - int ret_val; + struct ib_umem_odp *umem_odp; + struct ib_ucontext *context; + struct mm_struct *mm; + int ret; + + if (!udata) + return ERR_PTR(-EIO); + + context = container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; + if (!context) + return ERR_PTR(-EIO); + + if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) || + WARN_ON_ONCE(!context->device->ops.invalidate_range)) + return ERR_PTR(-EINVAL); + + umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); + + umem_odp->umem.ibdev = context->device; + umem_odp->umem.length = size; + umem_odp->umem.address = addr; + umem_odp->umem.writable = ib_access_writable(access); + umem_odp->umem.owning_mm = mm = current->mm; umem_odp->page_shift = PAGE_SHIFT; if (access & IB_ACCESS_HUGETLB) { @@ -410,63 +421,63 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) vma = find_vma(mm, ib_umem_start(umem_odp)); if (!vma || !is_vm_hugetlb_page(vma)) { up_read(&mm->mmap_sem); - return -EINVAL; + ret = -EINVAL; + goto err_free; } h = hstate_vma(vma); umem_odp->page_shift = huge_page_shift(h); up_read(&mm->mmap_sem); } - mutex_init(&umem_odp->umem_mutex); - - init_completion(&umem_odp->notifier_completion); - - if (ib_umem_odp_num_pages(umem_odp)) { - umem_odp->page_list = - vzalloc(array_size(sizeof(*umem_odp->page_list), - ib_umem_odp_num_pages(umem_odp))); - if (!umem_odp->page_list) - return -ENOMEM; - - umem_odp->dma_list = - vzalloc(array_size(sizeof(*umem_odp->dma_list), - ib_umem_odp_num_pages(umem_odp))); - if (!umem_odp->dma_list) { - ret_val = -ENOMEM; - goto out_page_list; - } - } - - ret_val = get_per_mm(umem_odp); - if (ret_val) - goto out_dma_list; - add_umem_to_per_mm(umem_odp); - - return 0; + ret = ib_init_umem_odp(umem_odp); + if (ret) + goto err_free; + return umem_odp; -out_dma_list: - vfree(umem_odp->dma_list); -out_page_list: - vfree(umem_odp->page_list); - return ret_val; +err_free: + kfree(umem_odp); + return ERR_PTR(ret); } +EXPORT_SYMBOL(ib_umem_odp_get); void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { + struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; + /* * Ensure that no more pages are mapped in the umem. * * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + if (!umem_odp->is_implicit_odp) { + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + kvfree(umem_odp->dma_list); + kvfree(umem_odp->page_list); + } - remove_umem_from_per_mm(umem_odp); - put_per_mm(umem_odp); - vfree(umem_odp->dma_list); - vfree(umem_odp->page_list); + down_write(&per_mm->umem_rwsem); + if (!umem_odp->is_implicit_odp) { + interval_tree_remove(&umem_odp->interval_tree, + &per_mm->umem_tree); + complete_all(&umem_odp->notifier_completion); + } + /* + * NOTE! mmu_notifier_unregister() can happen between a start/end + * callback, resulting in a missing end, and thus an unbalanced + * lock. This doesn't really matter to us since we are about to kfree + * the memory that holds the lock, however LOCKDEP doesn't like this. + * Thus we call the mmu_notifier_put under the rwsem and test the + * internal users count to reliably see if we are past this point. + */ + mmu_notifier_put(&per_mm->mn); + up_write(&per_mm->umem_rwsem); + + mmdrop(umem_odp->umem.owning_mm); + kfree(umem_odp); } +EXPORT_SYMBOL(ib_umem_odp_release); /* * Map for DMA and insert a single page into the on-demand paging page tables. @@ -493,8 +504,7 @@ static int ib_umem_odp_map_dma_single_page( u64 access_mask, unsigned long current_seq) { - struct ib_ucontext *context = umem_odp->umem.context; - struct ib_device *dev = context->device; + struct ib_device *dev = umem_odp->umem.ibdev; dma_addr_t dma_addr; int remove_existing_mapping = 0; int ret = 0; @@ -534,7 +544,7 @@ out: if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); - context->invalidate_range( + dev->ops.invalidate_range( umem_odp, ib_umem_start(umem_odp) + (page_index << umem_odp->page_shift), @@ -707,7 +717,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, { int idx; u64 addr; - struct ib_device *dev = umem_odp->umem.context->device; + struct ib_device *dev = umem_odp->umem.ibdev; virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); @@ -761,35 +771,21 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, void *cookie) { int ret_val = 0; - struct umem_odp_node *node, *next; + struct interval_tree_node *node, *next; struct ib_umem_odp *umem; if (unlikely(start == last)) return ret_val; - for (node = rbt_ib_umem_iter_first(root, start, last - 1); + for (node = interval_tree_iter_first(root, start, last - 1); node; node = next) { /* TODO move the blockable decision up to the callback */ if (!blockable) return -EAGAIN; - next = rbt_ib_umem_iter_next(node, start, last - 1); + next = interval_tree_iter_next(node, start, last - 1); umem = container_of(node, struct ib_umem_odp, interval_tree); ret_val = cb(umem, start, last, cookie) || ret_val; } return ret_val; } -EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); - -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length) -{ - struct umem_odp_node *node; - - node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); - if (node) - return container_of(node, struct ib_umem_odp, interval_tree); - return NULL; - -} -EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7ddd0e5bc6b3..7c10dfe417a4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -252,9 +252,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ucontext->closing = false; ucontext->cleanup_retryable = false; - mutex_init(&ucontext->per_mm_list_lock); - INIT_LIST_HEAD(&ucontext->per_mm_list); - ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; @@ -275,8 +272,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata); if (ret) goto err_file; - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) - ucontext->invalidate_range = NULL; rdma_restrack_uadd(&ucontext->res); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 11c13c1381cf..e369ac0d6f51 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1487,6 +1487,7 @@ static void __exit ib_uverbs_cleanup(void) IB_UVERBS_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_uverbs_dev, IB_UVERBS_NUM_DYNAMIC_MINOR); + mmu_notifier_synchronize(); } module_init(ib_uverbs_init); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4e9f1507ffd9..bface798ee59 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1867,10 +1867,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (err) goto out_sys_pages; - if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) - context->ibucontext.invalidate_range = - &mlx5_ib_invalidate_range; - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); if (err < 0) @@ -1999,11 +1995,6 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; - /* All umem's must be destroyed before destroying the ucontext. */ - mutex_lock(&ibcontext->per_mm_list_lock); - WARN_ON(!list_empty(&ibcontext->per_mm_list)); - mutex_unlock(&ibcontext->per_mm_list_lock); - bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index a40e0abf2338..b5aece786b36 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -56,19 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, struct scatterlist *sg; int entry; - if (umem->is_odp) { - struct ib_umem_odp *odp = to_ib_umem_odp(umem); - unsigned int page_shift = odp->page_shift; - - *ncont = ib_umem_odp_num_pages(odp); - *count = *ncont << (page_shift - PAGE_SHIFT); - *shift = page_shift; - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - - return; - } - addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3401f5f6792e..1eff031ef048 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -784,19 +784,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *ncont, int *order) { struct ib_umem *u; - int err; *umem = NULL; - u = ib_umem_get(udata, start, length, access_flags, 0); - err = PTR_ERR_OR_ZERO(u); - if (err) { - mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); - return err; + if (access_flags & IB_ACCESS_ON_DEMAND) { + struct ib_umem_odp *odp; + + odp = ib_umem_odp_get(udata, start, length, access_flags); + if (IS_ERR(odp)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", + PTR_ERR(odp)); + return PTR_ERR(odp); + } + + u = &odp->umem; + + *page_shift = odp->page_shift; + *ncont = ib_umem_odp_num_pages(odp); + *npages = *ncont << (*page_shift - PAGE_SHIFT); + if (order) + *order = ilog2(roundup_pow_of_two(*ncont)); + } else { + u = ib_umem_get(udata, start, length, access_flags, 0); + if (IS_ERR(u)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); + return PTR_ERR(u); + } + + mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + page_shift, ncont, order); } - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); ib_umem_release(u); @@ -1599,7 +1617,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - if (umem_odp->page_list) + if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); @@ -1610,7 +1628,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) * so that there will not be any invalidations in * flight, looking at the *mr struct. */ - ib_umem_release(umem); + ib_umem_odp_release(umem_odp); atomic_sub(npages, &dev->mdev->priv.reg_pages); /* Avoid double-freeing the umem. */ diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 0a59912a4cef..dd26e7acb37e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -184,7 +184,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && odp->umem.address == va) { + if (odp && ib_umem_start(odp) == va) { struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); @@ -206,7 +206,7 @@ static void mr_leaf_free_action(struct work_struct *work) mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_release(&odp->umem); + ib_umem_odp_release(odp); if (imr->live) mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | @@ -386,7 +386,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem *umem, + struct ib_umem_odp *umem_odp, bool ksm, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -404,7 +404,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->dev = dev; mr->access_flags = access_flags; mr->mmkey.iova = 0; - mr->umem = umem; + mr->umem = &umem_odp->umem; if (ksm) { err = mlx5_ib_update_xlt(mr, 0, @@ -464,18 +464,17 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr, addr, - MLX5_IMR_MTT_SIZE); + odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); return ERR_CAST(odp); } - mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, + mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, mr->access_flags); if (IS_ERR(mtt)) { mutex_unlock(&odp_mr->umem_mutex); - ib_umem_release(&odp->umem); + ib_umem_odp_release(odp); return ERR_CAST(mtt); } @@ -497,7 +496,7 @@ next_mr: addr += MLX5_IMR_MTT_SIZE; if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && odp->umem.address != addr) + if (odp && ib_umem_start(odp) != addr) odp = NULL; goto next_mr; } @@ -521,19 +520,19 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags) { struct mlx5_ib_mr *imr; - struct ib_umem *umem; + struct ib_umem_odp *umem_odp; - umem = ib_umem_get(udata, 0, 0, access_flags, 0); - if (IS_ERR(umem)) - return ERR_CAST(umem); + umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); + if (IS_ERR(umem_odp)) + return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags); + imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); if (IS_ERR(imr)) { - ib_umem_release(umem); + ib_umem_odp_release(umem_odp); return ERR_CAST(imr); } - imr->umem = umem; + imr->umem = &umem_odp->umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); @@ -541,34 +540,31 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, return imr; } -static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, - void *cookie) +void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; - - if (mr->parent != imr) - return 0; - - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); + struct rb_node *node; - if (umem_odp->dying) - return 0; + down_read(&per_mm->umem_rwsem); + for (node = rb_first_cached(&per_mm->umem_tree); node; + node = rb_next(node)) { + struct ib_umem_odp *umem_odp = + rb_entry(node, struct ib_umem_odp, interval_tree.rb); + struct mlx5_ib_mr *mr = umem_odp->private; - WRITE_ONCE(umem_odp->dying, 1); - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); + if (mr->parent != imr) + continue; - return 0; -} + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); -void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); + if (umem_odp->dying) + continue; - down_read(&per_mm->umem_rwsem); - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX, - mr_leaf_free, true, imr); + WRITE_ONCE(umem_odp->dying, 1); + atomic_inc(&imr->num_leaf_free); + schedule_work(&umem_odp->work); + } up_read(&per_mm->umem_rwsem); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); @@ -589,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, struct ib_umem_odp *odp; size_t size; - if (!odp_mr->page_list) { + if (odp_mr->is_implicit_odp) { odp = implicit_mr_get_data(mr, io_virt, bcnt); if (IS_ERR(odp)) @@ -607,7 +603,7 @@ next_mr: start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !mr->umem->writable) { + if (prefetch && !downgrade && !odp->umem.writable) { /* prefetch with write-access must * be supported by the MR */ @@ -615,7 +611,7 @@ next_mr: goto out; } - if (mr->umem->writable && !downgrade) + if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -625,8 +621,8 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size, - access_mask, current_seq); + ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, + current_seq); if (ret < 0) goto out; @@ -634,8 +630,7 @@ next_mr: np = ret; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem), - current_seq)) { + if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -668,7 +663,7 @@ next_mr: io_virt += size; next = odp_next(odp); - if (unlikely(!next || next->umem.address != io_virt)) { + if (unlikely(!next || ib_umem_start(next) != io_virt)) { mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", io_virt, next); return -EAGAIN; @@ -1618,6 +1613,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, + .invalidate_range = mlx5_ib_invalidate_range, }; int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index a2a142ae087b..9d042310214f 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -573,6 +573,7 @@ static void __exit gru_exit(void) gru_free_tables(); misc_deregister(&gru_miscdev); gru_proc_exit(); + mmu_notifier_synchronize(); } static const struct file_operations gru_fops = { diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 438191c22057..a7e44b2eb413 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -307,10 +307,8 @@ struct gru_mm_tracker { /* pack to reduce size */ struct gru_mm_struct { struct mmu_notifier ms_notifier; - atomic_t ms_refcnt; spinlock_t ms_asid_lock; /* protects ASID assignment */ atomic_t ms_range_active;/* num range_invals active */ - char ms_released; wait_queue_head_t ms_wait_queue; DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 59ba0adf23ce..10921cd2608d 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -235,83 +235,47 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn, gms, range->start, range->end); } -static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +static struct mmu_notifier *gru_alloc_notifier(struct mm_struct *mm) { - struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, - ms_notifier); + struct gru_mm_struct *gms; + + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (!gms) + return ERR_PTR(-ENOMEM); + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + init_waitqueue_head(&gms->ms_wait_queue); - gms->ms_released = 1; - gru_dbg(grudev, "gms %p\n", gms); + return &gms->ms_notifier; } +static void gru_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct gru_mm_struct, ms_notifier)); + STAT(gms_free); +} static const struct mmu_notifier_ops gru_mmuops = { .invalidate_range_start = gru_invalidate_range_start, .invalidate_range_end = gru_invalidate_range_end, - .release = gru_release, + .alloc_notifier = gru_alloc_notifier, + .free_notifier = gru_free_notifier, }; -/* Move this to the basic mmu_notifier file. But for now... */ -static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, - const struct mmu_notifier_ops *ops) -{ - struct mmu_notifier *mn, *gru_mn = NULL; - - if (mm->mmu_notifier_mm) { - rcu_read_lock(); - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, - hlist) - if (mn->ops == ops) { - gru_mn = mn; - break; - } - rcu_read_unlock(); - } - return gru_mn; -} - struct gru_mm_struct *gru_register_mmu_notifier(void) { - struct gru_mm_struct *gms; struct mmu_notifier *mn; - int err; - - mn = mmu_find_ops(current->mm, &gru_mmuops); - if (mn) { - gms = container_of(mn, struct gru_mm_struct, ms_notifier); - atomic_inc(&gms->ms_refcnt); - } else { - gms = kzalloc(sizeof(*gms), GFP_KERNEL); - if (!gms) - return ERR_PTR(-ENOMEM); - STAT(gms_alloc); - spin_lock_init(&gms->ms_asid_lock); - gms->ms_notifier.ops = &gru_mmuops; - atomic_set(&gms->ms_refcnt, 1); - init_waitqueue_head(&gms->ms_wait_queue); - err = __mmu_notifier_register(&gms->ms_notifier, current->mm); - if (err) - goto error; - } - if (gms) - gru_dbg(grudev, "gms %p, refcnt %d\n", gms, - atomic_read(&gms->ms_refcnt)); - return gms; -error: - kfree(gms); - return ERR_PTR(err); + + mn = mmu_notifier_get_locked(&gru_mmuops, current->mm); + if (IS_ERR(mn)) + return ERR_CAST(mn); + + return container_of(mn, struct gru_mm_struct, ms_notifier); } void gru_drop_mmu_notifier(struct gru_mm_struct *gms) { - gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, - atomic_read(&gms->ms_refcnt), gms->ms_released); - if (atomic_dec_return(&gms->ms_refcnt) == 0) { - if (!gms->ms_released) - mmu_notifier_unregister(&gms->ms_notifier, current->mm); - kfree(gms); - STAT(gms_free); - } + mmu_notifier_put(&gms->ms_notifier); } /* diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index a5fde15e91d3..36af7af6b7cf 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -118,4 +118,16 @@ config NVDIMM_KEYS depends on ENCRYPTED_KEYS depends on (LIBNVDIMM=ENCRYPTED_KEYS) || LIBNVDIMM=m +config NVDIMM_TEST_BUILD + tristate "Build the unit test core" + depends on m + depends on COMPILE_TEST && X86_64 + default m if COMPILE_TEST + help + Build the core of the unit test infrastructure. The result of + this build is non-functional for unit test execution, but it + otherwise helps catch build errors induced by changes to the + core devm_memremap_pages() implementation and other + infrastructure. + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index cefe233e0b52..29203f3d3069 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -29,3 +29,7 @@ libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o libnvdimm-$(CONFIG_NVDIMM_KEYS) += security.o + +TOOLS := ../../tools +TEST_SRC := $(TOOLS)/testing/nvdimm/test +obj-$(CONFIG_NVDIMM_TEST_BUILD) += $(TEST_SRC)/iomap.o |