diff options
author | Linus Torvalds | 2024-07-19 10:20:26 -0700 |
---|---|---|
committer | Linus Torvalds | 2024-07-19 10:20:26 -0700 |
commit | afd81d914f6fb3e74a46bf5d0dd0b028591ea22e (patch) | |
tree | d442b5de397de72b365d263e6a9c577fcb7aa0a6 /kernel | |
parent | ebcfbf02abfbecc144440ff797419cc95cb047fe (diff) | |
parent | b69bdba5a37eb6224039e9572e0e98fc3a931fee (diff) |
Merge tag 'dma-mapping-6.11-2024-07-19' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig:
- reduce duplicate swiotlb pool lookups (Michael Kelley)
- minor small fixes (Yicong Yang, Yang Li)
* tag 'dma-mapping-6.11-2024-07-19' of git://git.infradead.org/users/hch/dma-mapping:
swiotlb: fix kernel-doc description for swiotlb_del_transient
swiotlb: reduce swiotlb pool lookups
dma-mapping: benchmark: Don't starve others when doing the test
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/dma/direct.c | 10 | ||||
-rw-r--r-- | kernel/dma/direct.h | 9 | ||||
-rw-r--r-- | kernel/dma/map_benchmark.c | 16 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 68 |
4 files changed, 59 insertions, 44 deletions
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 4d543b1e9d57..4480a3cd92e0 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, @@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, sg->length); @@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); + swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)); } /** diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 18d346118fe8..d2c0b7e632fc 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -58,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, { phys_addr_t paddr = dma_to_phys(dev, addr); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, size, dir); + swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); @@ -75,8 +74,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); @@ -121,8 +119,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 4950e0b622b1..cc19a3efea89 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -89,6 +89,22 @@ static int map_benchmark_thread(void *data) atomic64_add(map_sq, &map->sum_sq_map); atomic64_add(unmap_sq, &map->sum_sq_unmap); atomic64_inc(&map->loops); + + /* + * We may test for a long time so periodically check whether + * we need to schedule to avoid starving the others. Otherwise + * we may hangup the kernel in a non-preemptible kernel when + * the test kthreads number >= CPU number, the test kthreads + * will run endless on every CPU since the thread resposible + * for notifying the kthread stop (in do_map_benchmark()) + * could not be scheduled. + * + * Note this may degrade the test concurrency since the test + * threads may need to share the CPU time with other load + * in the system. So it's recommended to run this benchmark + * on an idle system. + */ + cond_resched(); } out: diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index fe1ccb53596f..df68d29740a0 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu) } /** - * swiotlb_find_pool() - find the IO TLB pool for a physical address + * __swiotlb_find_pool() - find the IO TLB pool for a physical address * @dev: Device which has mapped the DMA buffer. * @paddr: Physical address within the DMA buffer. * * Find the IO TLB memory pool descriptor which contains the given physical - * address, if any. + * address, if any. This function is for use only when the dev is known to + * be using swiotlb. Use swiotlb_find_pool() for the more general case + * when this condition is not met. * * Return: Memory pool which contains @paddr, or %NULL if none. */ -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; @@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev, * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; @@ -1243,7 +1244,7 @@ found: * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy * atomicity). * - * See also the comment in is_swiotlb_buffer(). + * See also the comment in swiotlb_find_pool(). */ smp_mb(); @@ -1435,13 +1436,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * hardware behavior. Use of swiotlb is supposed to be transparent, * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. */ - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); return tlb_addr; } -static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); int index, nslots, aindex; @@ -1499,17 +1500,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) * swiotlb_del_transient() - delete a transient memory pool * @dev: Device which mapped the buffer. * @tlb_addr: Physical address within a bounce buffer. + * @pool: Pointer to the transient memory pool to be checked and deleted. * * Check whether the address belongs to a transient SWIOTLB memory pool. * If yes, then delete the pool. * * Return: %true if @tlb_addr belonged to a transient pool that was released. */ -static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *pool) { - struct io_tlb_pool *pool; - - pool = swiotlb_find_pool(dev, tlb_addr); if (!pool->transient) return false; @@ -1522,7 +1522,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) #else /* !CONFIG_SWIOTLB_DYNAMIC */ static inline bool swiotlb_del_transient(struct device *dev, - phys_addr_t tlb_addr) + phys_addr_t tlb_addr, struct io_tlb_pool *pool) { return false; } @@ -1532,36 +1532,39 @@ static inline bool swiotlb_del_transient(struct device *dev, /* * tlb_addr is the physical address of the bounce buffer to unmap. */ -void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, - size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs) +void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool) { /* * First, sync the memory before unmapping the entry */ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, + DMA_FROM_DEVICE, pool); - if (swiotlb_del_transient(dev, tlb_addr)) + if (swiotlb_del_transient(dev, tlb_addr, pool)) return; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); } -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); else BUG_ON(dir != DMA_FROM_DEVICE); } -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); else BUG_ON(dir != DMA_TO_DEVICE); } @@ -1585,8 +1588,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); @@ -1764,7 +1768,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (unlikely(!PAGE_ALIGNED(tlb_addr))) { dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", &tlb_addr); - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return NULL; } @@ -1774,11 +1778,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) bool swiotlb_free(struct device *dev, struct page *page, size_t size) { phys_addr_t tlb_addr = page_to_phys(page); + struct io_tlb_pool *pool; - if (!is_swiotlb_buffer(dev, tlb_addr)) + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool) return false; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return true; } |