aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds2024-07-19 10:20:26 -0700
committerLinus Torvalds2024-07-19 10:20:26 -0700
commitafd81d914f6fb3e74a46bf5d0dd0b028591ea22e (patch)
treed442b5de397de72b365d263e6a9c577fcb7aa0a6 /kernel
parentebcfbf02abfbecc144440ff797419cc95cb047fe (diff)
parentb69bdba5a37eb6224039e9572e0e98fc3a931fee (diff)
Merge tag 'dma-mapping-6.11-2024-07-19' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig: - reduce duplicate swiotlb pool lookups (Michael Kelley) - minor small fixes (Yicong Yang, Yang Li) * tag 'dma-mapping-6.11-2024-07-19' of git://git.infradead.org/users/hch/dma-mapping: swiotlb: fix kernel-doc description for swiotlb_del_transient swiotlb: reduce swiotlb pool lookups dma-mapping: benchmark: Don't starve others when doing the test
Diffstat (limited to 'kernel')
-rw-r--r--kernel/dma/direct.c10
-rw-r--r--kernel/dma/direct.h9
-rw-r--r--kernel/dma/map_benchmark.c16
-rw-r--r--kernel/dma/swiotlb.c68
4 files changed, 59 insertions, 44 deletions
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 4d543b1e9d57..4480a3cd92e0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
for_each_sg(sgl, sg, nents, i) {
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_device(dev, paddr, sg->length,
- dir);
+ swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, sg->length,
@@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(paddr, sg->length, dir);
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
- dir);
+ swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, sg->length);
@@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return !dev_is_dma_coherent(dev) ||
- is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
+ swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr));
}
/**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 18d346118fe8..d2c0b7e632fc 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -58,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
{
phys_addr_t paddr = dma_to_phys(dev, addr);
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_device(dev, paddr, size, dir);
+ swiotlb_sync_single_for_device(dev, paddr, size, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, size, dir);
@@ -75,8 +74,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
arch_sync_dma_for_cpu_all();
}
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
+ swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, size);
@@ -121,8 +119,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
- if (unlikely(is_swiotlb_buffer(dev, phys)))
- swiotlb_tbl_unmap_single(dev, phys, size, dir,
+ swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
}
#endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 4950e0b622b1..cc19a3efea89 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -89,6 +89,22 @@ static int map_benchmark_thread(void *data)
atomic64_add(map_sq, &map->sum_sq_map);
atomic64_add(unmap_sq, &map->sum_sq_unmap);
atomic64_inc(&map->loops);
+
+ /*
+ * We may test for a long time so periodically check whether
+ * we need to schedule to avoid starving the others. Otherwise
+ * we may hangup the kernel in a non-preemptible kernel when
+ * the test kthreads number >= CPU number, the test kthreads
+ * will run endless on every CPU since the thread resposible
+ * for notifying the kthread stop (in do_map_benchmark())
+ * could not be scheduled.
+ *
+ * Note this may degrade the test concurrency since the test
+ * threads may need to share the CPU time with other load
+ * in the system. So it's recommended to run this benchmark
+ * on an idle system.
+ */
+ cond_resched();
}
out:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index fe1ccb53596f..df68d29740a0 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
}
/**
- * swiotlb_find_pool() - find the IO TLB pool for a physical address
+ * __swiotlb_find_pool() - find the IO TLB pool for a physical address
* @dev: Device which has mapped the DMA buffer.
* @paddr: Physical address within the DMA buffer.
*
* Find the IO TLB memory pool descriptor which contains the given physical
- * address, if any.
+ * address, if any. This function is for use only when the dev is known to
+ * be using swiotlb. Use swiotlb_find_pool() for the more general case
+ * when this condition is not met.
*
* Return: Memory pool which contains @paddr, or %NULL if none.
*/
-struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
+struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
struct io_tlb_pool *pool;
@@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev,
* Bounce: copy the swiotlb buffer from or back to the original dma location
*/
static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
- enum dma_data_direction dir)
+ enum dma_data_direction dir, struct io_tlb_pool *mem)
{
- struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
@@ -1243,7 +1244,7 @@ found:
* that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy
* atomicity).
*
- * See also the comment in is_swiotlb_buffer().
+ * See also the comment in swiotlb_find_pool().
*/
smp_mb();
@@ -1435,13 +1436,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* hardware behavior. Use of swiotlb is supposed to be transparent,
* i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
*/
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool);
return tlb_addr;
}
-static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr,
+ struct io_tlb_pool *mem)
{
- struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
int index, nslots, aindex;
@@ -1499,17 +1500,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
* swiotlb_del_transient() - delete a transient memory pool
* @dev: Device which mapped the buffer.
* @tlb_addr: Physical address within a bounce buffer.
+ * @pool: Pointer to the transient memory pool to be checked and deleted.
*
* Check whether the address belongs to a transient SWIOTLB memory pool.
* If yes, then delete the pool.
*
* Return: %true if @tlb_addr belonged to a transient pool that was released.
*/
-static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
+static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr,
+ struct io_tlb_pool *pool)
{
- struct io_tlb_pool *pool;
-
- pool = swiotlb_find_pool(dev, tlb_addr);
if (!pool->transient)
return false;
@@ -1522,7 +1522,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
#else /* !CONFIG_SWIOTLB_DYNAMIC */
static inline bool swiotlb_del_transient(struct device *dev,
- phys_addr_t tlb_addr)
+ phys_addr_t tlb_addr, struct io_tlb_pool *pool)
{
return false;
}
@@ -1532,36 +1532,39 @@ static inline bool swiotlb_del_transient(struct device *dev,
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
-void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
- size_t mapping_size, enum dma_data_direction dir,
- unsigned long attrs)
+void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
+ size_t mapping_size, enum dma_data_direction dir,
+ unsigned long attrs, struct io_tlb_pool *pool)
{
/*
* First, sync the memory before unmapping the entry
*/
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size,
+ DMA_FROM_DEVICE, pool);
- if (swiotlb_del_transient(dev, tlb_addr))
+ if (swiotlb_del_transient(dev, tlb_addr, pool))
return;
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
}
-void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ struct io_tlb_pool *pool)
{
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool);
else
BUG_ON(dir != DMA_FROM_DEVICE);
}
-void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ struct io_tlb_pool *pool)
{
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool);
else
BUG_ON(dir != DMA_TO_DEVICE);
}
@@ -1585,8 +1588,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
/* Ensure that the address returned is DMA'ble */
dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
- attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC,
+ swiotlb_find_pool(dev, swiotlb_addr));
dev_WARN_ONCE(dev, 1,
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
@@ -1764,7 +1768,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
&tlb_addr);
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
return NULL;
}
@@ -1774,11 +1778,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
bool swiotlb_free(struct device *dev, struct page *page, size_t size)
{
phys_addr_t tlb_addr = page_to_phys(page);
+ struct io_tlb_pool *pool;
- if (!is_swiotlb_buffer(dev, tlb_addr))
+ pool = swiotlb_find_pool(dev, tlb_addr);
+ if (!pool)
return false;
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
return true;
}