page_pool: disable sync for cpu for dmabuf memory provider

dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically
its the driver responsibility to dma_sync for CPU, but the driver should
not dma_sync for CPU if the netmem is actually coming from a dmabuf
memory provider.

The page_pool already exposes a helper for dma_sync_for_cpu:
page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle
netmem, and have it skip dma_sync if the memory is from a dmabuf memory
provider. Drivers should migrate to using this helper when adding
support for netmem.

Also minimize the impact on the dma syncing performance for pages. Special
case the dma-sync path for pages to not go through the overhead checks
for dma-syncing and conversion to netmem.

Cc: Alexander Lobakin <aleksander.lobakin@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Link: https://patch.msgid.link/20241211212033.1684197-5-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Mina Almasry 2024-12-11 21:20:31 +00:00 committed by Jakub Kicinski
parent b400f4b874
commit 7dba339faa
4 changed files with 34 additions and 6 deletions

View File

@ -422,7 +422,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
*/
static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
{
return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
dma_addr_t ret = page->dma_addr;
if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
ret <<= PAGE_SHIFT;
return ret;
}
static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool,
const dma_addr_t dma_addr,
u32 offset, u32 dma_sync_size)
{
dma_sync_single_range_for_cpu(pool->p.dev, dma_addr,
offset + pool->p.offset, dma_sync_size,
page_pool_get_dma_dir(pool));
}
/**
@ -441,10 +455,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
const struct page *page,
u32 offset, u32 dma_sync_size)
{
dma_sync_single_range_for_cpu(pool->p.dev,
page_pool_get_dma_addr(page),
offset + pool->p.offset, dma_sync_size,
page_pool_get_dma_dir(pool));
__page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset,
dma_sync_size);
}
static inline void
page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool,
const netmem_ref netmem, u32 offset,
u32 dma_sync_size)
{
if (!pool->dma_sync_for_cpu)
return;
__page_pool_dma_sync_for_cpu(pool,
page_pool_get_dma_addr_netmem(netmem),
offset, dma_sync_size);
}
static inline bool page_pool_put(struct page_pool *pool)

View File

@ -164,7 +164,8 @@ struct page_pool {
bool has_init_callback:1; /* slow::init_callback is set */
bool dma_map:1; /* Perform DMA mapping */
bool dma_sync:1; /* Perform DMA sync */
bool dma_sync:1; /* Perform DMA sync for device */
bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */
#ifdef CONFIG_PAGE_POOL_STATS
bool system:1; /* This is a global percpu pool */
#endif

View File

@ -335,6 +335,7 @@ int mp_dmabuf_devmem_init(struct page_pool *pool)
* dma_sync_for_cpu/device. Force disable dma_sync.
*/
pool->dma_sync = false;
pool->dma_sync_for_cpu = false;
if (pool->p.order != 0)
return -E2BIG;

View File

@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool,
memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
pool->cpuid = cpuid;
pool->dma_sync_for_cpu = true;
/* Validate only known flags were used */
if (pool->slow.flags & ~PP_FLAG_ALL)