mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-03 19:55:31 +00:00
dma-mapping updates for Linux 6.10
- optimize DMA sync calls when they are no-ops (Alexander Lobakin) - fix swiotlb padding for untrusted devices (Michael Kelley) - add documentation for swiotb (Michael Kelley) -----BEGIN PGP SIGNATURE----- iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAmZLV+gLHGhjaEBsc3Qu ZGUACgkQD55TZVIEUYPO7hAAlKuXigzwcrVEUnfRGRdaZ28xbmffyC1dPfw8HRZe xJqvD51aJ/VOoOCcUyt3hNLEQHwtjEk4eM0xGcAASMdwceU58doJCcDJBpbbgbDK CPKJgBLQBC1JfAJUpRiJkV4RsudRhAyndIzUPVgkz0WObpEgDpfO0ClHRF/0Pavy 1sBFVFMbB1ewb/D8ffpp+DWfwrwu0oMC3A2LkYu2F5SQFWuVOpbNemrnZ6K2ckPt 2mcLpJ308+sti8Ka/LrI2akU8JCLYMYDQnue/44v3X3Gm63cMcEx/fj5M5x6m71n P+cxAkjsGDHybnfjbUvR842to8msRsH4CI4Zbb69+5HDlWSadM8JhQd74oeii6o6 RiGPrrFEk7vCxFOkUsqGFYMykEX+71wXfQ1Mpp/b4QgdqBLkxW4ozQ3Ya7ASUs2z TLLmQvIXtYKGnyU+RdOkvS6piHjd4wVHOhuGVdXqVT7WrbaPeovY4TNSTV2ZA1gE 9Y5RCdrX9xeGGNjsYXKwsWGvXVsm6UTQmQVUsatQb3ic+K3S6tQR9pwzk0HmhMuM BscWHSAEL7T8ZZ5Ydph45Cw/6xdH7LggD+nRtLcdAuzCika12eabZHsO0DrF533n qXYOjZOgsMEZWICynxq6+EGQKGWY+F+GyKDMU2w2Es5OgMa9Bqb40aSF+Q887s96 xwI= =Pa8W -----END PGP SIGNATURE----- Merge tag 'dma-mapping-6.10-2024-05-20' of git://git.infradead.org/users/hch/dma-mapping Pull dma-mapping updates from Christoph Hellwig: - optimize DMA sync calls when they are no-ops (Alexander Lobakin) - fix swiotlb padding for untrusted devices (Michael Kelley) - add documentation for swiotb (Michael Kelley) * tag 'dma-mapping-6.10-2024-05-20' of git://git.infradead.org/users/hch/dma-mapping: dma: fix DMA sync for drivers not calling dma_set_mask*() xsk: use generic DMA sync shortcut instead of a custom one page_pool: check for DMA sync shortcut earlier page_pool: don't use driver-set flags field directly page_pool: make sure frag API fields don't span between cachelines iommu/dma: avoid expensive indirect calls for sync operations dma: avoid redundant calls for sync operations dma: compile-out DMA sync op calls when not used iommu/dma: fix zeroing of bounce buffer padding used by untrusted devices swiotlb: remove alloc_size argument to swiotlb_tbl_map_single() Documentation/core-api: add swiotlb documentation
This commit is contained in:
commit
daa121128a
@ -102,6 +102,7 @@ more memory-management documentation in Documentation/mm/index.rst.
|
||||
dma-api-howto
|
||||
dma-attributes
|
||||
dma-isa-lpc
|
||||
swiotlb
|
||||
mm-api
|
||||
genalloc
|
||||
pin_user_pages
|
||||
|
321
Documentation/core-api/swiotlb.rst
Normal file
321
Documentation/core-api/swiotlb.rst
Normal file
@ -0,0 +1,321 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============
|
||||
DMA and swiotlb
|
||||
===============
|
||||
|
||||
swiotlb is a memory buffer allocator used by the Linux kernel DMA layer. It is
|
||||
typically used when a device doing DMA can't directly access the target memory
|
||||
buffer because of hardware limitations or other requirements. In such a case,
|
||||
the DMA layer calls swiotlb to allocate a temporary memory buffer that conforms
|
||||
to the limitations. The DMA is done to/from this temporary memory buffer, and
|
||||
the CPU copies the data between the temporary buffer and the original target
|
||||
memory buffer. This approach is generically called "bounce buffering", and the
|
||||
temporary memory buffer is called a "bounce buffer".
|
||||
|
||||
Device drivers don't interact directly with swiotlb. Instead, drivers inform
|
||||
the DMA layer of the DMA attributes of the devices they are managing, and use
|
||||
the normal DMA map, unmap, and sync APIs when programming a device to do DMA.
|
||||
These APIs use the device DMA attributes and kernel-wide settings to determine
|
||||
if bounce buffering is necessary. If so, the DMA layer manages the allocation,
|
||||
freeing, and sync'ing of bounce buffers. Since the DMA attributes are per
|
||||
device, some devices in a system may use bounce buffering while others do not.
|
||||
|
||||
Because the CPU copies data between the bounce buffer and the original target
|
||||
memory buffer, doing bounce buffering is slower than doing DMA directly to the
|
||||
original memory buffer, and it consumes more CPU resources. So it is used only
|
||||
when necessary for providing DMA functionality.
|
||||
|
||||
Usage Scenarios
|
||||
---------------
|
||||
swiotlb was originally created to handle DMA for devices with addressing
|
||||
limitations. As physical memory sizes grew beyond 4 GiB, some devices could
|
||||
only provide 32-bit DMA addresses. By allocating bounce buffer memory below
|
||||
the 4 GiB line, these devices with addressing limitations could still work and
|
||||
do DMA.
|
||||
|
||||
More recently, Confidential Computing (CoCo) VMs have the guest VM's memory
|
||||
encrypted by default, and the memory is not accessible by the host hypervisor
|
||||
and VMM. For the host to do I/O on behalf of the guest, the I/O must be
|
||||
directed to guest memory that is unencrypted. CoCo VMs set a kernel-wide option
|
||||
to force all DMA I/O to use bounce buffers, and the bounce buffer memory is set
|
||||
up as unencrypted. The host does DMA I/O to/from the bounce buffer memory, and
|
||||
the Linux kernel DMA layer does "sync" operations to cause the CPU to copy the
|
||||
data to/from the original target memory buffer. The CPU copying bridges between
|
||||
the unencrypted and the encrypted memory. This use of bounce buffers allows
|
||||
device drivers to "just work" in a CoCo VM, with no modifications
|
||||
needed to handle the memory encryption complexity.
|
||||
|
||||
Other edge case scenarios arise for bounce buffers. For example, when IOMMU
|
||||
mappings are set up for a DMA operation to/from a device that is considered
|
||||
"untrusted", the device should be given access only to the memory containing
|
||||
the data being transferred. But if that memory occupies only part of an IOMMU
|
||||
granule, other parts of the granule may contain unrelated kernel data. Since
|
||||
IOMMU access control is per-granule, the untrusted device can gain access to
|
||||
the unrelated kernel data. This problem is solved by bounce buffering the DMA
|
||||
operation and ensuring that unused portions of the bounce buffers do not
|
||||
contain any unrelated kernel data.
|
||||
|
||||
Core Functionality
|
||||
------------------
|
||||
The primary swiotlb APIs are swiotlb_tbl_map_single() and
|
||||
swiotlb_tbl_unmap_single(). The "map" API allocates a bounce buffer of a
|
||||
specified size in bytes and returns the physical address of the buffer. The
|
||||
buffer memory is physically contiguous. The expectation is that the DMA layer
|
||||
maps the physical memory address to a DMA address, and returns the DMA address
|
||||
to the driver for programming into the device. If a DMA operation specifies
|
||||
multiple memory buffer segments, a separate bounce buffer must be allocated for
|
||||
each segment. swiotlb_tbl_map_single() always does a "sync" operation (i.e., a
|
||||
CPU copy) to initialize the bounce buffer to match the contents of the original
|
||||
buffer.
|
||||
|
||||
swiotlb_tbl_unmap_single() does the reverse. If the DMA operation might have
|
||||
updated the bounce buffer memory and DMA_ATTR_SKIP_CPU_SYNC is not set, the
|
||||
unmap does a "sync" operation to cause a CPU copy of the data from the bounce
|
||||
buffer back to the original buffer. Then the bounce buffer memory is freed.
|
||||
|
||||
swiotlb also provides "sync" APIs that correspond to the dma_sync_*() APIs that
|
||||
a driver may use when control of a buffer transitions between the CPU and the
|
||||
device. The swiotlb "sync" APIs cause a CPU copy of the data between the
|
||||
original buffer and the bounce buffer. Like the dma_sync_*() APIs, the swiotlb
|
||||
"sync" APIs support doing a partial sync, where only a subset of the bounce
|
||||
buffer is copied to/from the original buffer.
|
||||
|
||||
Core Functionality Constraints
|
||||
------------------------------
|
||||
The swiotlb map/unmap/sync APIs must operate without blocking, as they are
|
||||
called by the corresponding DMA APIs which may run in contexts that cannot
|
||||
block. Hence the default memory pool for swiotlb allocations must be
|
||||
pre-allocated at boot time (but see Dynamic swiotlb below). Because swiotlb
|
||||
allocations must be physically contiguous, the entire default memory pool is
|
||||
allocated as a single contiguous block.
|
||||
|
||||
The need to pre-allocate the default swiotlb pool creates a boot-time tradeoff.
|
||||
The pool should be large enough to ensure that bounce buffer requests can
|
||||
always be satisfied, as the non-blocking requirement means requests can't wait
|
||||
for space to become available. But a large pool potentially wastes memory, as
|
||||
this pre-allocated memory is not available for other uses in the system. The
|
||||
tradeoff is particularly acute in CoCo VMs that use bounce buffers for all DMA
|
||||
I/O. These VMs use a heuristic to set the default pool size to ~6% of memory,
|
||||
with a max of 1 GiB, which has the potential to be very wasteful of memory.
|
||||
Conversely, the heuristic might produce a size that is insufficient, depending
|
||||
on the I/O patterns of the workload in the VM. The dynamic swiotlb feature
|
||||
described below can help, but has limitations. Better management of the swiotlb
|
||||
default memory pool size remains an open issue.
|
||||
|
||||
A single allocation from swiotlb is limited to IO_TLB_SIZE * IO_TLB_SEGSIZE
|
||||
bytes, which is 256 KiB with current definitions. When a device's DMA settings
|
||||
are such that the device might use swiotlb, the maximum size of a DMA segment
|
||||
must be limited to that 256 KiB. This value is communicated to higher-level
|
||||
kernel code via dma_map_mapping_size() and swiotlb_max_mapping_size(). If the
|
||||
higher-level code fails to account for this limit, it may make requests that
|
||||
are too large for swiotlb, and get a "swiotlb full" error.
|
||||
|
||||
A key device DMA setting is "min_align_mask", which is a power of 2 minus 1
|
||||
so that some number of low order bits are set, or it may be zero. swiotlb
|
||||
allocations ensure these min_align_mask bits of the physical address of the
|
||||
bounce buffer match the same bits in the address of the original buffer. When
|
||||
min_align_mask is non-zero, it may produce an "alignment offset" in the address
|
||||
of the bounce buffer that slightly reduces the maximum size of an allocation.
|
||||
This potential alignment offset is reflected in the value returned by
|
||||
swiotlb_max_mapping_size(), which can show up in places like
|
||||
/sys/block/<device>/queue/max_sectors_kb. For example, if a device does not use
|
||||
swiotlb, max_sectors_kb might be 512 KiB or larger. If a device might use
|
||||
swiotlb, max_sectors_kb will be 256 KiB. When min_align_mask is non-zero,
|
||||
max_sectors_kb might be even smaller, such as 252 KiB.
|
||||
|
||||
swiotlb_tbl_map_single() also takes an "alloc_align_mask" parameter. This
|
||||
parameter specifies the allocation of bounce buffer space must start at a
|
||||
physical address with the alloc_align_mask bits set to zero. But the actual
|
||||
bounce buffer might start at a larger address if min_align_mask is non-zero.
|
||||
Hence there may be pre-padding space that is allocated prior to the start of
|
||||
the bounce buffer. Similarly, the end of the bounce buffer is rounded up to an
|
||||
alloc_align_mask boundary, potentially resulting in post-padding space. Any
|
||||
pre-padding or post-padding space is not initialized by swiotlb code. The
|
||||
"alloc_align_mask" parameter is used by IOMMU code when mapping for untrusted
|
||||
devices. It is set to the granule size - 1 so that the bounce buffer is
|
||||
allocated entirely from granules that are not used for any other purpose.
|
||||
|
||||
Data structures concepts
|
||||
------------------------
|
||||
Memory used for swiotlb bounce buffers is allocated from overall system memory
|
||||
as one or more "pools". The default pool is allocated during system boot with a
|
||||
default size of 64 MiB. The default pool size may be modified with the
|
||||
"swiotlb=" kernel boot line parameter. The default size may also be adjusted
|
||||
due to other conditions, such as running in a CoCo VM, as described above. If
|
||||
CONFIG_SWIOTLB_DYNAMIC is enabled, additional pools may be allocated later in
|
||||
the life of the system. Each pool must be a contiguous range of physical
|
||||
memory. The default pool is allocated below the 4 GiB physical address line so
|
||||
it works for devices that can only address 32-bits of physical memory (unless
|
||||
architecture-specific code provides the SWIOTLB_ANY flag). In a CoCo VM, the
|
||||
pool memory must be decrypted before swiotlb is used.
|
||||
|
||||
Each pool is divided into "slots" of size IO_TLB_SIZE, which is 2 KiB with
|
||||
current definitions. IO_TLB_SEGSIZE contiguous slots (128 slots) constitute
|
||||
what might be called a "slot set". When a bounce buffer is allocated, it
|
||||
occupies one or more contiguous slots. A slot is never shared by multiple
|
||||
bounce buffers. Furthermore, a bounce buffer must be allocated from a single
|
||||
slot set, which leads to the maximum bounce buffer size being IO_TLB_SIZE *
|
||||
IO_TLB_SEGSIZE. Multiple smaller bounce buffers may co-exist in a single slot
|
||||
set if the alignment and size constraints can be met.
|
||||
|
||||
Slots are also grouped into "areas", with the constraint that a slot set exists
|
||||
entirely in a single area. Each area has its own spin lock that must be held to
|
||||
manipulate the slots in that area. The division into areas avoids contending
|
||||
for a single global spin lock when swiotlb is heavily used, such as in a CoCo
|
||||
VM. The number of areas defaults to the number of CPUs in the system for
|
||||
maximum parallelism, but since an area can't be smaller than IO_TLB_SEGSIZE
|
||||
slots, it might be necessary to assign multiple CPUs to the same area. The
|
||||
number of areas can also be set via the "swiotlb=" kernel boot parameter.
|
||||
|
||||
When allocating a bounce buffer, if the area associated with the calling CPU
|
||||
does not have enough free space, areas associated with other CPUs are tried
|
||||
sequentially. For each area tried, the area's spin lock must be obtained before
|
||||
trying an allocation, so contention may occur if swiotlb is relatively busy
|
||||
overall. But an allocation request does not fail unless all areas do not have
|
||||
enough free space.
|
||||
|
||||
IO_TLB_SIZE, IO_TLB_SEGSIZE, and the number of areas must all be powers of 2 as
|
||||
the code uses shifting and bit masking to do many of the calculations. The
|
||||
number of areas is rounded up to a power of 2 if necessary to meet this
|
||||
requirement.
|
||||
|
||||
The default pool is allocated with PAGE_SIZE alignment. If an alloc_align_mask
|
||||
argument to swiotlb_tbl_map_single() specifies a larger alignment, one or more
|
||||
initial slots in each slot set might not meet the alloc_align_mask criterium.
|
||||
Because a bounce buffer allocation can't cross a slot set boundary, eliminating
|
||||
those initial slots effectively reduces the max size of a bounce buffer.
|
||||
Currently, there's no problem because alloc_align_mask is set based on IOMMU
|
||||
granule size, and granules cannot be larger than PAGE_SIZE. But if that were to
|
||||
change in the future, the initial pool allocation might need to be done with
|
||||
alignment larger than PAGE_SIZE.
|
||||
|
||||
Dynamic swiotlb
|
||||
---------------
|
||||
When CONFIG_DYNAMIC_SWIOTLB is enabled, swiotlb can do on-demand expansion of
|
||||
the amount of memory available for allocation as bounce buffers. If a bounce
|
||||
buffer request fails due to lack of available space, an asynchronous background
|
||||
task is kicked off to allocate memory from general system memory and turn it
|
||||
into an swiotlb pool. Creating an additional pool must be done asynchronously
|
||||
because the memory allocation may block, and as noted above, swiotlb requests
|
||||
are not allowed to block. Once the background task is kicked off, the bounce
|
||||
buffer request creates a "transient pool" to avoid returning an "swiotlb full"
|
||||
error. A transient pool has the size of the bounce buffer request, and is
|
||||
deleted when the bounce buffer is freed. Memory for this transient pool comes
|
||||
from the general system memory atomic pool so that creation does not block.
|
||||
Creating a transient pool has relatively high cost, particularly in a CoCo VM
|
||||
where the memory must be decrypted, so it is done only as a stopgap until the
|
||||
background task can add another non-transient pool.
|
||||
|
||||
Adding a dynamic pool has limitations. Like with the default pool, the memory
|
||||
must be physically contiguous, so the size is limited to MAX_PAGE_ORDER pages
|
||||
(e.g., 4 MiB on a typical x86 system). Due to memory fragmentation, a max size
|
||||
allocation may not be available. The dynamic pool allocator tries smaller sizes
|
||||
until it succeeds, but with a minimum size of 1 MiB. Given sufficient system
|
||||
memory fragmentation, dynamically adding a pool might not succeed at all.
|
||||
|
||||
The number of areas in a dynamic pool may be different from the number of areas
|
||||
in the default pool. Because the new pool size is typically a few MiB at most,
|
||||
the number of areas will likely be smaller. For example, with a new pool size
|
||||
of 4 MiB and the 256 KiB minimum area size, only 16 areas can be created. If
|
||||
the system has more than 16 CPUs, multiple CPUs must share an area, creating
|
||||
more lock contention.
|
||||
|
||||
New pools added via dynamic swiotlb are linked together in a linear list.
|
||||
swiotlb code frequently must search for the pool containing a particular
|
||||
swiotlb physical address, so that search is linear and not performant with a
|
||||
large number of dynamic pools. The data structures could be improved for
|
||||
faster searches.
|
||||
|
||||
Overall, dynamic swiotlb works best for small configurations with relatively
|
||||
few CPUs. It allows the default swiotlb pool to be smaller so that memory is
|
||||
not wasted, with dynamic pools making more space available if needed (as long
|
||||
as fragmentation isn't an obstacle). It is less useful for large CoCo VMs.
|
||||
|
||||
Data Structure Details
|
||||
----------------------
|
||||
swiotlb is managed with four primary data structures: io_tlb_mem, io_tlb_pool,
|
||||
io_tlb_area, and io_tlb_slot. io_tlb_mem describes a swiotlb memory allocator,
|
||||
which includes the default memory pool and any dynamic or transient pools
|
||||
linked to it. Limited statistics on swiotlb usage are kept per memory allocator
|
||||
and are stored in this data structure. These statistics are available under
|
||||
/sys/kernel/debug/swiotlb when CONFIG_DEBUG_FS is set.
|
||||
|
||||
io_tlb_pool describes a memory pool, either the default pool, a dynamic pool,
|
||||
or a transient pool. The description includes the start and end addresses of
|
||||
the memory in the pool, a pointer to an array of io_tlb_area structures, and a
|
||||
pointer to an array of io_tlb_slot structures that are associated with the pool.
|
||||
|
||||
io_tlb_area describes an area. The primary field is the spin lock used to
|
||||
serialize access to slots in the area. The io_tlb_area array for a pool has an
|
||||
entry for each area, and is accessed using a 0-based area index derived from the
|
||||
calling processor ID. Areas exist solely to allow parallel access to swiotlb
|
||||
from multiple CPUs.
|
||||
|
||||
io_tlb_slot describes an individual memory slot in the pool, with size
|
||||
IO_TLB_SIZE (2 KiB currently). The io_tlb_slot array is indexed by the slot
|
||||
index computed from the bounce buffer address relative to the starting memory
|
||||
address of the pool. The size of struct io_tlb_slot is 24 bytes, so the
|
||||
overhead is about 1% of the slot size.
|
||||
|
||||
The io_tlb_slot array is designed to meet several requirements. First, the DMA
|
||||
APIs and the corresponding swiotlb APIs use the bounce buffer address as the
|
||||
identifier for a bounce buffer. This address is returned by
|
||||
swiotlb_tbl_map_single(), and then passed as an argument to
|
||||
swiotlb_tbl_unmap_single() and the swiotlb_sync_*() functions. The original
|
||||
memory buffer address obviously must be passed as an argument to
|
||||
swiotlb_tbl_map_single(), but it is not passed to the other APIs. Consequently,
|
||||
swiotlb data structures must save the original memory buffer address so that it
|
||||
can be used when doing sync operations. This original address is saved in the
|
||||
io_tlb_slot array.
|
||||
|
||||
Second, the io_tlb_slot array must handle partial sync requests. In such cases,
|
||||
the argument to swiotlb_sync_*() is not the address of the start of the bounce
|
||||
buffer but an address somewhere in the middle of the bounce buffer, and the
|
||||
address of the start of the bounce buffer isn't known to swiotlb code. But
|
||||
swiotlb code must be able to calculate the corresponding original memory buffer
|
||||
address to do the CPU copy dictated by the "sync". So an adjusted original
|
||||
memory buffer address is populated into the struct io_tlb_slot for each slot
|
||||
occupied by the bounce buffer. An adjusted "alloc_size" of the bounce buffer is
|
||||
also recorded in each struct io_tlb_slot so a sanity check can be performed on
|
||||
the size of the "sync" operation. The "alloc_size" field is not used except for
|
||||
the sanity check.
|
||||
|
||||
Third, the io_tlb_slot array is used to track available slots. The "list" field
|
||||
in struct io_tlb_slot records how many contiguous available slots exist starting
|
||||
at that slot. A "0" indicates that the slot is occupied. A value of "1"
|
||||
indicates only the current slot is available. A value of "2" indicates the
|
||||
current slot and the next slot are available, etc. The maximum value is
|
||||
IO_TLB_SEGSIZE, which can appear in the first slot in a slot set, and indicates
|
||||
that the entire slot set is available. These values are used when searching for
|
||||
available slots to use for a new bounce buffer. They are updated when allocating
|
||||
a new bounce buffer and when freeing a bounce buffer. At pool creation time, the
|
||||
"list" field is initialized to IO_TLB_SEGSIZE down to 1 for the slots in every
|
||||
slot set.
|
||||
|
||||
Fourth, the io_tlb_slot array keeps track of any "padding slots" allocated to
|
||||
meet alloc_align_mask requirements described above. When
|
||||
swiotlb_tlb_map_single() allocates bounce buffer space to meet alloc_align_mask
|
||||
requirements, it may allocate pre-padding space across zero or more slots. But
|
||||
when swiotbl_tlb_unmap_single() is called with the bounce buffer address, the
|
||||
alloc_align_mask value that governed the allocation, and therefore the
|
||||
allocation of any padding slots, is not known. The "pad_slots" field records
|
||||
the number of padding slots so that swiotlb_tbl_unmap_single() can free them.
|
||||
The "pad_slots" value is recorded only in the first non-padding slot allocated
|
||||
to the bounce buffer.
|
||||
|
||||
Restricted pools
|
||||
----------------
|
||||
The swiotlb machinery is also used for "restricted pools", which are pools of
|
||||
memory separate from the default swiotlb pool, and that are dedicated for DMA
|
||||
use by a particular device. Restricted pools provide a level of DMA memory
|
||||
protection on systems with limited hardware protection capabilities, such as
|
||||
those lacking an IOMMU. Such usage is specified by DeviceTree entries and
|
||||
requires that CONFIG_DMA_RESTRICTED_POOL is set. Each restricted pool is based
|
||||
on its own io_tlb_mem data structure that is independent of the main swiotlb
|
||||
io_tlb_mem.
|
||||
|
||||
Restricted pools add swiotlb_alloc() and swiotlb_free() APIs, which are called
|
||||
from the dma_alloc_*() and dma_free_*() APIs. The swiotlb_alloc/free() APIs
|
||||
allocate/free slots from/to the restricted pool directly and do not go through
|
||||
swiotlb_tbl_map/unmap_single().
|
@ -1152,9 +1152,6 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
|
||||
*/
|
||||
if (dev_use_swiotlb(dev, size, dir) &&
|
||||
iova_offset(iovad, phys | size)) {
|
||||
void *padding_start;
|
||||
size_t padding_size, aligned_size;
|
||||
|
||||
if (!is_swiotlb_active(dev)) {
|
||||
dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
|
||||
return DMA_MAPPING_ERROR;
|
||||
@ -1162,24 +1159,30 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
|
||||
|
||||
trace_swiotlb_bounced(dev, phys, size);
|
||||
|
||||
aligned_size = iova_align(iovad, size);
|
||||
phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
|
||||
phys = swiotlb_tbl_map_single(dev, phys, size,
|
||||
iova_mask(iovad), dir, attrs);
|
||||
|
||||
if (phys == DMA_MAPPING_ERROR)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
/* Cleanup the padding area. */
|
||||
padding_start = phys_to_virt(phys);
|
||||
padding_size = aligned_size;
|
||||
/*
|
||||
* Untrusted devices should not see padding areas with random
|
||||
* leftover kernel data, so zero the pre- and post-padding.
|
||||
* swiotlb_tbl_map_single() has initialized the bounce buffer
|
||||
* proper to the contents of the original memory buffer.
|
||||
*/
|
||||
if (dev_is_untrusted(dev)) {
|
||||
size_t start, virt = (size_t)phys_to_virt(phys);
|
||||
|
||||
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
|
||||
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
|
||||
padding_start += size;
|
||||
padding_size -= size;
|
||||
/* Pre-padding */
|
||||
start = iova_align_down(iovad, virt);
|
||||
memset((void *)start, 0, virt - start);
|
||||
|
||||
/* Post-padding */
|
||||
start = virt + size;
|
||||
memset((void *)start, 0,
|
||||
iova_align(iovad, start) - start);
|
||||
}
|
||||
|
||||
memset(padding_start, 0, padding_size);
|
||||
}
|
||||
|
||||
if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
@ -1718,7 +1721,8 @@ static size_t iommu_dma_max_mapping_size(struct device *dev)
|
||||
}
|
||||
|
||||
static const struct dma_map_ops iommu_dma_ops = {
|
||||
.flags = DMA_F_PCI_P2PDMA_SUPPORTED,
|
||||
.flags = DMA_F_PCI_P2PDMA_SUPPORTED |
|
||||
DMA_F_CAN_SKIP_SYNC,
|
||||
.alloc = iommu_dma_alloc,
|
||||
.free = iommu_dma_free,
|
||||
.alloc_pages_op = dma_common_alloc_pages,
|
||||
|
@ -1587,7 +1587,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
|
||||
length = __le32_to_cpu(entry->desc_wb->properties) &
|
||||
TSNEP_DESC_LENGTH_MASK;
|
||||
xsk_buff_set_size(entry->xdp, length - ETH_FCS_LEN);
|
||||
xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(entry->xdp);
|
||||
|
||||
/* RX metadata with timestamps is in front of actual data,
|
||||
* subtract metadata size to get length of actual data and
|
||||
|
@ -55,7 +55,7 @@ static u32 dpaa2_xsk_run_xdp(struct dpaa2_eth_priv *priv,
|
||||
xdp_set_data_meta_invalid(xdp_buff);
|
||||
xdp_buff->rxq = &ch->xdp_rxq;
|
||||
|
||||
xsk_buff_dma_sync_for_cpu(xdp_buff, ch->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(xdp_buff);
|
||||
xdp_act = bpf_prog_run_xdp(xdp_prog, xdp_buff);
|
||||
|
||||
/* xdp.data pointer may have changed */
|
||||
|
@ -482,7 +482,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
|
||||
|
||||
bi = *i40e_rx_bi(rx_ring, next_to_process);
|
||||
xsk_buff_set_size(bi, size);
|
||||
xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(bi);
|
||||
|
||||
if (!first)
|
||||
first = bi;
|
||||
|
@ -878,7 +878,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
|
||||
ICE_RX_FLX_DESC_PKT_LEN_M;
|
||||
|
||||
xsk_buff_set_size(xdp, size);
|
||||
xsk_buff_dma_sync_for_cpu(xdp, xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(xdp);
|
||||
|
||||
if (!first) {
|
||||
first = xdp;
|
||||
|
@ -2812,7 +2812,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
|
||||
}
|
||||
|
||||
bi->xdp->data_end = bi->xdp->data + size;
|
||||
xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(bi->xdp);
|
||||
|
||||
res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
|
||||
switch (res) {
|
||||
|
@ -303,7 +303,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
|
||||
}
|
||||
|
||||
bi->xdp->data_end = bi->xdp->data + size;
|
||||
xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(bi->xdp);
|
||||
xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
|
||||
|
||||
if (likely(xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))) {
|
||||
|
@ -270,7 +270,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
||||
/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
|
||||
mxbuf->cqe = cqe;
|
||||
xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
|
||||
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp);
|
||||
net_prefetch(mxbuf->xdp.data);
|
||||
|
||||
/* Possible flows:
|
||||
@ -319,7 +319,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
|
||||
/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
|
||||
mxbuf->cqe = cqe;
|
||||
xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
|
||||
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp);
|
||||
net_prefetch(mxbuf->xdp.data);
|
||||
|
||||
prog = rcu_dereference(rq->xdp_prog);
|
||||
|
@ -917,7 +917,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
|
||||
|
||||
if (!rq->xsk_pool) {
|
||||
count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk);
|
||||
} else if (likely(!rq->xsk_pool->dma_need_sync)) {
|
||||
} else if (likely(!dma_dev_need_sync(rq->pdev))) {
|
||||
mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk);
|
||||
count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk);
|
||||
} else {
|
||||
|
@ -184,7 +184,7 @@ nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget,
|
||||
xrxbuf->xdp->data += meta_len;
|
||||
xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len;
|
||||
xdp_set_data_meta_invalid(xrxbuf->xdp);
|
||||
xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(xrxbuf->xdp);
|
||||
net_prefetch(xrxbuf->xdp->data);
|
||||
|
||||
if (meta_len) {
|
||||
|
@ -5361,7 +5361,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
|
||||
|
||||
/* RX buffer is good and fit into a XSK pool buffer */
|
||||
buf->xdp->data_end = buf->xdp->data + buf1_len;
|
||||
xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool);
|
||||
xsk_buff_dma_sync_for_cpu(buf->xdp);
|
||||
|
||||
prog = READ_ONCE(priv->xdp_prog);
|
||||
res = __stmmac_xdp_run_prog(priv, prog, buf->xdp);
|
||||
|
@ -216,7 +216,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
|
||||
*/
|
||||
trace_swiotlb_bounced(dev, dev_addr, size);
|
||||
|
||||
map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
|
||||
map = swiotlb_tbl_map_single(dev, phys, size, 0, dir, attrs);
|
||||
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
|
@ -691,6 +691,7 @@ struct device_physical_location {
|
||||
* and optionall (if the coherent mask is large enough) also
|
||||
* for dma allocations. This flag is managed by the dma ops
|
||||
* instance from ->dma_supported.
|
||||
* @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
|
||||
*
|
||||
* At the lowest level, every device in a Linux system is represented by an
|
||||
* instance of struct device. The device structure contains the information
|
||||
@ -803,6 +804,9 @@ struct device {
|
||||
#ifdef CONFIG_DMA_OPS_BYPASS
|
||||
bool dma_ops_bypass : 1;
|
||||
#endif
|
||||
#ifdef CONFIG_DMA_NEED_SYNC
|
||||
bool dma_skip_sync:1;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -18,8 +18,11 @@ struct iommu_ops;
|
||||
*
|
||||
* DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can
|
||||
* handle PCI P2PDMA pages in the map_sg/unmap_sg operation.
|
||||
* DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is
|
||||
* coherent and it's not an SWIOTLB buffer.
|
||||
*/
|
||||
#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0)
|
||||
#define DMA_F_CAN_SKIP_SYNC (1 << 1)
|
||||
|
||||
struct dma_map_ops {
|
||||
unsigned int flags;
|
||||
@ -273,6 +276,15 @@ static inline bool dev_is_dma_coherent(struct device *dev)
|
||||
}
|
||||
#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
|
||||
|
||||
static inline void dma_reset_need_sync(struct device *dev)
|
||||
{
|
||||
#ifdef CONFIG_DMA_NEED_SYNC
|
||||
/* Reset it only once so that the function can be called on hotpath */
|
||||
if (unlikely(dev->dma_skip_sync))
|
||||
dev->dma_skip_sync = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether potential kmalloc() buffers are safe for non-coherent DMA.
|
||||
*/
|
||||
|
@ -117,14 +117,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs);
|
||||
void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs);
|
||||
void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir);
|
||||
void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir);
|
||||
void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir);
|
||||
void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir);
|
||||
void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t flag, unsigned long attrs);
|
||||
void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
|
||||
@ -147,7 +139,6 @@ u64 dma_get_required_mask(struct device *dev);
|
||||
bool dma_addressing_limited(struct device *dev);
|
||||
size_t dma_max_mapping_size(struct device *dev);
|
||||
size_t dma_opt_mapping_size(struct device *dev);
|
||||
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
|
||||
unsigned long dma_get_merge_boundary(struct device *dev);
|
||||
struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
|
||||
enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
|
||||
@ -195,22 +186,6 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
return -ENOMEM;
|
||||
@ -277,10 +252,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline unsigned long dma_get_merge_boundary(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
@ -310,6 +281,82 @@ static inline int dma_mmap_noncontiguous(struct device *dev,
|
||||
}
|
||||
#endif /* CONFIG_HAS_DMA */
|
||||
|
||||
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
|
||||
void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir);
|
||||
void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir);
|
||||
void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir);
|
||||
void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir);
|
||||
bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
|
||||
|
||||
static inline bool dma_dev_need_sync(const struct device *dev)
|
||||
{
|
||||
/* Always call DMA sync operations when debugging is enabled */
|
||||
return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG);
|
||||
}
|
||||
|
||||
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
if (dma_dev_need_sync(dev))
|
||||
__dma_sync_single_for_cpu(dev, addr, size, dir);
|
||||
}
|
||||
|
||||
static inline void dma_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
if (dma_dev_need_sync(dev))
|
||||
__dma_sync_single_for_device(dev, addr, size, dir);
|
||||
}
|
||||
|
||||
static inline void dma_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
if (dma_dev_need_sync(dev))
|
||||
__dma_sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
}
|
||||
|
||||
static inline void dma_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
if (dma_dev_need_sync(dev))
|
||||
__dma_sync_sg_for_device(dev, sg, nelems, dir);
|
||||
}
|
||||
|
||||
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
|
||||
}
|
||||
#else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
|
||||
static inline bool dma_dev_need_sync(const struct device *dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t addr, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline void dma_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sg, int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
}
|
||||
static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */
|
||||
|
||||
struct page *dma_alloc_pages(struct device *dev, size_t size,
|
||||
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp);
|
||||
void dma_free_pages(struct device *dev, size_t size, struct page *page,
|
||||
|
@ -65,6 +65,11 @@ static inline size_t iova_align(struct iova_domain *iovad, size_t size)
|
||||
return ALIGN(size, iovad->granule);
|
||||
}
|
||||
|
||||
static inline size_t iova_align_down(struct iova_domain *iovad, size_t size)
|
||||
{
|
||||
return ALIGN_DOWN(size, iovad->granule);
|
||||
}
|
||||
|
||||
static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova)
|
||||
{
|
||||
return (dma_addr_t)iova->pfn_lo << iova_shift(iovad);
|
||||
|
@ -43,7 +43,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
|
||||
extern void __init swiotlb_update_mem_attributes(void);
|
||||
|
||||
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
|
||||
size_t mapping_size, size_t alloc_size,
|
||||
size_t mapping_size,
|
||||
unsigned int alloc_aligned_mask, enum dma_data_direction dir,
|
||||
unsigned long attrs);
|
||||
|
||||
|
@ -45,7 +45,6 @@ struct pp_alloc_cache {
|
||||
|
||||
/**
|
||||
* struct page_pool_params - page pool parameters
|
||||
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV
|
||||
* @order: 2^order pages on allocation
|
||||
* @pool_size: size of the ptr_ring
|
||||
* @nid: NUMA node id to allocate from pages from
|
||||
@ -55,10 +54,11 @@ struct pp_alloc_cache {
|
||||
* @dma_dir: DMA mapping direction
|
||||
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
|
||||
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
|
||||
* @netdev: corresponding &net_device for Netlink introspection
|
||||
* @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL
|
||||
*/
|
||||
struct page_pool_params {
|
||||
struct_group_tagged(page_pool_params_fast, fast,
|
||||
unsigned int flags;
|
||||
unsigned int order;
|
||||
unsigned int pool_size;
|
||||
int nid;
|
||||
@ -70,6 +70,7 @@ struct page_pool_params {
|
||||
);
|
||||
struct_group_tagged(page_pool_params_slow, slow,
|
||||
struct net_device *netdev;
|
||||
unsigned int flags;
|
||||
/* private: used by test code only */
|
||||
void (*init_callback)(struct page *page, void *arg);
|
||||
void *init_arg;
|
||||
@ -130,12 +131,28 @@ struct page_pool {
|
||||
struct page_pool_params_fast p;
|
||||
|
||||
int cpuid;
|
||||
bool has_init_callback;
|
||||
u32 pages_state_hold_cnt;
|
||||
|
||||
bool has_init_callback:1; /* slow::init_callback is set */
|
||||
bool dma_map:1; /* Perform DMA mapping */
|
||||
bool dma_sync:1; /* Perform DMA sync */
|
||||
#ifdef CONFIG_PAGE_POOL_STATS
|
||||
bool system:1; /* This is a global percpu pool */
|
||||
#endif
|
||||
|
||||
/* The following block must stay within one cacheline. On 32-bit
|
||||
* systems, sizeof(long) == sizeof(int), so that the block size is
|
||||
* ``3 * sizeof(long)``. On 64-bit systems, the actual size is
|
||||
* ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of
|
||||
* them is ``4 * sizeof(long)``, so just use that one for simplicity.
|
||||
* Having it aligned to a cacheline boundary may be excessive and
|
||||
* doesn't bring any good.
|
||||
*/
|
||||
__cacheline_group_begin(frag) __aligned(4 * sizeof(long));
|
||||
long frag_users;
|
||||
struct page *frag_page;
|
||||
unsigned int frag_offset;
|
||||
u32 pages_state_hold_cnt;
|
||||
__cacheline_group_end(frag);
|
||||
|
||||
struct delayed_work release_dw;
|
||||
void (*disconnect)(void *pool);
|
||||
|
@ -219,13 +219,10 @@ static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool
|
||||
return meta;
|
||||
}
|
||||
|
||||
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
|
||||
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
||||
|
||||
if (!pool->dma_need_sync)
|
||||
return;
|
||||
|
||||
xp_dma_sync_for_cpu(xskb);
|
||||
}
|
||||
|
||||
@ -402,7 +399,7 @@ static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
|
||||
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,6 @@ struct xsk_dma_map {
|
||||
refcount_t users;
|
||||
struct list_head list; /* Protected by the RTNL_LOCK */
|
||||
u32 dma_pages_cnt;
|
||||
bool dma_need_sync;
|
||||
};
|
||||
|
||||
struct xsk_buff_pool {
|
||||
@ -82,7 +81,6 @@ struct xsk_buff_pool {
|
||||
u8 tx_metadata_len; /* inherited from umem */
|
||||
u8 cached_need_wakeup;
|
||||
bool uses_need_wakeup;
|
||||
bool dma_need_sync;
|
||||
bool unaligned;
|
||||
bool tx_sw_csum;
|
||||
void *addrs;
|
||||
@ -155,21 +153,17 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
|
||||
return xskb->frame_dma;
|
||||
}
|
||||
|
||||
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
|
||||
static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
|
||||
{
|
||||
xp_dma_sync_for_cpu_slow(xskb);
|
||||
dma_sync_single_for_cpu(xskb->pool->dev, xskb->dma,
|
||||
xskb->pool->frame_len,
|
||||
DMA_BIDIRECTIONAL);
|
||||
}
|
||||
|
||||
void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
|
||||
size_t size);
|
||||
static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool,
|
||||
dma_addr_t dma, size_t size)
|
||||
{
|
||||
if (!pool->dma_need_sync)
|
||||
return;
|
||||
|
||||
xp_dma_sync_for_device_slow(pool, dma, size);
|
||||
dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL);
|
||||
}
|
||||
|
||||
/* Masks for xdp_umem_page flags.
|
||||
|
@ -107,6 +107,11 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
|
||||
bool
|
||||
depends on SWIOTLB
|
||||
|
||||
config DMA_NEED_SYNC
|
||||
def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
|
||||
ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || DMA_OPS || \
|
||||
SWIOTLB
|
||||
|
||||
config DMA_RESTRICTED_POOL
|
||||
bool "DMA Restricted Pool"
|
||||
depends on OF && OF_RESERVED_MEM && SWIOTLB
|
||||
|
@ -329,7 +329,8 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
|
||||
}
|
||||
EXPORT_SYMBOL(dma_unmap_resource);
|
||||
|
||||
void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
||||
#ifdef CONFIG_DMA_NEED_SYNC
|
||||
void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
@ -341,9 +342,9 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
|
||||
ops->sync_single_for_cpu(dev, addr, size, dir);
|
||||
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_sync_single_for_cpu);
|
||||
EXPORT_SYMBOL(__dma_sync_single_for_cpu);
|
||||
|
||||
void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
|
||||
void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
@ -355,9 +356,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
|
||||
ops->sync_single_for_device(dev, addr, size, dir);
|
||||
debug_dma_sync_single_for_device(dev, addr, size, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_sync_single_for_device);
|
||||
EXPORT_SYMBOL(__dma_sync_single_for_device);
|
||||
|
||||
void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
|
||||
void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
@ -369,9 +370,9 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
|
||||
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_sync_sg_for_cpu);
|
||||
EXPORT_SYMBOL(__dma_sync_sg_for_cpu);
|
||||
|
||||
void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
|
||||
void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
|
||||
int nelems, enum dma_data_direction dir)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
@ -383,7 +384,47 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
|
||||
ops->sync_sg_for_device(dev, sg, nelems, dir);
|
||||
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_sync_sg_for_device);
|
||||
EXPORT_SYMBOL(__dma_sync_sg_for_device);
|
||||
|
||||
bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
/*
|
||||
* dma_skip_sync could've been reset on first SWIOTLB buffer
|
||||
* mapping, but @dma_addr is not necessary an SWIOTLB buffer.
|
||||
* In this case, fall back to more granular check.
|
||||
*/
|
||||
return dma_direct_need_sync(dev, dma_addr);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dma_need_sync);
|
||||
|
||||
static void dma_setup_need_sync(struct device *dev)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC))
|
||||
/*
|
||||
* dma_skip_sync will be reset to %false on first SWIOTLB buffer
|
||||
* mapping, if any. During the device initialization, it's
|
||||
* enough to check only for the DMA coherence.
|
||||
*/
|
||||
dev->dma_skip_sync = dev_is_dma_coherent(dev);
|
||||
else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu &&
|
||||
!ops->sync_sg_for_device && !ops->sync_sg_for_cpu)
|
||||
/*
|
||||
* Synchronization is not possible when none of DMA sync ops
|
||||
* is set.
|
||||
*/
|
||||
dev->dma_skip_sync = true;
|
||||
else
|
||||
dev->dma_skip_sync = false;
|
||||
}
|
||||
#else /* !CONFIG_DMA_NEED_SYNC */
|
||||
static inline void dma_setup_need_sync(struct device *dev) { }
|
||||
#endif /* !CONFIG_DMA_NEED_SYNC */
|
||||
|
||||
/*
|
||||
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
|
||||
@ -773,6 +814,8 @@ int dma_set_mask(struct device *dev, u64 mask)
|
||||
|
||||
arch_dma_set_mask(dev, mask);
|
||||
*dev->dma_mask = mask;
|
||||
dma_setup_need_sync(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(dma_set_mask);
|
||||
@ -841,16 +884,6 @@ size_t dma_opt_mapping_size(struct device *dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dma_opt_mapping_size);
|
||||
|
||||
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
||||
if (dma_map_direct(dev, ops))
|
||||
return dma_direct_need_sync(dev, dma_addr);
|
||||
return ops->sync_single_for_cpu || ops->sync_single_for_device;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dma_need_sync);
|
||||
|
||||
unsigned long dma_get_merge_boundary(struct device *dev)
|
||||
{
|
||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
@ -1340,15 +1340,40 @@ static unsigned long mem_used(struct io_tlb_mem *mem)
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
/**
|
||||
* swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area
|
||||
* @dev: Device which maps the buffer.
|
||||
* @orig_addr: Original (non-bounced) physical IO buffer address
|
||||
* @mapping_size: Requested size of the actual bounce buffer, excluding
|
||||
* any pre- or post-padding for alignment
|
||||
* @alloc_align_mask: Required start and end alignment of the allocated buffer
|
||||
* @dir: DMA direction
|
||||
* @attrs: Optional DMA attributes for the map operation
|
||||
*
|
||||
* Find and allocate a suitable sequence of IO TLB slots for the request.
|
||||
* The allocated space starts at an alignment specified by alloc_align_mask,
|
||||
* and the size of the allocated space is rounded up so that the total amount
|
||||
* of allocated space is a multiple of (alloc_align_mask + 1). If
|
||||
* alloc_align_mask is zero, the allocated space may be at any alignment and
|
||||
* the size is not rounded up.
|
||||
*
|
||||
* The returned address is within the allocated space and matches the bits
|
||||
* of orig_addr that are specified in the DMA min_align_mask for the device. As
|
||||
* such, this returned address may be offset from the beginning of the allocated
|
||||
* space. The bounce buffer space starting at the returned address for
|
||||
* mapping_size bytes is initialized to the contents of the original IO buffer
|
||||
* area. Any pre-padding (due to an offset) and any post-padding (due to
|
||||
* rounding-up the size) is not initialized.
|
||||
*/
|
||||
phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
|
||||
size_t mapping_size, size_t alloc_size,
|
||||
unsigned int alloc_align_mask, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
size_t mapping_size, unsigned int alloc_align_mask,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
|
||||
unsigned int offset;
|
||||
struct io_tlb_pool *pool;
|
||||
unsigned int i;
|
||||
size_t size;
|
||||
int index;
|
||||
phys_addr_t tlb_addr;
|
||||
unsigned short pad_slots;
|
||||
@ -1362,23 +1387,33 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
|
||||
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
|
||||
pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
|
||||
|
||||
if (mapping_size > alloc_size) {
|
||||
dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
|
||||
mapping_size, alloc_size);
|
||||
return (phys_addr_t)DMA_MAPPING_ERROR;
|
||||
}
|
||||
/*
|
||||
* The default swiotlb memory pool is allocated with PAGE_SIZE
|
||||
* alignment. If a mapping is requested with larger alignment,
|
||||
* the mapping may be unable to use the initial slot(s) in all
|
||||
* sets of IO_TLB_SEGSIZE slots. In such case, a mapping request
|
||||
* of or near the maximum mapping size would always fail.
|
||||
*/
|
||||
dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK,
|
||||
"Alloc alignment may prevent fulfilling requests with max mapping_size\n");
|
||||
|
||||
offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
|
||||
index = swiotlb_find_slots(dev, orig_addr,
|
||||
alloc_size + offset, alloc_align_mask, &pool);
|
||||
size = ALIGN(mapping_size + offset, alloc_align_mask + 1);
|
||||
index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool);
|
||||
if (index == -1) {
|
||||
if (!(attrs & DMA_ATTR_NO_WARN))
|
||||
dev_warn_ratelimited(dev,
|
||||
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
|
||||
alloc_size, mem->nslabs, mem_used(mem));
|
||||
size, mem->nslabs, mem_used(mem));
|
||||
return (phys_addr_t)DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* If dma_skip_sync was set, reset it on first SWIOTLB buffer
|
||||
* mapping to always sync SWIOTLB buffers.
|
||||
*/
|
||||
dma_reset_need_sync(dev);
|
||||
|
||||
/*
|
||||
* Save away the mapping from the original address to the DMA address.
|
||||
* This is needed when we sync the memory. Then we sync the buffer if
|
||||
@ -1388,7 +1423,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
|
||||
offset &= (IO_TLB_SIZE - 1);
|
||||
index += pad_slots;
|
||||
pool->slots[index].pad_slots = pad_slots;
|
||||
for (i = 0; i < nr_slots(alloc_size + offset); i++)
|
||||
for (i = 0; i < (nr_slots(size) - pad_slots); i++)
|
||||
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
|
||||
tlb_addr = slot_addr(pool->start, index) + offset;
|
||||
/*
|
||||
@ -1543,8 +1578,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
|
||||
|
||||
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);
|
||||
|
||||
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
|
||||
attrs);
|
||||
swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs);
|
||||
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
|
@ -173,19 +173,29 @@ static void page_pool_producer_unlock(struct page_pool *pool,
|
||||
spin_unlock_bh(&pool->ring.producer_lock);
|
||||
}
|
||||
|
||||
static void page_pool_struct_check(void)
|
||||
{
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long));
|
||||
}
|
||||
|
||||
static int page_pool_init(struct page_pool *pool,
|
||||
const struct page_pool_params *params,
|
||||
int cpuid)
|
||||
{
|
||||
unsigned int ring_qsize = 1024; /* Default */
|
||||
|
||||
page_pool_struct_check();
|
||||
|
||||
memcpy(&pool->p, ¶ms->fast, sizeof(pool->p));
|
||||
memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow));
|
||||
|
||||
pool->cpuid = cpuid;
|
||||
|
||||
/* Validate only known flags were used */
|
||||
if (pool->p.flags & ~(PP_FLAG_ALL))
|
||||
if (pool->slow.flags & ~PP_FLAG_ALL)
|
||||
return -EINVAL;
|
||||
|
||||
if (pool->p.pool_size)
|
||||
@ -199,22 +209,26 @@ static int page_pool_init(struct page_pool *pool,
|
||||
* DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
|
||||
* which is the XDP_TX use-case.
|
||||
*/
|
||||
if (pool->p.flags & PP_FLAG_DMA_MAP) {
|
||||
if (pool->slow.flags & PP_FLAG_DMA_MAP) {
|
||||
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
|
||||
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
|
||||
return -EINVAL;
|
||||
|
||||
pool->dma_map = true;
|
||||
}
|
||||
|
||||
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
|
||||
if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) {
|
||||
/* In order to request DMA-sync-for-device the page
|
||||
* needs to be mapped
|
||||
*/
|
||||
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
|
||||
if (!(pool->slow.flags & PP_FLAG_DMA_MAP))
|
||||
return -EINVAL;
|
||||
|
||||
if (!pool->p.max_len)
|
||||
return -EINVAL;
|
||||
|
||||
pool->dma_sync = true;
|
||||
|
||||
/* pool->p.offset has to be set according to the address
|
||||
* offset used by the DMA engine to start copying rx data
|
||||
*/
|
||||
@ -223,7 +237,7 @@ static int page_pool_init(struct page_pool *pool,
|
||||
pool->has_init_callback = !!pool->slow.init_callback;
|
||||
|
||||
#ifdef CONFIG_PAGE_POOL_STATS
|
||||
if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) {
|
||||
if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) {
|
||||
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
|
||||
if (!pool->recycle_stats)
|
||||
return -ENOMEM;
|
||||
@ -233,12 +247,13 @@ static int page_pool_init(struct page_pool *pool,
|
||||
* (also percpu) page pool instance.
|
||||
*/
|
||||
pool->recycle_stats = &pp_system_recycle_stats;
|
||||
pool->system = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
|
||||
#ifdef CONFIG_PAGE_POOL_STATS
|
||||
if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
|
||||
if (!pool->system)
|
||||
free_percpu(pool->recycle_stats);
|
||||
#endif
|
||||
return -ENOMEM;
|
||||
@ -249,7 +264,7 @@ static int page_pool_init(struct page_pool *pool,
|
||||
/* Driver calling page_pool_create() also call page_pool_destroy() */
|
||||
refcount_set(&pool->user_cnt, 1);
|
||||
|
||||
if (pool->p.flags & PP_FLAG_DMA_MAP)
|
||||
if (pool->dma_map)
|
||||
get_device(pool->p.dev);
|
||||
|
||||
return 0;
|
||||
@ -259,11 +274,11 @@ static void page_pool_uninit(struct page_pool *pool)
|
||||
{
|
||||
ptr_ring_cleanup(&pool->ring, NULL);
|
||||
|
||||
if (pool->p.flags & PP_FLAG_DMA_MAP)
|
||||
if (pool->dma_map)
|
||||
put_device(pool->p.dev);
|
||||
|
||||
#ifdef CONFIG_PAGE_POOL_STATS
|
||||
if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL))
|
||||
if (!pool->system)
|
||||
free_percpu(pool->recycle_stats);
|
||||
#endif
|
||||
}
|
||||
@ -384,16 +399,26 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
|
||||
return page;
|
||||
}
|
||||
|
||||
static void page_pool_dma_sync_for_device(const struct page_pool *pool,
|
||||
const struct page *page,
|
||||
unsigned int dma_sync_size)
|
||||
static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
|
||||
const struct page *page,
|
||||
u32 dma_sync_size)
|
||||
{
|
||||
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
|
||||
dma_addr_t dma_addr = page_pool_get_dma_addr(page);
|
||||
|
||||
dma_sync_size = min(dma_sync_size, pool->p.max_len);
|
||||
dma_sync_single_range_for_device(pool->p.dev, dma_addr,
|
||||
pool->p.offset, dma_sync_size,
|
||||
pool->p.dma_dir);
|
||||
__dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
|
||||
dma_sync_size, pool->p.dma_dir);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
page_pool_dma_sync_for_device(const struct page_pool *pool,
|
||||
const struct page *page,
|
||||
u32 dma_sync_size)
|
||||
{
|
||||
if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
|
||||
__page_pool_dma_sync_for_device(pool, page, dma_sync_size);
|
||||
}
|
||||
|
||||
static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
|
||||
@ -415,8 +440,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
|
||||
if (page_pool_set_dma_addr(page, dma))
|
||||
goto unmap_failed;
|
||||
|
||||
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
|
||||
page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
|
||||
page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
|
||||
|
||||
return true;
|
||||
|
||||
@ -461,8 +485,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
|
||||
if (unlikely(!page))
|
||||
return NULL;
|
||||
|
||||
if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
|
||||
unlikely(!page_pool_dma_map(pool, page))) {
|
||||
if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) {
|
||||
put_page(page);
|
||||
return NULL;
|
||||
}
|
||||
@ -482,8 +505,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
|
||||
gfp_t gfp)
|
||||
{
|
||||
const int bulk = PP_ALLOC_CACHE_REFILL;
|
||||
unsigned int pp_flags = pool->p.flags;
|
||||
unsigned int pp_order = pool->p.order;
|
||||
bool dma_map = pool->dma_map;
|
||||
struct page *page;
|
||||
int i, nr_pages;
|
||||
|
||||
@ -508,8 +531,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
|
||||
*/
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
page = pool->alloc.cache[i];
|
||||
if ((pp_flags & PP_FLAG_DMA_MAP) &&
|
||||
unlikely(!page_pool_dma_map(pool, page))) {
|
||||
if (dma_map && unlikely(!page_pool_dma_map(pool, page))) {
|
||||
put_page(page);
|
||||
continue;
|
||||
}
|
||||
@ -582,7 +604,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
|
||||
{
|
||||
dma_addr_t dma;
|
||||
|
||||
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
|
||||
if (!pool->dma_map)
|
||||
/* Always account for inflight pages, even if we didn't
|
||||
* map them
|
||||
*/
|
||||
@ -665,7 +687,7 @@ static bool __page_pool_page_can_be_recycled(const struct page *page)
|
||||
}
|
||||
|
||||
/* If the page refcnt == 1, this will try to recycle the page.
|
||||
* if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
|
||||
* If pool->dma_sync is set, we'll try to sync the DMA area for
|
||||
* the configured size min(dma_sync_size, pool->max_len).
|
||||
* If the page refcnt != 1, then the page will be returned to memory
|
||||
* subsystem.
|
||||
@ -688,9 +710,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
|
||||
if (likely(__page_pool_page_can_be_recycled(page))) {
|
||||
/* Read barrier done in page_ref_count / READ_ONCE */
|
||||
|
||||
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
|
||||
page_pool_dma_sync_for_device(pool, page,
|
||||
dma_sync_size);
|
||||
page_pool_dma_sync_for_device(pool, page, dma_sync_size);
|
||||
|
||||
if (allow_direct && page_pool_recycle_in_cache(page, pool))
|
||||
return NULL;
|
||||
@ -829,9 +849,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
|
||||
return NULL;
|
||||
|
||||
if (__page_pool_page_can_be_recycled(page)) {
|
||||
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
|
||||
page_pool_dma_sync_for_device(pool, page, -1);
|
||||
|
||||
page_pool_dma_sync_for_device(pool, page, -1);
|
||||
return page;
|
||||
}
|
||||
|
||||
|
@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
|
||||
|
||||
dma_map->netdev = netdev;
|
||||
dma_map->dev = dev;
|
||||
dma_map->dma_need_sync = false;
|
||||
dma_map->dma_pages_cnt = nr_pages;
|
||||
refcount_set(&dma_map->users, 1);
|
||||
list_add(&dma_map->list, &umem->xsk_dma_list);
|
||||
@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
|
||||
|
||||
pool->dev = dma_map->dev;
|
||||
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
|
||||
pool->dma_need_sync = dma_map->dma_need_sync;
|
||||
memcpy(pool->dma_pages, dma_map->dma_pages,
|
||||
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
|
||||
|
||||
@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
|
||||
__xp_dma_unmap(dma_map, attrs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (dma_need_sync(dev, dma))
|
||||
dma_map->dma_need_sync = true;
|
||||
dma_map->dma_pages[i] = dma;
|
||||
}
|
||||
|
||||
@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
|
||||
xskb->xdp.data_meta = xskb->xdp.data;
|
||||
xskb->xdp.flags = 0;
|
||||
|
||||
if (pool->dma_need_sync) {
|
||||
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
|
||||
pool->frame_len,
|
||||
DMA_BIDIRECTIONAL);
|
||||
}
|
||||
if (pool->dev)
|
||||
xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len);
|
||||
|
||||
return &xskb->xdp;
|
||||
}
|
||||
EXPORT_SYMBOL(xp_alloc);
|
||||
@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
|
||||
{
|
||||
u32 nb_entries1 = 0, nb_entries2;
|
||||
|
||||
if (unlikely(pool->dma_need_sync)) {
|
||||
if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
|
||||
struct xdp_buff *buff;
|
||||
|
||||
/* Slow path */
|
||||
@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
|
||||
(addr & ~PAGE_MASK);
|
||||
}
|
||||
EXPORT_SYMBOL(xp_raw_get_dma);
|
||||
|
||||
void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
|
||||
{
|
||||
dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
|
||||
xskb->pool->frame_len, DMA_BIDIRECTIONAL);
|
||||
}
|
||||
EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
|
||||
|
||||
void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
|
||||
size_t size)
|
||||
{
|
||||
dma_sync_single_range_for_device(pool->dev, dma, 0,
|
||||
size, DMA_BIDIRECTIONAL);
|
||||
}
|
||||
EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
|
||||
|
Loading…
Reference in New Issue
Block a user