dma-mapping updates for Linux 2.6

- reduce the swiotlb buffer size on allocation failure
    (Alexey Kardashevskiy)
  - clean up passing of bogus GFP flags to the dma-coherent allocator
    (Christoph Hellwig)
 -----BEGIN PGP SIGNATURE-----
 
 iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAmOYqxsLHGhjaEBsc3Qu
 ZGUACgkQD55TZVIEUYPh8g/+JB8mwv/qVkTecf+rJ4eewgNdkdRPFZfl2oPixZ3n
 tfTuy5+ly7zwzpNK3Kjy4UTj9rcyc9Pn5vKp8O9l8/d4w4HvCH9z3XaPARVR8cz0
 TBDrfX5PDWsR8bN246GxJJRZPH5ogmZ9Pl1EG7/ZLM5CF8PFVct9usr/zQTNkcQH
 K4Hf6suTXDmlxMbC22EyCtwKTA6ThppJOJTId+iLSu77g5xi51LrNGbYV8Ylfynb
 9p3lU67nTpXwrn019moPrEYs+QHvjVnfrIK2b2cafpu/DA1Vrkk9dF8DDvK0kXu5
 OBqU2NlPqHsdAp3jgNXxemPmw8eMbUW+gV3IQknTQEPsGStPKWM3b0qCpCGXAT8r
 79sEFc3NoUyJsz26BqqI2lWIBu7KOs+j1ZJlilG7pBZu1zwdYPrzglj8qUnTbT1F
 n9TP/A6Yu10ea2zLWrrbMVmA45lMMczHteqX6Hxr7gfHRQqsB3401YYGkQbUa15w
 xiQVx29dicXsD+QDS1FziGVbBqexZ8PsbBAlX3NJJcxMvAkCuG0CUclGhlbsEIx1
 zRmUNDbL4b1ImzmFUh9avpfDJntBfyWpjh4lWu3G+Y4JRfwfG/HNncDf+DFJHns+
 oT0Ox4fPdbfvWKXJx48LPoe0XzgjCguMS0Ql6CDgQj+uy7PdqfTHFJOlsCUDsBBH
 c5c=
 =t9QU
 -----END PGP SIGNATURE-----

Merge tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - reduce the swiotlb buffer size on allocation failure (Alexey
   Kardashevskiy)

 - clean up passing of bogus GFP flags to the dma-coherent allocator
   (Christoph Hellwig)

* tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping:
  dma-mapping: reject __GFP_COMP in dma_alloc_attrs
  ALSA: memalloc: don't pass bogus GFP_ flags to dma_alloc_*
  s390/ism: don't pass bogus GFP_ flags to dma_alloc_coherent
  cnic: don't pass bogus GFP_ flags to dma_alloc_coherent
  RDMA/qib: don't pass bogus GFP_ flags to dma_alloc_coherent
  RDMA/hfi1: don't pass bogus GFP_ flags to dma_alloc_coherent
  media: videobuf-dma-contig: use dma_mmap_coherent
  swiotlb: reduce the swiotlb buffer size on allocation failure
This commit is contained in:
Linus Torvalds 2022-12-13 09:05:19 -08:00
commit e529d3507a
11 changed files with 69 additions and 102 deletions

View File

@ -564,14 +564,6 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (mask < 0xffffffffULL)
gfp |= GFP_DMA;
/*
* Following is a work-around (a.k.a. hack) to prevent pages
* with __GFP_COMP being passed to split_page() which cannot
* handle them. The real problem is that this flag probably
* should be 0 on ARM as it is not supported on this
* platform; see CONFIG_HUGETLBFS.
*/
gfp &= ~(__GFP_COMP);
args.gfp = gfp;
*handle = DMA_MAPPING_ERROR;
@ -1093,15 +1085,6 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
return __iommu_alloc_simple(dev, size, gfp, handle,
coherent_flag, attrs);
/*
* Following is a work-around (a.k.a. hack) to prevent pages
* with __GFP_COMP being passed to split_page() which cannot
* handle them. The real problem is that this flag probably
* should be 0 on ARM as it is not supported on this
* platform; see CONFIG_HUGETLBFS.
*/
gfp &= ~(__GFP_COMP);
pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag);
if (!pages)
return NULL;

View File

@ -1761,17 +1761,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
unsigned amt;
if (!rcd->rcvhdrq) {
gfp_t gfp_flags;
amt = rcvhdrq_size(rcd);
if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
gfp_flags = GFP_KERNEL;
else
gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
&rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
GFP_KERNEL);
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
@ -1785,7 +1779,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
PAGE_SIZE,
&rcd->rcvhdrqtailaddr_dma,
gfp_flags);
GFP_KERNEL);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
}
@ -1821,19 +1815,10 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
u32 max_entries, egrtop, alloced_bytes = 0;
gfp_t gfp_flags;
u16 order, idx = 0;
int ret = 0;
u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
/*
* GFP_USER, but without GFP_FS, so buffer cache can be
* coalesced (we hope); otherwise, even at order 4,
* heavy filesystem activity makes these fail, and we can
* use compound pages.
*/
gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
/*
* The minimum size of the eager buffers is a groups of MTU-sized
* buffers.
@ -1864,7 +1849,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
dma_alloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
&rcd->egrbufs.buffers[idx].dma,
gfp_flags);
GFP_KERNEL);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;

View File

@ -2075,7 +2075,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd)
dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev,
dd->rcd[0]->rcvhdrq_size,
&dd->cspec->dummy_hdrq_phys,
GFP_ATOMIC | __GFP_COMP);
GFP_ATOMIC);
if (!dd->cspec->dummy_hdrq) {
qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n");
/* fallback to just 0'ing */

View File

@ -1546,18 +1546,14 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
if (!rcd->rcvhdrq) {
dma_addr_t phys_hdrqtail;
gfp_t gfp_flags;
amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
sizeof(u32), PAGE_SIZE);
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
old_node_id = dev_to_node(&dd->pcidev->dev);
set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
gfp_flags | __GFP_COMP);
rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
&rcd->rcvhdrq_phys, GFP_KERNEL);
set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrq) {
@ -1577,7 +1573,7 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
gfp_flags);
GFP_KERNEL);
set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
@ -1621,17 +1617,8 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
struct qib_devdata *dd = rcd->dd;
unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
gfp_t gfp_flags;
int old_node_id;
/*
* GFP_USER, but without GFP_FS, so buffer cache can be
* coalesced (we hope); otherwise, even at order 4,
* heavy filesystem activity makes these fail, and we can
* use compound pages.
*/
gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
egrcnt = rcd->rcvegrcnt;
egroff = rcd->rcvegr_tid_base;
egrsize = dd->rcvegrbufsize;
@ -1663,7 +1650,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
rcd->rcvegrbuf[e] =
dma_alloc_coherent(&dd->pcidev->dev, size,
&rcd->rcvegrbuf_phys[e],
gfp_flags);
GFP_KERNEL);
set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvegrbuf[e])
goto bail_rcvegrbuf_phys;

View File

@ -744,9 +744,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
/* IOMMU can map any pages, so himem can also be used here */
gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
/* It makes no sense to muck about with huge pages */
gfp &= ~__GFP_COMP;
while (count) {
struct page *page = NULL;
unsigned int order_size;

View File

@ -36,12 +36,11 @@ struct videobuf_dma_contig_memory {
static int __videobuf_dc_alloc(struct device *dev,
struct videobuf_dma_contig_memory *mem,
unsigned long size, gfp_t flags)
unsigned long size)
{
mem->size = size;
mem->vaddr = dma_alloc_coherent(dev, mem->size,
&mem->dma_handle, flags);
mem->vaddr = dma_alloc_coherent(dev, mem->size, &mem->dma_handle,
GFP_KERNEL);
if (!mem->vaddr) {
dev_err(dev, "memory alloc size %ld failed\n", mem->size);
return -ENOMEM;
@ -258,8 +257,7 @@ static int __videobuf_iolock(struct videobuf_queue *q,
return videobuf_dma_contig_user_get(mem, vb);
/* allocate memory for the read() method */
if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size),
GFP_KERNEL))
if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size)))
return -ENOMEM;
break;
case V4L2_MEMORY_OVERLAY:
@ -295,22 +293,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
BUG_ON(!mem);
MAGIC_CHECK(mem->magic, MAGIC_DC_MEM);
if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize),
GFP_KERNEL | __GFP_COMP))
if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize)))
goto error;
/* Try to remap memory */
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
/* the "vm_pgoff" is just used in v4l2 to find the
* corresponding buffer data structure which is allocated
* earlier and it does not mean the offset from the physical
* buffer start address as usual. So set it to 0 to pass
* the sanity check in vm_iomap_memory().
* the sanity check in dma_mmap_coherent().
*/
vma->vm_pgoff = 0;
retval = vm_iomap_memory(vma, mem->dma_handle, mem->size);
retval = dma_mmap_coherent(q->dev, vma, mem->vaddr, mem->dma_handle,
mem->size);
if (retval) {
dev_err(q->dev, "mmap: remap failed with error %d. ",
retval);

View File

@ -1027,16 +1027,14 @@ static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages)
udev->l2_ring_size = pages * CNIC_PAGE_SIZE;
udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size,
&udev->l2_ring_map,
GFP_KERNEL | __GFP_COMP);
&udev->l2_ring_map, GFP_KERNEL);
if (!udev->l2_ring)
return -ENOMEM;
udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
udev->l2_buf_size = CNIC_PAGE_ALIGN(udev->l2_buf_size);
udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size,
&udev->l2_buf_map,
GFP_KERNEL | __GFP_COMP);
&udev->l2_buf_map, GFP_KERNEL);
if (!udev->l2_buf) {
__cnic_free_uio_rings(udev);
return -ENOMEM;

View File

@ -243,7 +243,8 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
&dmb->dma_addr,
GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | __GFP_COMP | __GFP_NORETRY);
GFP_KERNEL | __GFP_NOWARN |
__GFP_NOMEMALLOC | __GFP_NORETRY);
if (!dmb->cpu_addr)
clear_bit(dmb->sba_idx, ism->sba_bitmap);

View File

@ -498,6 +498,14 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
WARN_ON_ONCE(!dev->coherent_dma_mask);
/*
* DMA allocations can never be turned back into a page pointer, so
* requesting compound pages doesn't make sense (and can't even be
* supported at all by various backends).
*/
if (WARN_ON_ONCE(flag & __GFP_COMP))
return NULL;
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;

View File

@ -300,6 +300,37 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
return;
}
static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags,
int (*remap)(void *tlb, unsigned long nslabs))
{
size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
void *tlb;
/*
* By default allocate the bounce buffer memory from low memory, but
* allow to pick a location everywhere for hypervisors with guest
* memory encryption.
*/
if (flags & SWIOTLB_ANY)
tlb = memblock_alloc(bytes, PAGE_SIZE);
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb) {
pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
__func__, bytes);
return NULL;
}
if (remap && remap(tlb, nslabs) < 0) {
memblock_free(tlb, PAGE_ALIGN(bytes));
pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
return NULL;
}
return tlb;
}
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@ -310,7 +341,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs;
size_t alloc_size;
size_t bytes;
void *tlb;
if (!addressing_limit && !swiotlb_force_bounce)
@ -326,31 +356,16 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
swiotlb_adjust_nareas(num_possible_cpus());
nslabs = default_nslabs;
/*
* By default allocate the bounce buffer memory from low memory, but
* allow to pick a location everywhere for hypervisors with guest
* memory encryption.
*/
retry:
bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
if (flags & SWIOTLB_ANY)
tlb = memblock_alloc(bytes, PAGE_SIZE);
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb) {
pr_warn("%s: failed to allocate tlb structure\n", __func__);
return;
while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
if (nslabs <= IO_TLB_MIN_SLABS)
return;
nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
}
if (remap && remap(tlb, nslabs) < 0) {
memblock_free(tlb, PAGE_ALIGN(bytes));
nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
if (nslabs >= IO_TLB_MIN_SLABS)
goto retry;
pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
return;
if (default_nslabs != nslabs) {
pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs",
default_nslabs, nslabs);
default_nslabs = nslabs;
}
alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));

View File

@ -21,7 +21,6 @@
#define DEFAULT_GFP \
(GFP_KERNEL | \
__GFP_COMP | /* compound page lets parts be mapped */ \
__GFP_RETRY_MAYFAIL | /* don't trigger OOM-killer */ \
__GFP_NOWARN) /* no stack trace print - this call is non-critical */
@ -543,7 +542,7 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
void *p;
sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
DEFAULT_GFP, 0);
DEFAULT_GFP | __GFP_COMP, 0);
#ifdef CONFIG_SND_DMA_SGBUF
if (!sgt && !get_dma_ops(dmab->dev.dev)) {
if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
@ -811,7 +810,7 @@ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size)
void *p;
p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr,
dmab->dev.dir, DEFAULT_GFP);
dmab->dev.dir, DEFAULT_GFP | __GFP_COMP);
if (p)
dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr);
return p;