2018-08-27 09:34:44 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/cred.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/dma-buf.h>
|
2022-11-10 20:13:46 +00:00
|
|
|
#include <linux/dma-resv.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
#include <linux/highmem.h>
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kernel.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
#include <linux/memfd.h>
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/miscdevice.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/shmem_fs.h>
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
#include <linux/hugetlb.h>
|
2018-09-11 13:42:04 +00:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/udmabuf.h>
|
2022-11-17 17:18:09 +00:00
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/iosys-map.h>
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2021-06-11 21:21:07 +00:00
|
|
|
static int list_limit = 1024;
|
|
|
|
module_param(list_limit, int, 0644);
|
|
|
|
MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024.");
|
|
|
|
|
|
|
|
static int size_limit_mb = 64;
|
|
|
|
module_param(size_limit_mb, int, 0644);
|
|
|
|
MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64.");
|
2018-09-11 13:42:10 +00:00
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
struct udmabuf {
|
2018-09-11 13:42:06 +00:00
|
|
|
pgoff_t pagecount;
|
2024-06-24 06:36:15 +00:00
|
|
|
struct folio **folios;
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Unlike folios, pinned_folios is only used for unpin.
|
|
|
|
* So, nr_pinned is not the same to pagecount, the pinned_folios
|
|
|
|
* only set each folio which already pinned when udmabuf_create.
|
|
|
|
* Note that, since a folio may be pinned multiple times, each folio
|
|
|
|
* can be added to pinned_folios multiple times, depending on how many
|
|
|
|
* times the folio has been pinned when create.
|
|
|
|
*/
|
|
|
|
pgoff_t nr_pinned;
|
|
|
|
struct folio **pinned_folios;
|
|
|
|
|
2019-12-03 01:36:27 +00:00
|
|
|
struct sg_table *sg;
|
2019-12-03 01:36:25 +00:00
|
|
|
struct miscdevice *device;
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
pgoff_t *offsets;
|
2018-08-27 09:34:44 +00:00
|
|
|
};
|
|
|
|
|
2019-01-03 23:26:34 +00:00
|
|
|
static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
|
2018-08-27 09:34:44 +00:00
|
|
|
{
|
|
|
|
struct vm_area_struct *vma = vmf->vma;
|
|
|
|
struct udmabuf *ubuf = vma->vm_private_data;
|
2022-06-20 07:15:47 +00:00
|
|
|
pgoff_t pgoff = vmf->pgoff;
|
udmabuf: pre-fault when first page fault
The current udmabuf mmap only fills the physical memory to the
corresponding virtual address when the user actually accesses the
virtual address.
However, the current udmabuf has already obtained and pinned the folio
upon completion of the creation.This means that the physical memory has
already been acquired, rather than being accessed dynamically.
As a result, the page fault has lost its purpose as a demanding
page. Due to the fact that page fault requires trapping into kernel mode
and filling in when accessing the corresponding virtual address in mmap,
when creating a large size udmabuf, this represents a considerable
overhead.
This patch fill the pfn into page table, and then pre-fault each pfn
into vma, when first access.
Notice, if anything wrong , we do not return an error during this
pre-fault step. However, an error will be returned if the failure occurs
when the addr is truly accessed
Suggested-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-2-link@vivo.com
2024-09-18 02:52:24 +00:00
|
|
|
unsigned long addr, pfn;
|
|
|
|
vm_fault_t ret;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2022-06-20 07:15:47 +00:00
|
|
|
if (pgoff >= ubuf->pagecount)
|
|
|
|
return VM_FAULT_SIGBUS;
|
2024-06-24 06:36:13 +00:00
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
pfn = folio_pfn(ubuf->folios[pgoff]);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
|
|
|
|
|
udmabuf: pre-fault when first page fault
The current udmabuf mmap only fills the physical memory to the
corresponding virtual address when the user actually accesses the
virtual address.
However, the current udmabuf has already obtained and pinned the folio
upon completion of the creation.This means that the physical memory has
already been acquired, rather than being accessed dynamically.
As a result, the page fault has lost its purpose as a demanding
page. Due to the fact that page fault requires trapping into kernel mode
and filling in when accessing the corresponding virtual address in mmap,
when creating a large size udmabuf, this represents a considerable
overhead.
This patch fill the pfn into page table, and then pre-fault each pfn
into vma, when first access.
Notice, if anything wrong , we do not return an error during this
pre-fault step. However, an error will be returned if the failure occurs
when the addr is truly accessed
Suggested-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-2-link@vivo.com
2024-09-18 02:52:24 +00:00
|
|
|
ret = vmf_insert_pfn(vma, vmf->address, pfn);
|
|
|
|
if (ret & VM_FAULT_ERROR)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* pre fault */
|
|
|
|
pgoff = vma->vm_pgoff;
|
|
|
|
addr = vma->vm_start;
|
|
|
|
|
|
|
|
for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) {
|
|
|
|
if (addr == vmf->address)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (WARN_ON(pgoff >= ubuf->pagecount))
|
|
|
|
break;
|
|
|
|
|
|
|
|
pfn = folio_pfn(ubuf->folios[pgoff]);
|
|
|
|
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If the below vmf_insert_pfn() fails, we do not return an
|
|
|
|
* error here during this pre-fault step. However, an error
|
|
|
|
* will be returned if the failure occurs when the addr is
|
|
|
|
* truly accessed.
|
|
|
|
*/
|
|
|
|
if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct vm_operations_struct udmabuf_vm_ops = {
|
|
|
|
.fault = udmabuf_vm_fault,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
|
|
|
|
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
vma->vm_ops = &udmabuf_vm_ops;
|
|
|
|
vma->vm_private_data = ubuf;
|
2024-06-24 06:36:13 +00:00
|
|
|
vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
|
2018-08-27 09:34:44 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-11-17 17:18:09 +00:00
|
|
|
static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
2024-09-18 02:52:26 +00:00
|
|
|
unsigned long *pfns;
|
2022-11-17 17:18:09 +00:00
|
|
|
void *vaddr;
|
2024-06-24 06:36:15 +00:00
|
|
|
pgoff_t pg;
|
2022-11-17 17:18:09 +00:00
|
|
|
|
|
|
|
dma_resv_assert_held(buf->resv);
|
|
|
|
|
2024-09-18 02:52:26 +00:00
|
|
|
/**
|
|
|
|
* HVO may free tail pages, so just use pfn to map each folio
|
|
|
|
* into vmalloc area.
|
|
|
|
*/
|
|
|
|
pfns = kvmalloc_array(ubuf->pagecount, sizeof(*pfns), GFP_KERNEL);
|
|
|
|
if (!pfns)
|
2024-06-24 06:36:15 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2024-09-18 02:52:26 +00:00
|
|
|
for (pg = 0; pg < ubuf->pagecount; pg++) {
|
|
|
|
unsigned long pfn = folio_pfn(ubuf->folios[pg]);
|
2024-06-24 06:36:15 +00:00
|
|
|
|
2024-09-18 02:52:26 +00:00
|
|
|
pfn += ubuf->offsets[pg] >> PAGE_SHIFT;
|
|
|
|
pfns[pg] = pfn;
|
|
|
|
}
|
|
|
|
|
|
|
|
vaddr = vmap_pfn(pfns, ubuf->pagecount, PAGE_KERNEL);
|
|
|
|
kvfree(pfns);
|
2022-11-17 17:18:09 +00:00
|
|
|
if (!vaddr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
iosys_map_set_vaddr(map, vaddr);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vunmap_udmabuf(struct dma_buf *buf, struct iosys_map *map)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
|
|
|
|
dma_resv_assert_held(buf->resv);
|
|
|
|
|
|
|
|
vm_unmap_ram(map->vaddr, ubuf->pagecount);
|
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:26 +00:00
|
|
|
static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
|
|
|
|
enum dma_data_direction direction)
|
2018-08-27 09:34:44 +00:00
|
|
|
{
|
2019-12-03 01:36:26 +00:00
|
|
|
struct udmabuf *ubuf = buf->priv;
|
2018-08-27 09:34:44 +00:00
|
|
|
struct sg_table *sg;
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
struct scatterlist *sgl;
|
|
|
|
unsigned int i = 0;
|
2018-09-11 13:42:05 +00:00
|
|
|
int ret;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
|
|
|
sg = kzalloc(sizeof(*sg), GFP_KERNEL);
|
|
|
|
if (!sg)
|
2018-09-11 13:42:05 +00:00
|
|
|
return ERR_PTR(-ENOMEM);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
|
|
|
ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
|
2018-09-11 13:42:05 +00:00
|
|
|
if (ret < 0)
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
goto err_alloc;
|
|
|
|
|
|
|
|
for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
|
2024-06-24 06:36:15 +00:00
|
|
|
sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
|
|
|
|
ubuf->offsets[i]);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
2020-04-06 14:41:45 +00:00
|
|
|
ret = dma_map_sgtable(dev, sg, direction, 0);
|
|
|
|
if (ret < 0)
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
goto err_map;
|
2018-08-27 09:34:44 +00:00
|
|
|
return sg;
|
|
|
|
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
err_map:
|
2018-08-27 09:34:44 +00:00
|
|
|
sg_free_table(sg);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
err_alloc:
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(sg);
|
2018-09-11 13:42:05 +00:00
|
|
|
return ERR_PTR(ret);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:26 +00:00
|
|
|
static void put_sg_table(struct device *dev, struct sg_table *sg,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
2020-04-06 14:41:45 +00:00
|
|
|
dma_unmap_sgtable(dev, sg, direction, 0);
|
2019-12-03 01:36:26 +00:00
|
|
|
sg_free_table(sg);
|
|
|
|
kfree(sg);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
|
|
|
return get_sg_table(at->dev, at->dmabuf, direction);
|
|
|
|
}
|
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
static void unmap_udmabuf(struct dma_buf_attachment *at,
|
|
|
|
struct sg_table *sg,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
2019-12-03 01:36:26 +00:00
|
|
|
return put_sg_table(at->dev, sg, direction);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
static void unpin_all_folios(struct udmabuf *ubuf)
|
2024-06-24 06:36:16 +00:00
|
|
|
{
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
pgoff_t i;
|
2024-06-24 06:36:16 +00:00
|
|
|
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
for (i = 0; i < ubuf->nr_pinned; ++i)
|
|
|
|
unpin_folio(ubuf->pinned_folios[i]);
|
2024-06-24 06:36:16 +00:00
|
|
|
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
kvfree(ubuf->pinned_folios);
|
2024-06-24 06:36:16 +00:00
|
|
|
}
|
|
|
|
|
2024-09-18 02:52:28 +00:00
|
|
|
static __always_inline int init_udmabuf(struct udmabuf *ubuf, pgoff_t pgcnt)
|
|
|
|
{
|
|
|
|
ubuf->folios = kvmalloc_array(pgcnt, sizeof(*ubuf->folios), GFP_KERNEL);
|
|
|
|
if (!ubuf->folios)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
ubuf->offsets = kvcalloc(pgcnt, sizeof(*ubuf->offsets), GFP_KERNEL);
|
|
|
|
if (!ubuf->offsets)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
ubuf->pinned_folios = kvmalloc_array(pgcnt,
|
|
|
|
sizeof(*ubuf->pinned_folios),
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!ubuf->pinned_folios)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2024-09-18 02:52:28 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline void deinit_udmabuf(struct udmabuf *ubuf)
|
|
|
|
{
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
unpin_all_folios(ubuf);
|
2024-09-18 02:52:28 +00:00
|
|
|
kvfree(ubuf->offsets);
|
|
|
|
kvfree(ubuf->folios);
|
|
|
|
}
|
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
static void release_udmabuf(struct dma_buf *buf)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
2019-12-03 01:36:27 +00:00
|
|
|
struct device *dev = ubuf->device->this_device;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2019-12-03 01:36:27 +00:00
|
|
|
if (ubuf->sg)
|
|
|
|
put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
|
|
|
|
|
2024-09-18 02:52:28 +00:00
|
|
|
deinit_udmabuf(ubuf);
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(ubuf);
|
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:27 +00:00
|
|
|
static int begin_cpu_udmabuf(struct dma_buf *buf,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
struct device *dev = ubuf->device->this_device;
|
2022-08-25 06:35:22 +00:00
|
|
|
int ret = 0;
|
2019-12-03 01:36:27 +00:00
|
|
|
|
|
|
|
if (!ubuf->sg) {
|
|
|
|
ubuf->sg = get_sg_table(dev, buf, direction);
|
2022-08-25 06:35:22 +00:00
|
|
|
if (IS_ERR(ubuf->sg)) {
|
|
|
|
ret = PTR_ERR(ubuf->sg);
|
|
|
|
ubuf->sg = NULL;
|
|
|
|
}
|
2019-12-03 01:36:27 +00:00
|
|
|
} else {
|
udmabuf: fix dma-buf cpu access
I'm just going to put Chia's review comment here since it sums
the issue rather nicely:
"(1) Semantically, a dma-buf is in DMA domain. CPU access from the
importer must be surrounded by {begin,end}_cpu_access. This gives the
exporter a chance to move the buffer to the CPU domain temporarily.
(2) When the exporter itself has other means to do CPU access, it is
only reasonable for the exporter to move the buffer to the CPU domain
before access, and to the DMA domain after access. The exporter can
potentially reuse {begin,end}_cpu_access for that purpose.
Because of (1), udmabuf does need to implement the
{begin,end}_cpu_access hooks. But "begin" should mean
dma_sync_sg_for_cpu and "end" should mean dma_sync_sg_for_device.
Because of (2), if userspace wants to continuing accessing through the
memfd mapping, it should call udmabuf's {begin,end}_cpu_access to
avoid cache issues."
Reported-by: Chia-I Wu <olvaffe@gmail.com>
Suggested-by: Chia-I Wu <olvaffe@gmail.com>
Fixes: 284562e1f348 ("udmabuf: implement begin_cpu_access/end_cpu_access hooks")
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20191217230228.453-1-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2019-12-17 23:02:28 +00:00
|
|
|
dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
|
|
|
|
direction);
|
2019-12-03 01:36:27 +00:00
|
|
|
}
|
|
|
|
|
2022-08-25 06:35:22 +00:00
|
|
|
return ret;
|
2019-12-03 01:36:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int end_cpu_udmabuf(struct dma_buf *buf,
|
|
|
|
enum dma_data_direction direction)
|
|
|
|
{
|
|
|
|
struct udmabuf *ubuf = buf->priv;
|
|
|
|
struct device *dev = ubuf->device->this_device;
|
|
|
|
|
|
|
|
if (!ubuf->sg)
|
|
|
|
return -EINVAL;
|
|
|
|
|
udmabuf: fix dma-buf cpu access
I'm just going to put Chia's review comment here since it sums
the issue rather nicely:
"(1) Semantically, a dma-buf is in DMA domain. CPU access from the
importer must be surrounded by {begin,end}_cpu_access. This gives the
exporter a chance to move the buffer to the CPU domain temporarily.
(2) When the exporter itself has other means to do CPU access, it is
only reasonable for the exporter to move the buffer to the CPU domain
before access, and to the DMA domain after access. The exporter can
potentially reuse {begin,end}_cpu_access for that purpose.
Because of (1), udmabuf does need to implement the
{begin,end}_cpu_access hooks. But "begin" should mean
dma_sync_sg_for_cpu and "end" should mean dma_sync_sg_for_device.
Because of (2), if userspace wants to continuing accessing through the
memfd mapping, it should call udmabuf's {begin,end}_cpu_access to
avoid cache issues."
Reported-by: Chia-I Wu <olvaffe@gmail.com>
Suggested-by: Chia-I Wu <olvaffe@gmail.com>
Fixes: 284562e1f348 ("udmabuf: implement begin_cpu_access/end_cpu_access hooks")
Signed-off-by: Gurchetan Singh <gurchetansingh@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20191217230228.453-1-gurchetansingh@chromium.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2019-12-17 23:02:28 +00:00
|
|
|
dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction);
|
2019-12-03 01:36:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-09-11 13:42:07 +00:00
|
|
|
static const struct dma_buf_ops udmabuf_ops = {
|
2019-12-03 01:36:24 +00:00
|
|
|
.cache_sgt_mapping = true,
|
|
|
|
.map_dma_buf = map_udmabuf,
|
|
|
|
.unmap_dma_buf = unmap_udmabuf,
|
|
|
|
.release = release_udmabuf,
|
|
|
|
.mmap = mmap_udmabuf,
|
2022-11-17 17:18:09 +00:00
|
|
|
.vmap = vmap_udmabuf,
|
|
|
|
.vunmap = vunmap_udmabuf,
|
2019-12-03 01:36:27 +00:00
|
|
|
.begin_cpu_access = begin_cpu_udmabuf,
|
|
|
|
.end_cpu_access = end_cpu_udmabuf,
|
2018-08-27 09:34:44 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#define SEALS_WANTED (F_SEAL_SHRINK)
|
2024-12-04 16:26:20 +00:00
|
|
|
#define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
static int check_memfd_seals(struct file *memfd)
|
|
|
|
{
|
|
|
|
int seals;
|
|
|
|
|
|
|
|
if (!shmem_file(memfd) && !is_file_hugepages(memfd))
|
|
|
|
return -EBADFD;
|
|
|
|
|
|
|
|
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
|
|
|
|
if (seals == -EINVAL)
|
|
|
|
return -EBADFD;
|
|
|
|
|
|
|
|
if ((seals & SEALS_WANTED) != SEALS_WANTED ||
|
|
|
|
(seals & SEALS_DENIED) != 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-12-04 16:26:21 +00:00
|
|
|
static struct dma_buf *export_udmabuf(struct udmabuf *ubuf,
|
|
|
|
struct miscdevice *device)
|
2024-06-24 06:36:15 +00:00
|
|
|
{
|
|
|
|
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
|
|
|
|
|
|
|
ubuf->device = device;
|
|
|
|
exp_info.ops = &udmabuf_ops;
|
|
|
|
exp_info.size = ubuf->pagecount << PAGE_SHIFT;
|
|
|
|
exp_info.priv = ubuf;
|
|
|
|
exp_info.flags = O_RDWR;
|
|
|
|
|
2024-12-04 16:26:21 +00:00
|
|
|
return dma_buf_export(&exp_info);
|
2024-06-24 06:36:15 +00:00
|
|
|
}
|
|
|
|
|
2024-09-18 02:52:27 +00:00
|
|
|
static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd,
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
loff_t start, loff_t size, struct folio **folios)
|
2024-09-18 02:52:27 +00:00
|
|
|
{
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
pgoff_t nr_pinned = ubuf->nr_pinned;
|
|
|
|
pgoff_t upgcnt = ubuf->pagecount;
|
2024-09-18 02:52:27 +00:00
|
|
|
u32 cur_folio, cur_pgcnt;
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
pgoff_t pgoff, pgcnt;
|
2024-09-18 02:52:27 +00:00
|
|
|
long nr_folios;
|
|
|
|
loff_t end;
|
|
|
|
|
|
|
|
pgcnt = size >> PAGE_SHIFT;
|
|
|
|
end = start + (pgcnt << PAGE_SHIFT) - 1;
|
|
|
|
nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff);
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
if (nr_folios <= 0)
|
|
|
|
return nr_folios ? nr_folios : -EINVAL;
|
2024-09-18 02:52:27 +00:00
|
|
|
|
|
|
|
cur_pgcnt = 0;
|
|
|
|
for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) {
|
|
|
|
pgoff_t subpgoff = pgoff;
|
|
|
|
size_t fsize = folio_size(folios[cur_folio]);
|
|
|
|
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
ubuf->pinned_folios[nr_pinned++] = folios[cur_folio];
|
2024-09-18 02:52:27 +00:00
|
|
|
|
|
|
|
for (; subpgoff < fsize; subpgoff += PAGE_SIZE) {
|
|
|
|
ubuf->folios[upgcnt] = folios[cur_folio];
|
|
|
|
ubuf->offsets[upgcnt] = subpgoff;
|
|
|
|
++upgcnt;
|
|
|
|
|
|
|
|
if (++cur_pgcnt >= pgcnt)
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* In a given range, only the first subpage of the first folio
|
|
|
|
* has an offset, that is returned by memfd_pin_folios().
|
|
|
|
* The first subpages of other folios (in the range) have an
|
|
|
|
* offset of 0.
|
|
|
|
*/
|
|
|
|
pgoff = 0;
|
|
|
|
}
|
|
|
|
end:
|
|
|
|
ubuf->pagecount = upgcnt;
|
udmabuf: remove udmabuf_folio
Currently, udmabuf handles folio by create an unpin list to record
each folio obtained from the list and unpinning them when released. To
maintain this, many struct have been established.
However, maintain this requires a significant amount of memory and
iter the list is a substantial overhead, which is not friendly to the
CPU cache.
When create, we arranged the folio array in the order of pin and set
the offset according to pgcnt. So, if record each pinned folio when
create, then can easy unpin it. Compare to use list to record it,
an array also can do this.
Hence, this patch setup a pinned_folios array(size is the pgcnt) to
instead of udmabuf_folio struct, it record each folio which pinned when
invoke memfd_pin_folios, then unpin folio by iter pinned_folios.
Note that, since a folio may be pinned multiple times, each folio can be
added to pinned_folios multiple times, depend on how many times the
folio has been pinned when create.
Compare to udmabuf_folio(24 byte size), a folio pointer is 8 byte, if no
large folio - each folio is PAGE_SIZE - and need to unpin when release.
So need to record each folio, by this patch, each folio can save 16 byte.
But if large folio used, depend on the large folio's number, the
pinned_folios array may take more memory, but it still can makes unpin
access more cache-friendly.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-7-link@vivo.com
2024-09-18 02:52:29 +00:00
|
|
|
ubuf->nr_pinned = nr_pinned;
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
return 0;
|
2024-09-18 02:52:27 +00:00
|
|
|
}
|
|
|
|
|
2019-12-03 01:36:25 +00:00
|
|
|
static long udmabuf_create(struct miscdevice *device,
|
|
|
|
struct udmabuf_create_list *head,
|
|
|
|
struct udmabuf_create_item *list)
|
2018-08-27 09:34:44 +00:00
|
|
|
{
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
unsigned long max_nr_folios = 0;
|
|
|
|
struct folio **folios = NULL;
|
2024-09-18 02:52:27 +00:00
|
|
|
pgoff_t pgcnt = 0, pglimit;
|
2018-08-27 09:34:44 +00:00
|
|
|
struct udmabuf *ubuf;
|
2024-12-04 16:26:21 +00:00
|
|
|
struct dma_buf *dmabuf;
|
2024-09-18 02:52:27 +00:00
|
|
|
long ret = -EINVAL;
|
|
|
|
u32 i, flags;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2018-09-11 13:42:15 +00:00
|
|
|
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
|
2018-08-27 09:34:44 +00:00
|
|
|
if (!ubuf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2018-09-11 13:42:10 +00:00
|
|
|
pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
|
2018-08-27 09:34:44 +00:00
|
|
|
for (i = 0; i < head->count; i++) {
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
pgoff_t subpgcnt;
|
|
|
|
|
2024-09-18 02:52:27 +00:00
|
|
|
if (!PAGE_ALIGNED(list[i].offset))
|
2024-09-18 02:52:28 +00:00
|
|
|
goto err_noinit;
|
2024-09-18 02:52:27 +00:00
|
|
|
if (!PAGE_ALIGNED(list[i].size))
|
2024-09-18 02:52:28 +00:00
|
|
|
goto err_noinit;
|
2024-09-18 02:52:27 +00:00
|
|
|
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
subpgcnt = list[i].size >> PAGE_SHIFT;
|
|
|
|
pgcnt += subpgcnt;
|
2024-09-18 02:52:27 +00:00
|
|
|
if (pgcnt > pglimit)
|
2024-09-18 02:52:28 +00:00
|
|
|
goto err_noinit;
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
|
|
|
|
max_nr_folios = max_t(unsigned long, subpgcnt, max_nr_folios);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
2021-12-30 14:26:49 +00:00
|
|
|
|
2024-09-18 02:52:27 +00:00
|
|
|
if (!pgcnt)
|
2024-09-18 02:52:28 +00:00
|
|
|
goto err_noinit;
|
2021-12-30 14:26:49 +00:00
|
|
|
|
2024-09-18 02:52:28 +00:00
|
|
|
ret = init_udmabuf(ubuf, pgcnt);
|
|
|
|
if (ret)
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
folios = kvmalloc_array(max_nr_folios, sizeof(*folios), GFP_KERNEL);
|
|
|
|
if (!folios) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2018-08-27 09:34:44 +00:00
|
|
|
for (i = 0; i < head->count; i++) {
|
2024-09-18 02:52:27 +00:00
|
|
|
struct file *memfd = fget(list[i].memfd);
|
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by
hugetlb pages for various reasons. However, a Guest OS would still
allocate (and pin) buffers that are backed by regular 4k sized pages. In
order to map these buffers and create dma-bufs for them on the Host, we
first need to find the hugetlb pages where the buffer allocations are
located and then determine the offsets of individual chunks (within those
pages) and use this information to eventually populate a scatterlist.
Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options
were passed to the Host kernel and Qemu was launched with these
relevant options: qemu-system-x86_64 -m 4096m....
-device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080
-display gtk,gl=on
-object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M
-machine memory-backend=mem1
Replacing -display gtk,gl=on with -display gtk,gl=off above would
exercise the mmap handler.
Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-06-24 06:36:14 +00:00
|
|
|
|
2024-09-18 02:52:27 +00:00
|
|
|
if (!memfd) {
|
|
|
|
ret = -EBADFD;
|
2024-06-24 06:36:16 +00:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2024-12-04 16:26:19 +00:00
|
|
|
/*
|
|
|
|
* Take the inode lock to protect against concurrent
|
|
|
|
* memfd_add_seals(), which takes this lock in write mode.
|
|
|
|
*/
|
|
|
|
inode_lock_shared(file_inode(memfd));
|
2024-09-18 02:52:27 +00:00
|
|
|
ret = check_memfd_seals(memfd);
|
2024-12-04 16:26:19 +00:00
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
2024-06-24 06:36:16 +00:00
|
|
|
|
2024-09-18 02:52:27 +00:00
|
|
|
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
list[i].size, folios);
|
2024-12-04 16:26:19 +00:00
|
|
|
out_unlock:
|
|
|
|
inode_unlock_shared(file_inode(memfd));
|
2018-08-27 09:34:44 +00:00
|
|
|
fput(memfd);
|
2024-09-18 02:52:27 +00:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 06:36:15 +00:00
|
|
|
flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0;
|
2024-12-04 16:26:21 +00:00
|
|
|
dmabuf = export_udmabuf(ubuf, device);
|
|
|
|
if (IS_ERR(dmabuf)) {
|
|
|
|
ret = PTR_ERR(dmabuf);
|
2018-09-11 13:42:11 +00:00
|
|
|
goto err;
|
2024-12-04 16:26:21 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Ownership of ubuf is held by the dmabuf from here.
|
|
|
|
* If the following dma_buf_fd() fails, dma_buf_put() cleans up both the
|
|
|
|
* dmabuf and the ubuf (through udmabuf_ops.release).
|
|
|
|
*/
|
|
|
|
|
|
|
|
ret = dma_buf_fd(dmabuf, flags);
|
|
|
|
if (ret < 0)
|
|
|
|
dma_buf_put(dmabuf);
|
2018-08-27 09:34:44 +00:00
|
|
|
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
kvfree(folios);
|
2024-06-24 06:36:15 +00:00
|
|
|
return ret;
|
2018-08-27 09:34:44 +00:00
|
|
|
|
2018-09-11 13:42:11 +00:00
|
|
|
err:
|
2024-09-18 02:52:28 +00:00
|
|
|
deinit_udmabuf(ubuf);
|
|
|
|
err_noinit:
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(ubuf);
|
udmabuf: reuse folio array when pin folios
When invoke memfd_pin_folios, we need offer an array to save each folio
which we pinned.
The current way is dynamic alloc an array(use kvmalloc), get folios,
save into udmabuf and then free.
Depend on the size, kvmalloc can do something different:
Below PAGE_SIZE, slab allocator will be used, which have good alloc
performance, due to it cached page.
PAGE_SIZE - PCP Order, PCP(per-cpu-pageset) also given buddy page a
cache in each CPU, so different CPU no need to hold some lock(zone or
some) to get the locally page. If PCP cached page, the access also fast.
PAGE_SIZE - BUDDY_MAX, try to get page from buddy, due to kvmalloc adjusted
the gfp flags, if zone freelist can't alloc page(fast path), we will not
enter slowpath to reclaim memory. Due to need hold lock and check, may
slow, but still fast than vmalloc.
Anything wrong will fallback into vmalloc to alloc memory, it obtains
contiguous virtual addresses by loop alloc order 0 page(PAGE_SIZE), and
then map it into vmalloc area. If necessary, page alloc may enter
slowpath to reclaim memory. Hence, if fallback into vmalloc, it's slow.
When create, we need to iter each udmabuf item, then pin it's range
folios, if each item's range folio's count is large, we may fallback each
into vmalloc.
This patch find the largest range folio in items, then alloc this size's
folio array. When pin range folios, reuse this array.
Signed-off-by: Huan Yang <link@vivo.com>
Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240918025238.2957823-8-link@vivo.com
2024-09-18 02:52:30 +00:00
|
|
|
kvfree(folios);
|
2018-08-27 09:34:44 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct udmabuf_create create;
|
|
|
|
struct udmabuf_create_list head;
|
|
|
|
struct udmabuf_create_item list;
|
|
|
|
|
|
|
|
if (copy_from_user(&create, (void __user *)arg,
|
2018-09-11 13:42:15 +00:00
|
|
|
sizeof(create)))
|
2018-08-27 09:34:44 +00:00
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
head.flags = create.flags;
|
|
|
|
head.count = 1;
|
|
|
|
list.memfd = create.memfd;
|
|
|
|
list.offset = create.offset;
|
|
|
|
list.size = create.size;
|
|
|
|
|
2019-12-03 01:36:25 +00:00
|
|
|
return udmabuf_create(filp->private_data, &head, &list);
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct udmabuf_create_list head;
|
|
|
|
struct udmabuf_create_item *list;
|
|
|
|
int ret = -EINVAL;
|
|
|
|
u32 lsize;
|
|
|
|
|
|
|
|
if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
|
|
|
|
return -EFAULT;
|
2018-09-11 13:42:10 +00:00
|
|
|
if (head.count > list_limit)
|
2018-08-27 09:34:44 +00:00
|
|
|
return -EINVAL;
|
|
|
|
lsize = sizeof(struct udmabuf_create_item) * head.count;
|
|
|
|
list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
|
|
|
|
if (IS_ERR(list))
|
|
|
|
return PTR_ERR(list);
|
|
|
|
|
2019-12-03 01:36:25 +00:00
|
|
|
ret = udmabuf_create(filp->private_data, &head, list);
|
2018-08-27 09:34:44 +00:00
|
|
|
kfree(list);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
switch (ioctl) {
|
|
|
|
case UDMABUF_CREATE:
|
|
|
|
ret = udmabuf_ioctl_create(filp, arg);
|
|
|
|
break;
|
|
|
|
case UDMABUF_CREATE_LIST:
|
|
|
|
ret = udmabuf_ioctl_create_list(filp, arg);
|
|
|
|
break;
|
|
|
|
default:
|
2018-09-11 13:42:13 +00:00
|
|
|
ret = -ENOTTY;
|
2018-08-27 09:34:44 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct file_operations udmabuf_fops = {
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.unlocked_ioctl = udmabuf_ioctl,
|
2020-09-03 18:16:52 +00:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_ioctl = udmabuf_ioctl,
|
|
|
|
#endif
|
2018-08-27 09:34:44 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct miscdevice udmabuf_misc = {
|
|
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
|
|
.name = "udmabuf",
|
|
|
|
.fops = &udmabuf_fops,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init udmabuf_dev_init(void)
|
|
|
|
{
|
2022-05-20 20:52:35 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = misc_register(&udmabuf_misc);
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Could not initialize udmabuf device\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device,
|
|
|
|
DMA_BIT_MASK(64));
|
|
|
|
if (ret < 0) {
|
|
|
|
pr_err("Could not setup DMA mask for udmabuf device\n");
|
|
|
|
misc_deregister(&udmabuf_misc);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2018-08-27 09:34:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit udmabuf_dev_exit(void)
|
|
|
|
{
|
|
|
|
misc_deregister(&udmabuf_misc);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(udmabuf_dev_init)
|
|
|
|
module_exit(udmabuf_dev_exit)
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
|