mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-08 14:23:19 +00:00
dma-mapping updates for 5.11:
- support for a partial IOMMU bypass (Alexey Kardashevskiy) - add a DMA API benchmark (Barry Song) - misc fixes (Tiezhu Yang, tangjianqiang) -----BEGIN PGP SIGNATURE----- iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAl/iF+wLHGhjaEBsc3Qu ZGUACgkQD55TZVIEUYP/HQ//beE+HGi0+5yiWdLY/Q3nqT/VExgdY2CAE2en0jcs kpUEZPfhE2dlKPf9nBl+ZsLIgqIwVP+oSawxZ65r0z/w95vgWANAmBg2m/FcnEUx Vl6jUUPmViY0AGzMR1+55voNNor4aX9rLUdZEN+vBy62Z6fvQVmKOtVPUf0ekkUh n3factrKIt7mplsIgJPO3v6G2XHugBaNmcrp9LkkmhniYkH8S31l4uKCSKI+6atT CsTTVqpQ+qVAyrgp30Xs+N9QoOefI4tFdXmvXzIIFe3JyDvIpniaiT+HHVXuEQJR 5Yukj9sKJXS1ipjc+yWsjvn4Wax/tubJ7eqIo9alsIkcJXI3AlKljwBtFrKPMq+L NK54N3N31A+/hrxOLQyfqz5vH6cUicFr7STIedb8pD3y3/f/tiP+/qown6Wj3rB/ E0IlCK5w87OVrJT1f5fHxh9Xe4R2W9rpeU/v/AZ/DrQllwp3PbG0kWqu3m4mujih CF3D+rOoB5tNtBxrNU+TpOVm+OOdX1IDtBleKePHTTIQeBY3p3UZuwINO0axFr9p oQndj1fiYvRwI6GS31GPpduFG7PrrwCb5zD0Rm07+aNCIoSmu+ADEngfgxnj/rGz q+c9Te1dDn58OktkiEONPNu+iuGBxhBR7AeP6UDdcC1vkO/hvW2HnppSY0qiWthU IFY= =dOug -----END PGP SIGNATURE----- Merge tag 'dma-mapping-5.11' of git://git.infradead.org/users/hch/dma-mapping Pull dma-mapping updates from Christoph Hellwig: - support for a partial IOMMU bypass (Alexey Kardashevskiy) - add a DMA API benchmark (Barry Song) - misc fixes (Tiezhu Yang, tangjianqiang) * tag 'dma-mapping-5.11' of git://git.infradead.org/users/hch/dma-mapping: selftests/dma: add test application for DMA_MAP_BENCHMARK dma-mapping: add benchmark support for streaming DMA APIs dma-contiguous: fix a typo error in a comment dma-pool: no need to check return value of debugfs_create functions powerpc/dma: Fallback to dma_ops when persistent memory present dma-mapping: Allow mixing bypass and mapped DMA operation
This commit is contained in:
commit
347d81b68b
@ -5297,6 +5297,12 @@ F: include/linux/dma-mapping.h
|
|||||||
F: include/linux/dma-map-ops.h
|
F: include/linux/dma-map-ops.h
|
||||||
F: kernel/dma/
|
F: kernel/dma/
|
||||||
|
|
||||||
|
DMA MAPPING BENCHMARK
|
||||||
|
M: Barry Song <song.bao.hua@hisilicon.com>
|
||||||
|
L: iommu@lists.linux-foundation.org
|
||||||
|
F: kernel/dma/map_benchmark.c
|
||||||
|
F: tools/testing/selftests/dma/
|
||||||
|
|
||||||
DMA-BUF HEAPS FRAMEWORK
|
DMA-BUF HEAPS FRAMEWORK
|
||||||
M: Sumit Semwal <sumit.semwal@linaro.org>
|
M: Sumit Semwal <sumit.semwal@linaro.org>
|
||||||
R: Benjamin Gaignard <benjamin.gaignard@linaro.org>
|
R: Benjamin Gaignard <benjamin.gaignard@linaro.org>
|
||||||
|
@ -161,6 +161,7 @@ config PPC
|
|||||||
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
|
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
|
||||||
select DMA_OPS if PPC64
|
select DMA_OPS if PPC64
|
||||||
select DMA_OPS_BYPASS if PPC64
|
select DMA_OPS_BYPASS if PPC64
|
||||||
|
select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES
|
||||||
select DYNAMIC_FTRACE if FUNCTION_TRACER
|
select DYNAMIC_FTRACE if FUNCTION_TRACER
|
||||||
select EDAC_ATOMIC_SCRUB
|
select EDAC_ATOMIC_SCRUB
|
||||||
select EDAC_SUPPORT
|
select EDAC_SUPPORT
|
||||||
|
@ -10,6 +10,63 @@
|
|||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <asm/iommu.h>
|
#include <asm/iommu.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
|
||||||
|
#define can_map_direct(dev, addr) \
|
||||||
|
((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
|
||||||
|
|
||||||
|
bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr)
|
||||||
|
{
|
||||||
|
if (likely(!dev->bus_dma_limit))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return can_map_direct(dev, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
|
||||||
|
|
||||||
|
bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle)
|
||||||
|
{
|
||||||
|
if (likely(!dev->bus_dma_limit))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return is_direct_handle(dev, dma_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
|
||||||
|
int nents)
|
||||||
|
{
|
||||||
|
struct scatterlist *s;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (likely(!dev->bus_dma_limit))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for_each_sg(sg, s, nents, i) {
|
||||||
|
if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
|
||||||
|
int nents)
|
||||||
|
{
|
||||||
|
struct scatterlist *s;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (likely(!dev->bus_dma_limit))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for_each_sg(sg, s, nents, i) {
|
||||||
|
if (!is_direct_handle(dev, s->dma_address + s->length))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generic iommu implementation
|
* Generic iommu implementation
|
||||||
*/
|
*/
|
||||||
@ -90,8 +147,18 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
|
|||||||
struct iommu_table *tbl = get_iommu_table_base(dev);
|
struct iommu_table *tbl = get_iommu_table_base(dev);
|
||||||
|
|
||||||
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
|
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
|
||||||
dev->dma_ops_bypass = true;
|
/*
|
||||||
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
|
* dma_iommu_bypass_supported() sets dma_max when there is
|
||||||
|
* 1:1 mapping but it is somehow limited.
|
||||||
|
* ibm,pmemory is one example.
|
||||||
|
*/
|
||||||
|
dev->dma_ops_bypass = dev->bus_dma_limit == 0;
|
||||||
|
if (!dev->dma_ops_bypass)
|
||||||
|
dev_warn(dev,
|
||||||
|
"iommu: 64-bit OK but direct DMA is limited by %llx\n",
|
||||||
|
dev->bus_dma_limit);
|
||||||
|
else
|
||||||
|
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
|
|||||||
np, ret);
|
np, ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 find_existing_ddw(struct device_node *pdn)
|
static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
|
||||||
{
|
{
|
||||||
struct direct_window *window;
|
struct direct_window *window;
|
||||||
const struct dynamic_dma_window_prop *direct64;
|
const struct dynamic_dma_window_prop *direct64;
|
||||||
@ -851,6 +851,7 @@ static u64 find_existing_ddw(struct device_node *pdn)
|
|||||||
if (window->device == pdn) {
|
if (window->device == pdn) {
|
||||||
direct64 = window->prop;
|
direct64 = window->prop;
|
||||||
dma_addr = be64_to_cpu(direct64->dma_base);
|
dma_addr = be64_to_cpu(direct64->dma_base);
|
||||||
|
*window_shift = be32_to_cpu(direct64->window_shift);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1111,11 +1112,12 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
|
|||||||
*/
|
*/
|
||||||
static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
|
static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
|
||||||
{
|
{
|
||||||
int len, ret;
|
int len = 0, ret;
|
||||||
|
int max_ram_len = order_base_2(ddw_memory_hotplug_max());
|
||||||
struct ddw_query_response query;
|
struct ddw_query_response query;
|
||||||
struct ddw_create_response create;
|
struct ddw_create_response create;
|
||||||
int page_shift;
|
int page_shift;
|
||||||
u64 dma_addr, max_addr;
|
u64 dma_addr;
|
||||||
struct device_node *dn;
|
struct device_node *dn;
|
||||||
u32 ddw_avail[DDW_APPLICABLE_SIZE];
|
u32 ddw_avail[DDW_APPLICABLE_SIZE];
|
||||||
struct direct_window *window;
|
struct direct_window *window;
|
||||||
@ -1123,10 +1125,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
|
|||||||
struct dynamic_dma_window_prop *ddwprop;
|
struct dynamic_dma_window_prop *ddwprop;
|
||||||
struct failed_ddw_pdn *fpdn;
|
struct failed_ddw_pdn *fpdn;
|
||||||
bool default_win_removed = false;
|
bool default_win_removed = false;
|
||||||
|
bool pmem_present;
|
||||||
|
|
||||||
|
dn = of_find_node_by_type(NULL, "ibm,pmemory");
|
||||||
|
pmem_present = dn != NULL;
|
||||||
|
of_node_put(dn);
|
||||||
|
|
||||||
mutex_lock(&direct_window_init_mutex);
|
mutex_lock(&direct_window_init_mutex);
|
||||||
|
|
||||||
dma_addr = find_existing_ddw(pdn);
|
dma_addr = find_existing_ddw(pdn, &len);
|
||||||
if (dma_addr != 0)
|
if (dma_addr != 0)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
@ -1212,14 +1219,29 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
|
|||||||
}
|
}
|
||||||
/* verify the window * number of ptes will map the partition */
|
/* verify the window * number of ptes will map the partition */
|
||||||
/* check largest block * page size > max memory hotplug addr */
|
/* check largest block * page size > max memory hotplug addr */
|
||||||
max_addr = ddw_memory_hotplug_max();
|
/*
|
||||||
if (query.largest_available_block < (max_addr >> page_shift)) {
|
* The "ibm,pmemory" can appear anywhere in the address space.
|
||||||
dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
|
* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
|
||||||
"%llu-sized pages\n", max_addr, query.largest_available_block,
|
* for the upper limit and fallback to max RAM otherwise but this
|
||||||
1ULL << page_shift);
|
* disables device::dma_ops_bypass.
|
||||||
|
*/
|
||||||
|
len = max_ram_len;
|
||||||
|
if (pmem_present) {
|
||||||
|
if (query.largest_available_block >=
|
||||||
|
(1ULL << (MAX_PHYSMEM_BITS - page_shift)))
|
||||||
|
len = MAX_PHYSMEM_BITS - page_shift;
|
||||||
|
else
|
||||||
|
dev_info(&dev->dev, "Skipping ibm,pmemory");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query.largest_available_block < (1ULL << (len - page_shift))) {
|
||||||
|
dev_dbg(&dev->dev,
|
||||||
|
"can't map partition max 0x%llx with %llu %llu-sized pages\n",
|
||||||
|
1ULL << len,
|
||||||
|
query.largest_available_block,
|
||||||
|
1ULL << page_shift);
|
||||||
goto out_failed;
|
goto out_failed;
|
||||||
}
|
}
|
||||||
len = order_base_2(max_addr);
|
|
||||||
win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
|
win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
|
||||||
if (!win64) {
|
if (!win64) {
|
||||||
dev_info(&dev->dev,
|
dev_info(&dev->dev,
|
||||||
@ -1299,6 +1321,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
|
|||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&direct_window_init_mutex);
|
mutex_unlock(&direct_window_init_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have persistent memory and the window size is only as big
|
||||||
|
* as RAM, then we failed to create a window to cover persistent
|
||||||
|
* memory and need to set the DMA limit.
|
||||||
|
*/
|
||||||
|
if (pmem_present && dma_addr && (len == max_ram_len))
|
||||||
|
dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
|
||||||
|
|
||||||
return dma_addr;
|
return dma_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -317,6 +317,20 @@ static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
|
|||||||
void *arch_dma_set_uncached(void *addr, size_t size);
|
void *arch_dma_set_uncached(void *addr, size_t size);
|
||||||
void arch_dma_clear_uncached(void *addr, size_t size);
|
void arch_dma_clear_uncached(void *addr, size_t size);
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
|
||||||
|
bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr);
|
||||||
|
bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle);
|
||||||
|
bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
|
||||||
|
int nents);
|
||||||
|
bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
|
||||||
|
int nents);
|
||||||
|
#else
|
||||||
|
#define arch_dma_map_page_direct(d, a) (false)
|
||||||
|
#define arch_dma_unmap_page_direct(d, a) (false)
|
||||||
|
#define arch_dma_map_sg_direct(d, s, n) (false)
|
||||||
|
#define arch_dma_unmap_sg_direct(d, s, n) (false)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
|
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
|
||||||
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
|
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
|
||||||
const struct iommu_ops *iommu, bool coherent);
|
const struct iommu_ops *iommu, bool coherent);
|
||||||
|
@ -20,6 +20,10 @@ config DMA_OPS
|
|||||||
config DMA_OPS_BYPASS
|
config DMA_OPS_BYPASS
|
||||||
bool
|
bool
|
||||||
|
|
||||||
|
# Lets platform IOMMU driver choose between bypass and IOMMU
|
||||||
|
config ARCH_HAS_DMA_MAP_DIRECT
|
||||||
|
bool
|
||||||
|
|
||||||
config NEED_SG_DMA_LENGTH
|
config NEED_SG_DMA_LENGTH
|
||||||
bool
|
bool
|
||||||
|
|
||||||
@ -220,3 +224,12 @@ config DMA_API_DEBUG_SG
|
|||||||
is technically out-of-spec.
|
is technically out-of-spec.
|
||||||
|
|
||||||
If unsure, say N.
|
If unsure, say N.
|
||||||
|
|
||||||
|
config DMA_MAP_BENCHMARK
|
||||||
|
bool "Enable benchmarking of streaming DMA mapping"
|
||||||
|
depends on DEBUG_FS
|
||||||
|
help
|
||||||
|
Provides /sys/kernel/debug/dma_map_benchmark that helps with testing
|
||||||
|
performance of dma_(un)map_page.
|
||||||
|
|
||||||
|
See tools/testing/selftests/dma/dma_map_benchmark.c
|
||||||
|
@ -9,3 +9,4 @@ obj-$(CONFIG_DMA_API_DEBUG) += debug.o
|
|||||||
obj-$(CONFIG_SWIOTLB) += swiotlb.o
|
obj-$(CONFIG_SWIOTLB) += swiotlb.o
|
||||||
obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
|
obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
|
||||||
obj-$(CONFIG_DMA_REMAP) += remap.o
|
obj-$(CONFIG_DMA_REMAP) += remap.o
|
||||||
|
obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
* coders, etc.
|
* coders, etc.
|
||||||
*
|
*
|
||||||
* Such devices often require big memory buffers (a full HD frame
|
* Such devices often require big memory buffers (a full HD frame
|
||||||
* is, for instance, more then 2 mega pixels large, i.e. more than 6
|
* is, for instance, more than 2 mega pixels large, i.e. more than 6
|
||||||
* MB of memory), which makes mechanisms such as kmalloc() or
|
* MB of memory), which makes mechanisms such as kmalloc() or
|
||||||
* alloc_page() ineffective.
|
* alloc_page() ineffective.
|
||||||
*
|
*
|
||||||
|
361
kernel/dma/map_benchmark.c
Normal file
361
kernel/dma/map_benchmark.c
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Hisilicon Limited.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
|
||||||
|
#include <linux/debugfs.h>
|
||||||
|
#include <linux/delay.h>
|
||||||
|
#include <linux/device.h>
|
||||||
|
#include <linux/dma-mapping.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/kthread.h>
|
||||||
|
#include <linux/math64.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/pci.h>
|
||||||
|
#include <linux/platform_device.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <linux/timekeeping.h>
|
||||||
|
|
||||||
|
#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark)
|
||||||
|
#define DMA_MAP_MAX_THREADS 1024
|
||||||
|
#define DMA_MAP_MAX_SECONDS 300
|
||||||
|
|
||||||
|
#define DMA_MAP_BIDIRECTIONAL 0
|
||||||
|
#define DMA_MAP_TO_DEVICE 1
|
||||||
|
#define DMA_MAP_FROM_DEVICE 2
|
||||||
|
|
||||||
|
struct map_benchmark {
|
||||||
|
__u64 avg_map_100ns; /* average map latency in 100ns */
|
||||||
|
__u64 map_stddev; /* standard deviation of map latency */
|
||||||
|
__u64 avg_unmap_100ns; /* as above */
|
||||||
|
__u64 unmap_stddev;
|
||||||
|
__u32 threads; /* how many threads will do map/unmap in parallel */
|
||||||
|
__u32 seconds; /* how long the test will last */
|
||||||
|
__s32 node; /* which numa node this benchmark will run on */
|
||||||
|
__u32 dma_bits; /* DMA addressing capability */
|
||||||
|
__u32 dma_dir; /* DMA data direction */
|
||||||
|
__u64 expansion[10]; /* For future use */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct map_benchmark_data {
|
||||||
|
struct map_benchmark bparam;
|
||||||
|
struct device *dev;
|
||||||
|
struct dentry *debugfs;
|
||||||
|
enum dma_data_direction dir;
|
||||||
|
atomic64_t sum_map_100ns;
|
||||||
|
atomic64_t sum_unmap_100ns;
|
||||||
|
atomic64_t sum_sq_map;
|
||||||
|
atomic64_t sum_sq_unmap;
|
||||||
|
atomic64_t loops;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int map_benchmark_thread(void *data)
|
||||||
|
{
|
||||||
|
void *buf;
|
||||||
|
dma_addr_t dma_addr;
|
||||||
|
struct map_benchmark_data *map = data;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
buf = (void *)__get_free_page(GFP_KERNEL);
|
||||||
|
if (!buf)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
while (!kthread_should_stop()) {
|
||||||
|
u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
|
||||||
|
ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
|
||||||
|
ktime_t map_delta, unmap_delta;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* for a non-coherent device, if we don't stain them in the
|
||||||
|
* cache, this will give an underestimate of the real-world
|
||||||
|
* overhead of BIDIRECTIONAL or TO_DEVICE mappings;
|
||||||
|
* 66 means evertything goes well! 66 is lucky.
|
||||||
|
*/
|
||||||
|
if (map->dir != DMA_FROM_DEVICE)
|
||||||
|
memset(buf, 0x66, PAGE_SIZE);
|
||||||
|
|
||||||
|
map_stime = ktime_get();
|
||||||
|
dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir);
|
||||||
|
if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
|
||||||
|
pr_err("dma_map_single failed on %s\n",
|
||||||
|
dev_name(map->dev));
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
map_etime = ktime_get();
|
||||||
|
map_delta = ktime_sub(map_etime, map_stime);
|
||||||
|
|
||||||
|
unmap_stime = ktime_get();
|
||||||
|
dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
|
||||||
|
unmap_etime = ktime_get();
|
||||||
|
unmap_delta = ktime_sub(unmap_etime, unmap_stime);
|
||||||
|
|
||||||
|
/* calculate sum and sum of squares */
|
||||||
|
|
||||||
|
map_100ns = div64_ul(map_delta, 100);
|
||||||
|
unmap_100ns = div64_ul(unmap_delta, 100);
|
||||||
|
map_sq = map_100ns * map_100ns;
|
||||||
|
unmap_sq = unmap_100ns * unmap_100ns;
|
||||||
|
|
||||||
|
atomic64_add(map_100ns, &map->sum_map_100ns);
|
||||||
|
atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
|
||||||
|
atomic64_add(map_sq, &map->sum_sq_map);
|
||||||
|
atomic64_add(unmap_sq, &map->sum_sq_unmap);
|
||||||
|
atomic64_inc(&map->loops);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
free_page((unsigned long)buf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int do_map_benchmark(struct map_benchmark_data *map)
|
||||||
|
{
|
||||||
|
struct task_struct **tsk;
|
||||||
|
int threads = map->bparam.threads;
|
||||||
|
int node = map->bparam.node;
|
||||||
|
const cpumask_t *cpu_mask = cpumask_of_node(node);
|
||||||
|
u64 loops;
|
||||||
|
int ret = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
|
||||||
|
if (!tsk)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
get_device(map->dev);
|
||||||
|
|
||||||
|
for (i = 0; i < threads; i++) {
|
||||||
|
tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
|
||||||
|
map->bparam.node, "dma-map-benchmark/%d", i);
|
||||||
|
if (IS_ERR(tsk[i])) {
|
||||||
|
pr_err("create dma_map thread failed\n");
|
||||||
|
ret = PTR_ERR(tsk[i]);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node != NUMA_NO_NODE)
|
||||||
|
kthread_bind_mask(tsk[i], cpu_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* clear the old value in the previous benchmark */
|
||||||
|
atomic64_set(&map->sum_map_100ns, 0);
|
||||||
|
atomic64_set(&map->sum_unmap_100ns, 0);
|
||||||
|
atomic64_set(&map->sum_sq_map, 0);
|
||||||
|
atomic64_set(&map->sum_sq_unmap, 0);
|
||||||
|
atomic64_set(&map->loops, 0);
|
||||||
|
|
||||||
|
for (i = 0; i < threads; i++)
|
||||||
|
wake_up_process(tsk[i]);
|
||||||
|
|
||||||
|
msleep_interruptible(map->bparam.seconds * 1000);
|
||||||
|
|
||||||
|
/* wait for the completion of benchmark threads */
|
||||||
|
for (i = 0; i < threads; i++) {
|
||||||
|
ret = kthread_stop(tsk[i]);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
loops = atomic64_read(&map->loops);
|
||||||
|
if (likely(loops > 0)) {
|
||||||
|
u64 map_variance, unmap_variance;
|
||||||
|
u64 sum_map = atomic64_read(&map->sum_map_100ns);
|
||||||
|
u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
|
||||||
|
u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
|
||||||
|
u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
|
||||||
|
|
||||||
|
/* average latency */
|
||||||
|
map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
|
||||||
|
map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
|
||||||
|
|
||||||
|
/* standard deviation of latency */
|
||||||
|
map_variance = div64_u64(sum_sq_map, loops) -
|
||||||
|
map->bparam.avg_map_100ns *
|
||||||
|
map->bparam.avg_map_100ns;
|
||||||
|
unmap_variance = div64_u64(sum_sq_unmap, loops) -
|
||||||
|
map->bparam.avg_unmap_100ns *
|
||||||
|
map->bparam.avg_unmap_100ns;
|
||||||
|
map->bparam.map_stddev = int_sqrt64(map_variance);
|
||||||
|
map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
put_device(map->dev);
|
||||||
|
kfree(tsk);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
|
||||||
|
unsigned long arg)
|
||||||
|
{
|
||||||
|
struct map_benchmark_data *map = file->private_data;
|
||||||
|
void __user *argp = (void __user *)arg;
|
||||||
|
u64 old_dma_mask;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
switch (cmd) {
|
||||||
|
case DMA_MAP_BENCHMARK:
|
||||||
|
if (map->bparam.threads == 0 ||
|
||||||
|
map->bparam.threads > DMA_MAP_MAX_THREADS) {
|
||||||
|
pr_err("invalid thread number\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (map->bparam.seconds == 0 ||
|
||||||
|
map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
|
||||||
|
pr_err("invalid duration seconds\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (map->bparam.node != NUMA_NO_NODE &&
|
||||||
|
!node_possible(map->bparam.node)) {
|
||||||
|
pr_err("invalid numa node\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (map->bparam.dma_dir) {
|
||||||
|
case DMA_MAP_BIDIRECTIONAL:
|
||||||
|
map->dir = DMA_BIDIRECTIONAL;
|
||||||
|
break;
|
||||||
|
case DMA_MAP_FROM_DEVICE:
|
||||||
|
map->dir = DMA_FROM_DEVICE;
|
||||||
|
break;
|
||||||
|
case DMA_MAP_TO_DEVICE:
|
||||||
|
map->dir = DMA_TO_DEVICE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pr_err("invalid DMA direction\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
old_dma_mask = dma_get_mask(map->dev);
|
||||||
|
|
||||||
|
ret = dma_set_mask(map->dev,
|
||||||
|
DMA_BIT_MASK(map->bparam.dma_bits));
|
||||||
|
if (ret) {
|
||||||
|
pr_err("failed to set dma_mask on device %s\n",
|
||||||
|
dev_name(map->dev));
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = do_map_benchmark(map);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* restore the original dma_mask as many devices' dma_mask are
|
||||||
|
* set by architectures, acpi, busses. When we bind them back
|
||||||
|
* to their original drivers, those drivers shouldn't see
|
||||||
|
* dma_mask changed by benchmark
|
||||||
|
*/
|
||||||
|
dma_set_mask(map->dev, old_dma_mask);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations map_benchmark_fops = {
|
||||||
|
.open = simple_open,
|
||||||
|
.unlocked_ioctl = map_benchmark_ioctl,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void map_benchmark_remove_debugfs(void *data)
|
||||||
|
{
|
||||||
|
struct map_benchmark_data *map = (struct map_benchmark_data *)data;
|
||||||
|
|
||||||
|
debugfs_remove(map->debugfs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __map_benchmark_probe(struct device *dev)
|
||||||
|
{
|
||||||
|
struct dentry *entry;
|
||||||
|
struct map_benchmark_data *map;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
|
||||||
|
if (!map)
|
||||||
|
return -ENOMEM;
|
||||||
|
map->dev = dev;
|
||||||
|
|
||||||
|
ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
|
||||||
|
if (ret) {
|
||||||
|
pr_err("Can't add debugfs remove action\n");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we only permit a device bound with this driver, 2nd probe
|
||||||
|
* will fail
|
||||||
|
*/
|
||||||
|
entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
|
||||||
|
&map_benchmark_fops);
|
||||||
|
if (IS_ERR(entry))
|
||||||
|
return PTR_ERR(entry);
|
||||||
|
map->debugfs = entry;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int map_benchmark_platform_probe(struct platform_device *pdev)
|
||||||
|
{
|
||||||
|
return __map_benchmark_probe(&pdev->dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct platform_driver map_benchmark_platform_driver = {
|
||||||
|
.driver = {
|
||||||
|
.name = "dma_map_benchmark",
|
||||||
|
},
|
||||||
|
.probe = map_benchmark_platform_probe,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int
|
||||||
|
map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||||
|
{
|
||||||
|
return __map_benchmark_probe(&pdev->dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pci_driver map_benchmark_pci_driver = {
|
||||||
|
.name = "dma_map_benchmark",
|
||||||
|
.probe = map_benchmark_pci_probe,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init map_benchmark_init(void)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = pci_register_driver(&map_benchmark_pci_driver);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = platform_driver_register(&map_benchmark_platform_driver);
|
||||||
|
if (ret) {
|
||||||
|
pci_unregister_driver(&map_benchmark_pci_driver);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit map_benchmark_cleanup(void)
|
||||||
|
{
|
||||||
|
platform_driver_unregister(&map_benchmark_platform_driver);
|
||||||
|
pci_unregister_driver(&map_benchmark_pci_driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(map_benchmark_init);
|
||||||
|
module_exit(map_benchmark_cleanup);
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>");
|
||||||
|
MODULE_DESCRIPTION("dma_map benchmark driver");
|
||||||
|
MODULE_LICENSE("GPL");
|
@ -149,7 +149,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
|
|||||||
if (WARN_ON_ONCE(!dev->dma_mask))
|
if (WARN_ON_ONCE(!dev->dma_mask))
|
||||||
return DMA_MAPPING_ERROR;
|
return DMA_MAPPING_ERROR;
|
||||||
|
|
||||||
if (dma_map_direct(dev, ops))
|
if (dma_map_direct(dev, ops) ||
|
||||||
|
arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size))
|
||||||
addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
|
addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
|
||||||
else
|
else
|
||||||
addr = ops->map_page(dev, page, offset, size, dir, attrs);
|
addr = ops->map_page(dev, page, offset, size, dir, attrs);
|
||||||
@ -165,7 +166,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
|
|||||||
const struct dma_map_ops *ops = get_dma_ops(dev);
|
const struct dma_map_ops *ops = get_dma_ops(dev);
|
||||||
|
|
||||||
BUG_ON(!valid_dma_direction(dir));
|
BUG_ON(!valid_dma_direction(dir));
|
||||||
if (dma_map_direct(dev, ops))
|
if (dma_map_direct(dev, ops) ||
|
||||||
|
arch_dma_unmap_page_direct(dev, addr + size))
|
||||||
dma_direct_unmap_page(dev, addr, size, dir, attrs);
|
dma_direct_unmap_page(dev, addr, size, dir, attrs);
|
||||||
else if (ops->unmap_page)
|
else if (ops->unmap_page)
|
||||||
ops->unmap_page(dev, addr, size, dir, attrs);
|
ops->unmap_page(dev, addr, size, dir, attrs);
|
||||||
@ -188,7 +190,8 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
|
|||||||
if (WARN_ON_ONCE(!dev->dma_mask))
|
if (WARN_ON_ONCE(!dev->dma_mask))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (dma_map_direct(dev, ops))
|
if (dma_map_direct(dev, ops) ||
|
||||||
|
arch_dma_map_sg_direct(dev, sg, nents))
|
||||||
ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
|
ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
|
||||||
else
|
else
|
||||||
ents = ops->map_sg(dev, sg, nents, dir, attrs);
|
ents = ops->map_sg(dev, sg, nents, dir, attrs);
|
||||||
@ -207,7 +210,8 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
|
|||||||
|
|
||||||
BUG_ON(!valid_dma_direction(dir));
|
BUG_ON(!valid_dma_direction(dir));
|
||||||
debug_dma_unmap_sg(dev, sg, nents, dir);
|
debug_dma_unmap_sg(dev, sg, nents, dir);
|
||||||
if (dma_map_direct(dev, ops))
|
if (dma_map_direct(dev, ops) ||
|
||||||
|
arch_dma_unmap_sg_direct(dev, sg, nents))
|
||||||
dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
|
dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
|
||||||
else if (ops->unmap_sg)
|
else if (ops->unmap_sg)
|
||||||
ops->unmap_sg(dev, sg, nents, dir, attrs);
|
ops->unmap_sg(dev, sg, nents, dir, attrs);
|
||||||
|
@ -38,9 +38,6 @@ static void __init dma_atomic_pool_debugfs_init(void)
|
|||||||
struct dentry *root;
|
struct dentry *root;
|
||||||
|
|
||||||
root = debugfs_create_dir("dma_pools", NULL);
|
root = debugfs_create_dir("dma_pools", NULL);
|
||||||
if (IS_ERR_OR_NULL(root))
|
|
||||||
return;
|
|
||||||
|
|
||||||
debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
|
debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
|
||||||
debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
|
debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
|
||||||
debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
|
debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
|
||||||
|
6
tools/testing/selftests/dma/Makefile
Normal file
6
tools/testing/selftests/dma/Makefile
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
CFLAGS += -I../../../../usr/include/
|
||||||
|
|
||||||
|
TEST_GEN_PROGS := dma_map_benchmark
|
||||||
|
|
||||||
|
include ../lib.mk
|
1
tools/testing/selftests/dma/config
Normal file
1
tools/testing/selftests/dma/config
Normal file
@ -0,0 +1 @@
|
|||||||
|
CONFIG_DMA_MAP_BENCHMARK=y
|
123
tools/testing/selftests/dma/dma_map_benchmark.c
Normal file
123
tools/testing/selftests/dma/dma_map_benchmark.c
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Hisilicon Limited.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark)
|
||||||
|
#define DMA_MAP_MAX_THREADS 1024
|
||||||
|
#define DMA_MAP_MAX_SECONDS 300
|
||||||
|
|
||||||
|
#define DMA_MAP_BIDIRECTIONAL 0
|
||||||
|
#define DMA_MAP_TO_DEVICE 1
|
||||||
|
#define DMA_MAP_FROM_DEVICE 2
|
||||||
|
|
||||||
|
static char *directions[] = {
|
||||||
|
"BIDIRECTIONAL",
|
||||||
|
"TO_DEVICE",
|
||||||
|
"FROM_DEVICE",
|
||||||
|
};
|
||||||
|
|
||||||
|
struct map_benchmark {
|
||||||
|
__u64 avg_map_100ns; /* average map latency in 100ns */
|
||||||
|
__u64 map_stddev; /* standard deviation of map latency */
|
||||||
|
__u64 avg_unmap_100ns; /* as above */
|
||||||
|
__u64 unmap_stddev;
|
||||||
|
__u32 threads; /* how many threads will do map/unmap in parallel */
|
||||||
|
__u32 seconds; /* how long the test will last */
|
||||||
|
__s32 node; /* which numa node this benchmark will run on */
|
||||||
|
__u32 dma_bits; /* DMA addressing capability */
|
||||||
|
__u32 dma_dir; /* DMA data direction */
|
||||||
|
__u64 expansion[10]; /* For future use */
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
struct map_benchmark map;
|
||||||
|
int fd, opt;
|
||||||
|
/* default single thread, run 20 seconds on NUMA_NO_NODE */
|
||||||
|
int threads = 1, seconds = 20, node = -1;
|
||||||
|
/* default dma mask 32bit, bidirectional DMA */
|
||||||
|
int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
|
||||||
|
|
||||||
|
int cmd = DMA_MAP_BENCHMARK;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
|
||||||
|
switch (opt) {
|
||||||
|
case 't':
|
||||||
|
threads = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
seconds = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
node = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
bits = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'd':
|
||||||
|
dir = atoi(optarg);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
|
||||||
|
fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
|
||||||
|
DMA_MAP_MAX_THREADS);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
|
||||||
|
fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
|
||||||
|
DMA_MAP_MAX_SECONDS);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* suppose the mininum DMA zone is 1MB in the world */
|
||||||
|
if (bits < 20 || bits > 64) {
|
||||||
|
fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
|
||||||
|
dir != DMA_MAP_FROM_DEVICE) {
|
||||||
|
fprintf(stderr, "invalid dma direction\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
|
||||||
|
if (fd == -1) {
|
||||||
|
perror("open");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
map.seconds = seconds;
|
||||||
|
map.threads = threads;
|
||||||
|
map.node = node;
|
||||||
|
map.dma_bits = bits;
|
||||||
|
map.dma_dir = dir;
|
||||||
|
if (ioctl(fd, cmd, &map)) {
|
||||||
|
perror("ioctl");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
|
||||||
|
threads, seconds, node, dir[directions]);
|
||||||
|
printf("average map latency(us):%.1f standard deviation:%.1f\n",
|
||||||
|
map.avg_map_100ns/10.0, map.map_stddev/10.0);
|
||||||
|
printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
|
||||||
|
map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user