mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
4635873c56
sg_alloc_table_chained() currently allows the caller to provide one preallocated SGL and returns if the requested number isn't bigger than size of that SGL. This is used to inline an SGL for an IO request. However, scattergather code only allows that size of the 1st preallocated SGL to be SG_CHUNK_SIZE(128). This means a substantial amount of memory (4KB) is claimed for the SGL for each IO request. If the I/O is small, it would be prudent to allocate a smaller SGL. Introduce an extra parameter to sg_alloc_table_chained() and sg_free_table_chained() for specifying size of the preallocated SGL. Both __sg_free_table() and __sg_alloc_table() assume that each SGL has the same size except for the last one. Change the code to allow both functions to accept a variable size for the 1st preallocated SGL. [mkp: attempted to clarify commit desc] Cc: Christoph Hellwig <hch@lst.de> Cc: Bart Van Assche <bvanassche@acm.org> Cc: Ewan D. Milne <emilne@redhat.com> Cc: Hannes Reinecke <hare@suse.com> Cc: Sagi Grimberg <sagi@grimberg.me> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: netdev@vger.kernel.org Cc: linux-nvme@lists.infradead.org Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
468 lines
15 KiB
C
468 lines
15 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_SCATTERLIST_H
|
|
#define _LINUX_SCATTERLIST_H
|
|
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/io.h>
|
|
|
|
struct scatterlist {
|
|
unsigned long page_link;
|
|
unsigned int offset;
|
|
unsigned int length;
|
|
dma_addr_t dma_address;
|
|
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
|
unsigned int dma_length;
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Since the above length field is an unsigned int, below we define the maximum
|
|
* length in bytes that can be stored in one scatterlist entry.
|
|
*/
|
|
#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK)
|
|
|
|
/*
|
|
* These macros should be used after a dma_map_sg call has been done
|
|
* to get bus addresses of each of the SG entries and their lengths.
|
|
* You should only work with the number of sg entries dma_map_sg
|
|
* returns, or alternatively stop on the first sg_dma_len(sg) which
|
|
* is 0.
|
|
*/
|
|
#define sg_dma_address(sg) ((sg)->dma_address)
|
|
|
|
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
|
#define sg_dma_len(sg) ((sg)->dma_length)
|
|
#else
|
|
#define sg_dma_len(sg) ((sg)->length)
|
|
#endif
|
|
|
|
struct sg_table {
|
|
struct scatterlist *sgl; /* the list */
|
|
unsigned int nents; /* number of mapped entries */
|
|
unsigned int orig_nents; /* original size of list */
|
|
};
|
|
|
|
/*
|
|
* Notes on SG table design.
|
|
*
|
|
* We use the unsigned long page_link field in the scatterlist struct to place
|
|
* the page pointer AND encode information about the sg table as well. The two
|
|
* lower bits are reserved for this information.
|
|
*
|
|
* If bit 0 is set, then the page_link contains a pointer to the next sg
|
|
* table list. Otherwise the next entry is at sg + 1.
|
|
*
|
|
* If bit 1 is set, then this sg entry is the last element in a list.
|
|
*
|
|
* See sg_next().
|
|
*
|
|
*/
|
|
|
|
#define SG_CHAIN 0x01UL
|
|
#define SG_END 0x02UL
|
|
|
|
/*
|
|
* We overload the LSB of the page pointer to indicate whether it's
|
|
* a valid sg entry, or whether it points to the start of a new scatterlist.
|
|
* Those low bits are there for everyone! (thanks mason :-)
|
|
*/
|
|
#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
|
|
#define sg_is_last(sg) ((sg)->page_link & SG_END)
|
|
#define sg_chain_ptr(sg) \
|
|
((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
|
|
|
|
/**
|
|
* sg_assign_page - Assign a given page to an SG entry
|
|
* @sg: SG entry
|
|
* @page: The page
|
|
*
|
|
* Description:
|
|
* Assign page to sg entry. Also see sg_set_page(), the most commonly used
|
|
* variant.
|
|
*
|
|
**/
|
|
static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
|
|
{
|
|
unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END);
|
|
|
|
/*
|
|
* In order for the low bit stealing approach to work, pages
|
|
* must be aligned at a 32-bit boundary as a minimum.
|
|
*/
|
|
BUG_ON((unsigned long) page & (SG_CHAIN | SG_END));
|
|
#ifdef CONFIG_DEBUG_SG
|
|
BUG_ON(sg_is_chain(sg));
|
|
#endif
|
|
sg->page_link = page_link | (unsigned long) page;
|
|
}
|
|
|
|
/**
|
|
* sg_set_page - Set sg entry to point at given page
|
|
* @sg: SG entry
|
|
* @page: The page
|
|
* @len: Length of data
|
|
* @offset: Offset into page
|
|
*
|
|
* Description:
|
|
* Use this function to set an sg entry pointing at a page, never assign
|
|
* the page directly. We encode sg table information in the lower bits
|
|
* of the page pointer. See sg_page() for looking up the page belonging
|
|
* to an sg entry.
|
|
*
|
|
**/
|
|
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
|
|
unsigned int len, unsigned int offset)
|
|
{
|
|
sg_assign_page(sg, page);
|
|
sg->offset = offset;
|
|
sg->length = len;
|
|
}
|
|
|
|
static inline struct page *sg_page(struct scatterlist *sg)
|
|
{
|
|
#ifdef CONFIG_DEBUG_SG
|
|
BUG_ON(sg_is_chain(sg));
|
|
#endif
|
|
return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END));
|
|
}
|
|
|
|
/**
|
|
* sg_set_buf - Set sg entry to point at given data
|
|
* @sg: SG entry
|
|
* @buf: Data
|
|
* @buflen: Data length
|
|
*
|
|
**/
|
|
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
|
|
unsigned int buflen)
|
|
{
|
|
#ifdef CONFIG_DEBUG_SG
|
|
BUG_ON(!virt_addr_valid(buf));
|
|
#endif
|
|
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
|
|
}
|
|
|
|
/*
|
|
* Loop over each sg element, following the pointer to a new list if necessary
|
|
*/
|
|
#define for_each_sg(sglist, sg, nr, __i) \
|
|
for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
|
|
|
|
/**
|
|
* sg_chain - Chain two sglists together
|
|
* @prv: First scatterlist
|
|
* @prv_nents: Number of entries in prv
|
|
* @sgl: Second scatterlist
|
|
*
|
|
* Description:
|
|
* Links @prv@ and @sgl@ together, to form a longer scatterlist.
|
|
*
|
|
**/
|
|
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
|
|
struct scatterlist *sgl)
|
|
{
|
|
/*
|
|
* offset and length are unused for chain entry. Clear them.
|
|
*/
|
|
prv[prv_nents - 1].offset = 0;
|
|
prv[prv_nents - 1].length = 0;
|
|
|
|
/*
|
|
* Set lowest bit to indicate a link pointer, and make sure to clear
|
|
* the termination bit if it happens to be set.
|
|
*/
|
|
prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
|
|
& ~SG_END;
|
|
}
|
|
|
|
/**
|
|
* sg_mark_end - Mark the end of the scatterlist
|
|
* @sg: SG entryScatterlist
|
|
*
|
|
* Description:
|
|
* Marks the passed in sg entry as the termination point for the sg
|
|
* table. A call to sg_next() on this entry will return NULL.
|
|
*
|
|
**/
|
|
static inline void sg_mark_end(struct scatterlist *sg)
|
|
{
|
|
/*
|
|
* Set termination bit, clear potential chain bit
|
|
*/
|
|
sg->page_link |= SG_END;
|
|
sg->page_link &= ~SG_CHAIN;
|
|
}
|
|
|
|
/**
|
|
* sg_unmark_end - Undo setting the end of the scatterlist
|
|
* @sg: SG entryScatterlist
|
|
*
|
|
* Description:
|
|
* Removes the termination marker from the given entry of the scatterlist.
|
|
*
|
|
**/
|
|
static inline void sg_unmark_end(struct scatterlist *sg)
|
|
{
|
|
sg->page_link &= ~SG_END;
|
|
}
|
|
|
|
/**
|
|
* sg_phys - Return physical address of an sg entry
|
|
* @sg: SG entry
|
|
*
|
|
* Description:
|
|
* This calls page_to_phys() on the page in this sg entry, and adds the
|
|
* sg offset. The caller must know that it is legal to call page_to_phys()
|
|
* on the sg page.
|
|
*
|
|
**/
|
|
static inline dma_addr_t sg_phys(struct scatterlist *sg)
|
|
{
|
|
return page_to_phys(sg_page(sg)) + sg->offset;
|
|
}
|
|
|
|
/**
|
|
* sg_virt - Return virtual address of an sg entry
|
|
* @sg: SG entry
|
|
*
|
|
* Description:
|
|
* This calls page_address() on the page in this sg entry, and adds the
|
|
* sg offset. The caller must know that the sg page has a valid virtual
|
|
* mapping.
|
|
*
|
|
**/
|
|
static inline void *sg_virt(struct scatterlist *sg)
|
|
{
|
|
return page_address(sg_page(sg)) + sg->offset;
|
|
}
|
|
|
|
/**
|
|
* sg_init_marker - Initialize markers in sg table
|
|
* @sgl: The SG table
|
|
* @nents: Number of entries in table
|
|
*
|
|
**/
|
|
static inline void sg_init_marker(struct scatterlist *sgl,
|
|
unsigned int nents)
|
|
{
|
|
sg_mark_end(&sgl[nents - 1]);
|
|
}
|
|
|
|
int sg_nents(struct scatterlist *sg);
|
|
int sg_nents_for_len(struct scatterlist *sg, u64 len);
|
|
struct scatterlist *sg_next(struct scatterlist *);
|
|
struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
|
|
void sg_init_table(struct scatterlist *, unsigned int);
|
|
void sg_init_one(struct scatterlist *, const void *, unsigned int);
|
|
int sg_split(struct scatterlist *in, const int in_mapped_nents,
|
|
const off_t skip, const int nb_splits,
|
|
const size_t *split_sizes,
|
|
struct scatterlist **out, int *out_mapped_nents,
|
|
gfp_t gfp_mask);
|
|
|
|
typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
|
|
typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
|
|
|
|
void __sg_free_table(struct sg_table *, unsigned int, unsigned int,
|
|
sg_free_fn *);
|
|
void sg_free_table(struct sg_table *);
|
|
int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
|
|
struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
|
|
int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
|
|
int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
|
|
unsigned int n_pages, unsigned int offset,
|
|
unsigned long size, unsigned int max_segment,
|
|
gfp_t gfp_mask);
|
|
int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
|
|
unsigned int n_pages, unsigned int offset,
|
|
unsigned long size, gfp_t gfp_mask);
|
|
|
|
#ifdef CONFIG_SGL_ALLOC
|
|
struct scatterlist *sgl_alloc_order(unsigned long long length,
|
|
unsigned int order, bool chainable,
|
|
gfp_t gfp, unsigned int *nent_p);
|
|
struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
|
|
unsigned int *nent_p);
|
|
void sgl_free_n_order(struct scatterlist *sgl, int nents, int order);
|
|
void sgl_free_order(struct scatterlist *sgl, int order);
|
|
void sgl_free(struct scatterlist *sgl);
|
|
#endif /* CONFIG_SGL_ALLOC */
|
|
|
|
size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
|
|
size_t buflen, off_t skip, bool to_buffer);
|
|
|
|
size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
|
|
const void *buf, size_t buflen);
|
|
size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
|
|
void *buf, size_t buflen);
|
|
|
|
size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
|
|
const void *buf, size_t buflen, off_t skip);
|
|
size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
|
|
void *buf, size_t buflen, off_t skip);
|
|
size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
|
|
size_t buflen, off_t skip);
|
|
|
|
/*
|
|
* Maximum number of entries that will be allocated in one piece, if
|
|
* a list larger than this is required then chaining will be utilized.
|
|
*/
|
|
#define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist))
|
|
|
|
/*
|
|
* The maximum number of SG segments that we will put inside a
|
|
* scatterlist (unless chaining is used). Should ideally fit inside a
|
|
* single page, to avoid a higher order allocation. We could define this
|
|
* to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The
|
|
* minimum value is 32
|
|
*/
|
|
#define SG_CHUNK_SIZE 128
|
|
|
|
/*
|
|
* Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit
|
|
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
|
|
*/
|
|
#ifdef CONFIG_ARCH_NO_SG_CHAIN
|
|
#define SG_MAX_SEGMENTS SG_CHUNK_SIZE
|
|
#else
|
|
#define SG_MAX_SEGMENTS 2048
|
|
#endif
|
|
|
|
#ifdef CONFIG_SG_POOL
|
|
void sg_free_table_chained(struct sg_table *table,
|
|
unsigned nents_first_chunk);
|
|
int sg_alloc_table_chained(struct sg_table *table, int nents,
|
|
struct scatterlist *first_chunk,
|
|
unsigned nents_first_chunk);
|
|
#endif
|
|
|
|
/*
|
|
* sg page iterator
|
|
*
|
|
* Iterates over sg entries page-by-page. On each successful iteration, you
|
|
* can call sg_page_iter_page(@piter) to get the current page.
|
|
* @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to
|
|
* the page's page offset within the sg. The iteration will stop either when a
|
|
* maximum number of sg entries was reached or a terminating sg
|
|
* (sg_last(sg) == true) was reached.
|
|
*/
|
|
struct sg_page_iter {
|
|
struct scatterlist *sg; /* sg holding the page */
|
|
unsigned int sg_pgoffset; /* page offset within the sg */
|
|
|
|
/* these are internal states, keep away */
|
|
unsigned int __nents; /* remaining sg entries */
|
|
int __pg_advance; /* nr pages to advance at the
|
|
* next step */
|
|
};
|
|
|
|
/*
|
|
* sg page iterator for DMA addresses
|
|
*
|
|
* This is the same as sg_page_iter however you can call
|
|
* sg_page_iter_dma_address(@dma_iter) to get the page's DMA
|
|
* address. sg_page_iter_page() cannot be called on this iterator.
|
|
*/
|
|
struct sg_dma_page_iter {
|
|
struct sg_page_iter base;
|
|
};
|
|
|
|
bool __sg_page_iter_next(struct sg_page_iter *piter);
|
|
bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter);
|
|
void __sg_page_iter_start(struct sg_page_iter *piter,
|
|
struct scatterlist *sglist, unsigned int nents,
|
|
unsigned long pgoffset);
|
|
/**
|
|
* sg_page_iter_page - get the current page held by the page iterator
|
|
* @piter: page iterator holding the page
|
|
*/
|
|
static inline struct page *sg_page_iter_page(struct sg_page_iter *piter)
|
|
{
|
|
return nth_page(sg_page(piter->sg), piter->sg_pgoffset);
|
|
}
|
|
|
|
/**
|
|
* sg_page_iter_dma_address - get the dma address of the current page held by
|
|
* the page iterator.
|
|
* @dma_iter: page iterator holding the page
|
|
*/
|
|
static inline dma_addr_t
|
|
sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter)
|
|
{
|
|
return sg_dma_address(dma_iter->base.sg) +
|
|
(dma_iter->base.sg_pgoffset << PAGE_SHIFT);
|
|
}
|
|
|
|
/**
|
|
* for_each_sg_page - iterate over the pages of the given sg list
|
|
* @sglist: sglist to iterate over
|
|
* @piter: page iterator to hold current page, sg, sg_pgoffset
|
|
* @nents: maximum number of sg entries to iterate over
|
|
* @pgoffset: starting page offset
|
|
*
|
|
* Callers may use sg_page_iter_page() to get each page pointer.
|
|
*/
|
|
#define for_each_sg_page(sglist, piter, nents, pgoffset) \
|
|
for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
|
|
__sg_page_iter_next(piter);)
|
|
|
|
/**
|
|
* for_each_sg_dma_page - iterate over the pages of the given sg list
|
|
* @sglist: sglist to iterate over
|
|
* @dma_iter: page iterator to hold current page
|
|
* @dma_nents: maximum number of sg entries to iterate over, this is the value
|
|
* returned from dma_map_sg
|
|
* @pgoffset: starting page offset
|
|
*
|
|
* Callers may use sg_page_iter_dma_address() to get each page's DMA address.
|
|
*/
|
|
#define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset) \
|
|
for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents, \
|
|
pgoffset); \
|
|
__sg_page_iter_dma_next(dma_iter);)
|
|
|
|
/*
|
|
* Mapping sg iterator
|
|
*
|
|
* Iterates over sg entries mapping page-by-page. On each successful
|
|
* iteration, @miter->page points to the mapped page and
|
|
* @miter->length bytes of data can be accessed at @miter->addr. As
|
|
* long as an interation is enclosed between start and stop, the user
|
|
* is free to choose control structure and when to stop.
|
|
*
|
|
* @miter->consumed is set to @miter->length on each iteration. It
|
|
* can be adjusted if the user can't consume all the bytes in one go.
|
|
* Also, a stopped iteration can be resumed by calling next on it.
|
|
* This is useful when iteration needs to release all resources and
|
|
* continue later (e.g. at the next interrupt).
|
|
*/
|
|
|
|
#define SG_MITER_ATOMIC (1 << 0) /* use kmap_atomic */
|
|
#define SG_MITER_TO_SG (1 << 1) /* flush back to phys on unmap */
|
|
#define SG_MITER_FROM_SG (1 << 2) /* nop */
|
|
|
|
struct sg_mapping_iter {
|
|
/* the following three fields can be accessed directly */
|
|
struct page *page; /* currently mapped page */
|
|
void *addr; /* pointer to the mapped area */
|
|
size_t length; /* length of the mapped area */
|
|
size_t consumed; /* number of consumed bytes */
|
|
struct sg_page_iter piter; /* page iterator */
|
|
|
|
/* these are internal states, keep away */
|
|
unsigned int __offset; /* offset within page */
|
|
unsigned int __remaining; /* remaining bytes on page */
|
|
unsigned int __flags;
|
|
};
|
|
|
|
void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
|
|
unsigned int nents, unsigned int flags);
|
|
bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset);
|
|
bool sg_miter_next(struct sg_mapping_iter *miter);
|
|
void sg_miter_stop(struct sg_mapping_iter *miter);
|
|
|
|
#endif /* _LINUX_SCATTERLIST_H */
|