2019-05-27 08:55:01 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
|
|
|
* Berkeley style UIO structures - Alan Cox 1994.
|
|
|
|
*/
|
2012-10-13 10:46:48 +01:00
|
|
|
#ifndef __LINUX_UIO_H
|
|
|
|
#define __LINUX_UIO_H
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2013-11-27 16:29:46 -08:00
|
|
|
#include <linux/kernel.h>
|
2017-06-29 21:45:10 -04:00
|
|
|
#include <linux/thread_info.h>
|
2021-10-18 10:39:06 -04:00
|
|
|
#include <linux/mm_types.h>
|
2012-10-13 10:46:48 +01:00
|
|
|
#include <uapi/linux/uio.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2013-11-27 16:29:46 -08:00
|
|
|
struct page;
|
2009-07-29 15:04:19 -07:00
|
|
|
|
2023-01-19 12:47:23 +00:00
|
|
|
typedef unsigned int __bitwise iov_iter_extraction_t;
|
|
|
|
|
2009-07-29 15:04:19 -07:00
|
|
|
struct kvec {
|
|
|
|
void *iov_base; /* and that should *never* hold a userland pointer */
|
|
|
|
size_t iov_len;
|
|
|
|
};
|
|
|
|
|
2018-10-22 13:07:28 +01:00
|
|
|
enum iter_type {
|
2019-02-27 13:05:25 -07:00
|
|
|
/* iter types */
|
2023-09-25 13:03:02 +01:00
|
|
|
ITER_UBUF,
|
2021-04-22 14:50:39 -04:00
|
|
|
ITER_IOVEC,
|
|
|
|
ITER_BVEC,
|
2023-09-25 13:03:02 +01:00
|
|
|
ITER_KVEC,
|
2021-04-22 14:50:39 -04:00
|
|
|
ITER_XARRAY,
|
|
|
|
ITER_DISCARD,
|
2014-04-04 23:12:29 -04:00
|
|
|
};
|
|
|
|
|
2022-09-15 20:25:47 -04:00
|
|
|
#define ITER_SOURCE 1 // == WRITE
|
|
|
|
#define ITER_DEST 0 // == READ
|
|
|
|
|
2021-09-10 11:18:36 -06:00
|
|
|
struct iov_iter_state {
|
|
|
|
size_t iov_offset;
|
|
|
|
size_t count;
|
|
|
|
unsigned long nr_segs;
|
|
|
|
};
|
|
|
|
|
2013-11-27 16:29:46 -08:00
|
|
|
struct iov_iter {
|
2021-04-22 14:50:39 -04:00
|
|
|
u8 iter_type;
|
mm: hwpoison: coredump: support recovery from dump_user_range()
dump_user_range() is used to copy the user page to a coredump file, but if
a hardware memory error occurred during copy, which called from
__kernel_write_iter() in dump_user_range(), it crashes,
CPU: 112 PID: 7014 Comm: mca-recover Not tainted 6.3.0-rc2 #425
pc : __memcpy+0x110/0x260
lr : _copy_from_iter+0x3bc/0x4c8
...
Call trace:
__memcpy+0x110/0x260
copy_page_from_iter+0xcc/0x130
pipe_write+0x164/0x6d8
__kernel_write_iter+0x9c/0x210
dump_user_range+0xc8/0x1d8
elf_core_dump+0x308/0x368
do_coredump+0x2e8/0xa40
get_signal+0x59c/0x788
do_signal+0x118/0x1f8
do_notify_resume+0xf0/0x280
el0_da+0x130/0x138
el0t_64_sync_handler+0x68/0xc0
el0t_64_sync+0x188/0x190
Generally, the '->write_iter' of file ops will use copy_page_from_iter()
and copy_page_from_iter_atomic(), change memcpy() to copy_mc_to_kernel()
in both of them to handle #MC during source read, which stop coredump
processing and kill the task instead of kernel panic, but the source
address may not always a user address, so introduce a new copy_mc flag in
struct iov_iter{} to indicate that the iter could do a safe memory copy,
also introduce the helpers to set/cleck the flag, for now, it's only used
in coredump's dump_user_range(), but it could expand to any other
scenarios to fix the similar issue.
Link: https://lkml.kernel.org/r/20230417045323.11054-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tong Tiangen <tongtiangen@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-04-17 12:53:23 +08:00
|
|
|
bool copy_mc;
|
2021-07-12 12:06:14 +02:00
|
|
|
bool nofault;
|
2021-04-22 14:50:39 -04:00
|
|
|
bool data_source;
|
2022-05-22 14:59:25 -04:00
|
|
|
bool user_backed;
|
2023-09-25 13:02:58 +01:00
|
|
|
size_t iov_offset;
|
2023-03-28 14:21:06 -06:00
|
|
|
/*
|
|
|
|
* Hack alert: overlay ubuf_iovec with iovec + count, so
|
|
|
|
* that the members resolve correctly regardless of the type
|
|
|
|
* of iterator used. This means that you can use:
|
|
|
|
*
|
|
|
|
* &iter->__ubuf_iovec or iter->__iov
|
|
|
|
*
|
|
|
|
* interchangably for the user_backed cases, hence simplifying
|
|
|
|
* some of the cases that need to deal with both.
|
|
|
|
*/
|
2014-04-04 23:12:29 -04:00
|
|
|
union {
|
2023-03-28 14:21:06 -06:00
|
|
|
/*
|
|
|
|
* This really should be a const, but we cannot do that without
|
|
|
|
* also modifying any of the zero-filling iter init functions.
|
|
|
|
* Leave it non-const for now, but it should be treated as such.
|
|
|
|
*/
|
|
|
|
struct iovec __ubuf_iovec;
|
|
|
|
struct {
|
|
|
|
union {
|
|
|
|
/* use iter_iov() to get the current vec */
|
|
|
|
const struct iovec *__iov;
|
|
|
|
const struct kvec *kvec;
|
|
|
|
const struct bio_vec *bvec;
|
|
|
|
struct xarray *xarray;
|
|
|
|
void __user *ubuf;
|
|
|
|
};
|
|
|
|
size_t count;
|
|
|
|
};
|
2016-09-22 16:33:12 -04:00
|
|
|
};
|
|
|
|
union {
|
|
|
|
unsigned long nr_segs;
|
2020-02-10 10:00:21 +00:00
|
|
|
loff_t xarray_start;
|
2014-04-04 23:12:29 -04:00
|
|
|
};
|
2013-11-27 16:29:46 -08:00
|
|
|
};
|
|
|
|
|
2023-03-28 14:21:06 -06:00
|
|
|
static inline const struct iovec *iter_iov(const struct iov_iter *iter)
|
|
|
|
{
|
|
|
|
if (iter->iter_type == ITER_UBUF)
|
|
|
|
return (const struct iovec *) &iter->__ubuf_iovec;
|
|
|
|
return iter->__iov;
|
|
|
|
}
|
|
|
|
|
2023-03-29 09:16:45 -06:00
|
|
|
#define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset)
|
|
|
|
#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset)
|
2023-03-29 08:52:15 -06:00
|
|
|
|
2018-10-22 13:07:28 +01:00
|
|
|
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
|
|
|
|
{
|
2021-04-22 14:50:39 -04:00
|
|
|
return i->iter_type;
|
2018-10-22 13:07:28 +01:00
|
|
|
}
|
|
|
|
|
2021-09-10 11:18:36 -06:00
|
|
|
static inline void iov_iter_save_state(struct iov_iter *iter,
|
|
|
|
struct iov_iter_state *state)
|
|
|
|
{
|
|
|
|
state->iov_offset = iter->iov_offset;
|
|
|
|
state->count = iter->count;
|
|
|
|
state->nr_segs = iter->nr_segs;
|
|
|
|
}
|
|
|
|
|
2022-05-22 14:59:25 -04:00
|
|
|
static inline bool iter_is_ubuf(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_UBUF;
|
|
|
|
}
|
|
|
|
|
2018-10-22 13:07:28 +01:00
|
|
|
static inline bool iter_is_iovec(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_IOVEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iov_iter_is_kvec(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_KVEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iov_iter_is_bvec(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_BVEC;
|
|
|
|
}
|
|
|
|
|
2018-10-20 00:57:56 +01:00
|
|
|
static inline bool iov_iter_is_discard(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_DISCARD;
|
|
|
|
}
|
|
|
|
|
2020-02-10 10:00:21 +00:00
|
|
|
static inline bool iov_iter_is_xarray(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return iov_iter_type(i) == ITER_XARRAY;
|
|
|
|
}
|
|
|
|
|
2018-10-22 13:07:28 +01:00
|
|
|
static inline unsigned char iov_iter_rw(const struct iov_iter *i)
|
|
|
|
{
|
2021-04-22 14:50:39 -04:00
|
|
|
return i->data_source ? WRITE : READ;
|
2018-10-22 13:07:28 +01:00
|
|
|
}
|
|
|
|
|
2022-05-22 14:59:25 -04:00
|
|
|
static inline bool user_backed_iter(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return i->user_backed;
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
|
|
|
* Total number of bytes covered by an iovec.
|
|
|
|
*
|
|
|
|
* NOTE that it is not safe to use this function until all the iovec's
|
|
|
|
* segment lengths have been validated. Because the individual lengths can
|
|
|
|
* overflow a size_t when added together.
|
|
|
|
*/
|
|
|
|
static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
|
|
|
|
{
|
|
|
|
unsigned long seg;
|
|
|
|
size_t ret = 0;
|
|
|
|
|
|
|
|
for (seg = 0; seg < nr_segs; seg++)
|
|
|
|
ret += iov[seg].iov_len;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2023-07-09 18:17:33 -04:00
|
|
|
size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
|
2021-04-30 10:26:41 -04:00
|
|
|
size_t bytes, struct iov_iter *i);
|
2013-11-27 16:29:46 -08:00
|
|
|
void iov_iter_advance(struct iov_iter *i, size_t bytes);
|
2017-02-17 18:42:24 -05:00
|
|
|
void iov_iter_revert(struct iov_iter *i, size_t bytes);
|
2021-08-02 14:54:16 +02:00
|
|
|
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
|
2021-07-05 17:26:28 +02:00
|
|
|
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
|
2013-11-27 16:29:46 -08:00
|
|
|
size_t iov_iter_single_seg_count(const struct iov_iter *i);
|
2014-02-03 17:07:03 -05:00
|
|
|
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
|
|
|
|
struct iov_iter *i);
|
2014-04-03 15:05:18 -04:00
|
|
|
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
|
|
|
|
struct iov_iter *i);
|
2017-06-29 21:45:10 -04:00
|
|
|
|
|
|
|
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
|
|
|
|
|
2021-10-18 10:39:06 -04:00
|
|
|
static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
|
|
|
|
size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return copy_page_to_iter(&folio->page, offset, bytes, i);
|
|
|
|
}
|
2023-07-09 18:17:33 -04:00
|
|
|
|
|
|
|
static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
|
|
|
|
size_t offset, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
|
|
|
|
}
|
|
|
|
|
2023-03-22 18:57:03 +00:00
|
|
|
size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
|
|
|
|
size_t bytes, struct iov_iter *i);
|
2021-10-18 10:39:06 -04:00
|
|
|
|
2017-06-29 21:45:10 -04:00
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
2022-06-06 18:42:59 -04:00
|
|
|
if (check_copy_size(addr, bytes, true))
|
2017-06-29 21:45:10 -04:00
|
|
|
return _copy_to_iter(addr, bytes, i);
|
2022-06-06 18:42:59 -04:00
|
|
|
return 0;
|
2017-06-29 21:45:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
2022-06-06 18:42:59 -04:00
|
|
|
if (check_copy_size(addr, bytes, false))
|
2017-06-29 21:45:10 -04:00
|
|
|
return _copy_from_iter(addr, bytes, i);
|
2022-06-06 18:42:59 -04:00
|
|
|
return 0;
|
2017-06-29 21:45:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
2021-04-29 21:16:56 -04:00
|
|
|
size_t copied = copy_from_iter(addr, bytes, i);
|
|
|
|
if (likely(copied == bytes))
|
|
|
|
return true;
|
|
|
|
iov_iter_revert(i, copied);
|
|
|
|
return false;
|
2017-06-29 21:45:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
2022-06-06 18:42:59 -04:00
|
|
|
if (check_copy_size(addr, bytes, false))
|
2017-06-29 21:45:10 -04:00
|
|
|
return _copy_from_iter_nocache(addr, bytes, i);
|
2022-06-06 18:42:59 -04:00
|
|
|
return 0;
|
2017-06-29 21:45:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
{
|
2021-04-29 21:16:56 -04:00
|
|
|
size_t copied = copy_from_iter_nocache(addr, bytes, i);
|
|
|
|
if (likely(copied == bytes))
|
|
|
|
return true;
|
|
|
|
iov_iter_revert(i, copied);
|
|
|
|
return false;
|
2017-06-29 21:45:10 -04:00
|
|
|
}
|
|
|
|
|
2017-05-29 12:22:50 -07:00
|
|
|
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
|
|
|
/*
|
|
|
|
* Note, users like pmem that depend on the stricter semantics of
|
2021-12-15 09:45:05 +01:00
|
|
|
* _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for
|
2017-05-29 12:22:50 -07:00
|
|
|
* IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
|
|
|
|
* destination is flushed from the cache on return.
|
|
|
|
*/
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-07 20:39:20 -07:00
|
|
|
size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
|
2017-05-29 12:22:50 -07:00
|
|
|
#else
|
Merge branch 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter hardening from Al Viro:
"This is the iov_iter/uaccess/hardening pile.
For one thing, it trims the inline part of copy_to_user/copy_from_user
to the minimum that *does* need to be inlined - object size checks,
basically. For another, it sanitizes the checks for iov_iter
primitives. There are 4 groups of checks: access_ok(), might_fault(),
object size and KASAN.
- access_ok() had been verified by whoever had set the iov_iter up.
However, that has happened in a function far away, so proving that
there's no path to actual copying bypassing those checks is hard
and proving that iov_iter has not been buggered in the meanwhile is
also not pleasant. So we want those redone in actual
copyin/copyout.
- might_fault() is better off consolidated - we know whether it needs
to be checked as soon as we enter iov_iter primitive and observe
the iov_iter flavour. No need to wait until the copyin/copyout. The
call chains are short enough to make sure we won't miss anything -
in fact, it's more robust that way, since there are cases where we
do e.g. forced fault-in before getting to copyin/copyout. It's not
quite what we need to check (in particular, combination of
iovec-backed and set_fs(KERNEL_DS) is almost certainly a bug, not a
cause to skip checks), but that's for later series. For now let's
keep might_fault().
- KASAN checks belong in copyin/copyout - at the same level where
other iov_iter flavours would've hit them in memcpy().
- object size checks should apply to *all* iov_iter flavours, not
just iovec-backed ones.
There are two groups of primitives - one gets the kernel object
described as pointer + size (copy_to_iter(), etc.) while another gets
it as page + offset + size (copy_page_to_iter(), etc.)
For the first group the checks are best done where we actually have a
chance to find the object size. In other words, those belong in inline
wrappers in uio.h, before calling into iov_iter.c. Same kind as we
have for inlined part of copy_to_user().
For the second group there is no object to look at - offset in page is
just a number, it bears no type information. So we do them in the
common helper called by iov_iter.c primitives of that kind. All it
currently does is checking that we are not trying to access outside of
the compound page; eventually we might want to add some sanity checks
on the page involved.
So the things we need in copyin/copyout part of iov_iter.c do not
quite match anything in uaccess.h (we want no zeroing, we *do* want
access_ok() and KASAN and we want no might_fault() or object size
checks done on that level). OTOH, these needs are simple enough to
provide a couple of helpers (static in iov_iter.c) doing just what we
need..."
* 'uaccess-work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
iov_iter: saner checks on copyin/copyout
iov_iter: sanity checks for copy to/from page primitives
iov_iter/hardening: move object size checks to inlined part
copy_{to,from}_user(): consolidate object size checks
copy_{from,to}_user(): move kasan checks and might_fault() out-of-line
2017-07-07 20:39:20 -07:00
|
|
|
#define _copy_from_iter_flushcache _copy_from_iter_nocache
|
|
|
|
#endif
|
|
|
|
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-05 20:40:16 -07:00
|
|
|
#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
|
|
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
mm: hwpoison: coredump: support recovery from dump_user_range()
dump_user_range() is used to copy the user page to a coredump file, but if
a hardware memory error occurred during copy, which called from
__kernel_write_iter() in dump_user_range(), it crashes,
CPU: 112 PID: 7014 Comm: mca-recover Not tainted 6.3.0-rc2 #425
pc : __memcpy+0x110/0x260
lr : _copy_from_iter+0x3bc/0x4c8
...
Call trace:
__memcpy+0x110/0x260
copy_page_from_iter+0xcc/0x130
pipe_write+0x164/0x6d8
__kernel_write_iter+0x9c/0x210
dump_user_range+0xc8/0x1d8
elf_core_dump+0x308/0x368
do_coredump+0x2e8/0xa40
get_signal+0x59c/0x788
do_signal+0x118/0x1f8
do_notify_resume+0xf0/0x280
el0_da+0x130/0x138
el0t_64_sync_handler+0x68/0xc0
el0t_64_sync+0x188/0x190
Generally, the '->write_iter' of file ops will use copy_page_from_iter()
and copy_page_from_iter_atomic(), change memcpy() to copy_mc_to_kernel()
in both of them to handle #MC during source read, which stop coredump
processing and kill the task instead of kernel panic, but the source
address may not always a user address, so introduce a new copy_mc flag in
struct iov_iter{} to indicate that the iter could do a safe memory copy,
also introduce the helpers to set/cleck the flag, for now, it's only used
in coredump's dump_user_range(), but it could expand to any other
scenarios to fix the similar issue.
Link: https://lkml.kernel.org/r/20230417045323.11054-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tong Tiangen <tongtiangen@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-04-17 12:53:23 +08:00
|
|
|
static inline void iov_iter_set_copy_mc(struct iov_iter *i)
|
|
|
|
{
|
|
|
|
i->copy_mc = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return i->copy_mc;
|
|
|
|
}
|
2018-05-03 17:06:31 -07:00
|
|
|
#else
|
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-05 20:40:16 -07:00
|
|
|
#define _copy_mc_to_iter _copy_to_iter
|
mm: hwpoison: coredump: support recovery from dump_user_range()
dump_user_range() is used to copy the user page to a coredump file, but if
a hardware memory error occurred during copy, which called from
__kernel_write_iter() in dump_user_range(), it crashes,
CPU: 112 PID: 7014 Comm: mca-recover Not tainted 6.3.0-rc2 #425
pc : __memcpy+0x110/0x260
lr : _copy_from_iter+0x3bc/0x4c8
...
Call trace:
__memcpy+0x110/0x260
copy_page_from_iter+0xcc/0x130
pipe_write+0x164/0x6d8
__kernel_write_iter+0x9c/0x210
dump_user_range+0xc8/0x1d8
elf_core_dump+0x308/0x368
do_coredump+0x2e8/0xa40
get_signal+0x59c/0x788
do_signal+0x118/0x1f8
do_notify_resume+0xf0/0x280
el0_da+0x130/0x138
el0t_64_sync_handler+0x68/0xc0
el0t_64_sync+0x188/0x190
Generally, the '->write_iter' of file ops will use copy_page_from_iter()
and copy_page_from_iter_atomic(), change memcpy() to copy_mc_to_kernel()
in both of them to handle #MC during source read, which stop coredump
processing and kill the task instead of kernel panic, but the source
address may not always a user address, so introduce a new copy_mc flag in
struct iov_iter{} to indicate that the iter could do a safe memory copy,
also introduce the helpers to set/cleck the flag, for now, it's only used
in coredump's dump_user_range(), but it could expand to any other
scenarios to fix the similar issue.
Link: https://lkml.kernel.org/r/20230417045323.11054-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tong Tiangen <tongtiangen@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-04-17 12:53:23 +08:00
|
|
|
static inline void iov_iter_set_copy_mc(struct iov_iter *i) { }
|
|
|
|
static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
2018-05-03 17:06:31 -07:00
|
|
|
#endif
|
|
|
|
|
2014-08-01 09:27:22 -04:00
|
|
|
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
|
2022-06-10 12:58:27 -07:00
|
|
|
bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
|
|
|
|
unsigned len_mask);
|
2014-03-05 13:50:45 -05:00
|
|
|
unsigned long iov_iter_alignment(const struct iov_iter *i);
|
2016-04-08 19:05:19 -04:00
|
|
|
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
|
2018-10-20 00:57:56 +01:00
|
|
|
void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
|
2014-03-05 19:28:09 -05:00
|
|
|
unsigned long nr_segs, size_t count);
|
2018-10-20 00:57:56 +01:00
|
|
|
void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
|
2015-01-23 01:08:07 -05:00
|
|
|
unsigned long nr_segs, size_t count);
|
2018-10-20 00:57:56 +01:00
|
|
|
void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
|
2014-11-24 14:46:11 -05:00
|
|
|
unsigned long nr_segs, size_t count);
|
2018-10-20 00:57:56 +01:00
|
|
|
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
|
2020-02-10 10:00:21 +00:00
|
|
|
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
|
|
|
|
loff_t start, size_t count);
|
2022-06-10 13:05:12 -04:00
|
|
|
ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
|
2014-09-24 17:09:11 +02:00
|
|
|
size_t maxsize, unsigned maxpages, size_t *start);
|
2022-06-10 13:05:12 -04:00
|
|
|
ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
|
2014-03-21 04:58:33 -04:00
|
|
|
size_t maxsize, size_t *start);
|
2014-03-19 01:16:16 -04:00
|
|
|
int iov_iter_npages(const struct iov_iter *i, int maxpages);
|
2021-09-10 11:18:36 -06:00
|
|
|
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
|
2013-11-27 16:29:46 -08:00
|
|
|
|
2015-01-31 20:08:47 -05:00
|
|
|
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
|
|
|
|
|
2016-10-10 13:57:37 -04:00
|
|
|
static inline size_t iov_iter_count(const struct iov_iter *i)
|
2013-11-27 16:29:46 -08:00
|
|
|
{
|
|
|
|
return i->count;
|
|
|
|
}
|
|
|
|
|
2014-06-23 08:44:40 +01:00
|
|
|
/*
|
|
|
|
* Cap the iov_iter by given limit; note that the second argument is
|
|
|
|
* *not* the new size - it's upper limit for such. Passing it a value
|
|
|
|
* greater than the amount of data in iov_iter is fine - it'll just do
|
|
|
|
* nothing in that case.
|
|
|
|
*/
|
|
|
|
static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
|
2014-03-22 06:51:37 -04:00
|
|
|
{
|
2014-06-23 08:44:40 +01:00
|
|
|
/*
|
|
|
|
* count doesn't have to fit in size_t - comparison extends both
|
|
|
|
* operands to u64 here and any value that would be truncated by
|
|
|
|
* conversion in assignement is by definition greater than all
|
|
|
|
* values of size_t, including old i->count.
|
|
|
|
*/
|
2021-09-10 11:19:58 -06:00
|
|
|
if (i->count > count)
|
2014-03-22 06:51:37 -04:00
|
|
|
i->count = count;
|
|
|
|
}
|
|
|
|
|
2014-04-04 12:15:19 -04:00
|
|
|
/*
|
|
|
|
* reexpand a previously truncated iterator; count must be no more than how much
|
|
|
|
* we had shrunk it.
|
|
|
|
*/
|
|
|
|
static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
|
|
|
|
{
|
|
|
|
i->count = count;
|
|
|
|
}
|
2021-02-03 14:29:52 -05:00
|
|
|
|
2022-02-02 14:20:31 -08:00
|
|
|
static inline int
|
|
|
|
iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes)
|
|
|
|
{
|
|
|
|
size_t shorted = 0;
|
|
|
|
int npages;
|
|
|
|
|
|
|
|
if (iov_iter_count(i) > max_bytes) {
|
|
|
|
shorted = iov_iter_count(i) - max_bytes;
|
|
|
|
iov_iter_truncate(i, max_bytes);
|
|
|
|
}
|
2022-09-08 15:20:23 +03:00
|
|
|
npages = iov_iter_npages(i, maxpages);
|
2022-02-02 14:20:31 -08:00
|
|
|
if (shorted)
|
|
|
|
iov_iter_reexpand(i, iov_iter_count(i) + shorted);
|
|
|
|
|
|
|
|
return npages;
|
|
|
|
}
|
|
|
|
|
2021-02-03 14:29:52 -05:00
|
|
|
struct csum_state {
|
|
|
|
__wsum csum;
|
|
|
|
size_t off;
|
|
|
|
};
|
|
|
|
|
|
|
|
size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i);
|
2014-11-24 01:08:00 -05:00
|
|
|
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
|
2021-04-29 21:16:56 -04:00
|
|
|
|
|
|
|
static __always_inline __must_check
|
|
|
|
bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
|
|
|
|
__wsum *csum, struct iov_iter *i)
|
|
|
|
{
|
|
|
|
size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i);
|
|
|
|
if (likely(copied == bytes))
|
|
|
|
return true;
|
|
|
|
iov_iter_revert(i, copied);
|
|
|
|
return false;
|
|
|
|
}
|
2018-12-03 17:52:09 -08:00
|
|
|
size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
|
|
|
|
struct iov_iter *i);
|
2014-04-04 12:15:19 -04:00
|
|
|
|
2020-09-25 06:51:40 +02:00
|
|
|
struct iovec *iovec_from_user(const struct iovec __user *uvector,
|
|
|
|
unsigned long nr_segs, unsigned long fast_segs,
|
|
|
|
struct iovec *fast_iov, bool compat);
|
|
|
|
ssize_t import_iovec(int type, const struct iovec __user *uvec,
|
|
|
|
unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
|
|
|
|
struct iov_iter *i);
|
|
|
|
ssize_t __import_iovec(int type, const struct iovec __user *uvec,
|
|
|
|
unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
|
|
|
|
struct iov_iter *i, bool compat);
|
saner iov_iter initialization primitives
iovec-backed iov_iter instances are assumed to satisfy several properties:
* no more than UIO_MAXIOV elements in iovec array
* total size of all ranges is no more than MAX_RW_COUNT
* all ranges pass access_ok().
The problem is, invariants of data structures should be established in the
primitives creating those data structures, not in the code using those
primitives. And iov_iter_init() violates that principle. For a while we
managed to get away with that, but once the use of iov_iter started to
spread, it didn't take long for shit to hit the fan - missed check in
sys_sendto() had introduced a roothole.
We _do_ have primitives for importing and validating iovecs (both native and
compat ones) and those primitives are almost always followed by shoving the
resulting iovec into iov_iter. Life would be considerably simpler (and safer)
if we combined those primitives with initializing iov_iter.
That gives us two new primitives - import_iovec() and compat_import_iovec().
Calling conventions:
iovec = iov_array;
err = import_iovec(direction, uvec, nr_segs,
ARRAY_SIZE(iov_array), &iovec,
&iter);
imports user vector into kernel space (into iov_array if it fits, allocated
if it doesn't fit or if iovec was NULL), validates it and sets iter up to
refer to it. On success 0 is returned and allocated kernel copy (or NULL
if the array had fit into caller-supplied one) is returned via iovec.
On failure all allocations are undone and -E... is returned. If the total
size of ranges exceeds MAX_RW_COUNT, the excess is silently truncated.
compat_import_iovec() expects uvec to be a pointer to user array of compat_iovec;
otherwise it's identical to import_iovec().
Finally, import_single_range() sets iov_iter backed by single-element iovec
covering a user-supplied range -
err = import_single_range(direction, address, size, iovec, &iter);
does validation and sets iter up. Again, size in excess of MAX_RW_COUNT gets
silently truncated.
Next commits will be switching the things up to use of those and reducing
the amount of iov_iter_init() instances.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2015-03-21 17:45:43 -04:00
|
|
|
int import_single_range(int type, void __user *buf, size_t len,
|
|
|
|
struct iovec *iov, struct iov_iter *i);
|
2023-01-05 11:07:30 -08:00
|
|
|
int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i);
|
saner iov_iter initialization primitives
iovec-backed iov_iter instances are assumed to satisfy several properties:
* no more than UIO_MAXIOV elements in iovec array
* total size of all ranges is no more than MAX_RW_COUNT
* all ranges pass access_ok().
The problem is, invariants of data structures should be established in the
primitives creating those data structures, not in the code using those
primitives. And iov_iter_init() violates that principle. For a while we
managed to get away with that, but once the use of iov_iter started to
spread, it didn't take long for shit to hit the fan - missed check in
sys_sendto() had introduced a roothole.
We _do_ have primitives for importing and validating iovecs (both native and
compat ones) and those primitives are almost always followed by shoving the
resulting iovec into iov_iter. Life would be considerably simpler (and safer)
if we combined those primitives with initializing iov_iter.
That gives us two new primitives - import_iovec() and compat_import_iovec().
Calling conventions:
iovec = iov_array;
err = import_iovec(direction, uvec, nr_segs,
ARRAY_SIZE(iov_array), &iovec,
&iter);
imports user vector into kernel space (into iov_array if it fits, allocated
if it doesn't fit or if iovec was NULL), validates it and sets iter up to
refer to it. On success 0 is returned and allocated kernel copy (or NULL
if the array had fit into caller-supplied one) is returned via iovec.
On failure all allocations are undone and -E... is returned. If the total
size of ranges exceeds MAX_RW_COUNT, the excess is silently truncated.
compat_import_iovec() expects uvec to be a pointer to user array of compat_iovec;
otherwise it's identical to import_iovec().
Finally, import_single_range() sets iov_iter backed by single-element iovec
covering a user-supplied range -
err = import_single_range(direction, address, size, iovec, &iter);
does validation and sets iter up. Again, size in excess of MAX_RW_COUNT gets
silently truncated.
Next commits will be switching the things up to use of those and reducing
the amount of iov_iter_init() instances.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2015-03-21 17:45:43 -04:00
|
|
|
|
2022-05-22 14:59:25 -04:00
|
|
|
static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
|
|
|
|
void __user *buf, size_t count)
|
|
|
|
{
|
|
|
|
WARN_ON(direction & ~(READ | WRITE));
|
|
|
|
*i = (struct iov_iter) {
|
|
|
|
.iter_type = ITER_UBUF,
|
mm: hwpoison: coredump: support recovery from dump_user_range()
dump_user_range() is used to copy the user page to a coredump file, but if
a hardware memory error occurred during copy, which called from
__kernel_write_iter() in dump_user_range(), it crashes,
CPU: 112 PID: 7014 Comm: mca-recover Not tainted 6.3.0-rc2 #425
pc : __memcpy+0x110/0x260
lr : _copy_from_iter+0x3bc/0x4c8
...
Call trace:
__memcpy+0x110/0x260
copy_page_from_iter+0xcc/0x130
pipe_write+0x164/0x6d8
__kernel_write_iter+0x9c/0x210
dump_user_range+0xc8/0x1d8
elf_core_dump+0x308/0x368
do_coredump+0x2e8/0xa40
get_signal+0x59c/0x788
do_signal+0x118/0x1f8
do_notify_resume+0xf0/0x280
el0_da+0x130/0x138
el0t_64_sync_handler+0x68/0xc0
el0t_64_sync+0x188/0x190
Generally, the '->write_iter' of file ops will use copy_page_from_iter()
and copy_page_from_iter_atomic(), change memcpy() to copy_mc_to_kernel()
in both of them to handle #MC during source read, which stop coredump
processing and kill the task instead of kernel panic, but the source
address may not always a user address, so introduce a new copy_mc flag in
struct iov_iter{} to indicate that the iter could do a safe memory copy,
also introduce the helpers to set/cleck the flag, for now, it's only used
in coredump's dump_user_range(), but it could expand to any other
scenarios to fix the similar issue.
Link: https://lkml.kernel.org/r/20230417045323.11054-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tong Tiangen <tongtiangen@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-04-17 12:53:23 +08:00
|
|
|
.copy_mc = false,
|
2022-05-22 14:59:25 -04:00
|
|
|
.user_backed = true,
|
|
|
|
.data_source = direction,
|
|
|
|
.ubuf = buf,
|
2023-03-28 14:29:03 -06:00
|
|
|
.count = count,
|
|
|
|
.nr_segs = 1
|
2022-05-22 14:59:25 -04:00
|
|
|
};
|
|
|
|
}
|
2023-01-19 12:47:23 +00:00
|
|
|
/* Flags for iov_iter_get/extract_pages*() */
|
|
|
|
/* Allow P2PDMA on the extracted pages */
|
|
|
|
#define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01)
|
|
|
|
|
2022-10-28 21:50:30 +01:00
|
|
|
ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages,
|
|
|
|
size_t maxsize, unsigned int maxpages,
|
|
|
|
iov_iter_extraction_t extraction_flags,
|
|
|
|
size_t *offset0);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained
|
|
|
|
* @iter: The iterator
|
|
|
|
*
|
|
|
|
* Examine the iterator and indicate by returning true or false as to how, if
|
|
|
|
* at all, pages extracted from the iterator will be retained by the extraction
|
|
|
|
* function.
|
|
|
|
*
|
|
|
|
* %true indicates that the pages will have a pin placed in them that the
|
|
|
|
* caller must unpin. This is must be done for DMA/async DIO to force fork()
|
|
|
|
* to forcibly copy a page for the child (the parent must retain the original
|
|
|
|
* page).
|
|
|
|
*
|
|
|
|
* %false indicates that no measures are taken and that it's up to the caller
|
|
|
|
* to retain the pages.
|
|
|
|
*/
|
|
|
|
static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter)
|
|
|
|
{
|
|
|
|
return user_backed_iter(iter);
|
|
|
|
}
|
2022-05-22 14:59:25 -04:00
|
|
|
|
2023-06-06 14:08:50 +01:00
|
|
|
struct sg_table;
|
|
|
|
ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
|
|
|
|
struct sg_table *sgtable, unsigned int sg_max,
|
|
|
|
iov_iter_extraction_t extraction_flags);
|
|
|
|
|
2009-07-29 15:04:19 -07:00
|
|
|
#endif
|