mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 02:33:57 +00:00
btrfs: add BTRFS_IOC_ENCODED_READ ioctl
There are 4 main cases: 1. Inline extents: we copy the data straight out of the extent buffer. 2. Hole/preallocated extents: we fill in zeroes. 3. Regular, uncompressed extents: we read the sectors we need directly from disk. 4. Regular, compressed extents: we read the entire compressed extent from disk and indicate what subset of the decompressed extent is in the file. This initial implementation simplifies a few things that can be improved in the future: - Cases 1, 3, and 4 allocate temporary memory to read into before copying out to userspace. - We don't do read repair, because it turns out that read repair is currently broken for compressed data. - We hold the inode lock during the operation. Note that we don't need to hold the mmap lock. We may race with btrfs_page_mkwrite() and read the old data from before the page was dirtied: btrfs_page_mkwrite btrfs_encoded_read --------------------------------------------------- (enter) (enter) btrfs_wait_ordered_range lock_extent_bits btrfs_page_set_dirty unlock_extent_cached (exit) lock_extent_bits read extent (dirty page hasn't been flushed, so this is the old data) unlock_extent_cached (exit) we read the old data from before the page was dirtied. But, that's true even if we were to hold the mmap lock: btrfs_page_mkwrite btrfs_encoded_read ------------------------------------------------------------------- (enter) (enter) btrfs_inode_lock(BTRFS_ILOCK_MMAP) down_read(i_mmap_lock) (blocked) btrfs_wait_ordered_range lock_extent_bits read extent (page hasn't been dirtied, so this is the old data) unlock_extent_cached btrfs_inode_unlock(BTRFS_ILOCK_MMAP) down_read(i_mmap_lock) returns lock_extent_bits btrfs_page_set_dirty unlock_extent_cached In other words, this is inherently racy, so it's fine that we return the old data in this tiny window. Signed-off-by: Omar Sandoval <osandov@fb.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
dcb77a9ae8
commit
1881fba89b
@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||
struct btrfs_ordered_sum;
|
||||
struct btrfs_ref;
|
||||
struct btrfs_bio;
|
||||
struct btrfs_ioctl_encoded_io_args;
|
||||
|
||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||
|
||||
@ -3305,6 +3306,9 @@ int btrfs_writepage_cow_fixup(struct page *page);
|
||||
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start,
|
||||
u64 end, bool uptodate);
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
extern const struct dentry_operations btrfs_dentry_operations;
|
||||
extern const struct iomap_ops btrfs_dio_iomap_ops;
|
||||
extern const struct iomap_dio_ops btrfs_dio_ops;
|
||||
|
498
fs/btrfs/inode.c
498
fs/btrfs/inode.c
@ -10156,6 +10156,504 @@ void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
|
||||
}
|
||||
}
|
||||
|
||||
static int btrfs_encoded_io_compression_from_extent(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
int compress_type)
|
||||
{
|
||||
switch (compress_type) {
|
||||
case BTRFS_COMPRESS_NONE:
|
||||
return BTRFS_ENCODED_IO_COMPRESSION_NONE;
|
||||
case BTRFS_COMPRESS_ZLIB:
|
||||
return BTRFS_ENCODED_IO_COMPRESSION_ZLIB;
|
||||
case BTRFS_COMPRESS_LZO:
|
||||
/*
|
||||
* The LZO format depends on the sector size. 64K is the maximum
|
||||
* sector size that we support.
|
||||
*/
|
||||
if (fs_info->sectorsize < SZ_4K || fs_info->sectorsize > SZ_64K)
|
||||
return -EINVAL;
|
||||
return BTRFS_ENCODED_IO_COMPRESSION_LZO_4K +
|
||||
(fs_info->sectorsize_bits - 12);
|
||||
case BTRFS_COMPRESS_ZSTD:
|
||||
return BTRFS_ENCODED_IO_COMPRESSION_ZSTD;
|
||||
default:
|
||||
return -EUCLEAN;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t btrfs_encoded_read_inline(
|
||||
struct kiocb *iocb,
|
||||
struct iov_iter *iter, u64 start,
|
||||
u64 lockend,
|
||||
struct extent_state **cached_state,
|
||||
u64 extent_start, size_t count,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded,
|
||||
bool *unlocked)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_file_extent_item *item;
|
||||
u64 ram_bytes;
|
||||
unsigned long ptr;
|
||||
void *tmp;
|
||||
ssize_t ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
|
||||
extent_start, 0);
|
||||
if (ret) {
|
||||
if (ret > 0) {
|
||||
/* The extent item disappeared? */
|
||||
ret = -EIO;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
|
||||
|
||||
ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
|
||||
ptr = btrfs_file_extent_inline_start(item);
|
||||
|
||||
encoded->len = min_t(u64, extent_start + ram_bytes,
|
||||
inode->vfs_inode.i_size) - iocb->ki_pos;
|
||||
ret = btrfs_encoded_io_compression_from_extent(fs_info,
|
||||
btrfs_file_extent_compression(leaf, item));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
encoded->compression = ret;
|
||||
if (encoded->compression) {
|
||||
size_t inline_size;
|
||||
|
||||
inline_size = btrfs_file_extent_inline_item_len(leaf,
|
||||
path->slots[0]);
|
||||
if (inline_size > count) {
|
||||
ret = -ENOBUFS;
|
||||
goto out;
|
||||
}
|
||||
count = inline_size;
|
||||
encoded->unencoded_len = ram_bytes;
|
||||
encoded->unencoded_offset = iocb->ki_pos - extent_start;
|
||||
} else {
|
||||
count = min_t(u64, count, encoded->len);
|
||||
encoded->len = count;
|
||||
encoded->unencoded_len = count;
|
||||
ptr += iocb->ki_pos - extent_start;
|
||||
}
|
||||
|
||||
tmp = kmalloc(count, GFP_NOFS);
|
||||
if (!tmp) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
read_extent_buffer(leaf, tmp, ptr, count);
|
||||
btrfs_release_path(path);
|
||||
unlock_extent_cached(io_tree, start, lockend, cached_state);
|
||||
btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
|
||||
*unlocked = true;
|
||||
|
||||
ret = copy_to_iter(tmp, count, iter);
|
||||
if (ret != count)
|
||||
ret = -EFAULT;
|
||||
kfree(tmp);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_encoded_read_private {
|
||||
struct btrfs_inode *inode;
|
||||
u64 file_offset;
|
||||
wait_queue_head_t wait;
|
||||
atomic_t pending;
|
||||
blk_status_t status;
|
||||
bool skip_csum;
|
||||
};
|
||||
|
||||
static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
|
||||
struct bio *bio, int mirror_num)
|
||||
{
|
||||
struct btrfs_encoded_read_private *priv = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
blk_status_t ret;
|
||||
|
||||
if (!priv->skip_csum) {
|
||||
ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret) {
|
||||
btrfs_bio_free_csum(bbio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
atomic_inc(&priv->pending);
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
if (ret) {
|
||||
atomic_dec(&priv->pending);
|
||||
btrfs_bio_free_csum(bbio);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
|
||||
{
|
||||
const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
|
||||
struct btrfs_encoded_read_private *priv = bbio->bio.bi_private;
|
||||
struct btrfs_inode *inode = priv->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
u32 sectorsize = fs_info->sectorsize;
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
u64 start = priv->file_offset;
|
||||
u32 bio_offset = 0;
|
||||
|
||||
if (priv->skip_csum || !uptodate)
|
||||
return bbio->bio.bi_status;
|
||||
|
||||
bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
|
||||
unsigned int i, nr_sectors, pgoff;
|
||||
|
||||
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
|
||||
pgoff = bvec->bv_offset;
|
||||
for (i = 0; i < nr_sectors; i++) {
|
||||
ASSERT(pgoff < PAGE_SIZE);
|
||||
if (check_data_csum(&inode->vfs_inode, bbio, bio_offset,
|
||||
bvec->bv_page, pgoff, start))
|
||||
return BLK_STS_IOERR;
|
||||
start += sectorsize;
|
||||
bio_offset += sectorsize;
|
||||
pgoff += sectorsize;
|
||||
}
|
||||
}
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void btrfs_encoded_read_endio(struct bio *bio)
|
||||
{
|
||||
struct btrfs_encoded_read_private *priv = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t status;
|
||||
|
||||
status = btrfs_encoded_read_verify_csum(bbio);
|
||||
if (status) {
|
||||
/*
|
||||
* The memory barrier implied by the atomic_dec_return() here
|
||||
* pairs with the memory barrier implied by the
|
||||
* atomic_dec_return() or io_wait_event() in
|
||||
* btrfs_encoded_read_regular_fill_pages() to ensure that this
|
||||
* write is observed before the load of status in
|
||||
* btrfs_encoded_read_regular_fill_pages().
|
||||
*/
|
||||
WRITE_ONCE(priv->status, status);
|
||||
}
|
||||
if (!atomic_dec_return(&priv->pending))
|
||||
wake_up(&priv->wait);
|
||||
btrfs_bio_free_csum(bbio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
u64 file_offset,
|
||||
u64 disk_bytenr,
|
||||
u64 disk_io_size,
|
||||
struct page **pages)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_encoded_read_private priv = {
|
||||
.inode = inode,
|
||||
.file_offset = file_offset,
|
||||
.pending = ATOMIC_INIT(1),
|
||||
.skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
|
||||
};
|
||||
unsigned long i = 0;
|
||||
u64 cur = 0;
|
||||
int ret;
|
||||
|
||||
init_waitqueue_head(&priv.wait);
|
||||
/*
|
||||
* Submit bios for the extent, splitting due to bio or stripe limits as
|
||||
* necessary.
|
||||
*/
|
||||
while (cur < disk_io_size) {
|
||||
struct extent_map *em;
|
||||
struct btrfs_io_geometry geom;
|
||||
struct bio *bio = NULL;
|
||||
u64 remaining;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
|
||||
disk_io_size - cur);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
} else {
|
||||
ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
|
||||
disk_bytenr + cur, &geom);
|
||||
free_extent_map(em);
|
||||
}
|
||||
if (ret) {
|
||||
WRITE_ONCE(priv.status, errno_to_blk_status(ret));
|
||||
break;
|
||||
}
|
||||
remaining = min(geom.len, disk_io_size - cur);
|
||||
while (bio || remaining) {
|
||||
size_t bytes = min_t(u64, remaining, PAGE_SIZE);
|
||||
|
||||
if (!bio) {
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
bio->bi_iter.bi_sector =
|
||||
(disk_bytenr + cur) >> SECTOR_SHIFT;
|
||||
bio->bi_end_io = btrfs_encoded_read_endio;
|
||||
bio->bi_private = &priv;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
}
|
||||
|
||||
if (!bytes ||
|
||||
bio_add_page(bio, pages[i], bytes, 0) < bytes) {
|
||||
blk_status_t status;
|
||||
|
||||
status = submit_encoded_read_bio(inode, bio, 0);
|
||||
if (status) {
|
||||
WRITE_ONCE(priv.status, status);
|
||||
bio_put(bio);
|
||||
goto out;
|
||||
}
|
||||
bio = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
i++;
|
||||
cur += bytes;
|
||||
remaining -= bytes;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (atomic_dec_return(&priv.pending))
|
||||
io_wait_event(priv.wait, !atomic_read(&priv.pending));
|
||||
/* See btrfs_encoded_read_endio() for ordering. */
|
||||
return blk_status_to_errno(READ_ONCE(priv.status));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
|
||||
struct iov_iter *iter,
|
||||
u64 start, u64 lockend,
|
||||
struct extent_state **cached_state,
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
size_t count, bool compressed,
|
||||
bool *unlocked)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
struct page **pages;
|
||||
unsigned long nr_pages, i;
|
||||
u64 cur;
|
||||
size_t page_offset;
|
||||
ssize_t ret;
|
||||
|
||||
nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
|
||||
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
|
||||
if (!pages)
|
||||
return -ENOMEM;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
pages[i] = alloc_page(GFP_NOFS);
|
||||
if (!pages[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
|
||||
disk_io_size, pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
unlock_extent_cached(io_tree, start, lockend, cached_state);
|
||||
btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
|
||||
*unlocked = true;
|
||||
|
||||
if (compressed) {
|
||||
i = 0;
|
||||
page_offset = 0;
|
||||
} else {
|
||||
i = (iocb->ki_pos - start) >> PAGE_SHIFT;
|
||||
page_offset = (iocb->ki_pos - start) & (PAGE_SIZE - 1);
|
||||
}
|
||||
cur = 0;
|
||||
while (cur < count) {
|
||||
size_t bytes = min_t(size_t, count - cur,
|
||||
PAGE_SIZE - page_offset);
|
||||
|
||||
if (copy_page_to_iter(pages[i], page_offset, bytes,
|
||||
iter) != bytes) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
i++;
|
||||
cur += bytes;
|
||||
page_offset = 0;
|
||||
}
|
||||
ret = count;
|
||||
out:
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if (pages[i])
|
||||
__free_page(pages[i]);
|
||||
}
|
||||
kfree(pages);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
ssize_t ret;
|
||||
size_t count = iov_iter_count(iter);
|
||||
u64 start, lockend, disk_bytenr, disk_io_size;
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct extent_map *em;
|
||||
bool unlocked = false;
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
||||
btrfs_inode_lock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
if (iocb->ki_pos >= inode->vfs_inode.i_size) {
|
||||
btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
|
||||
return 0;
|
||||
}
|
||||
start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize);
|
||||
/*
|
||||
* We don't know how long the extent containing iocb->ki_pos is, but if
|
||||
* it's compressed we know that it won't be longer than this.
|
||||
*/
|
||||
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
|
||||
|
||||
for (;;) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
ret = btrfs_wait_ordered_range(&inode->vfs_inode, start,
|
||||
lockend - start + 1);
|
||||
if (ret)
|
||||
goto out_unlock_inode;
|
||||
lock_extent_bits(io_tree, start, lockend, &cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(inode, start,
|
||||
lockend - start + 1);
|
||||
if (!ordered)
|
||||
break;
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent_cached(io_tree, start, lockend, &cached_state);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto out_unlock_extent;
|
||||
}
|
||||
|
||||
if (em->block_start == EXTENT_MAP_INLINE) {
|
||||
u64 extent_start = em->start;
|
||||
|
||||
/*
|
||||
* For inline extents we get everything we need out of the
|
||||
* extent item.
|
||||
*/
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
|
||||
&cached_state, extent_start,
|
||||
count, encoded, &unlocked);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We only want to return up to EOF even if the extent extends beyond
|
||||
* that.
|
||||
*/
|
||||
encoded->len = min_t(u64, extent_map_end(em),
|
||||
inode->vfs_inode.i_size) - iocb->ki_pos;
|
||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
|
||||
disk_bytenr = EXTENT_MAP_HOLE;
|
||||
count = min_t(u64, count, encoded->len);
|
||||
encoded->len = count;
|
||||
encoded->unencoded_len = count;
|
||||
} else if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
|
||||
disk_bytenr = em->block_start;
|
||||
/*
|
||||
* Bail if the buffer isn't large enough to return the whole
|
||||
* compressed extent.
|
||||
*/
|
||||
if (em->block_len > count) {
|
||||
ret = -ENOBUFS;
|
||||
goto out_em;
|
||||
}
|
||||
disk_io_size = count = em->block_len;
|
||||
encoded->unencoded_len = em->ram_bytes;
|
||||
encoded->unencoded_offset = iocb->ki_pos - em->orig_start;
|
||||
ret = btrfs_encoded_io_compression_from_extent(fs_info,
|
||||
em->compress_type);
|
||||
if (ret < 0)
|
||||
goto out_em;
|
||||
encoded->compression = ret;
|
||||
} else {
|
||||
disk_bytenr = em->block_start + (start - em->start);
|
||||
if (encoded->len > count)
|
||||
encoded->len = count;
|
||||
/*
|
||||
* Don't read beyond what we locked. This also limits the page
|
||||
* allocations that we'll do.
|
||||
*/
|
||||
disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
|
||||
count = start + disk_io_size - iocb->ki_pos;
|
||||
encoded->len = count;
|
||||
encoded->unencoded_len = count;
|
||||
disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
|
||||
}
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
|
||||
if (disk_bytenr == EXTENT_MAP_HOLE) {
|
||||
unlock_extent_cached(io_tree, start, lockend, &cached_state);
|
||||
btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
|
||||
unlocked = true;
|
||||
ret = iov_iter_zero(count, iter);
|
||||
if (ret != count)
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
|
||||
&cached_state, disk_bytenr,
|
||||
disk_io_size, count,
|
||||
encoded->compression,
|
||||
&unlocked);
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret >= 0)
|
||||
iocb->ki_pos += encoded->len;
|
||||
out_em:
|
||||
free_extent_map(em);
|
||||
out_unlock_extent:
|
||||
if (!unlocked)
|
||||
unlock_extent_cached(io_tree, start, lockend, &cached_state);
|
||||
out_unlock_inode:
|
||||
if (!unlocked)
|
||||
btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
/*
|
||||
* Add an entry indicating a block group or device which is pinned by a
|
||||
|
106
fs/btrfs/ioctl.c
106
fs/btrfs/ioctl.c
@ -28,6 +28,7 @@
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/fileattr.h>
|
||||
#include <linux/fsverity.h>
|
||||
#include <linux/sched/xacct.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "export.h"
|
||||
@ -88,6 +89,22 @@ struct btrfs_ioctl_send_args_32 {
|
||||
|
||||
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
|
||||
struct btrfs_ioctl_send_args_32)
|
||||
|
||||
struct btrfs_ioctl_encoded_io_args_32 {
|
||||
compat_uptr_t iov;
|
||||
compat_ulong_t iovcnt;
|
||||
__s64 offset;
|
||||
__u64 flags;
|
||||
__u64 len;
|
||||
__u64 unencoded_len;
|
||||
__u64 unencoded_offset;
|
||||
__u32 compression;
|
||||
__u32 encryption;
|
||||
__u8 reserved[64];
|
||||
};
|
||||
|
||||
#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args_32)
|
||||
#endif
|
||||
|
||||
/* Mask out flags that are inappropriate for the given type of inode. */
|
||||
@ -5195,6 +5212,89 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
|
||||
bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_encoded_io_args args = { 0 };
|
||||
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
|
||||
flags);
|
||||
size_t copy_end;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
ssize_t ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
if (compat) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_encoded_io_args_32 args32;
|
||||
|
||||
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
|
||||
flags);
|
||||
if (copy_from_user(&args32, argp, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
args.iov = compat_ptr(args32.iov);
|
||||
args.iovcnt = args32.iovcnt;
|
||||
args.offset = args32.offset;
|
||||
args.flags = args32.flags;
|
||||
#else
|
||||
return -ENOTTY;
|
||||
#endif
|
||||
} else {
|
||||
copy_end = copy_end_kernel;
|
||||
if (copy_from_user(&args, argp, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
}
|
||||
if (args.flags != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
|
||||
&iov, &iter);
|
||||
if (ret < 0)
|
||||
goto out_acct;
|
||||
|
||||
if (iov_iter_count(&iter) == 0) {
|
||||
ret = 0;
|
||||
goto out_iov;
|
||||
}
|
||||
pos = args.offset;
|
||||
ret = rw_verify_area(READ, file, &pos, args.len);
|
||||
if (ret < 0)
|
||||
goto out_iov;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
ret = btrfs_encoded_read(&kiocb, &iter, &args);
|
||||
if (ret >= 0) {
|
||||
fsnotify_access(file);
|
||||
if (copy_to_user(argp + copy_end,
|
||||
(char *)&args + copy_end_kernel,
|
||||
sizeof(args) - copy_end_kernel))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
out_iov:
|
||||
kfree(iov);
|
||||
out_acct:
|
||||
if (ret > 0)
|
||||
add_rchar(current, ret);
|
||||
inc_syscr(current);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
cmd, unsigned long arg)
|
||||
{
|
||||
@ -5339,6 +5439,12 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return fsverity_ioctl_enable(file, (const void __user *)argp);
|
||||
case FS_IOC_MEASURE_VERITY:
|
||||
return fsverity_ioctl_measure(file, argp);
|
||||
case BTRFS_IOC_ENCODED_READ:
|
||||
return btrfs_ioctl_encoded_read(file, argp, false);
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_ENCODED_READ_32:
|
||||
return btrfs_ioctl_encoded_read(file, argp, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
Loading…
Reference in New Issue
Block a user