mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 00:32:00 +00:00
Merge branch 'vfs.all' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
This commit is contained in:
commit
cd07c43f9b
@ -12,21 +12,10 @@ returns a list of extents.
|
||||
Request Basics
|
||||
--------------
|
||||
|
||||
A fiemap request is encoded within struct fiemap::
|
||||
|
||||
struct fiemap {
|
||||
__u64 fm_start; /* logical offset (inclusive) at
|
||||
* which to start mapping (in) */
|
||||
__u64 fm_length; /* logical length of mapping which
|
||||
* userspace cares about (in) */
|
||||
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
|
||||
__u32 fm_mapped_extents; /* number of extents that were
|
||||
* mapped (out) */
|
||||
__u32 fm_extent_count; /* size of fm_extents array (in) */
|
||||
__u32 fm_reserved;
|
||||
struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
|
||||
};
|
||||
A fiemap request is encoded within struct fiemap:
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/fiemap.h
|
||||
:identifiers: fiemap
|
||||
|
||||
fm_start, and fm_length specify the logical range within the file
|
||||
which the process would like mappings for. Extents returned mirror
|
||||
@ -60,6 +49,8 @@ FIEMAP_FLAG_XATTR
|
||||
If this flag is set, the extents returned will describe the inodes
|
||||
extended attribute lookup tree, instead of its data tree.
|
||||
|
||||
FIEMAP_FLAG_CACHE
|
||||
This flag requests caching of the extents.
|
||||
|
||||
Extent Mapping
|
||||
--------------
|
||||
@ -77,18 +68,10 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST
|
||||
flag set (see the next section on extent flags).
|
||||
|
||||
Each extent is described by a single fiemap_extent structure as
|
||||
returned in fm_extents::
|
||||
returned in fm_extents:
|
||||
|
||||
struct fiemap_extent {
|
||||
__u64 fe_logical; /* logical offset in bytes for the start of
|
||||
* the extent */
|
||||
__u64 fe_physical; /* physical offset in bytes for the start
|
||||
* of the extent */
|
||||
__u64 fe_length; /* length in bytes for the extent */
|
||||
__u64 fe_reserved64[2];
|
||||
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
|
||||
__u32 fe_reserved[3];
|
||||
};
|
||||
.. kernel-doc:: include/uapi/linux/fiemap.h
|
||||
:identifiers: fiemap_extent
|
||||
|
||||
All offsets and lengths are in bytes and mirror those on disk. It is valid
|
||||
for an extents logical offset to start before the request or its logical
|
||||
@ -175,6 +158,8 @@ FIEMAP_EXTENT_MERGED
|
||||
userspace would be highly inefficient, the kernel will try to merge most
|
||||
adjacent blocks into 'extents'.
|
||||
|
||||
FIEMAP_EXTENT_SHARED
|
||||
This flag is set to request that space be shared with other files.
|
||||
|
||||
VFS -> File System Implementation
|
||||
---------------------------------
|
||||
@ -191,14 +176,10 @@ each discovered extent::
|
||||
u64 len);
|
||||
|
||||
->fiemap is passed struct fiemap_extent_info which describes the
|
||||
fiemap request::
|
||||
fiemap request:
|
||||
|
||||
struct fiemap_extent_info {
|
||||
unsigned int fi_flags; /* Flags as passed from user */
|
||||
unsigned int fi_extents_mapped; /* Number of mapped extents */
|
||||
unsigned int fi_extents_max; /* Size of fiemap_extent array */
|
||||
struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
|
||||
};
|
||||
.. kernel-doc:: include/linux/fiemap.h
|
||||
:identifiers: fiemap_extent_info
|
||||
|
||||
It is intended that the file system should not need to access any of this
|
||||
structure directly. Filesystem handlers should be tolerant to signals and return
|
||||
|
@ -527,11 +527,6 @@ There are some functions to help manage credentials:
|
||||
This gets a reference on a live set of credentials, returning a pointer to
|
||||
that set of credentials.
|
||||
|
||||
- ``struct cred *get_new_cred(struct cred *cred);``
|
||||
|
||||
This gets a reference on a set of credentials that is under construction
|
||||
and is thus still mutable, returning a pointer to that set of credentials.
|
||||
|
||||
|
||||
Open File Credentials
|
||||
=====================
|
||||
|
@ -12387,6 +12387,13 @@ F: Documentation/kbuild/kconfig*
|
||||
F: scripts/Kconfig.include
|
||||
F: scripts/kconfig/
|
||||
|
||||
KCORE
|
||||
M: Omar Sandoval <osandov@osandov.com>
|
||||
L: linux-debuggers@vger.kernel.org
|
||||
S: Maintained
|
||||
F: fs/proc/kcore.c
|
||||
F: include/linux/kcore.h
|
||||
|
||||
KCOV
|
||||
R: Dmitry Vyukov <dvyukov@google.com>
|
||||
R: Andrey Konovalov <andreyknvl@gmail.com>
|
||||
|
@ -249,7 +249,7 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m
|
||||
fp = file_open_root(&root, filename, flags, mode);
|
||||
path_put(&root);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(revert_creds(old_cred));
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
@ -79,11 +79,13 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
|
||||
if (pos + total >= i_size_read(rreq->inode))
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
|
||||
|
||||
if (!err)
|
||||
if (!err && total) {
|
||||
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
||||
subreq->transferred += total;
|
||||
}
|
||||
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
subreq->error = err;
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -11,6 +11,7 @@ kafs-y := \
|
||||
cmservice.o \
|
||||
dir.o \
|
||||
dir_edit.o \
|
||||
dir_search.o \
|
||||
dir_silly.o \
|
||||
dynroot.o \
|
||||
file.o \
|
||||
|
@ -41,7 +41,7 @@ static void afs_volume_init_callback(struct afs_volume *volume)
|
||||
|
||||
list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
|
||||
if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
|
||||
atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
|
||||
afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb);
|
||||
queue_work(system_unbound_wq, &vnode->cb_work);
|
||||
}
|
||||
}
|
||||
@ -79,7 +79,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
|
||||
_enter("");
|
||||
|
||||
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
|
||||
if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) {
|
||||
if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) {
|
||||
vnode->cb_break++;
|
||||
vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
|
||||
afs_clear_permits(vnode);
|
||||
|
836
fs/afs/dir.c
836
fs/afs/dir.c
File diff suppressed because it is too large
Load Diff
@ -10,6 +10,7 @@
|
||||
#include <linux/namei.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/folio_queue.h>
|
||||
#include "internal.h"
|
||||
#include "xdr_fs.h"
|
||||
|
||||
@ -105,23 +106,57 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a new directory folio.
|
||||
* Get a specific block, extending the directory storage to cover it as needed.
|
||||
*/
|
||||
static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
|
||||
static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block)
|
||||
{
|
||||
struct address_space *mapping = vnode->netfs.inode.i_mapping;
|
||||
struct folio_queue *fq;
|
||||
struct afs_vnode *dvnode = iter->dvnode;
|
||||
struct folio *folio;
|
||||
size_t blpos = block * AFS_DIR_BLOCK_SIZE;
|
||||
size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
|
||||
int ret;
|
||||
|
||||
folio = __filemap_get_folio(mapping, index,
|
||||
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
|
||||
mapping->gfp_mask);
|
||||
if (IS_ERR(folio)) {
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
return NULL;
|
||||
if (dvnode->directory_size < blend) {
|
||||
size_t cur_size = dvnode->directory_size;
|
||||
|
||||
ret = netfs_alloc_folioq_buffer(
|
||||
NULL, &dvnode->directory, &cur_size, blend,
|
||||
mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
|
||||
dvnode->directory_size = cur_size;
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
}
|
||||
if (!folio_test_private(folio))
|
||||
folio_attach_private(folio, (void *)1);
|
||||
return folio;
|
||||
|
||||
fq = iter->fq;
|
||||
if (!fq)
|
||||
fq = dvnode->directory;
|
||||
|
||||
/* Search the folio queue for the folio containing the block... */
|
||||
for (; fq; fq = fq->next) {
|
||||
for (int s = iter->fq_slot; s < folioq_count(fq); s++) {
|
||||
size_t fsize = folioq_folio_size(fq, s);
|
||||
|
||||
if (blend <= fpos + fsize) {
|
||||
/* ... and then return the mapped block. */
|
||||
folio = folioq_folio(fq, s);
|
||||
if (WARN_ON_ONCE(folio_pos(folio) != fpos))
|
||||
goto fail;
|
||||
iter->fq = fq;
|
||||
iter->fq_slot = s;
|
||||
iter->fpos = fpos;
|
||||
return kmap_local_folio(folio, blpos - fpos);
|
||||
}
|
||||
fpos += fsize;
|
||||
}
|
||||
iter->fq_slot = 0;
|
||||
}
|
||||
|
||||
fail:
|
||||
iter->fq = NULL;
|
||||
iter->fq_slot = 0;
|
||||
afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -209,9 +244,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
{
|
||||
union afs_xdr_dir_block *meta, *block;
|
||||
union afs_xdr_dirent *de;
|
||||
struct folio *folio0, *folio;
|
||||
unsigned int need_slots, nr_blocks, b;
|
||||
pgoff_t index;
|
||||
struct afs_dir_iter iter = { .dvnode = vnode };
|
||||
unsigned int nr_blocks, b, entry;
|
||||
loff_t i_size;
|
||||
int slot;
|
||||
|
||||
@ -220,20 +254,17 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
i_size = i_size_read(&vnode->netfs.inode);
|
||||
if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
|
||||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_bad_size);
|
||||
return;
|
||||
}
|
||||
|
||||
folio0 = afs_dir_get_folio(vnode, 0);
|
||||
if (!folio0) {
|
||||
_leave(" [fgp]");
|
||||
meta = afs_dir_get_block(&iter, 0);
|
||||
if (!meta)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Work out how many slots we're going to need. */
|
||||
need_slots = afs_dir_calc_slots(name->len);
|
||||
iter.nr_slots = afs_dir_calc_slots(name->len);
|
||||
|
||||
meta = kmap_local_folio(folio0, 0);
|
||||
if (i_size == 0)
|
||||
goto new_directory;
|
||||
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
|
||||
@ -245,22 +276,21 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
/* If the directory extended into a new folio, then we need to
|
||||
* tack a new folio on the end.
|
||||
*/
|
||||
index = b / AFS_DIR_BLOCKS_PER_PAGE;
|
||||
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
|
||||
goto error;
|
||||
if (index >= folio_nr_pages(folio0)) {
|
||||
folio = afs_dir_get_folio(vnode, index);
|
||||
if (!folio)
|
||||
goto error;
|
||||
} else {
|
||||
folio = folio0;
|
||||
}
|
||||
goto error_too_many_blocks;
|
||||
|
||||
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
|
||||
/* Lower dir blocks have a counter in the header we can check. */
|
||||
if (b < AFS_DIR_BLOCKS_WITH_CTR &&
|
||||
meta->meta.alloc_ctrs[b] < iter.nr_slots)
|
||||
continue;
|
||||
|
||||
block = afs_dir_get_block(&iter, b);
|
||||
if (!block)
|
||||
goto error;
|
||||
|
||||
/* Abandon the edit if we got a callback break. */
|
||||
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
|
||||
goto invalidated;
|
||||
goto already_invalidated;
|
||||
|
||||
_debug("block %u: %2u %3u %u",
|
||||
b,
|
||||
@ -275,31 +305,23 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* Only lower dir blocks have a counter in the header. */
|
||||
if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
|
||||
meta->meta.alloc_ctrs[b] >= need_slots) {
|
||||
/* We need to try and find one or more consecutive
|
||||
* slots to hold the entry.
|
||||
*/
|
||||
slot = afs_find_contig_bits(block, need_slots);
|
||||
if (slot >= 0) {
|
||||
_debug("slot %u", slot);
|
||||
goto found_space;
|
||||
}
|
||||
/* We need to try and find one or more consecutive slots to
|
||||
* hold the entry.
|
||||
*/
|
||||
slot = afs_find_contig_bits(block, iter.nr_slots);
|
||||
if (slot >= 0) {
|
||||
_debug("slot %u", slot);
|
||||
goto found_space;
|
||||
}
|
||||
|
||||
kunmap_local(block);
|
||||
if (folio != folio0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
}
|
||||
|
||||
/* There are no spare slots of sufficient size, yet the operation
|
||||
* succeeded. Download the directory again.
|
||||
*/
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_no_slots);
|
||||
goto out_unmap;
|
||||
|
||||
new_directory:
|
||||
@ -307,8 +329,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
i_size = AFS_DIR_BLOCK_SIZE;
|
||||
afs_set_i_size(vnode, i_size);
|
||||
slot = AFS_DIR_RESV_BLOCKS0;
|
||||
folio = folio0;
|
||||
block = kmap_local_folio(folio, 0);
|
||||
block = afs_dir_get_block(&iter, 0);
|
||||
nr_blocks = 1;
|
||||
b = 0;
|
||||
|
||||
@ -326,41 +347,39 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
de->u.name[name->len] = 0;
|
||||
|
||||
/* Adjust the bitmap. */
|
||||
afs_set_contig_bits(block, slot, need_slots);
|
||||
kunmap_local(block);
|
||||
if (folio != folio0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
afs_set_contig_bits(block, slot, iter.nr_slots);
|
||||
|
||||
/* Adjust the allocation counter. */
|
||||
if (b < AFS_DIR_BLOCKS_WITH_CTR)
|
||||
meta->meta.alloc_ctrs[b] -= need_slots;
|
||||
meta->meta.alloc_ctrs[b] -= iter.nr_slots;
|
||||
|
||||
/* Adjust the hash chain. */
|
||||
entry = b * AFS_DIR_SLOTS_PER_BLOCK + slot;
|
||||
iter.bucket = afs_dir_hash_name(name);
|
||||
de->u.hash_next = meta->meta.hashtable[iter.bucket];
|
||||
meta->meta.hashtable[iter.bucket] = htons(entry);
|
||||
kunmap_local(block);
|
||||
|
||||
inode_inc_iversion_raw(&vnode->netfs.inode);
|
||||
afs_stat_v(vnode, n_dir_cr);
|
||||
_debug("Insert %s in %u[%u]", name->name, b, slot);
|
||||
|
||||
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
|
||||
|
||||
out_unmap:
|
||||
kunmap_local(meta);
|
||||
folio_unlock(folio0);
|
||||
folio_put(folio0);
|
||||
_leave("");
|
||||
return;
|
||||
|
||||
invalidated:
|
||||
already_invalidated:
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
kunmap_local(block);
|
||||
if (folio != folio0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
goto out_unmap;
|
||||
|
||||
error_too_many_blocks:
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_too_many_blocks);
|
||||
error:
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
@ -374,13 +393,14 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
|
||||
void afs_edit_dir_remove(struct afs_vnode *vnode,
|
||||
struct qstr *name, enum afs_edit_dir_reason why)
|
||||
{
|
||||
union afs_xdr_dir_block *meta, *block;
|
||||
union afs_xdr_dirent *de;
|
||||
struct folio *folio0, *folio;
|
||||
unsigned int need_slots, nr_blocks, b;
|
||||
pgoff_t index;
|
||||
union afs_xdr_dir_block *meta, *block, *pblock;
|
||||
union afs_xdr_dirent *de, *pde;
|
||||
struct afs_dir_iter iter = { .dvnode = vnode };
|
||||
struct afs_fid fid;
|
||||
unsigned int b, slot, entry;
|
||||
loff_t i_size;
|
||||
int slot;
|
||||
__be16 next;
|
||||
int found;
|
||||
|
||||
_enter(",,{%d,%s},", name->len, name->name);
|
||||
|
||||
@ -388,81 +408,95 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
|
||||
if (i_size < AFS_DIR_BLOCK_SIZE ||
|
||||
i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
|
||||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
return;
|
||||
}
|
||||
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
|
||||
|
||||
folio0 = afs_dir_get_folio(vnode, 0);
|
||||
if (!folio0) {
|
||||
_leave(" [fgp]");
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_bad_size);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Work out how many slots we're going to discard. */
|
||||
need_slots = afs_dir_calc_slots(name->len);
|
||||
if (!afs_dir_init_iter(&iter, name))
|
||||
return;
|
||||
|
||||
meta = kmap_local_folio(folio0, 0);
|
||||
meta = afs_dir_find_block(&iter, 0);
|
||||
if (!meta)
|
||||
return;
|
||||
|
||||
/* Find a block that has sufficient slots available. Each folio
|
||||
* contains two or more directory blocks.
|
||||
*/
|
||||
for (b = 0; b < nr_blocks; b++) {
|
||||
index = b / AFS_DIR_BLOCKS_PER_PAGE;
|
||||
if (index >= folio_nr_pages(folio0)) {
|
||||
folio = afs_dir_get_folio(vnode, index);
|
||||
if (!folio)
|
||||
goto error;
|
||||
} else {
|
||||
folio = folio0;
|
||||
}
|
||||
|
||||
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
|
||||
|
||||
/* Abandon the edit if we got a callback break. */
|
||||
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
|
||||
goto invalidated;
|
||||
|
||||
if (b > AFS_DIR_BLOCKS_WITH_CTR ||
|
||||
meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
|
||||
slot = afs_dir_scan_block(block, name, b);
|
||||
if (slot >= 0)
|
||||
goto found_dirent;
|
||||
}
|
||||
|
||||
kunmap_local(block);
|
||||
if (folio != folio0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
/* Find the entry in the blob. */
|
||||
found = afs_dir_search_bucket(&iter, name, &fid);
|
||||
if (found < 0) {
|
||||
/* Didn't find the dirent to clobber. Re-download. */
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
|
||||
0, 0, 0, 0, name->name);
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_wrong_name);
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
/* Didn't find the dirent to clobber. Download the directory again. */
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
|
||||
0, 0, 0, 0, name->name);
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
goto out_unmap;
|
||||
entry = found;
|
||||
b = entry / AFS_DIR_SLOTS_PER_BLOCK;
|
||||
slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
|
||||
|
||||
found_dirent:
|
||||
block = afs_dir_find_block(&iter, b);
|
||||
if (!block)
|
||||
goto error;
|
||||
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
|
||||
goto already_invalidated;
|
||||
|
||||
/* Check and clear the entry. */
|
||||
de = &block->dirents[slot];
|
||||
if (de->u.valid != 1)
|
||||
goto error_unmap;
|
||||
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
|
||||
ntohl(de->u.vnode), ntohl(de->u.unique),
|
||||
name->name);
|
||||
|
||||
memset(de, 0, sizeof(*de) * need_slots);
|
||||
|
||||
/* Adjust the bitmap. */
|
||||
afs_clear_contig_bits(block, slot, need_slots);
|
||||
kunmap_local(block);
|
||||
if (folio != folio0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
afs_clear_contig_bits(block, slot, iter.nr_slots);
|
||||
|
||||
/* Adjust the allocation counter. */
|
||||
if (b < AFS_DIR_BLOCKS_WITH_CTR)
|
||||
meta->meta.alloc_ctrs[b] += need_slots;
|
||||
meta->meta.alloc_ctrs[b] += iter.nr_slots;
|
||||
|
||||
/* Clear the constituent entries. */
|
||||
next = de->u.hash_next;
|
||||
memset(de, 0, sizeof(*de) * iter.nr_slots);
|
||||
kunmap_local(block);
|
||||
|
||||
/* Adjust the hash chain: if iter->prev_entry is 0, the hashtable head
|
||||
* index is previous; otherwise it's slot number of the previous entry.
|
||||
*/
|
||||
if (!iter.prev_entry) {
|
||||
__be16 prev_next = meta->meta.hashtable[iter.bucket];
|
||||
|
||||
if (unlikely(prev_next != htons(entry))) {
|
||||
pr_warn("%llx:%llx:%x: not head of chain b=%x p=%x,%x e=%x %*s",
|
||||
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
|
||||
iter.bucket, iter.prev_entry, prev_next, entry,
|
||||
name->len, name->name);
|
||||
goto error;
|
||||
}
|
||||
meta->meta.hashtable[iter.bucket] = next;
|
||||
} else {
|
||||
unsigned int pb = iter.prev_entry / AFS_DIR_SLOTS_PER_BLOCK;
|
||||
unsigned int ps = iter.prev_entry % AFS_DIR_SLOTS_PER_BLOCK;
|
||||
__be16 prev_next;
|
||||
|
||||
pblock = afs_dir_find_block(&iter, pb);
|
||||
if (!pblock)
|
||||
goto error;
|
||||
pde = &pblock->dirents[ps];
|
||||
prev_next = pde->u.hash_next;
|
||||
if (prev_next != htons(entry)) {
|
||||
kunmap_local(pblock);
|
||||
pr_warn("%llx:%llx:%x: not prev in chain b=%x p=%x,%x e=%x %*s",
|
||||
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
|
||||
iter.bucket, iter.prev_entry, prev_next, entry,
|
||||
name->len, name->name);
|
||||
goto error;
|
||||
}
|
||||
pde->u.hash_next = next;
|
||||
kunmap_local(pblock);
|
||||
}
|
||||
|
||||
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
|
||||
|
||||
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
|
||||
afs_stat_v(vnode, n_dir_rm);
|
||||
@ -470,26 +504,20 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
|
||||
|
||||
out_unmap:
|
||||
kunmap_local(meta);
|
||||
folio_unlock(folio0);
|
||||
folio_put(folio0);
|
||||
_leave("");
|
||||
return;
|
||||
|
||||
invalidated:
|
||||
already_invalidated:
|
||||
kunmap_local(block);
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
|
||||
0, 0, 0, 0, name->name);
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
kunmap_local(block);
|
||||
if (folio != folio0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
goto out_unmap;
|
||||
|
||||
error_unmap:
|
||||
kunmap_local(block);
|
||||
error:
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
|
||||
0, 0, 0, 0, name->name);
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
@ -502,9 +530,8 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
|
||||
{
|
||||
union afs_xdr_dir_block *block;
|
||||
union afs_xdr_dirent *de;
|
||||
struct folio *folio;
|
||||
struct afs_dir_iter iter = { .dvnode = vnode };
|
||||
unsigned int nr_blocks, b;
|
||||
pgoff_t index;
|
||||
loff_t i_size;
|
||||
int slot;
|
||||
|
||||
@ -512,39 +539,35 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
|
||||
|
||||
i_size = i_size_read(&vnode->netfs.inode);
|
||||
if (i_size < AFS_DIR_BLOCK_SIZE) {
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size);
|
||||
return;
|
||||
}
|
||||
|
||||
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
|
||||
|
||||
/* Find a block that has sufficient slots available. Each folio
|
||||
* contains two or more directory blocks.
|
||||
*/
|
||||
for (b = 0; b < nr_blocks; b++) {
|
||||
index = b / AFS_DIR_BLOCKS_PER_PAGE;
|
||||
folio = afs_dir_get_folio(vnode, index);
|
||||
if (!folio)
|
||||
block = afs_dir_get_block(&iter, b);
|
||||
if (!block)
|
||||
goto error;
|
||||
|
||||
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
|
||||
|
||||
/* Abandon the edit if we got a callback break. */
|
||||
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
|
||||
goto invalidated;
|
||||
goto already_invalidated;
|
||||
|
||||
slot = afs_dir_scan_block(block, &dotdot_name, b);
|
||||
if (slot >= 0)
|
||||
goto found_dirent;
|
||||
|
||||
kunmap_local(block);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
/* Didn't find the dirent to clobber. Download the directory again. */
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
|
||||
0, 0, 0, 0, "..");
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_no_dd);
|
||||
goto out;
|
||||
|
||||
found_dirent:
|
||||
@ -556,26 +579,70 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
|
||||
ntohl(de->u.vnode), ntohl(de->u.unique), "..");
|
||||
|
||||
kunmap_local(block);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
|
||||
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
|
||||
|
||||
out:
|
||||
_leave("");
|
||||
return;
|
||||
|
||||
invalidated:
|
||||
already_invalidated:
|
||||
kunmap_local(block);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
|
||||
0, 0, 0, 0, "..");
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
goto out;
|
||||
|
||||
error:
|
||||
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
|
||||
0, 0, 0, 0, "..");
|
||||
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialise a new directory. We need to fill in the "." and ".." entries.
|
||||
*/
|
||||
void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_dvnode)
|
||||
{
|
||||
union afs_xdr_dir_block *meta;
|
||||
struct afs_dir_iter iter = { .dvnode = dvnode };
|
||||
union afs_xdr_dirent *de;
|
||||
unsigned int slot = AFS_DIR_RESV_BLOCKS0;
|
||||
loff_t i_size;
|
||||
|
||||
i_size = i_size_read(&dvnode->netfs.inode);
|
||||
if (i_size != AFS_DIR_BLOCK_SIZE) {
|
||||
afs_invalidate_dir(dvnode, afs_dir_invalid_edit_add_bad_size);
|
||||
return;
|
||||
}
|
||||
|
||||
meta = afs_dir_get_block(&iter, 0);
|
||||
if (!meta)
|
||||
return;
|
||||
|
||||
afs_edit_init_block(meta, meta, 0);
|
||||
|
||||
de = &meta->dirents[slot];
|
||||
de->u.valid = 1;
|
||||
de->u.vnode = htonl(dvnode->fid.vnode);
|
||||
de->u.unique = htonl(dvnode->fid.unique);
|
||||
memcpy(de->u.name, ".", 2);
|
||||
trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
|
||||
dvnode->fid.vnode, dvnode->fid.unique, ".");
|
||||
slot++;
|
||||
|
||||
de = &meta->dirents[slot];
|
||||
de->u.valid = 1;
|
||||
de->u.vnode = htonl(parent_dvnode->fid.vnode);
|
||||
de->u.unique = htonl(parent_dvnode->fid.unique);
|
||||
memcpy(de->u.name, "..", 3);
|
||||
trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
|
||||
parent_dvnode->fid.vnode, parent_dvnode->fid.unique, "..");
|
||||
|
||||
afs_set_contig_bits(meta, AFS_DIR_RESV_BLOCKS0, 2);
|
||||
meta->meta.alloc_ctrs[0] -= 2;
|
||||
kunmap_local(meta);
|
||||
|
||||
netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
|
||||
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
|
||||
set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
|
||||
}
|
||||
|
227
fs/afs/dir_search.c
Normal file
227
fs/afs/dir_search.c
Normal file
@ -0,0 +1,227 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Search a directory's hash table.
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* https://tools.ietf.org/html/draft-keiser-afs3-directory-object-00
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/iversion.h>
|
||||
#include "internal.h"
|
||||
#include "afs_fs.h"
|
||||
#include "xdr_fs.h"
|
||||
|
||||
/*
|
||||
* Calculate the name hash.
|
||||
*/
|
||||
unsigned int afs_dir_hash_name(const struct qstr *name)
|
||||
{
|
||||
const unsigned char *p = name->name;
|
||||
unsigned int hash = 0, i;
|
||||
int bucket;
|
||||
|
||||
for (i = 0; i < name->len; i++)
|
||||
hash = (hash * 173) + p[i];
|
||||
bucket = hash & (AFS_DIR_HASHTBL_SIZE - 1);
|
||||
if (hash > INT_MAX) {
|
||||
bucket = AFS_DIR_HASHTBL_SIZE - bucket;
|
||||
bucket &= (AFS_DIR_HASHTBL_SIZE - 1);
|
||||
}
|
||||
return bucket;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset a directory iterator.
|
||||
*/
|
||||
static bool afs_dir_reset_iter(struct afs_dir_iter *iter)
|
||||
{
|
||||
unsigned long long i_size = i_size_read(&iter->dvnode->netfs.inode);
|
||||
unsigned int nblocks;
|
||||
|
||||
/* Work out the maximum number of steps we can take. */
|
||||
nblocks = umin(i_size / AFS_DIR_BLOCK_SIZE, AFS_DIR_MAX_BLOCKS);
|
||||
if (!nblocks)
|
||||
return false;
|
||||
iter->loop_check = nblocks * (AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS);
|
||||
iter->prev_entry = 0; /* Hash head is previous */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialise a directory iterator for looking up a name.
|
||||
*/
|
||||
bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name)
|
||||
{
|
||||
iter->nr_slots = afs_dir_calc_slots(name->len);
|
||||
iter->bucket = afs_dir_hash_name(name);
|
||||
return afs_dir_reset_iter(iter);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a specific block.
|
||||
*/
|
||||
union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block)
|
||||
{
|
||||
struct folio_queue *fq = iter->fq;
|
||||
struct afs_vnode *dvnode = iter->dvnode;
|
||||
struct folio *folio;
|
||||
size_t blpos = block * AFS_DIR_BLOCK_SIZE;
|
||||
size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
|
||||
int slot = iter->fq_slot;
|
||||
|
||||
_enter("%zx,%d", block, slot);
|
||||
|
||||
if (iter->block) {
|
||||
kunmap_local(iter->block);
|
||||
iter->block = NULL;
|
||||
}
|
||||
|
||||
if (dvnode->directory_size < blend)
|
||||
goto fail;
|
||||
|
||||
if (!fq || blpos < fpos) {
|
||||
fq = dvnode->directory;
|
||||
slot = 0;
|
||||
fpos = 0;
|
||||
}
|
||||
|
||||
/* Search the folio queue for the folio containing the block... */
|
||||
for (; fq; fq = fq->next) {
|
||||
for (; slot < folioq_count(fq); slot++) {
|
||||
size_t fsize = folioq_folio_size(fq, slot);
|
||||
|
||||
if (blend <= fpos + fsize) {
|
||||
/* ... and then return the mapped block. */
|
||||
folio = folioq_folio(fq, slot);
|
||||
if (WARN_ON_ONCE(folio_pos(folio) != fpos))
|
||||
goto fail;
|
||||
iter->fq = fq;
|
||||
iter->fq_slot = slot;
|
||||
iter->fpos = fpos;
|
||||
iter->block = kmap_local_folio(folio, blpos - fpos);
|
||||
return iter->block;
|
||||
}
|
||||
fpos += fsize;
|
||||
}
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
fail:
|
||||
iter->fq = NULL;
|
||||
iter->fq_slot = 0;
|
||||
afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search through a directory bucket.
|
||||
*/
|
||||
int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
|
||||
struct afs_fid *_fid)
|
||||
{
|
||||
const union afs_xdr_dir_block *meta;
|
||||
unsigned int entry;
|
||||
int ret = -ESTALE;
|
||||
|
||||
meta = afs_dir_find_block(iter, 0);
|
||||
if (!meta)
|
||||
return -ESTALE;
|
||||
|
||||
entry = ntohs(meta->meta.hashtable[iter->bucket & (AFS_DIR_HASHTBL_SIZE - 1)]);
|
||||
_enter("%x,%x", iter->bucket, entry);
|
||||
|
||||
while (entry) {
|
||||
const union afs_xdr_dir_block *block;
|
||||
const union afs_xdr_dirent *dire;
|
||||
unsigned int blnum = entry / AFS_DIR_SLOTS_PER_BLOCK;
|
||||
unsigned int slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
|
||||
unsigned int resv = (blnum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
|
||||
|
||||
_debug("search %x", entry);
|
||||
|
||||
if (slot < resv) {
|
||||
kdebug("slot out of range h=%x rs=%2x sl=%2x-%2x",
|
||||
iter->bucket, resv, slot, slot + iter->nr_slots - 1);
|
||||
goto bad;
|
||||
}
|
||||
|
||||
block = afs_dir_find_block(iter, blnum);
|
||||
if (!block)
|
||||
goto bad;
|
||||
dire = &block->dirents[slot];
|
||||
|
||||
if (slot + iter->nr_slots <= AFS_DIR_SLOTS_PER_BLOCK &&
|
||||
memcmp(dire->u.name, name->name, name->len) == 0 &&
|
||||
dire->u.name[name->len] == '\0') {
|
||||
_fid->vnode = ntohl(dire->u.vnode);
|
||||
_fid->unique = ntohl(dire->u.unique);
|
||||
ret = entry;
|
||||
goto found;
|
||||
}
|
||||
|
||||
iter->prev_entry = entry;
|
||||
entry = ntohs(dire->u.hash_next);
|
||||
if (!--iter->loop_check) {
|
||||
kdebug("dir chain loop h=%x", iter->bucket);
|
||||
goto bad;
|
||||
}
|
||||
}
|
||||
|
||||
ret = -ENOENT;
|
||||
found:
|
||||
if (iter->block) {
|
||||
kunmap_local(iter->block);
|
||||
iter->block = NULL;
|
||||
}
|
||||
|
||||
bad:
|
||||
if (ret == -ESTALE)
|
||||
afs_invalidate_dir(iter->dvnode, afs_dir_invalid_iter_stale);
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the appropriate hash chain in the contents of an AFS directory.
|
||||
*/
|
||||
int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
|
||||
struct afs_fid *_fid, afs_dataversion_t *_dir_version)
|
||||
{
|
||||
struct afs_dir_iter iter = { .dvnode = dvnode, };
|
||||
int ret, retry_limit = 3;
|
||||
|
||||
_enter("{%lu},,,", dvnode->netfs.inode.i_ino);
|
||||
|
||||
if (!afs_dir_init_iter(&iter, name))
|
||||
return -ENOENT;
|
||||
do {
|
||||
if (--retry_limit < 0) {
|
||||
pr_warn("afs_read_dir(): Too many retries\n");
|
||||
ret = -ESTALE;
|
||||
break;
|
||||
}
|
||||
ret = afs_read_dir(dvnode, NULL);
|
||||
if (ret < 0) {
|
||||
if (ret != -ESTALE)
|
||||
break;
|
||||
if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
|
||||
ret = -ESTALE;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
*_dir_version = inode_peek_iversion_raw(&dvnode->netfs.inode);
|
||||
|
||||
ret = afs_dir_search_bucket(&iter, name, _fid);
|
||||
up_read(&dvnode->validate_lock);
|
||||
if (ret == -ESTALE)
|
||||
afs_dir_reset_iter(&iter);
|
||||
} while (ret == -ESTALE);
|
||||
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
}
|
258
fs/afs/file.c
258
fs/afs/file.c
@ -20,7 +20,6 @@
|
||||
#include "internal.h"
|
||||
|
||||
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
|
||||
static int afs_symlink_read_folio(struct file *file, struct folio *folio);
|
||||
|
||||
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
|
||||
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
|
||||
@ -61,13 +60,6 @@ const struct address_space_operations afs_file_aops = {
|
||||
.writepages = afs_writepages,
|
||||
};
|
||||
|
||||
const struct address_space_operations afs_symlink_aops = {
|
||||
.read_folio = afs_symlink_read_folio,
|
||||
.release_folio = netfs_release_folio,
|
||||
.invalidate_folio = netfs_invalidate_folio,
|
||||
.migrate_folio = filemap_migrate_folio,
|
||||
};
|
||||
|
||||
static const struct vm_operations_struct afs_vm_ops = {
|
||||
.open = afs_vm_open,
|
||||
.close = afs_vm_close,
|
||||
@ -208,49 +200,12 @@ int afs_release(struct inode *inode, struct file *file)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new read record.
|
||||
*/
|
||||
struct afs_read *afs_alloc_read(gfp_t gfp)
|
||||
{
|
||||
struct afs_read *req;
|
||||
|
||||
req = kzalloc(sizeof(struct afs_read), gfp);
|
||||
if (req)
|
||||
refcount_set(&req->usage, 1);
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispose of a ref to a read record.
|
||||
*/
|
||||
void afs_put_read(struct afs_read *req)
|
||||
{
|
||||
if (refcount_dec_and_test(&req->usage)) {
|
||||
if (req->cleanup)
|
||||
req->cleanup(req);
|
||||
key_put(req->key);
|
||||
kfree(req);
|
||||
}
|
||||
}
|
||||
|
||||
static void afs_fetch_data_notify(struct afs_operation *op)
|
||||
{
|
||||
struct afs_read *req = op->fetch.req;
|
||||
struct netfs_io_subrequest *subreq = req->subreq;
|
||||
int error = afs_op_error(op);
|
||||
struct netfs_io_subrequest *subreq = op->fetch.subreq;
|
||||
|
||||
req->error = error;
|
||||
if (subreq) {
|
||||
subreq->rreq->i_size = req->file_size;
|
||||
if (req->pos + req->actual_len >= req->file_size)
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
|
||||
netfs_read_subreq_terminated(subreq, error, false);
|
||||
req->subreq = NULL;
|
||||
} else if (req->done) {
|
||||
req->done(req);
|
||||
}
|
||||
subreq->error = afs_op_error(op);
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
}
|
||||
|
||||
static void afs_fetch_data_success(struct afs_operation *op)
|
||||
@ -260,7 +215,7 @@ static void afs_fetch_data_success(struct afs_operation *op)
|
||||
_enter("op=%08x", op->debug_id);
|
||||
afs_vnode_commit_status(op, &op->file[0]);
|
||||
afs_stat_v(vnode, n_fetches);
|
||||
atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
|
||||
atomic_long_add(op->fetch.subreq->transferred, &op->net->n_fetch_bytes);
|
||||
afs_fetch_data_notify(op);
|
||||
}
|
||||
|
||||
@ -270,107 +225,188 @@ static void afs_fetch_data_aborted(struct afs_operation *op)
|
||||
afs_fetch_data_notify(op);
|
||||
}
|
||||
|
||||
static void afs_fetch_data_put(struct afs_operation *op)
|
||||
{
|
||||
op->fetch.req->error = afs_op_error(op);
|
||||
afs_put_read(op->fetch.req);
|
||||
}
|
||||
|
||||
static const struct afs_operation_ops afs_fetch_data_operation = {
|
||||
const struct afs_operation_ops afs_fetch_data_operation = {
|
||||
.issue_afs_rpc = afs_fs_fetch_data,
|
||||
.issue_yfs_rpc = yfs_fs_fetch_data,
|
||||
.success = afs_fetch_data_success,
|
||||
.aborted = afs_fetch_data_aborted,
|
||||
.failed = afs_fetch_data_notify,
|
||||
.put = afs_fetch_data_put,
|
||||
};
|
||||
|
||||
static void afs_issue_read_call(struct afs_operation *op)
|
||||
{
|
||||
op->call_responded = false;
|
||||
op->call_error = 0;
|
||||
op->call_abort_code = 0;
|
||||
if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags))
|
||||
yfs_fs_fetch_data(op);
|
||||
else
|
||||
afs_fs_fetch_data(op);
|
||||
}
|
||||
|
||||
static void afs_end_read(struct afs_operation *op)
|
||||
{
|
||||
if (op->call_responded && op->server)
|
||||
set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
|
||||
|
||||
if (!afs_op_error(op))
|
||||
afs_fetch_data_success(op);
|
||||
else if (op->cumul_error.aborted)
|
||||
afs_fetch_data_aborted(op);
|
||||
else
|
||||
afs_fetch_data_notify(op);
|
||||
|
||||
afs_end_vnode_operation(op);
|
||||
afs_put_operation(op);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform I/O processing on an asynchronous call. The work item carries a ref
|
||||
* to the call struct that we either need to release or to pass on.
|
||||
*/
|
||||
static void afs_read_receive(struct afs_call *call)
|
||||
{
|
||||
struct afs_operation *op = call->op;
|
||||
enum afs_call_state state;
|
||||
|
||||
_enter("");
|
||||
|
||||
state = READ_ONCE(call->state);
|
||||
if (state == AFS_CALL_COMPLETE)
|
||||
return;
|
||||
trace_afs_read_recv(op, call);
|
||||
|
||||
while (state < AFS_CALL_COMPLETE && READ_ONCE(call->need_attention)) {
|
||||
WRITE_ONCE(call->need_attention, false);
|
||||
afs_deliver_to_call(call);
|
||||
state = READ_ONCE(call->state);
|
||||
}
|
||||
|
||||
if (state < AFS_CALL_COMPLETE) {
|
||||
netfs_read_subreq_progress(op->fetch.subreq);
|
||||
if (rxrpc_kernel_check_life(call->net->socket, call->rxcall))
|
||||
return;
|
||||
/* rxrpc terminated the call. */
|
||||
afs_set_call_complete(call, call->error, call->abort_code);
|
||||
}
|
||||
|
||||
op->call_abort_code = call->abort_code;
|
||||
op->call_error = call->error;
|
||||
op->call_responded = call->responded;
|
||||
op->call = NULL;
|
||||
call->op = NULL;
|
||||
afs_put_call(call);
|
||||
|
||||
/* If the call failed, then we need to crank the server rotation
|
||||
* handle and try the next.
|
||||
*/
|
||||
if (afs_select_fileserver(op)) {
|
||||
afs_issue_read_call(op);
|
||||
return;
|
||||
}
|
||||
|
||||
afs_end_read(op);
|
||||
}
|
||||
|
||||
void afs_fetch_data_async_rx(struct work_struct *work)
|
||||
{
|
||||
struct afs_call *call = container_of(work, struct afs_call, async_work);
|
||||
|
||||
afs_read_receive(call);
|
||||
afs_put_call(call);
|
||||
}
|
||||
|
||||
void afs_fetch_data_immediate_cancel(struct afs_call *call)
|
||||
{
|
||||
if (call->async) {
|
||||
afs_get_call(call, afs_call_trace_wake);
|
||||
if (!queue_work(afs_async_calls, &call->async_work))
|
||||
afs_deferred_put_call(call);
|
||||
flush_work(&call->async_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch file data from the volume.
|
||||
*/
|
||||
int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
|
||||
static void afs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct afs_operation *op;
|
||||
struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
|
||||
struct key *key = subreq->rreq->netfs_priv;
|
||||
|
||||
_enter("%s{%llx:%llu.%u},%x,,,",
|
||||
vnode->volume->name,
|
||||
vnode->fid.vid,
|
||||
vnode->fid.vnode,
|
||||
vnode->fid.unique,
|
||||
key_serial(req->key));
|
||||
key_serial(key));
|
||||
|
||||
op = afs_alloc_operation(req->key, vnode->volume);
|
||||
op = afs_alloc_operation(key, vnode->volume);
|
||||
if (IS_ERR(op)) {
|
||||
if (req->subreq)
|
||||
netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false);
|
||||
return PTR_ERR(op);
|
||||
subreq->error = PTR_ERR(op);
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
return;
|
||||
}
|
||||
|
||||
afs_op_set_vnode(op, 0, vnode);
|
||||
|
||||
op->fetch.req = afs_get_read(req);
|
||||
op->fetch.subreq = subreq;
|
||||
op->ops = &afs_fetch_data_operation;
|
||||
return afs_do_sync_operation(op);
|
||||
}
|
||||
|
||||
static void afs_read_worker(struct work_struct *work)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work);
|
||||
struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
|
||||
struct afs_read *fsreq;
|
||||
|
||||
fsreq = afs_alloc_read(GFP_NOFS);
|
||||
if (!fsreq)
|
||||
return netfs_read_subreq_terminated(subreq, -ENOMEM, false);
|
||||
|
||||
fsreq->subreq = subreq;
|
||||
fsreq->pos = subreq->start + subreq->transferred;
|
||||
fsreq->len = subreq->len - subreq->transferred;
|
||||
fsreq->key = key_get(subreq->rreq->netfs_priv);
|
||||
fsreq->vnode = vnode;
|
||||
fsreq->iter = &subreq->io_iter;
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
afs_fetch_data(fsreq->vnode, fsreq);
|
||||
afs_put_read(fsreq);
|
||||
}
|
||||
|
||||
static void afs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
INIT_WORK(&subreq->work, afs_read_worker);
|
||||
queue_work(system_long_wq, &subreq->work);
|
||||
}
|
||||
if (subreq->rreq->origin == NETFS_READAHEAD ||
|
||||
subreq->rreq->iocb) {
|
||||
op->flags |= AFS_OPERATION_ASYNC;
|
||||
|
||||
static int afs_symlink_read_folio(struct file *file, struct folio *folio)
|
||||
{
|
||||
struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host);
|
||||
struct afs_read *fsreq;
|
||||
int ret;
|
||||
if (!afs_begin_vnode_operation(op)) {
|
||||
subreq->error = afs_put_operation(op);
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
return;
|
||||
}
|
||||
|
||||
fsreq = afs_alloc_read(GFP_NOFS);
|
||||
if (!fsreq)
|
||||
return -ENOMEM;
|
||||
if (!afs_select_fileserver(op)) {
|
||||
afs_end_read(op);
|
||||
return;
|
||||
}
|
||||
|
||||
fsreq->pos = folio_pos(folio);
|
||||
fsreq->len = folio_size(folio);
|
||||
fsreq->vnode = vnode;
|
||||
fsreq->iter = &fsreq->def_iter;
|
||||
iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages,
|
||||
fsreq->pos, fsreq->len);
|
||||
|
||||
ret = afs_fetch_data(fsreq->vnode, fsreq);
|
||||
if (ret == 0)
|
||||
folio_mark_uptodate(folio);
|
||||
folio_unlock(folio);
|
||||
return ret;
|
||||
afs_issue_read_call(op);
|
||||
} else {
|
||||
afs_do_sync_operation(op);
|
||||
}
|
||||
}
|
||||
|
||||
static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
|
||||
{
|
||||
struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
|
||||
|
||||
if (file)
|
||||
rreq->netfs_priv = key_get(afs_file_key(file));
|
||||
rreq->rsize = 256 * 1024;
|
||||
rreq->wsize = 256 * 1024 * 1024;
|
||||
|
||||
switch (rreq->origin) {
|
||||
case NETFS_READ_SINGLE:
|
||||
if (!file) {
|
||||
struct key *key = afs_request_key(vnode->volume->cell);
|
||||
|
||||
if (IS_ERR(key))
|
||||
return PTR_ERR(key);
|
||||
rreq->netfs_priv = key;
|
||||
}
|
||||
break;
|
||||
case NETFS_WRITEBACK:
|
||||
case NETFS_WRITETHROUGH:
|
||||
case NETFS_UNBUFFERED_WRITE:
|
||||
case NETFS_DIO_WRITE:
|
||||
if (S_ISREG(rreq->inode->i_mode))
|
||||
rreq->io_streams[0].avail = true;
|
||||
break;
|
||||
case NETFS_WRITEBACK_SINGLE:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -49,6 +49,105 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo
|
||||
return op;
|
||||
}
|
||||
|
||||
struct afs_io_locker {
|
||||
struct list_head link;
|
||||
struct task_struct *task;
|
||||
unsigned long have_lock;
|
||||
};
|
||||
|
||||
/*
|
||||
* Unlock the I/O lock on a vnode.
|
||||
*/
|
||||
static void afs_unlock_for_io(struct afs_vnode *vnode)
|
||||
{
|
||||
struct afs_io_locker *locker;
|
||||
|
||||
spin_lock(&vnode->lock);
|
||||
locker = list_first_entry_or_null(&vnode->io_lock_waiters,
|
||||
struct afs_io_locker, link);
|
||||
if (locker) {
|
||||
list_del(&locker->link);
|
||||
smp_store_release(&locker->have_lock, 1);
|
||||
smp_mb__after_atomic(); /* Store have_lock before task state */
|
||||
wake_up_process(locker->task);
|
||||
} else {
|
||||
clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
|
||||
}
|
||||
spin_unlock(&vnode->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock the I/O lock on a vnode uninterruptibly. We can't use an ordinary
|
||||
* mutex as lockdep will complain if we unlock it in the wrong thread.
|
||||
*/
|
||||
static void afs_lock_for_io(struct afs_vnode *vnode)
|
||||
{
|
||||
struct afs_io_locker myself = { .task = current, };
|
||||
|
||||
spin_lock(&vnode->lock);
|
||||
|
||||
if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
|
||||
spin_unlock(&vnode->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
list_add_tail(&myself.link, &vnode->io_lock_waiters);
|
||||
spin_unlock(&vnode->lock);
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (smp_load_acquire(&myself.have_lock))
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock the I/O lock on a vnode interruptibly. We can't use an ordinary mutex
|
||||
* as lockdep will complain if we unlock it in the wrong thread.
|
||||
*/
|
||||
static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
|
||||
{
|
||||
struct afs_io_locker myself = { .task = current, };
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&vnode->lock);
|
||||
|
||||
if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
|
||||
spin_unlock(&vnode->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
list_add_tail(&myself.link, &vnode->io_lock_waiters);
|
||||
spin_unlock(&vnode->lock);
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (smp_load_acquire(&myself.have_lock) ||
|
||||
signal_pending(current))
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
/* If we got a signal, try to transfer the lock onto the next
|
||||
* waiter.
|
||||
*/
|
||||
if (unlikely(signal_pending(current))) {
|
||||
spin_lock(&vnode->lock);
|
||||
if (myself.have_lock) {
|
||||
spin_unlock(&vnode->lock);
|
||||
afs_unlock_for_io(vnode);
|
||||
} else {
|
||||
list_del(&myself.link);
|
||||
spin_unlock(&vnode->lock);
|
||||
}
|
||||
ret = -ERESTARTSYS;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock the vnode(s) being operated upon.
|
||||
*/
|
||||
@ -60,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
|
||||
_enter("");
|
||||
|
||||
if (op->flags & AFS_OPERATION_UNINTR) {
|
||||
mutex_lock(&vnode->io_lock);
|
||||
afs_lock_for_io(vnode);
|
||||
op->flags |= AFS_OPERATION_LOCK_0;
|
||||
_leave(" = t [1]");
|
||||
return true;
|
||||
@ -72,7 +171,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
|
||||
if (vnode2 > vnode)
|
||||
swap(vnode, vnode2);
|
||||
|
||||
if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
|
||||
if (afs_lock_for_io_interruptible(vnode) < 0) {
|
||||
afs_op_set_error(op, -ERESTARTSYS);
|
||||
op->flags |= AFS_OPERATION_STOP;
|
||||
_leave(" = f [I 0]");
|
||||
@ -81,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op)
|
||||
op->flags |= AFS_OPERATION_LOCK_0;
|
||||
|
||||
if (vnode2) {
|
||||
if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
|
||||
if (afs_lock_for_io_interruptible(vnode2) < 0) {
|
||||
afs_op_set_error(op, -ERESTARTSYS);
|
||||
op->flags |= AFS_OPERATION_STOP;
|
||||
mutex_unlock(&vnode->io_lock);
|
||||
afs_unlock_for_io(vnode);
|
||||
op->flags &= ~AFS_OPERATION_LOCK_0;
|
||||
_leave(" = f [I 1]");
|
||||
return false;
|
||||
@ -104,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op)
|
||||
_enter("");
|
||||
|
||||
if (op->flags & AFS_OPERATION_LOCK_1)
|
||||
mutex_unlock(&vnode2->io_lock);
|
||||
afs_unlock_for_io(vnode2);
|
||||
if (op->flags & AFS_OPERATION_LOCK_0)
|
||||
mutex_unlock(&vnode->io_lock);
|
||||
afs_unlock_for_io(vnode);
|
||||
}
|
||||
|
||||
static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
|
||||
@ -157,7 +256,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
|
||||
/*
|
||||
* Tidy up a filesystem cursor and unlock the vnode.
|
||||
*/
|
||||
static void afs_end_vnode_operation(struct afs_operation *op)
|
||||
void afs_end_vnode_operation(struct afs_operation *op)
|
||||
{
|
||||
_enter("");
|
||||
|
||||
|
@ -301,19 +301,19 @@ void afs_fs_fetch_status(struct afs_operation *op)
|
||||
static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
{
|
||||
struct afs_operation *op = call->op;
|
||||
struct netfs_io_subrequest *subreq = op->fetch.subreq;
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_read *req = op->fetch.req;
|
||||
const __be32 *bp;
|
||||
size_t count_before;
|
||||
int ret;
|
||||
|
||||
_enter("{%u,%zu,%zu/%llu}",
|
||||
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
|
||||
req->actual_len);
|
||||
call->remaining);
|
||||
|
||||
switch (call->unmarshall) {
|
||||
case 0:
|
||||
req->actual_len = 0;
|
||||
call->remaining = 0;
|
||||
call->unmarshall++;
|
||||
if (call->operation_ID == FSFETCHDATA64) {
|
||||
afs_extract_to_tmp64(call);
|
||||
@ -323,8 +323,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
}
|
||||
fallthrough;
|
||||
|
||||
/* Extract the returned data length into
|
||||
* ->actual_len. This may indicate more or less data than was
|
||||
/* Extract the returned data length into ->remaining.
|
||||
* This may indicate more or less data than was
|
||||
* requested will be returned.
|
||||
*/
|
||||
case 1:
|
||||
@ -333,42 +333,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
req->actual_len = be64_to_cpu(call->tmp64);
|
||||
_debug("DATA length: %llu", req->actual_len);
|
||||
call->remaining = be64_to_cpu(call->tmp64);
|
||||
_debug("DATA length: %llu", call->remaining);
|
||||
|
||||
if (req->actual_len == 0)
|
||||
if (call->remaining == 0)
|
||||
goto no_more_data;
|
||||
|
||||
call->iter = req->iter;
|
||||
call->iov_len = min(req->actual_len, req->len);
|
||||
call->iter = &subreq->io_iter;
|
||||
call->iov_len = umin(call->remaining, subreq->len - subreq->transferred);
|
||||
call->unmarshall++;
|
||||
fallthrough;
|
||||
|
||||
/* extract the returned data */
|
||||
case 2:
|
||||
count_before = call->iov_len;
|
||||
_debug("extract data %zu/%llu", count_before, req->actual_len);
|
||||
_debug("extract data %zu/%llu", count_before, call->remaining);
|
||||
|
||||
ret = afs_extract_data(call, true);
|
||||
if (req->subreq) {
|
||||
req->subreq->transferred += count_before - call->iov_len;
|
||||
netfs_read_subreq_progress(req->subreq, false);
|
||||
}
|
||||
subreq->transferred += count_before - call->iov_len;
|
||||
call->remaining -= count_before - call->iov_len;
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
call->iter = &call->def_iter;
|
||||
if (req->actual_len <= req->len)
|
||||
if (call->remaining)
|
||||
goto no_more_data;
|
||||
|
||||
/* Discard any excess data the server gave us */
|
||||
afs_extract_discard(call, req->actual_len - req->len);
|
||||
afs_extract_discard(call, call->remaining);
|
||||
call->unmarshall = 3;
|
||||
fallthrough;
|
||||
|
||||
case 3:
|
||||
_debug("extract discard %zu/%llu",
|
||||
iov_iter_count(call->iter), req->actual_len - req->len);
|
||||
iov_iter_count(call->iter), call->remaining);
|
||||
|
||||
ret = afs_extract_data(call, true);
|
||||
if (ret < 0)
|
||||
@ -390,8 +388,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
xdr_decode_AFSCallBack(&bp, call, &vp->scb);
|
||||
xdr_decode_AFSVolSync(&bp, &op->volsync);
|
||||
|
||||
req->data_version = vp->scb.status.data_version;
|
||||
req->file_size = vp->scb.status.size;
|
||||
if (subreq->start + subreq->transferred >= vp->scb.status.size)
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
|
||||
|
||||
call->unmarshall++;
|
||||
fallthrough;
|
||||
@ -410,14 +408,18 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
|
||||
static const struct afs_call_type afs_RXFSFetchData = {
|
||||
.name = "FS.FetchData",
|
||||
.op = afs_FS_FetchData,
|
||||
.async_rx = afs_fetch_data_async_rx,
|
||||
.deliver = afs_deliver_fs_fetch_data,
|
||||
.immediate_cancel = afs_fetch_data_immediate_cancel,
|
||||
.destructor = afs_flat_call_destructor,
|
||||
};
|
||||
|
||||
static const struct afs_call_type afs_RXFSFetchData64 = {
|
||||
.name = "FS.FetchData64",
|
||||
.op = afs_FS_FetchData64,
|
||||
.async_rx = afs_fetch_data_async_rx,
|
||||
.deliver = afs_deliver_fs_fetch_data,
|
||||
.immediate_cancel = afs_fetch_data_immediate_cancel,
|
||||
.destructor = afs_flat_call_destructor,
|
||||
};
|
||||
|
||||
@ -426,8 +428,8 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
|
||||
*/
|
||||
static void afs_fs_fetch_data64(struct afs_operation *op)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = op->fetch.subreq;
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_read *req = op->fetch.req;
|
||||
struct afs_call *call;
|
||||
__be32 *bp;
|
||||
|
||||
@ -437,16 +439,19 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
|
||||
if (!call)
|
||||
return afs_op_nomem(op);
|
||||
|
||||
if (op->flags & AFS_OPERATION_ASYNC)
|
||||
call->async = true;
|
||||
|
||||
/* marshall the parameters */
|
||||
bp = call->request;
|
||||
bp[0] = htonl(FSFETCHDATA64);
|
||||
bp[1] = htonl(vp->fid.vid);
|
||||
bp[2] = htonl(vp->fid.vnode);
|
||||
bp[3] = htonl(vp->fid.unique);
|
||||
bp[4] = htonl(upper_32_bits(req->pos));
|
||||
bp[5] = htonl(lower_32_bits(req->pos));
|
||||
bp[4] = htonl(upper_32_bits(subreq->start + subreq->transferred));
|
||||
bp[5] = htonl(lower_32_bits(subreq->start + subreq->transferred));
|
||||
bp[6] = 0;
|
||||
bp[7] = htonl(lower_32_bits(req->len));
|
||||
bp[7] = htonl(lower_32_bits(subreq->len - subreq->transferred));
|
||||
|
||||
call->fid = vp->fid;
|
||||
trace_afs_make_fs_call(call, &vp->fid);
|
||||
@ -458,9 +463,9 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
|
||||
*/
|
||||
void afs_fs_fetch_data(struct afs_operation *op)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = op->fetch.subreq;
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_call *call;
|
||||
struct afs_read *req = op->fetch.req;
|
||||
__be32 *bp;
|
||||
|
||||
if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
|
||||
@ -472,16 +477,14 @@ void afs_fs_fetch_data(struct afs_operation *op)
|
||||
if (!call)
|
||||
return afs_op_nomem(op);
|
||||
|
||||
req->call_debug_id = call->debug_id;
|
||||
|
||||
/* marshall the parameters */
|
||||
bp = call->request;
|
||||
bp[0] = htonl(FSFETCHDATA);
|
||||
bp[1] = htonl(vp->fid.vid);
|
||||
bp[2] = htonl(vp->fid.vnode);
|
||||
bp[3] = htonl(vp->fid.unique);
|
||||
bp[4] = htonl(lower_32_bits(req->pos));
|
||||
bp[5] = htonl(lower_32_bits(req->len));
|
||||
bp[4] = htonl(lower_32_bits(subreq->start + subreq->transferred));
|
||||
bp[5] = htonl(lower_32_bits(subreq->len + subreq->transferred));
|
||||
|
||||
call->fid = vp->fid;
|
||||
trace_afs_make_fs_call(call, &vp->fid);
|
||||
@ -1733,6 +1736,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
|
||||
.op = afs_FS_GetCapabilities,
|
||||
.deliver = afs_deliver_fs_get_capabilities,
|
||||
.done = afs_fileserver_probe_result,
|
||||
.immediate_cancel = afs_fileserver_probe_result,
|
||||
.destructor = afs_fs_get_capabilities_destructor,
|
||||
};
|
||||
|
||||
|
140
fs/afs/inode.c
140
fs/afs/inode.c
@ -25,8 +25,94 @@
|
||||
#include "internal.h"
|
||||
#include "afs_fs.h"
|
||||
|
||||
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
|
||||
{
|
||||
size_t size = strlen(op->create.symlink) + 1;
|
||||
size_t dsize = 0;
|
||||
char *p;
|
||||
|
||||
if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
|
||||
mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
|
||||
return;
|
||||
|
||||
vnode->directory_size = dsize;
|
||||
p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
|
||||
memcpy(p, op->create.symlink, size);
|
||||
kunmap_local(p);
|
||||
set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
|
||||
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
|
||||
}
|
||||
|
||||
static void afs_put_link(void *arg)
|
||||
{
|
||||
struct folio *folio = virt_to_folio(arg);
|
||||
|
||||
kunmap_local(arg);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
|
||||
struct delayed_call *callback)
|
||||
{
|
||||
struct afs_vnode *vnode = AFS_FS_I(inode);
|
||||
struct folio *folio;
|
||||
char *content;
|
||||
ssize_t ret;
|
||||
|
||||
if (!dentry) {
|
||||
/* RCU pathwalk. */
|
||||
if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
|
||||
return ERR_PTR(-ECHILD);
|
||||
goto good;
|
||||
}
|
||||
|
||||
if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
|
||||
goto fetch;
|
||||
|
||||
ret = afs_validate(vnode, NULL);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
|
||||
test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
|
||||
goto good;
|
||||
|
||||
fetch:
|
||||
ret = afs_read_single(vnode, NULL);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
|
||||
|
||||
good:
|
||||
folio = folioq_folio(vnode->directory, 0);
|
||||
folio_get(folio);
|
||||
content = kmap_local_folio(folio, 0);
|
||||
set_delayed_call(callback, afs_put_link, content);
|
||||
return content;
|
||||
}
|
||||
|
||||
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||
{
|
||||
DEFINE_DELAYED_CALL(done);
|
||||
const char *content;
|
||||
int len;
|
||||
|
||||
content = afs_get_link(dentry, d_inode(dentry), &done);
|
||||
if (IS_ERR(content)) {
|
||||
do_delayed_call(&done);
|
||||
return PTR_ERR(content);
|
||||
}
|
||||
|
||||
len = umin(strlen(content), buflen);
|
||||
if (copy_to_user(buffer, content, len))
|
||||
len = -EFAULT;
|
||||
do_delayed_call(&done);
|
||||
return len;
|
||||
}
|
||||
|
||||
static const struct inode_operations afs_symlink_inode_operations = {
|
||||
.get_link = page_get_link,
|
||||
.get_link = afs_get_link,
|
||||
.readlink = afs_readlink,
|
||||
};
|
||||
|
||||
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
|
||||
@ -110,7 +196,9 @@ static int afs_inode_init_from_status(struct afs_operation *op,
|
||||
inode->i_op = &afs_dir_inode_operations;
|
||||
inode->i_fop = &afs_dir_file_operations;
|
||||
inode->i_mapping->a_ops = &afs_dir_aops;
|
||||
mapping_set_large_folios(inode->i_mapping);
|
||||
__set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
|
||||
/* Assume locally cached directory data will be valid. */
|
||||
__set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
|
||||
break;
|
||||
case AFS_FTYPE_SYMLINK:
|
||||
/* Symlinks with a mode of 0644 are actually mountpoints. */
|
||||
@ -122,13 +210,13 @@ static int afs_inode_init_from_status(struct afs_operation *op,
|
||||
inode->i_mode = S_IFDIR | 0555;
|
||||
inode->i_op = &afs_mntpt_inode_operations;
|
||||
inode->i_fop = &afs_mntpt_file_operations;
|
||||
inode->i_mapping->a_ops = &afs_symlink_aops;
|
||||
} else {
|
||||
inode->i_mode = S_IFLNK | status->mode;
|
||||
inode->i_op = &afs_symlink_inode_operations;
|
||||
inode->i_mapping->a_ops = &afs_symlink_aops;
|
||||
}
|
||||
inode->i_mapping->a_ops = &afs_dir_aops;
|
||||
inode_nohighmem(inode);
|
||||
mapping_set_release_always(inode->i_mapping);
|
||||
break;
|
||||
default:
|
||||
dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
|
||||
@ -140,15 +228,17 @@ static int afs_inode_init_from_status(struct afs_operation *op,
|
||||
afs_set_netfs_context(vnode);
|
||||
|
||||
vnode->invalid_before = status->data_version;
|
||||
trace_afs_set_dv(vnode, status->data_version);
|
||||
inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
|
||||
|
||||
if (!vp->scb.have_cb) {
|
||||
/* it's a symlink we just created (the fileserver
|
||||
* didn't give us a callback) */
|
||||
atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
|
||||
afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink);
|
||||
} else {
|
||||
vnode->cb_server = op->server;
|
||||
atomic64_set(&vnode->cb_expires_at, vp->scb.callback.expires_at);
|
||||
afs_set_cb_promise(vnode, vp->scb.callback.expires_at,
|
||||
afs_cb_promise_set_new_inode);
|
||||
}
|
||||
|
||||
write_sequnlock(&vnode->cb_lock);
|
||||
@ -207,12 +297,17 @@ static void afs_apply_status(struct afs_operation *op,
|
||||
if (vp->update_ctime)
|
||||
inode_set_ctime_to_ts(inode, op->ctime);
|
||||
|
||||
if (vnode->status.data_version != status->data_version)
|
||||
if (vnode->status.data_version != status->data_version) {
|
||||
trace_afs_set_dv(vnode, status->data_version);
|
||||
data_changed = true;
|
||||
}
|
||||
|
||||
vnode->status = *status;
|
||||
|
||||
if (vp->dv_before + vp->dv_delta != status->data_version) {
|
||||
trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta,
|
||||
status->data_version);
|
||||
|
||||
if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
|
||||
atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
|
||||
pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
|
||||
@ -223,12 +318,10 @@ static void afs_apply_status(struct afs_operation *op,
|
||||
op->debug_id);
|
||||
|
||||
vnode->invalid_before = status->data_version;
|
||||
if (vnode->status.type == AFS_FTYPE_DIR) {
|
||||
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
|
||||
afs_stat_v(vnode, n_inval);
|
||||
} else {
|
||||
if (vnode->status.type == AFS_FTYPE_DIR)
|
||||
afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch);
|
||||
else
|
||||
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
|
||||
}
|
||||
change_size = true;
|
||||
data_changed = true;
|
||||
unexpected_jump = true;
|
||||
@ -258,6 +351,8 @@ static void afs_apply_status(struct afs_operation *op,
|
||||
inode_set_ctime_to_ts(inode, t);
|
||||
inode_set_atime_to_ts(inode, t);
|
||||
}
|
||||
if (op->ops == &afs_fetch_data_operation)
|
||||
op->fetch.subreq->rreq->i_size = status->size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -273,7 +368,7 @@ static void afs_apply_callback(struct afs_operation *op,
|
||||
if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
|
||||
if (op->volume->type == AFSVL_RWVOL)
|
||||
vnode->cb_server = op->server;
|
||||
atomic64_set(&vnode->cb_expires_at, cb->expires_at);
|
||||
afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb);
|
||||
}
|
||||
}
|
||||
|
||||
@ -435,7 +530,9 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
|
||||
} __packed key;
|
||||
struct afs_vnode_cache_aux aux;
|
||||
|
||||
if (vnode->status.type != AFS_FTYPE_FILE) {
|
||||
if (vnode->status.type != AFS_FTYPE_FILE &&
|
||||
vnode->status.type != AFS_FTYPE_DIR &&
|
||||
vnode->status.type != AFS_FTYPE_SYMLINK) {
|
||||
vnode->netfs.cache = NULL;
|
||||
return;
|
||||
}
|
||||
@ -637,6 +734,7 @@ int afs_drop_inode(struct inode *inode)
|
||||
void afs_evict_inode(struct inode *inode)
|
||||
{
|
||||
struct afs_vnode_cache_aux aux;
|
||||
struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
|
||||
struct afs_vnode *vnode = AFS_FS_I(inode);
|
||||
|
||||
_enter("{%llx:%llu.%d}",
|
||||
@ -648,8 +746,22 @@ void afs_evict_inode(struct inode *inode)
|
||||
|
||||
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
|
||||
|
||||
if ((S_ISDIR(inode->i_mode) ||
|
||||
S_ISLNK(inode->i_mode)) &&
|
||||
(inode->i_state & I_DIRTY) &&
|
||||
!sbi->dyn_root) {
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.for_sync = true,
|
||||
.range_end = LLONG_MAX,
|
||||
};
|
||||
|
||||
afs_single_writepages(inode->i_mapping, &wbc);
|
||||
}
|
||||
|
||||
netfs_wait_for_outstanding_io(inode);
|
||||
truncate_inode_pages_final(&inode->i_data);
|
||||
netfs_free_folioq_buffer(vnode->directory);
|
||||
|
||||
afs_set_cache_aux(vnode, &aux);
|
||||
netfs_clear_inode_writeback(inode, &aux);
|
||||
|
@ -163,6 +163,7 @@ struct afs_call {
|
||||
spinlock_t state_lock;
|
||||
int error; /* error code */
|
||||
u32 abort_code; /* Remote abort ID or 0 */
|
||||
unsigned long long remaining; /* How much is left to receive */
|
||||
unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */
|
||||
unsigned request_size; /* size of request data */
|
||||
unsigned reply_max; /* maximum size of reply */
|
||||
@ -201,11 +202,17 @@ struct afs_call_type {
|
||||
/* clean up a call */
|
||||
void (*destructor)(struct afs_call *call);
|
||||
|
||||
/* Async receive processing function */
|
||||
void (*async_rx)(struct work_struct *work);
|
||||
|
||||
/* Work function */
|
||||
void (*work)(struct work_struct *work);
|
||||
|
||||
/* Call done function (gets called immediately on success or failure) */
|
||||
void (*done)(struct afs_call *call);
|
||||
|
||||
/* Handle a call being immediately cancelled. */
|
||||
void (*immediate_cancel)(struct afs_call *call);
|
||||
};
|
||||
|
||||
/*
|
||||
@ -232,28 +239,6 @@ static inline struct key *afs_file_key(struct file *file)
|
||||
return af->key;
|
||||
}
|
||||
|
||||
/*
|
||||
* Record of an outstanding read operation on a vnode.
|
||||
*/
|
||||
struct afs_read {
|
||||
loff_t pos; /* Where to start reading */
|
||||
loff_t len; /* How much we're asking for */
|
||||
loff_t actual_len; /* How much we're actually getting */
|
||||
loff_t file_size; /* File size returned by server */
|
||||
struct key *key; /* The key to use to reissue the read */
|
||||
struct afs_vnode *vnode; /* The file being read into. */
|
||||
struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */
|
||||
afs_dataversion_t data_version; /* Version number returned by server */
|
||||
refcount_t usage;
|
||||
unsigned int call_debug_id;
|
||||
unsigned int nr_pages;
|
||||
int error;
|
||||
void (*done)(struct afs_read *);
|
||||
void (*cleanup)(struct afs_read *);
|
||||
struct iov_iter *iter; /* Iterator representing the buffer */
|
||||
struct iov_iter def_iter; /* Default iterator */
|
||||
};
|
||||
|
||||
/*
|
||||
* AFS superblock private data
|
||||
* - there's one superblock per volume
|
||||
@ -702,13 +687,14 @@ struct afs_vnode {
|
||||
struct afs_file_status status; /* AFS status info for this file */
|
||||
afs_dataversion_t invalid_before; /* Child dentries are invalid before this */
|
||||
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
|
||||
struct mutex io_lock; /* Lock for serialising I/O on this mutex */
|
||||
struct list_head io_lock_waiters; /* Threads waiting for the I/O lock */
|
||||
struct rw_semaphore validate_lock; /* lock for validating this vnode */
|
||||
struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */
|
||||
struct key *silly_key; /* Silly rename key */
|
||||
spinlock_t wb_lock; /* lock for wb_keys */
|
||||
spinlock_t lock; /* waitqueue/flags lock */
|
||||
unsigned long flags;
|
||||
#define AFS_VNODE_IO_LOCK 0 /* Set if the I/O serialisation lock is held */
|
||||
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
|
||||
#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
|
||||
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
|
||||
@ -719,7 +705,9 @@ struct afs_vnode {
|
||||
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
|
||||
#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
|
||||
#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
|
||||
#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
|
||||
|
||||
struct folio_queue *directory; /* Directory contents */
|
||||
struct list_head wb_keys; /* List of keys available for writeback */
|
||||
struct list_head pending_locks; /* locks waiting to be granted */
|
||||
struct list_head granted_locks; /* locks granted on this file */
|
||||
@ -728,6 +716,7 @@ struct afs_vnode {
|
||||
ktime_t locked_at; /* Time at which lock obtained */
|
||||
enum afs_lock_state lock_state : 8;
|
||||
afs_lock_type_t lock_type : 8;
|
||||
unsigned int directory_size; /* Amount of space in ->directory */
|
||||
|
||||
/* outstanding callback notification on this file */
|
||||
struct work_struct cb_work; /* Work for mmap'd files */
|
||||
@ -907,7 +896,7 @@ struct afs_operation {
|
||||
bool new_negative;
|
||||
} rename;
|
||||
struct {
|
||||
struct afs_read *req;
|
||||
struct netfs_io_subrequest *subreq;
|
||||
} fetch;
|
||||
struct {
|
||||
afs_lock_type_t type;
|
||||
@ -959,6 +948,7 @@ struct afs_operation {
|
||||
#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
|
||||
#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
|
||||
#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */
|
||||
#define AFS_OPERATION_ASYNC 0x2000 /* Set if should run asynchronously */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -983,6 +973,21 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
|
||||
i_size_read(&vnode->netfs.inode), flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Directory iteration management.
|
||||
*/
|
||||
struct afs_dir_iter {
|
||||
struct afs_vnode *dvnode;
|
||||
union afs_xdr_dir_block *block;
|
||||
struct folio_queue *fq;
|
||||
unsigned int fpos;
|
||||
int fq_slot;
|
||||
unsigned int loop_check;
|
||||
u8 nr_slots;
|
||||
u8 bucket;
|
||||
unsigned int prev_entry;
|
||||
};
|
||||
|
||||
#include <trace/events/afs.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
@ -1064,8 +1069,13 @@ extern const struct inode_operations afs_dir_inode_operations;
|
||||
extern const struct address_space_operations afs_dir_aops;
|
||||
extern const struct dentry_operations afs_fs_dentry_operations;
|
||||
|
||||
ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
|
||||
ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
|
||||
__acquires(&dvnode->validate_lock);
|
||||
extern void afs_d_release(struct dentry *);
|
||||
extern void afs_check_for_remote_deletion(struct afs_operation *);
|
||||
int afs_single_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
|
||||
/*
|
||||
* dir_edit.c
|
||||
@ -1075,6 +1085,18 @@ extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *
|
||||
extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
|
||||
void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
|
||||
enum afs_edit_dir_reason why);
|
||||
void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode);
|
||||
|
||||
/*
|
||||
* dir_search.c
|
||||
*/
|
||||
unsigned int afs_dir_hash_name(const struct qstr *name);
|
||||
bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name);
|
||||
union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block);
|
||||
int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
|
||||
struct afs_fid *_fid);
|
||||
int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
|
||||
struct afs_fid *_fid, afs_dataversion_t *_dir_version);
|
||||
|
||||
/*
|
||||
* dir_silly.c
|
||||
@ -1099,24 +1121,17 @@ extern void afs_dynroot_depopulate(struct super_block *);
|
||||
* file.c
|
||||
*/
|
||||
extern const struct address_space_operations afs_file_aops;
|
||||
extern const struct address_space_operations afs_symlink_aops;
|
||||
extern const struct inode_operations afs_file_inode_operations;
|
||||
extern const struct file_operations afs_file_operations;
|
||||
extern const struct afs_operation_ops afs_fetch_data_operation;
|
||||
extern const struct netfs_request_ops afs_req_ops;
|
||||
|
||||
extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
|
||||
extern void afs_put_wb_key(struct afs_wb_key *);
|
||||
extern int afs_open(struct inode *, struct file *);
|
||||
extern int afs_release(struct inode *, struct file *);
|
||||
extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
|
||||
extern struct afs_read *afs_alloc_read(gfp_t);
|
||||
extern void afs_put_read(struct afs_read *);
|
||||
|
||||
static inline struct afs_read *afs_get_read(struct afs_read *req)
|
||||
{
|
||||
refcount_inc(&req->usage);
|
||||
return req;
|
||||
}
|
||||
void afs_fetch_data_async_rx(struct work_struct *work);
|
||||
void afs_fetch_data_immediate_cancel(struct afs_call *call);
|
||||
|
||||
/*
|
||||
* flock.c
|
||||
@ -1168,6 +1183,7 @@ extern void afs_fs_store_acl(struct afs_operation *);
|
||||
extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *);
|
||||
extern int afs_put_operation(struct afs_operation *);
|
||||
extern bool afs_begin_vnode_operation(struct afs_operation *);
|
||||
extern void afs_end_vnode_operation(struct afs_operation *op);
|
||||
extern void afs_wait_for_operation(struct afs_operation *);
|
||||
extern int afs_do_sync_operation(struct afs_operation *);
|
||||
|
||||
@ -1205,6 +1221,10 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
|
||||
*/
|
||||
extern const struct afs_operation_ops afs_fetch_status_operation;
|
||||
|
||||
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
|
||||
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
|
||||
struct delayed_call *callback);
|
||||
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
|
||||
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
|
||||
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
|
||||
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
|
||||
@ -1336,6 +1356,7 @@ extern void afs_charge_preallocation(struct work_struct *);
|
||||
extern void afs_put_call(struct afs_call *);
|
||||
void afs_deferred_put_call(struct afs_call *call);
|
||||
void afs_make_call(struct afs_call *call, gfp_t gfp);
|
||||
void afs_deliver_to_call(struct afs_call *call);
|
||||
void afs_wait_for_call_to_complete(struct afs_call *call);
|
||||
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
|
||||
const struct afs_call_type *,
|
||||
@ -1346,6 +1367,28 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
|
||||
extern int afs_extract_data(struct afs_call *, bool);
|
||||
extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
|
||||
|
||||
static inline struct afs_call *afs_get_call(struct afs_call *call,
|
||||
enum afs_call_trace why)
|
||||
{
|
||||
int r;
|
||||
|
||||
__refcount_inc(&call->ref, &r);
|
||||
|
||||
trace_afs_call(call->debug_id, why, r + 1,
|
||||
atomic_read(&call->net->nr_outstanding_calls),
|
||||
__builtin_return_address(0));
|
||||
return call;
|
||||
}
|
||||
|
||||
static inline void afs_see_call(struct afs_call *call, enum afs_call_trace why)
|
||||
{
|
||||
int r = refcount_read(&call->ref);
|
||||
|
||||
trace_afs_call(call->debug_id, why, r,
|
||||
atomic_read(&call->net->nr_outstanding_calls),
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
|
||||
static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
|
||||
gfp_t gfp)
|
||||
{
|
||||
@ -1712,6 +1755,38 @@ static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the callback promise on a vnode.
|
||||
*/
|
||||
static inline void afs_set_cb_promise(struct afs_vnode *vnode, time64_t expires_at,
|
||||
enum afs_cb_promise_trace trace)
|
||||
{
|
||||
atomic64_set(&vnode->cb_expires_at, expires_at);
|
||||
trace_afs_cb_promise(vnode, trace);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the callback promise on a vnode, returning true if it was promised.
|
||||
*/
|
||||
static inline bool afs_clear_cb_promise(struct afs_vnode *vnode,
|
||||
enum afs_cb_promise_trace trace)
|
||||
{
|
||||
trace_afs_cb_promise(vnode, trace);
|
||||
return atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a directory as being invalid.
|
||||
*/
|
||||
static inline void afs_invalidate_dir(struct afs_vnode *dvnode,
|
||||
enum afs_dir_invalid_trace trace)
|
||||
{
|
||||
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
|
||||
trace_afs_dir_invalid(dvnode, trace);
|
||||
afs_stat_v(dvnode, n_inval);
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* debug tracing
|
||||
|
@ -177,7 +177,7 @@ static int __init afs_init(void)
|
||||
afs_wq = alloc_workqueue("afs", 0, 0);
|
||||
if (!afs_wq)
|
||||
goto error_afs_wq;
|
||||
afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
|
||||
afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
|
||||
if (!afs_async_calls)
|
||||
goto error_async;
|
||||
afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
|
||||
|
@ -30,7 +30,7 @@ const struct file_operations afs_mntpt_file_operations = {
|
||||
|
||||
const struct inode_operations afs_mntpt_inode_operations = {
|
||||
.lookup = afs_mntpt_lookup,
|
||||
.readlink = page_readlink,
|
||||
.readlink = afs_readlink,
|
||||
.getattr = afs_getattr,
|
||||
};
|
||||
|
||||
@ -118,9 +118,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
|
||||
ctx->volnamesz = sizeof(afs_root_volume) - 1;
|
||||
} else {
|
||||
/* read the contents of the AFS special symlink */
|
||||
struct page *page;
|
||||
DEFINE_DELAYED_CALL(cleanup);
|
||||
const char *content;
|
||||
loff_t size = i_size_read(d_inode(mntpt));
|
||||
char *buf;
|
||||
|
||||
if (src_as->cell)
|
||||
ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
|
||||
@ -128,16 +128,16 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
|
||||
if (size < 2 || size > PAGE_SIZE - 1)
|
||||
return -EINVAL;
|
||||
|
||||
page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
content = afs_get_link(mntpt, d_inode(mntpt), &cleanup);
|
||||
if (IS_ERR(content)) {
|
||||
do_delayed_call(&cleanup);
|
||||
return PTR_ERR(content);
|
||||
}
|
||||
|
||||
buf = kmap(page);
|
||||
ret = -EINVAL;
|
||||
if (buf[size - 1] == '.')
|
||||
ret = vfs_parse_fs_string(fc, "source", buf, size - 1);
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
if (content[size - 1] == '.')
|
||||
ret = vfs_parse_fs_string(fc, "source", content, size - 1);
|
||||
do_delayed_call(&cleanup);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
@ -99,7 +99,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
|
||||
write_seqlock(&vnode->cb_lock);
|
||||
ASSERTCMP(cb_server, ==, vnode->cb_server);
|
||||
vnode->cb_server = NULL;
|
||||
if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
|
||||
if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server))
|
||||
vnode->cb_break++;
|
||||
write_sequnlock(&vnode->cb_lock);
|
||||
}
|
||||
@ -583,7 +583,7 @@ bool afs_select_fileserver(struct afs_operation *op)
|
||||
if (vnode->cb_server != server) {
|
||||
vnode->cb_server = server;
|
||||
vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
|
||||
atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
|
||||
afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change);
|
||||
}
|
||||
|
||||
retry_server:
|
||||
|
@ -149,7 +149,8 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
|
||||
call->net = net;
|
||||
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
|
||||
refcount_set(&call->ref, 1);
|
||||
INIT_WORK(&call->async_work, afs_process_async_call);
|
||||
INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call);
|
||||
INIT_WORK(&call->work, call->type->work);
|
||||
INIT_WORK(&call->free_work, afs_deferred_free_worker);
|
||||
init_waitqueue_head(&call->waitq);
|
||||
spin_lock_init(&call->state_lock);
|
||||
@ -235,27 +236,12 @@ void afs_deferred_put_call(struct afs_call *call)
|
||||
schedule_work(&call->free_work);
|
||||
}
|
||||
|
||||
static struct afs_call *afs_get_call(struct afs_call *call,
|
||||
enum afs_call_trace why)
|
||||
{
|
||||
int r;
|
||||
|
||||
__refcount_inc(&call->ref, &r);
|
||||
|
||||
trace_afs_call(call->debug_id, why, r + 1,
|
||||
atomic_read(&call->net->nr_outstanding_calls),
|
||||
__builtin_return_address(0));
|
||||
return call;
|
||||
}
|
||||
|
||||
/*
|
||||
* Queue the call for actual work.
|
||||
*/
|
||||
static void afs_queue_call_work(struct afs_call *call)
|
||||
{
|
||||
if (call->type->work) {
|
||||
INIT_WORK(&call->work, call->type->work);
|
||||
|
||||
afs_get_call(call, afs_call_trace_work);
|
||||
if (!queue_work(afs_wq, &call->work))
|
||||
afs_put_call(call);
|
||||
@ -430,11 +416,16 @@ void afs_make_call(struct afs_call *call, gfp_t gfp)
|
||||
return;
|
||||
|
||||
error_do_abort:
|
||||
if (ret != -ECONNABORTED) {
|
||||
if (ret != -ECONNABORTED)
|
||||
rxrpc_kernel_abort_call(call->net->socket, rxcall,
|
||||
RX_USER_ABORT, ret,
|
||||
afs_abort_send_data_error);
|
||||
} else {
|
||||
if (call->async) {
|
||||
afs_see_call(call, afs_call_trace_async_abort);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ret == -ECONNABORTED) {
|
||||
len = 0;
|
||||
iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
|
||||
rxrpc_kernel_recv_data(call->net->socket, rxcall,
|
||||
@ -445,8 +436,10 @@ void afs_make_call(struct afs_call *call, gfp_t gfp)
|
||||
call->error = ret;
|
||||
trace_afs_call_done(call);
|
||||
error_kill_call:
|
||||
if (call->type->done)
|
||||
call->type->done(call);
|
||||
if (call->async)
|
||||
afs_see_call(call, afs_call_trace_async_kill);
|
||||
if (call->type->immediate_cancel)
|
||||
call->type->immediate_cancel(call);
|
||||
|
||||
/* We need to dispose of the extra ref we grabbed for an async call.
|
||||
* The call, however, might be queued on afs_async_calls and we need to
|
||||
@ -501,7 +494,7 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort)
|
||||
/*
|
||||
* deliver messages to a call
|
||||
*/
|
||||
static void afs_deliver_to_call(struct afs_call *call)
|
||||
void afs_deliver_to_call(struct afs_call *call)
|
||||
{
|
||||
enum afs_call_state state;
|
||||
size_t len;
|
||||
@ -602,7 +595,6 @@ static void afs_deliver_to_call(struct afs_call *call)
|
||||
abort_code = 0;
|
||||
call_complete:
|
||||
afs_set_call_complete(call, ret, remote_abort);
|
||||
state = AFS_CALL_COMPLETE;
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -803,6 +795,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
|
||||
return -ENOTSUPP;
|
||||
|
||||
trace_afs_cb_call(call);
|
||||
call->work.func = call->type->work;
|
||||
|
||||
/* pass responsibility for the remainer of this message off to the
|
||||
* cache manager op */
|
||||
|
@ -663,7 +663,7 @@ static void afs_i_init_once(void *_vnode)
|
||||
|
||||
memset(vnode, 0, sizeof(*vnode));
|
||||
inode_init_once(&vnode->netfs.inode);
|
||||
mutex_init(&vnode->io_lock);
|
||||
INIT_LIST_HEAD(&vnode->io_lock_waiters);
|
||||
init_rwsem(&vnode->validate_lock);
|
||||
spin_lock_init(&vnode->wb_lock);
|
||||
spin_lock_init(&vnode->lock);
|
||||
@ -696,6 +696,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
|
||||
vnode->volume = NULL;
|
||||
vnode->lock_key = NULL;
|
||||
vnode->permit_cache = NULL;
|
||||
vnode->directory = NULL;
|
||||
vnode->directory_size = 0;
|
||||
|
||||
vnode->flags = 1 << AFS_VNODE_UNSET;
|
||||
vnode->lock_state = AFS_VNODE_LOCK_NONE;
|
||||
|
@ -120,22 +120,31 @@
|
||||
bool afs_check_validity(const struct afs_vnode *vnode)
|
||||
{
|
||||
const struct afs_volume *volume = vnode->volume;
|
||||
enum afs_vnode_invalid_trace trace = afs_vnode_valid_trace;
|
||||
time64_t cb_expires_at = atomic64_read(&vnode->cb_expires_at);
|
||||
time64_t deadline = ktime_get_real_seconds() + 10;
|
||||
|
||||
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
|
||||
return true;
|
||||
|
||||
if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
|
||||
atomic64_read(&vnode->cb_expires_at) <= deadline ||
|
||||
volume->cb_expires_at <= deadline ||
|
||||
vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot) ||
|
||||
vnode->cb_scrub != atomic_read(&volume->cb_scrub) ||
|
||||
test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
|
||||
_debug("inval");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break))
|
||||
trace = afs_vnode_invalid_trace_cb_v_break;
|
||||
else if (cb_expires_at == AFS_NO_CB_PROMISE)
|
||||
trace = afs_vnode_invalid_trace_no_cb_promise;
|
||||
else if (cb_expires_at <= deadline)
|
||||
trace = afs_vnode_invalid_trace_expired;
|
||||
else if (volume->cb_expires_at <= deadline)
|
||||
trace = afs_vnode_invalid_trace_vol_expired;
|
||||
else if (vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot))
|
||||
trace = afs_vnode_invalid_trace_cb_ro_snapshot;
|
||||
else if (vnode->cb_scrub != atomic_read(&volume->cb_scrub))
|
||||
trace = afs_vnode_invalid_trace_cb_scrub;
|
||||
else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
|
||||
trace = afs_vnode_invalid_trace_zap_data;
|
||||
else
|
||||
return true;
|
||||
trace_afs_vnode_invalid(vnode, trace);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -370,6 +370,7 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
|
||||
.name = "VL.GetCapabilities",
|
||||
.op = afs_VL_GetCapabilities,
|
||||
.deliver = afs_deliver_vl_get_capabilities,
|
||||
.immediate_cancel = afs_vlserver_probe_result,
|
||||
.done = afs_vlserver_probe_result,
|
||||
.destructor = afs_destroy_vl_get_capabilities,
|
||||
};
|
||||
|
@ -179,8 +179,8 @@ void afs_issue_write(struct netfs_io_subrequest *subreq)
|
||||
*/
|
||||
void afs_begin_writeback(struct netfs_io_request *wreq)
|
||||
{
|
||||
afs_get_writeback_key(wreq);
|
||||
wreq->io_streams[0].avail = true;
|
||||
if (S_ISREG(wreq->inode->i_mode))
|
||||
afs_get_writeback_key(wreq);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -193,6 +193,18 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
|
||||
list_first_entry(&stream->subrequests,
|
||||
struct netfs_io_subrequest, rreq_link);
|
||||
|
||||
switch (wreq->origin) {
|
||||
case NETFS_READAHEAD:
|
||||
case NETFS_READPAGE:
|
||||
case NETFS_READ_GAPS:
|
||||
case NETFS_READ_SINGLE:
|
||||
case NETFS_READ_FOR_WRITE:
|
||||
case NETFS_DIO_READ:
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (subreq->error) {
|
||||
case -EACCES:
|
||||
case -EPERM:
|
||||
|
@ -88,7 +88,7 @@ union afs_xdr_dir_block {
|
||||
|
||||
struct {
|
||||
struct afs_xdr_dir_hdr hdr;
|
||||
u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS];
|
||||
u8 alloc_ctrs[AFS_DIR_BLOCKS_WITH_CTR];
|
||||
__be16 hashtable[AFS_DIR_HASHTBL_SIZE];
|
||||
} meta;
|
||||
|
||||
|
@ -352,19 +352,19 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call)
|
||||
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
|
||||
{
|
||||
struct afs_operation *op = call->op;
|
||||
struct netfs_io_subrequest *subreq = op->fetch.subreq;
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_read *req = op->fetch.req;
|
||||
const __be32 *bp;
|
||||
size_t count_before;
|
||||
int ret;
|
||||
|
||||
_enter("{%u,%zu, %zu/%llu}",
|
||||
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
|
||||
req->actual_len);
|
||||
call->remaining);
|
||||
|
||||
switch (call->unmarshall) {
|
||||
case 0:
|
||||
req->actual_len = 0;
|
||||
call->remaining = 0;
|
||||
afs_extract_to_tmp64(call);
|
||||
call->unmarshall++;
|
||||
fallthrough;
|
||||
@ -379,42 +379,39 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
req->actual_len = be64_to_cpu(call->tmp64);
|
||||
_debug("DATA length: %llu", req->actual_len);
|
||||
call->remaining = be64_to_cpu(call->tmp64);
|
||||
_debug("DATA length: %llu", call->remaining);
|
||||
|
||||
if (req->actual_len == 0)
|
||||
if (call->remaining == 0)
|
||||
goto no_more_data;
|
||||
|
||||
call->iter = req->iter;
|
||||
call->iov_len = min(req->actual_len, req->len);
|
||||
call->iter = &subreq->io_iter;
|
||||
call->iov_len = min(call->remaining, subreq->len - subreq->transferred);
|
||||
call->unmarshall++;
|
||||
fallthrough;
|
||||
|
||||
/* extract the returned data */
|
||||
case 2:
|
||||
count_before = call->iov_len;
|
||||
_debug("extract data %zu/%llu", count_before, req->actual_len);
|
||||
_debug("extract data %zu/%llu", count_before, call->remaining);
|
||||
|
||||
ret = afs_extract_data(call, true);
|
||||
if (req->subreq) {
|
||||
req->subreq->transferred += count_before - call->iov_len;
|
||||
netfs_read_subreq_progress(req->subreq, false);
|
||||
}
|
||||
subreq->transferred += count_before - call->iov_len;
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
call->iter = &call->def_iter;
|
||||
if (req->actual_len <= req->len)
|
||||
if (call->remaining)
|
||||
goto no_more_data;
|
||||
|
||||
/* Discard any excess data the server gave us */
|
||||
afs_extract_discard(call, req->actual_len - req->len);
|
||||
afs_extract_discard(call, call->remaining);
|
||||
call->unmarshall = 3;
|
||||
fallthrough;
|
||||
|
||||
case 3:
|
||||
_debug("extract discard %zu/%llu",
|
||||
iov_iter_count(call->iter), req->actual_len - req->len);
|
||||
iov_iter_count(call->iter), call->remaining);
|
||||
|
||||
ret = afs_extract_data(call, true);
|
||||
if (ret < 0)
|
||||
@ -439,8 +436,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
|
||||
xdr_decode_YFSCallBack(&bp, call, &vp->scb);
|
||||
xdr_decode_YFSVolSync(&bp, &op->volsync);
|
||||
|
||||
req->data_version = vp->scb.status.data_version;
|
||||
req->file_size = vp->scb.status.size;
|
||||
if (subreq->start + subreq->transferred >= vp->scb.status.size)
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
|
||||
|
||||
call->unmarshall++;
|
||||
fallthrough;
|
||||
@ -459,7 +456,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
|
||||
static const struct afs_call_type yfs_RXYFSFetchData64 = {
|
||||
.name = "YFS.FetchData64",
|
||||
.op = yfs_FS_FetchData64,
|
||||
.async_rx = afs_fetch_data_async_rx,
|
||||
.deliver = yfs_deliver_fs_fetch_data64,
|
||||
.immediate_cancel = afs_fetch_data_immediate_cancel,
|
||||
.destructor = afs_flat_call_destructor,
|
||||
};
|
||||
|
||||
@ -468,14 +467,15 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = {
|
||||
*/
|
||||
void yfs_fs_fetch_data(struct afs_operation *op)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = op->fetch.subreq;
|
||||
struct afs_vnode_param *vp = &op->file[0];
|
||||
struct afs_read *req = op->fetch.req;
|
||||
struct afs_call *call;
|
||||
__be32 *bp;
|
||||
|
||||
_enter(",%x,{%llx:%llu},%llx,%llx",
|
||||
_enter(",%x,{%llx:%llu},%llx,%zx",
|
||||
key_serial(op->key), vp->fid.vid, vp->fid.vnode,
|
||||
req->pos, req->len);
|
||||
subreq->start + subreq->transferred,
|
||||
subreq->len - subreq->transferred);
|
||||
|
||||
call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64,
|
||||
sizeof(__be32) * 2 +
|
||||
@ -487,15 +487,16 @@ void yfs_fs_fetch_data(struct afs_operation *op)
|
||||
if (!call)
|
||||
return afs_op_nomem(op);
|
||||
|
||||
req->call_debug_id = call->debug_id;
|
||||
if (op->flags & AFS_OPERATION_ASYNC)
|
||||
call->async = true;
|
||||
|
||||
/* marshall the parameters */
|
||||
bp = call->request;
|
||||
bp = xdr_encode_u32(bp, YFSFETCHDATA64);
|
||||
bp = xdr_encode_u32(bp, 0); /* RPC flags */
|
||||
bp = xdr_encode_YFSFid(bp, &vp->fid);
|
||||
bp = xdr_encode_u64(bp, req->pos);
|
||||
bp = xdr_encode_u64(bp, req->len);
|
||||
bp = xdr_encode_u64(bp, subreq->start + subreq->transferred);
|
||||
bp = xdr_encode_u64(bp, subreq->len - subreq->transferred);
|
||||
yfs_check_req(call, bp);
|
||||
|
||||
call->fid = vp->fid;
|
||||
|
@ -176,7 +176,7 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
|
||||
!(file->f_mode & FMODE_CAN_ODIRECT))
|
||||
return -EINVAL;
|
||||
|
||||
old_cred = override_creds_light(ctx->cred);
|
||||
old_cred = override_creds(ctx->cred);
|
||||
if (is_sync_kiocb(iocb)) {
|
||||
rwf_t rwf = iocb_to_rw_flags(flags);
|
||||
|
||||
@ -197,7 +197,7 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
|
||||
backing_aio_cleanup(aio, ret);
|
||||
}
|
||||
out:
|
||||
revert_creds_light(old_cred);
|
||||
revert_creds(old_cred);
|
||||
|
||||
if (ctx->accessed)
|
||||
ctx->accessed(iocb->ki_filp);
|
||||
@ -233,7 +233,7 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||
*/
|
||||
flags &= ~IOCB_DIO_CALLER_COMP;
|
||||
|
||||
old_cred = override_creds_light(ctx->cred);
|
||||
old_cred = override_creds(ctx->cred);
|
||||
if (is_sync_kiocb(iocb)) {
|
||||
rwf_t rwf = iocb_to_rw_flags(flags);
|
||||
|
||||
@ -264,7 +264,7 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||
backing_aio_cleanup(aio, ret);
|
||||
}
|
||||
out:
|
||||
revert_creds_light(old_cred);
|
||||
revert_creds(old_cred);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -281,9 +281,9 @@ ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb,
|
||||
if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
|
||||
return -EIO;
|
||||
|
||||
old_cred = override_creds_light(ctx->cred);
|
||||
old_cred = override_creds(ctx->cred);
|
||||
ret = vfs_splice_read(in, &iocb->ki_pos, pipe, len, flags);
|
||||
revert_creds_light(old_cred);
|
||||
revert_creds(old_cred);
|
||||
|
||||
if (ctx->accessed)
|
||||
ctx->accessed(iocb->ki_filp);
|
||||
@ -310,11 +310,11 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
old_cred = override_creds_light(ctx->cred);
|
||||
old_cred = override_creds(ctx->cred);
|
||||
file_start_write(out);
|
||||
ret = out->f_op->splice_write(pipe, out, &iocb->ki_pos, len, flags);
|
||||
file_end_write(out);
|
||||
revert_creds_light(old_cred);
|
||||
revert_creds(old_cred);
|
||||
|
||||
if (ctx->end_write)
|
||||
ctx->end_write(iocb, ret);
|
||||
@ -338,9 +338,9 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
|
||||
|
||||
vma_set_file(vma, file);
|
||||
|
||||
old_cred = override_creds_light(ctx->cred);
|
||||
old_cred = override_creds(ctx->cred);
|
||||
ret = call_mmap(vma->vm_file, vma);
|
||||
revert_creds_light(old_cred);
|
||||
revert_creds(old_cred);
|
||||
|
||||
if (ctx->accessed)
|
||||
ctx->accessed(user_file);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/falloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <trace/events/fscache.h>
|
||||
#include <trace/events/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
struct cachefiles_kiocb {
|
||||
@ -366,6 +367,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
|
||||
if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
|
||||
if (term_func)
|
||||
term_func(term_func_priv, -ENOBUFS, false);
|
||||
trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
@ -695,6 +697,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
|
||||
iov_iter_truncate(&subreq->io_iter, len);
|
||||
}
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_cache_prepare);
|
||||
cachefiles_begin_secure(cache, &saved_cred);
|
||||
ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
|
||||
&start, &len, len, true);
|
||||
@ -704,6 +707,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
|
||||
return;
|
||||
}
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_cache_write);
|
||||
cachefiles_write(&subreq->rreq->cache_resources,
|
||||
subreq->start, &subreq->io_iter,
|
||||
netfs_write_subrequest_terminated, subreq);
|
||||
|
@ -77,6 +77,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)
|
||||
trace_cachefiles_vfs_error(object, file_inode(file), ret,
|
||||
cachefiles_trace_setxattr_error);
|
||||
trace_cachefiles_coherency(object, file_inode(file)->i_ino,
|
||||
be64_to_cpup((__be64 *)buf->data),
|
||||
buf->content,
|
||||
cachefiles_coherency_set_fail);
|
||||
if (ret != -ENOMEM)
|
||||
@ -85,6 +86,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)
|
||||
"Failed to set xattr with error %d", ret);
|
||||
} else {
|
||||
trace_cachefiles_coherency(object, file_inode(file)->i_ino,
|
||||
be64_to_cpup((__be64 *)buf->data),
|
||||
buf->content,
|
||||
cachefiles_coherency_set_ok);
|
||||
}
|
||||
@ -126,7 +128,10 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
|
||||
object,
|
||||
"Failed to read aux with error %zd", xlen);
|
||||
why = cachefiles_coherency_check_xattr;
|
||||
} else if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) {
|
||||
why = cachefiles_coherency_check_type;
|
||||
} else if (memcmp(buf->data, p, len) != 0) {
|
||||
why = cachefiles_coherency_check_aux;
|
||||
@ -141,7 +146,9 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
trace_cachefiles_coherency(object, file_inode(file)->i_ino,
|
||||
be64_to_cpup((__be64 *)buf->data),
|
||||
buf->content, why);
|
||||
kfree(buf);
|
||||
return ret;
|
||||
|
@ -253,8 +253,9 @@ static void finish_netfs_read(struct ceph_osd_request *req)
|
||||
subreq->transferred = err;
|
||||
err = 0;
|
||||
}
|
||||
subreq->error = err;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
iput(req->r_inode);
|
||||
ceph_dec_osd_stopping_blocker(fsc->mdsc);
|
||||
}
|
||||
@ -314,7 +315,9 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
|
||||
|
||||
ceph_mdsc_put_request(req);
|
||||
out:
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
subreq->error = err;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -426,8 +429,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
ceph_osdc_start_request(req->r_osdc, req);
|
||||
out:
|
||||
ceph_osdc_put_request(req);
|
||||
if (err)
|
||||
netfs_read_subreq_terminated(subreq, err, false);
|
||||
if (err) {
|
||||
subreq->error = err;
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
}
|
||||
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
|
||||
}
|
||||
|
||||
|
@ -5006,10 +5006,11 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
|
||||
if (IS_ENCRYPTED(inode)) {
|
||||
inode->i_op = &ext4_encrypted_symlink_inode_operations;
|
||||
} else if (ext4_inode_is_fast_symlink(inode)) {
|
||||
inode->i_link = (char *)ei->i_data;
|
||||
inode->i_op = &ext4_fast_symlink_inode_operations;
|
||||
nd_terminate_link(ei->i_data, inode->i_size,
|
||||
sizeof(ei->i_data) - 1);
|
||||
inode_set_cached_link(inode, (char *)ei->i_data,
|
||||
inode->i_size);
|
||||
} else {
|
||||
inode->i_op = &ext4_symlink_inode_operations;
|
||||
}
|
||||
|
@ -3418,7 +3418,6 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
||||
inode->i_op = &ext4_symlink_inode_operations;
|
||||
} else {
|
||||
inode->i_op = &ext4_fast_symlink_inode_operations;
|
||||
inode->i_link = (char *)&EXT4_I(inode)->i_data;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3434,6 +3433,9 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
||||
disk_link.len);
|
||||
inode->i_size = disk_link.len - 1;
|
||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||
if (!IS_ENCRYPTED(inode))
|
||||
inode_set_cached_link(inode, (char *)&EXT4_I(inode)->i_data,
|
||||
inode->i_size);
|
||||
}
|
||||
err = ext4_add_nondir(handle, dentry, &inode);
|
||||
if (handle)
|
||||
|
115
fs/fhandle.c
115
fs/fhandle.c
@ -187,17 +187,6 @@ static int get_path_from_fd(int fd, struct path *root)
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum handle_to_path_flags {
|
||||
HANDLE_CHECK_PERMS = (1 << 0),
|
||||
HANDLE_CHECK_SUBTREE = (1 << 1),
|
||||
};
|
||||
|
||||
struct handle_to_path_ctx {
|
||||
struct path root;
|
||||
enum handle_to_path_flags flags;
|
||||
unsigned int fh_flags;
|
||||
};
|
||||
|
||||
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
|
||||
{
|
||||
struct handle_to_path_ctx *ctx = context;
|
||||
@ -261,50 +250,55 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
|
||||
{
|
||||
int handle_dwords;
|
||||
struct vfsmount *mnt = ctx->root.mnt;
|
||||
struct dentry *dentry;
|
||||
|
||||
/* change the handle size to multiple of sizeof(u32) */
|
||||
handle_dwords = handle->handle_bytes >> 2;
|
||||
path->dentry = exportfs_decode_fh_raw(mnt,
|
||||
(struct fid *)handle->f_handle,
|
||||
handle_dwords, handle->handle_type,
|
||||
ctx->fh_flags,
|
||||
vfs_dentry_acceptable, ctx);
|
||||
if (IS_ERR_OR_NULL(path->dentry)) {
|
||||
if (path->dentry == ERR_PTR(-ENOMEM))
|
||||
dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle,
|
||||
handle_dwords, handle->handle_type,
|
||||
ctx->fh_flags, vfs_dentry_acceptable,
|
||||
ctx);
|
||||
if (IS_ERR_OR_NULL(dentry)) {
|
||||
if (dentry == ERR_PTR(-ENOMEM))
|
||||
return -ENOMEM;
|
||||
return -ESTALE;
|
||||
}
|
||||
path->dentry = dentry;
|
||||
path->mnt = mntget(mnt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow relaxed permissions of file handles if the caller has the
|
||||
* ability to mount the filesystem or create a bind-mount of the
|
||||
* provided @mountdirfd.
|
||||
*
|
||||
* In both cases the caller may be able to get an unobstructed way to
|
||||
* the encoded file handle. If the caller is only able to create a
|
||||
* bind-mount we need to verify that there are no locked mounts on top
|
||||
* of it that could prevent us from getting to the encoded file.
|
||||
*
|
||||
* In principle, locked mounts can prevent the caller from mounting the
|
||||
* filesystem but that only applies to procfs and sysfs neither of which
|
||||
* support decoding file handles.
|
||||
*/
|
||||
static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
|
||||
unsigned int o_flags)
|
||||
static inline int may_decode_fh(struct handle_to_path_ctx *ctx,
|
||||
unsigned int o_flags)
|
||||
{
|
||||
struct path *root = &ctx->root;
|
||||
|
||||
if (capable(CAP_DAC_READ_SEARCH))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Restrict to O_DIRECTORY to provide a deterministic API that avoids a
|
||||
* confusing api in the face of disconnected non-dir dentries.
|
||||
* Allow relaxed permissions of file handles if the caller has
|
||||
* the ability to mount the filesystem or create a bind-mount of
|
||||
* the provided @mountdirfd.
|
||||
*
|
||||
* In both cases the caller may be able to get an unobstructed
|
||||
* way to the encoded file handle. If the caller is only able to
|
||||
* create a bind-mount we need to verify that there are no
|
||||
* locked mounts on top of it that could prevent us from getting
|
||||
* to the encoded file.
|
||||
*
|
||||
* In principle, locked mounts can prevent the caller from
|
||||
* mounting the filesystem but that only applies to procfs and
|
||||
* sysfs neither of which support decoding file handles.
|
||||
*
|
||||
* Restrict to O_DIRECTORY to provide a deterministic API that
|
||||
* avoids a confusing api in the face of disconnected non-dir
|
||||
* dentries.
|
||||
*
|
||||
* There's only one dentry for each directory inode (VFS rule)...
|
||||
*/
|
||||
if (!(o_flags & O_DIRECTORY))
|
||||
return false;
|
||||
return -EPERM;
|
||||
|
||||
if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
|
||||
ctx->flags = HANDLE_CHECK_PERMS;
|
||||
@ -314,14 +308,14 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
|
||||
!has_locked_children(real_mount(root->mnt), root->dentry))
|
||||
ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
|
||||
else
|
||||
return false;
|
||||
return -EPERM;
|
||||
|
||||
/* Are we able to override DAC permissions? */
|
||||
if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
|
||||
return false;
|
||||
return -EPERM;
|
||||
|
||||
ctx->fh_flags = EXPORT_FH_DIR_ONLY;
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
|
||||
@ -331,15 +325,19 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
|
||||
struct file_handle f_handle;
|
||||
struct file_handle *handle = NULL;
|
||||
struct handle_to_path_ctx ctx = {};
|
||||
const struct export_operations *eops;
|
||||
|
||||
retval = get_path_from_fd(mountdirfd, &ctx.root);
|
||||
if (retval)
|
||||
goto out_err;
|
||||
|
||||
if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
|
||||
retval = -EPERM;
|
||||
eops = ctx.root.mnt->mnt_sb->s_export_op;
|
||||
if (eops && eops->permission)
|
||||
retval = eops->permission(&ctx, o_flags);
|
||||
else
|
||||
retval = may_decode_fh(&ctx, o_flags);
|
||||
if (retval)
|
||||
goto out_path;
|
||||
}
|
||||
|
||||
if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
|
||||
retval = -EFAULT;
|
||||
@ -398,29 +396,28 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
|
||||
int open_flag)
|
||||
{
|
||||
long retval = 0;
|
||||
struct path path;
|
||||
struct path path __free(path_put) = {};
|
||||
struct file *file;
|
||||
int fd;
|
||||
const struct export_operations *eops;
|
||||
|
||||
retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
fd = get_unused_fd_flags(open_flag);
|
||||
if (fd < 0) {
|
||||
path_put(&path);
|
||||
CLASS(get_unused_fd, fd)(O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
}
|
||||
file = file_open_root(&path, "", open_flag, 0);
|
||||
if (IS_ERR(file)) {
|
||||
put_unused_fd(fd);
|
||||
retval = PTR_ERR(file);
|
||||
} else {
|
||||
retval = fd;
|
||||
fd_install(fd, file);
|
||||
}
|
||||
path_put(&path);
|
||||
return retval;
|
||||
|
||||
eops = path.mnt->mnt_sb->s_export_op;
|
||||
if (eops->open)
|
||||
file = eops->open(&path, open_flag);
|
||||
else
|
||||
file = file_open_root(&path, "", open_flag, 0);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
fd_install(fd, file);
|
||||
return take_fd(fd);
|
||||
}
|
||||
|
||||
/**
|
||||
|
22
fs/file.c
22
fs/file.c
@ -279,10 +279,6 @@ static int expand_files(struct files_struct *files, unsigned int nr)
|
||||
if (nr < fdt->max_fds)
|
||||
return 0;
|
||||
|
||||
/* Can we expand? */
|
||||
if (nr >= sysctl_nr_open)
|
||||
return -EMFILE;
|
||||
|
||||
if (unlikely(files->resize_in_progress)) {
|
||||
spin_unlock(&files->file_lock);
|
||||
wait_event(files->resize_wait, !files->resize_in_progress);
|
||||
@ -290,6 +286,10 @@ static int expand_files(struct files_struct *files, unsigned int nr)
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* Can we expand? */
|
||||
if (unlikely(nr >= sysctl_nr_open))
|
||||
return -EMFILE;
|
||||
|
||||
/* All good, so we try */
|
||||
files->resize_in_progress = true;
|
||||
error = expand_fdtable(files, nr);
|
||||
@ -1231,17 +1231,9 @@ __releases(&files->file_lock)
|
||||
|
||||
/*
|
||||
* We need to detect attempts to do dup2() over allocated but still
|
||||
* not finished descriptor. NB: OpenBSD avoids that at the price of
|
||||
* extra work in their equivalent of fget() - they insert struct
|
||||
* file immediately after grabbing descriptor, mark it larval if
|
||||
* more work (e.g. actual opening) is needed and make sure that
|
||||
* fget() treats larval files as absent. Potentially interesting,
|
||||
* but while extra work in fget() is trivial, locking implications
|
||||
* and amount of surgery on open()-related paths in VFS are not.
|
||||
* FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
|
||||
* deadlocks in rather amusing ways, AFAICS. All of that is out of
|
||||
* scope of POSIX or SUS, since neither considers shared descriptor
|
||||
* tables and this condition does not arise without those.
|
||||
* not finished descriptor.
|
||||
*
|
||||
* POSIX is silent on the issue, we return -EBUSY.
|
||||
*/
|
||||
fdt = files_fdtable(files);
|
||||
fd = array_index_nospec(fd, fdt->max_fds);
|
||||
|
@ -128,7 +128,7 @@ static struct ctl_table fs_stat_sysctls[] = {
|
||||
.data = &sysctl_nr_open,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_douintvec_minmax,
|
||||
.extra1 = &sysctl_nr_open_min,
|
||||
.extra2 = &sysctl_nr_open_max,
|
||||
},
|
||||
@ -478,6 +478,8 @@ static void ____fput(struct callback_head *work)
|
||||
__fput(container_of(work, struct file, f_task_work));
|
||||
}
|
||||
|
||||
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
|
||||
|
||||
/*
|
||||
* If kernel thread really needs to have the final fput() it has done
|
||||
* to complete, call this. The only user right now is the boot - we
|
||||
@ -491,11 +493,10 @@ static void ____fput(struct callback_head *work)
|
||||
void flush_delayed_fput(void)
|
||||
{
|
||||
delayed_fput(NULL);
|
||||
flush_delayed_work(&delayed_fput_work);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flush_delayed_fput);
|
||||
|
||||
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
|
||||
|
||||
void fput(struct file *file)
|
||||
{
|
||||
if (file_ref_put(&file->f_ref)) {
|
||||
|
@ -493,7 +493,7 @@ static void put_fc_log(struct fs_context *fc)
|
||||
if (log) {
|
||||
if (refcount_dec_and_test(&log->usage)) {
|
||||
fc->log.log = NULL;
|
||||
for (i = 0; i <= 7; i++)
|
||||
for (i = 0; i < ARRAY_SIZE(log->buffer) ; i++)
|
||||
if (log->need_free & (1 << i))
|
||||
kfree(log->buffer[i]);
|
||||
kfree(log);
|
||||
|
159
fs/libfs.c
159
fs/libfs.c
@ -245,9 +245,17 @@ const struct inode_operations simple_dir_inode_operations = {
|
||||
};
|
||||
EXPORT_SYMBOL(simple_dir_inode_operations);
|
||||
|
||||
/* 0 is '.', 1 is '..', so always start with offset 2 or more */
|
||||
/* simple_offset_add() allocation range */
|
||||
enum {
|
||||
DIR_OFFSET_MIN = 2,
|
||||
DIR_OFFSET_MIN = 3,
|
||||
DIR_OFFSET_MAX = LONG_MAX - 1,
|
||||
};
|
||||
|
||||
/* simple_offset_add() never assigns these to a dentry */
|
||||
enum {
|
||||
DIR_OFFSET_FIRST = 2, /* Find first real entry */
|
||||
DIR_OFFSET_EOD = LONG_MAX, /* Marks EOD */
|
||||
|
||||
};
|
||||
|
||||
static void offset_set(struct dentry *dentry, long offset)
|
||||
@ -291,8 +299,11 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
|
||||
return -EBUSY;
|
||||
|
||||
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
|
||||
LONG_MAX, &octx->next_offset, GFP_KERNEL);
|
||||
if (ret < 0)
|
||||
DIR_OFFSET_MAX, &octx->next_offset,
|
||||
GFP_KERNEL);
|
||||
if (unlikely(ret == -EBUSY))
|
||||
return -ENOSPC;
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
offset_set(dentry, offset);
|
||||
@ -329,38 +340,6 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
|
||||
offset_set(dentry, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_offset_empty - Check if a dentry can be unlinked
|
||||
* @dentry: dentry to be tested
|
||||
*
|
||||
* Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
|
||||
*/
|
||||
int simple_offset_empty(struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct offset_ctx *octx;
|
||||
struct dentry *child;
|
||||
unsigned long index;
|
||||
int ret = 1;
|
||||
|
||||
if (!inode || !S_ISDIR(inode->i_mode))
|
||||
return ret;
|
||||
|
||||
index = DIR_OFFSET_MIN;
|
||||
octx = inode->i_op->get_offset_ctx(inode);
|
||||
mt_for_each(&octx->mt, child, index, LONG_MAX) {
|
||||
spin_lock(&child->d_lock);
|
||||
if (simple_positive(child)) {
|
||||
spin_unlock(&child->d_lock);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&child->d_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* simple_offset_rename - handle directory offsets for rename
|
||||
* @old_dir: parent directory of source entry
|
||||
@ -454,14 +433,6 @@ void simple_offset_destroy(struct offset_ctx *octx)
|
||||
mtree_destroy(&octx->mt);
|
||||
}
|
||||
|
||||
static int offset_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
|
||||
|
||||
file->private_data = (void *)ctx->next_offset;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* offset_dir_llseek - Advance the read position of a directory descriptor
|
||||
* @file: an open directory whose position is to be updated
|
||||
@ -475,9 +446,6 @@ static int offset_dir_open(struct inode *inode, struct file *file)
|
||||
*/
|
||||
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *inode = file->f_inode;
|
||||
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
|
||||
|
||||
switch (whence) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
@ -490,25 +458,46 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* In this case, ->private_data is protected by f_pos_lock */
|
||||
if (!offset)
|
||||
file->private_data = (void *)ctx->next_offset;
|
||||
return vfs_setpos(file, offset, LONG_MAX);
|
||||
}
|
||||
|
||||
static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
|
||||
/* Cf. find_next_child() */
|
||||
static struct dentry *find_next_sibling_locked(struct dentry *parent,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
MA_STATE(mas, &octx->mt, offset, offset);
|
||||
struct dentry *found = NULL;
|
||||
|
||||
hlist_for_each_entry_from(dentry, d_sib) {
|
||||
if (!simple_positive(dentry))
|
||||
continue;
|
||||
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
|
||||
if (simple_positive(dentry))
|
||||
found = dget_dlock(dentry);
|
||||
spin_unlock(&dentry->d_lock);
|
||||
if (likely(found))
|
||||
break;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
static noinline_for_stack struct dentry *
|
||||
offset_dir_lookup(struct file *file, loff_t offset)
|
||||
{
|
||||
struct dentry *parent = file->f_path.dentry;
|
||||
struct dentry *child, *found = NULL;
|
||||
struct inode *inode = d_inode(parent);
|
||||
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
|
||||
|
||||
MA_STATE(mas, &octx->mt, offset, offset);
|
||||
|
||||
rcu_read_lock();
|
||||
child = mas_find(&mas, LONG_MAX);
|
||||
child = mas_find(&mas, DIR_OFFSET_MAX);
|
||||
if (!child)
|
||||
goto out;
|
||||
spin_lock(&child->d_lock);
|
||||
if (simple_positive(child))
|
||||
found = dget_dlock(child);
|
||||
spin_unlock(&child->d_lock);
|
||||
|
||||
spin_lock(&parent->d_lock);
|
||||
found = find_next_sibling_locked(parent, child);
|
||||
spin_unlock(&parent->d_lock);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return found;
|
||||
@ -517,35 +506,46 @@ static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
|
||||
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
long offset = dentry2offset(dentry);
|
||||
|
||||
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
|
||||
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
|
||||
return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
|
||||
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
|
||||
}
|
||||
|
||||
static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
|
||||
static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
|
||||
struct dentry *dir = file->f_path.dentry;
|
||||
struct dentry *dentry;
|
||||
|
||||
if (ctx->pos == DIR_OFFSET_FIRST) {
|
||||
spin_lock(&dir->d_lock);
|
||||
dentry = find_next_sibling_locked(dir, d_first_child(dir));
|
||||
spin_unlock(&dir->d_lock);
|
||||
} else
|
||||
dentry = offset_dir_lookup(file, ctx->pos);
|
||||
if (!dentry)
|
||||
goto out_eod;
|
||||
|
||||
while (true) {
|
||||
dentry = offset_find_next(octx, ctx->pos);
|
||||
if (!dentry)
|
||||
return;
|
||||
struct dentry *next;
|
||||
|
||||
if (dentry2offset(dentry) >= last_index) {
|
||||
dput(dentry);
|
||||
return;
|
||||
}
|
||||
ctx->pos = dentry2offset(dentry);
|
||||
if (!offset_dir_emit(ctx, dentry))
|
||||
break;
|
||||
|
||||
if (!offset_dir_emit(ctx, dentry)) {
|
||||
dput(dentry);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->pos = dentry2offset(dentry) + 1;
|
||||
spin_lock(&dir->d_lock);
|
||||
next = find_next_sibling_locked(dir, d_next_sibling(dentry));
|
||||
spin_unlock(&dir->d_lock);
|
||||
dput(dentry);
|
||||
|
||||
if (!next)
|
||||
goto out_eod;
|
||||
dentry = next;
|
||||
}
|
||||
dput(dentry);
|
||||
return;
|
||||
|
||||
out_eod:
|
||||
ctx->pos = DIR_OFFSET_EOD;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -565,6 +565,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
|
||||
*
|
||||
* On return, @ctx->pos contains an offset that will read the next entry
|
||||
* in this directory when offset_readdir() is called again with @ctx.
|
||||
* Caller places this value in the d_off field of the last entry in the
|
||||
* user's buffer.
|
||||
*
|
||||
* Return values:
|
||||
* %0 - Complete
|
||||
@ -572,19 +574,17 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
|
||||
static int offset_readdir(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
struct dentry *dir = file->f_path.dentry;
|
||||
long last_index = (long)file->private_data;
|
||||
|
||||
lockdep_assert_held(&d_inode(dir)->i_rwsem);
|
||||
|
||||
if (!dir_emit_dots(file, ctx))
|
||||
return 0;
|
||||
|
||||
offset_iterate_dir(d_inode(dir), ctx, last_index);
|
||||
if (ctx->pos != DIR_OFFSET_EOD)
|
||||
offset_iterate_dir(file, ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations simple_offset_dir_operations = {
|
||||
.open = offset_dir_open,
|
||||
.llseek = offset_dir_llseek,
|
||||
.iterate_shared = offset_readdir,
|
||||
.read = generic_read_dir,
|
||||
@ -673,6 +673,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
|
||||
s->s_blocksize_bits = PAGE_SHIFT;
|
||||
s->s_magic = ctx->magic;
|
||||
s->s_op = ctx->ops ?: &simple_super_operations;
|
||||
s->s_export_op = ctx->eops;
|
||||
s->s_xattr = ctx->xattr;
|
||||
s->s_time_gran = 1;
|
||||
root = new_inode(s);
|
||||
|
46
fs/mount.h
46
fs/mount.h
@ -8,15 +8,23 @@
|
||||
struct mnt_namespace {
|
||||
struct ns_common ns;
|
||||
struct mount * root;
|
||||
struct rb_root mounts; /* Protected by namespace_sem */
|
||||
struct {
|
||||
struct rb_root mounts; /* Protected by namespace_sem */
|
||||
struct rb_node *mnt_last_node; /* last (rightmost) mount in the rbtree */
|
||||
struct rb_node *mnt_first_node; /* first (leftmost) mount in the rbtree */
|
||||
};
|
||||
struct user_namespace *user_ns;
|
||||
struct ucounts *ucounts;
|
||||
u64 seq; /* Sequence number to prevent loops */
|
||||
wait_queue_head_t poll;
|
||||
union {
|
||||
wait_queue_head_t poll;
|
||||
struct rcu_head mnt_ns_rcu;
|
||||
};
|
||||
u64 event;
|
||||
unsigned int nr_mounts; /* # of mounts in the namespace */
|
||||
unsigned int pending_mounts;
|
||||
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
|
||||
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
|
||||
refcount_t passive; /* number references not pinning @mounts */
|
||||
} __randomize_layout;
|
||||
|
||||
@ -38,6 +46,7 @@ struct mount {
|
||||
struct dentry *mnt_mountpoint;
|
||||
struct vfsmount mnt;
|
||||
union {
|
||||
struct rb_node mnt_node; /* node in the ns->mounts rbtree */
|
||||
struct rcu_head mnt_rcu;
|
||||
struct llist_node mnt_llist;
|
||||
};
|
||||
@ -51,10 +60,7 @@ struct mount {
|
||||
struct list_head mnt_child; /* and going through their mnt_child */
|
||||
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
|
||||
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
|
||||
union {
|
||||
struct rb_node mnt_node; /* Under ns->mounts */
|
||||
struct list_head mnt_list;
|
||||
};
|
||||
struct list_head mnt_list;
|
||||
struct list_head mnt_expire; /* link in fs-specific expiry list */
|
||||
struct list_head mnt_share; /* circular list of shared mounts */
|
||||
struct list_head mnt_slave_list;/* list of slave mounts */
|
||||
@ -145,24 +151,28 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
|
||||
return ns->seq == 0;
|
||||
}
|
||||
|
||||
static inline bool mnt_ns_attached(const struct mount *mnt)
|
||||
{
|
||||
return !RB_EMPTY_NODE(&mnt->mnt_node);
|
||||
}
|
||||
|
||||
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
|
||||
{
|
||||
WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
|
||||
mnt->mnt.mnt_flags &= ~MNT_ONRB;
|
||||
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
|
||||
struct mnt_namespace *ns = mnt->mnt_ns;
|
||||
WARN_ON(!mnt_ns_attached(mnt));
|
||||
if (ns->mnt_last_node == &mnt->mnt_node)
|
||||
ns->mnt_last_node = rb_prev(&mnt->mnt_node);
|
||||
if (ns->mnt_first_node == &mnt->mnt_node)
|
||||
ns->mnt_first_node = rb_next(&mnt->mnt_node);
|
||||
rb_erase(&mnt->mnt_node, &ns->mounts);
|
||||
RB_CLEAR_NODE(&mnt->mnt_node);
|
||||
list_add_tail(&mnt->mnt_list, dt_list);
|
||||
}
|
||||
|
||||
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
|
||||
struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mnt_ns, bool previous);
|
||||
static inline struct mnt_namespace *lookup_next_mnt_ns(struct mnt_namespace *mntns)
|
||||
{
|
||||
return __lookup_next_mnt_ns(mntns, false);
|
||||
}
|
||||
static inline struct mnt_namespace *lookup_prev_mnt_ns(struct mnt_namespace *mntns)
|
||||
{
|
||||
return __lookup_next_mnt_ns(mntns, true);
|
||||
}
|
||||
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mnt_ns,
|
||||
bool previous);
|
||||
|
||||
static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
|
||||
{
|
||||
return container_of(ns, struct mnt_namespace, ns);
|
||||
|
34
fs/namei.c
34
fs/namei.c
@ -5272,19 +5272,16 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
|
||||
getname(newname), 0);
|
||||
}
|
||||
|
||||
int readlink_copy(char __user *buffer, int buflen, const char *link)
|
||||
int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen)
|
||||
{
|
||||
int len = PTR_ERR(link);
|
||||
if (IS_ERR(link))
|
||||
goto out;
|
||||
int copylen;
|
||||
|
||||
len = strlen(link);
|
||||
if (len > (unsigned) buflen)
|
||||
len = buflen;
|
||||
if (copy_to_user(buffer, link, len))
|
||||
len = -EFAULT;
|
||||
out:
|
||||
return len;
|
||||
copylen = linklen;
|
||||
if (unlikely(copylen > (unsigned) buflen))
|
||||
copylen = buflen;
|
||||
if (copy_to_user(buffer, link, copylen))
|
||||
copylen = -EFAULT;
|
||||
return copylen;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -5304,6 +5301,9 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||
const char *link;
|
||||
int res;
|
||||
|
||||
if (inode->i_opflags & IOP_CACHED_LINK)
|
||||
return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen);
|
||||
|
||||
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
|
||||
if (unlikely(inode->i_op->readlink))
|
||||
return inode->i_op->readlink(dentry, buffer, buflen);
|
||||
@ -5322,7 +5322,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
}
|
||||
res = readlink_copy(buffer, buflen, link);
|
||||
res = readlink_copy(buffer, buflen, link, strlen(link));
|
||||
do_delayed_call(&done);
|
||||
return res;
|
||||
}
|
||||
@ -5391,10 +5391,14 @@ EXPORT_SYMBOL(page_put_link);
|
||||
|
||||
int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
|
||||
{
|
||||
const char *link;
|
||||
int res;
|
||||
|
||||
DEFINE_DELAYED_CALL(done);
|
||||
int res = readlink_copy(buffer, buflen,
|
||||
page_get_link(dentry, d_inode(dentry),
|
||||
&done));
|
||||
link = page_get_link(dentry, d_inode(dentry), &done);
|
||||
res = PTR_ERR(link);
|
||||
if (!IS_ERR(link))
|
||||
res = readlink_copy(buffer, buflen, link, strlen(link));
|
||||
do_delayed_call(&done);
|
||||
return res;
|
||||
}
|
||||
|
214
fs/namespace.c
214
fs/namespace.c
@ -32,7 +32,6 @@
|
||||
#include <linux/fs_context.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/mnt_idmapping.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include "pnode.h"
|
||||
#include "internal.h"
|
||||
@ -66,12 +65,12 @@ static int __init set_mphash_entries(char *str)
|
||||
__setup("mphash_entries=", set_mphash_entries);
|
||||
|
||||
static u64 event;
|
||||
static DEFINE_IDA(mnt_id_ida);
|
||||
static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC);
|
||||
static DEFINE_IDA(mnt_group_ida);
|
||||
|
||||
/* Don't allow confusion with old 32bit mount ID */
|
||||
#define MNT_UNIQUE_ID_OFFSET (1ULL << 31)
|
||||
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
|
||||
static u64 mnt_id_ctr = MNT_UNIQUE_ID_OFFSET;
|
||||
|
||||
static struct hlist_head *mount_hashtable __ro_after_init;
|
||||
static struct hlist_head *mountpoint_hashtable __ro_after_init;
|
||||
@ -79,8 +78,10 @@ static struct kmem_cache *mnt_cache __ro_after_init;
|
||||
static DECLARE_RWSEM(namespace_sem);
|
||||
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
|
||||
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
|
||||
static DEFINE_RWLOCK(mnt_ns_tree_lock);
|
||||
static DEFINE_SEQLOCK(mnt_ns_tree_lock);
|
||||
|
||||
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
|
||||
static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
|
||||
|
||||
struct mount_kattr {
|
||||
unsigned int attr_set;
|
||||
@ -106,17 +107,6 @@ EXPORT_SYMBOL_GPL(fs_kobj);
|
||||
*/
|
||||
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
|
||||
|
||||
static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
|
||||
{
|
||||
u64 seq_b = ns->seq;
|
||||
|
||||
if (seq < seq_b)
|
||||
return -1;
|
||||
if (seq > seq_b)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
|
||||
{
|
||||
if (!node)
|
||||
@ -124,24 +114,53 @@ static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
|
||||
return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
|
||||
}
|
||||
|
||||
static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
|
||||
static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
|
||||
{
|
||||
struct mnt_namespace *ns_a = node_to_mnt_ns(a);
|
||||
struct mnt_namespace *ns_b = node_to_mnt_ns(b);
|
||||
u64 seq_a = ns_a->seq;
|
||||
u64 seq_b = ns_b->seq;
|
||||
|
||||
return mnt_ns_cmp(seq_a, ns_b) < 0;
|
||||
if (seq_a < seq_b)
|
||||
return -1;
|
||||
if (seq_a > seq_b)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void mnt_ns_tree_write_lock(void)
|
||||
{
|
||||
write_seqlock(&mnt_ns_tree_lock);
|
||||
}
|
||||
|
||||
static inline void mnt_ns_tree_write_unlock(void)
|
||||
{
|
||||
write_sequnlock(&mnt_ns_tree_lock);
|
||||
}
|
||||
|
||||
static void mnt_ns_tree_add(struct mnt_namespace *ns)
|
||||
{
|
||||
guard(write_lock)(&mnt_ns_tree_lock);
|
||||
rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
|
||||
struct rb_node *node, *prev;
|
||||
|
||||
mnt_ns_tree_write_lock();
|
||||
node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
|
||||
/*
|
||||
* If there's no previous entry simply add it after the
|
||||
* head and if there is add it after the previous entry.
|
||||
*/
|
||||
prev = rb_prev(&ns->mnt_ns_tree_node);
|
||||
if (!prev)
|
||||
list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
|
||||
else
|
||||
list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
|
||||
mnt_ns_tree_write_unlock();
|
||||
|
||||
WARN_ON_ONCE(node);
|
||||
}
|
||||
|
||||
static void mnt_ns_release(struct mnt_namespace *ns)
|
||||
{
|
||||
lockdep_assert_not_held(&mnt_ns_tree_lock);
|
||||
lockdep_assert_not_held(&mnt_ns_tree_lock.lock);
|
||||
|
||||
/* keep alive for {list,stat}mount() */
|
||||
if (refcount_dec_and_test(&ns->passive)) {
|
||||
@ -151,41 +170,34 @@ static void mnt_ns_release(struct mnt_namespace *ns)
|
||||
}
|
||||
DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
|
||||
|
||||
static void mnt_ns_release_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
|
||||
}
|
||||
|
||||
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
|
||||
{
|
||||
/* remove from global mount namespace list */
|
||||
if (!is_anon_ns(ns)) {
|
||||
guard(write_lock)(&mnt_ns_tree_lock);
|
||||
mnt_ns_tree_write_lock();
|
||||
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
|
||||
list_bidir_del_rcu(&ns->mnt_ns_list);
|
||||
mnt_ns_tree_write_unlock();
|
||||
}
|
||||
|
||||
mnt_ns_release(ns);
|
||||
call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the mount namespace which either has the specified id, or has the
|
||||
* next smallest id afer the specified one.
|
||||
*/
|
||||
static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
|
||||
static int mnt_ns_find(const void *key, const struct rb_node *node)
|
||||
{
|
||||
struct rb_node *node = mnt_ns_tree.rb_node;
|
||||
struct mnt_namespace *ret = NULL;
|
||||
const u64 mnt_ns_id = *(u64 *)key;
|
||||
const struct mnt_namespace *ns = node_to_mnt_ns(node);
|
||||
|
||||
lockdep_assert_held(&mnt_ns_tree_lock);
|
||||
|
||||
while (node) {
|
||||
struct mnt_namespace *n = node_to_mnt_ns(node);
|
||||
|
||||
if (mnt_ns_id <= n->seq) {
|
||||
ret = node_to_mnt_ns(node);
|
||||
if (mnt_ns_id == n->seq)
|
||||
break;
|
||||
node = node->rb_left;
|
||||
} else {
|
||||
node = node->rb_right;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
if (mnt_ns_id < ns->seq)
|
||||
return -1;
|
||||
if (mnt_ns_id > ns->seq)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -195,18 +207,37 @@ static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
|
||||
* namespace the @namespace_sem must first be acquired. If the namespace has
|
||||
* already shut down before acquiring @namespace_sem, {list,stat}mount() will
|
||||
* see that the mount rbtree of the namespace is empty.
|
||||
*
|
||||
* Note the lookup is lockless protected by a sequence counter. We only
|
||||
* need to guard against false negatives as false positives aren't
|
||||
* possible. So if we didn't find a mount namespace and the sequence
|
||||
* counter has changed we need to retry. If the sequence counter is
|
||||
* still the same we know the search actually failed.
|
||||
*/
|
||||
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
|
||||
{
|
||||
struct mnt_namespace *ns;
|
||||
struct mnt_namespace *ns;
|
||||
struct rb_node *node;
|
||||
unsigned int seq;
|
||||
|
||||
guard(read_lock)(&mnt_ns_tree_lock);
|
||||
ns = mnt_ns_find_id_at(mnt_ns_id);
|
||||
if (!ns || ns->seq != mnt_ns_id)
|
||||
return NULL;
|
||||
guard(rcu)();
|
||||
do {
|
||||
seq = read_seqbegin(&mnt_ns_tree_lock);
|
||||
node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
|
||||
if (node)
|
||||
break;
|
||||
} while (read_seqretry(&mnt_ns_tree_lock, seq));
|
||||
|
||||
refcount_inc(&ns->passive);
|
||||
return ns;
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* The last reference count is put with RCU delay so we can
|
||||
* unconditonally acquire a reference here.
|
||||
*/
|
||||
ns = node_to_mnt_ns(node);
|
||||
refcount_inc(&ns->passive);
|
||||
return ns;
|
||||
}
|
||||
|
||||
static inline void lock_mount_hash(void)
|
||||
@ -236,18 +267,19 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
|
||||
|
||||
static int mnt_alloc_id(struct mount *mnt)
|
||||
{
|
||||
int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
|
||||
int res;
|
||||
|
||||
if (res < 0)
|
||||
return res;
|
||||
mnt->mnt_id = res;
|
||||
mnt->mnt_id_unique = atomic64_inc_return(&mnt_id_ctr);
|
||||
return 0;
|
||||
xa_lock(&mnt_id_xa);
|
||||
res = __xa_alloc(&mnt_id_xa, &mnt->mnt_id, mnt, XA_LIMIT(1, INT_MAX), GFP_KERNEL);
|
||||
if (!res)
|
||||
mnt->mnt_id_unique = ++mnt_id_ctr;
|
||||
xa_unlock(&mnt_id_xa);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void mnt_free_id(struct mount *mnt)
|
||||
{
|
||||
ida_free(&mnt_id_ida, mnt->mnt_id);
|
||||
xa_erase(&mnt_id_xa, mnt->mnt_id);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -344,6 +376,7 @@ static struct mount *alloc_vfsmnt(const char *name)
|
||||
INIT_HLIST_NODE(&mnt->mnt_mp_list);
|
||||
INIT_LIST_HEAD(&mnt->mnt_umounting);
|
||||
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
|
||||
RB_CLEAR_NODE(&mnt->mnt_node);
|
||||
mnt->mnt.mnt_idmap = &nop_mnt_idmap;
|
||||
}
|
||||
return mnt;
|
||||
@ -1123,19 +1156,27 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
|
||||
{
|
||||
struct rb_node **link = &ns->mounts.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
bool mnt_first_node = true, mnt_last_node = true;
|
||||
|
||||
WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB);
|
||||
WARN_ON(mnt_ns_attached(mnt));
|
||||
mnt->mnt_ns = ns;
|
||||
while (*link) {
|
||||
parent = *link;
|
||||
if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique)
|
||||
if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) {
|
||||
link = &parent->rb_left;
|
||||
else
|
||||
mnt_last_node = false;
|
||||
} else {
|
||||
link = &parent->rb_right;
|
||||
mnt_first_node = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (mnt_last_node)
|
||||
ns->mnt_last_node = &mnt->mnt_node;
|
||||
if (mnt_first_node)
|
||||
ns->mnt_first_node = &mnt->mnt_node;
|
||||
rb_link_node(&mnt->mnt_node, parent, link);
|
||||
rb_insert_color(&mnt->mnt_node, &ns->mounts);
|
||||
mnt->mnt.mnt_flags |= MNT_ONRB;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1305,7 +1346,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
|
||||
}
|
||||
|
||||
mnt->mnt.mnt_flags = old->mnt.mnt_flags;
|
||||
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB);
|
||||
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
|
||||
|
||||
atomic_inc(&sb->s_active);
|
||||
mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
|
||||
@ -1763,7 +1804,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
|
||||
/* Gather the mounts to umount */
|
||||
for (p = mnt; p; p = next_mnt(p, mnt)) {
|
||||
p->mnt.mnt_flags |= MNT_UMOUNT;
|
||||
if (p->mnt.mnt_flags & MNT_ONRB)
|
||||
if (mnt_ns_attached(p))
|
||||
move_from_ns(p, &tmp_list);
|
||||
else
|
||||
list_move(&p->mnt_list, &tmp_list);
|
||||
@ -1912,16 +1953,14 @@ static int do_umount(struct mount *mnt, int flags)
|
||||
|
||||
event++;
|
||||
if (flags & MNT_DETACH) {
|
||||
if (mnt->mnt.mnt_flags & MNT_ONRB ||
|
||||
!list_empty(&mnt->mnt_list))
|
||||
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
|
||||
umount_tree(mnt, UMOUNT_PROPAGATE);
|
||||
retval = 0;
|
||||
} else {
|
||||
shrink_submounts(mnt);
|
||||
retval = -EBUSY;
|
||||
if (!propagate_mount_busy(mnt, 2)) {
|
||||
if (mnt->mnt.mnt_flags & MNT_ONRB ||
|
||||
!list_empty(&mnt->mnt_list))
|
||||
if (mnt_ns_attached(mnt) || !list_empty(&mnt->mnt_list))
|
||||
umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
|
||||
retval = 0;
|
||||
}
|
||||
@ -2071,30 +2110,34 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
|
||||
return &mnt->ns;
|
||||
}
|
||||
|
||||
struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool previous)
|
||||
struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
|
||||
{
|
||||
guard(read_lock)(&mnt_ns_tree_lock);
|
||||
guard(rcu)();
|
||||
|
||||
for (;;) {
|
||||
struct rb_node *node;
|
||||
struct list_head *list;
|
||||
|
||||
if (previous)
|
||||
node = rb_prev(&mntns->mnt_ns_tree_node);
|
||||
list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
|
||||
else
|
||||
node = rb_next(&mntns->mnt_ns_tree_node);
|
||||
if (!node)
|
||||
list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
|
||||
if (list_is_head(list, &mnt_ns_list))
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
mntns = node_to_mnt_ns(node);
|
||||
node = &mntns->mnt_ns_tree_node;
|
||||
mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
|
||||
|
||||
/*
|
||||
* The last passive reference count is put with RCU
|
||||
* delay so accessing the mount namespace is not just
|
||||
* safe but all relevant members are still valid.
|
||||
*/
|
||||
if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Holding mnt_ns_tree_lock prevents the mount namespace from
|
||||
* being freed but it may well be on it's deathbed. We want an
|
||||
* active reference, not just a passive one here as we're
|
||||
* persisting the mount namespace.
|
||||
* We need an active reference count as we're persisting
|
||||
* the mount namespace and it might already be on its
|
||||
* deathbed.
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&mntns->ns.count))
|
||||
continue;
|
||||
@ -3911,6 +3954,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
|
||||
refcount_set(&new_ns->ns.count, 1);
|
||||
refcount_set(&new_ns->passive, 1);
|
||||
new_ns->mounts = RB_ROOT;
|
||||
INIT_LIST_HEAD(&new_ns->mnt_ns_list);
|
||||
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
|
||||
init_waitqueue_head(&new_ns->poll);
|
||||
new_ns->user_ns = get_user_ns(user_ns);
|
||||
@ -3990,7 +4034,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
|
||||
while (p->mnt.mnt_root != q->mnt.mnt_root)
|
||||
p = next_mnt(skip_mnt_tree(p), old);
|
||||
}
|
||||
mnt_ns_tree_add(new_ns);
|
||||
namespace_unlock();
|
||||
|
||||
if (rootmnt)
|
||||
@ -3998,6 +4041,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
|
||||
if (pwdmnt)
|
||||
mntput(pwdmnt);
|
||||
|
||||
mnt_ns_tree_add(new_ns);
|
||||
return new_ns;
|
||||
}
|
||||
|
||||
@ -5044,6 +5088,10 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
|
||||
if (sb->s_op->show_options) {
|
||||
size_t start = seq->count;
|
||||
|
||||
err = security_sb_show_options(seq, sb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = sb->s_op->show_options(seq, mnt->mnt_root);
|
||||
if (err)
|
||||
return err;
|
||||
@ -5531,9 +5579,9 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
|
||||
|
||||
if (!last_mnt_id) {
|
||||
if (reverse)
|
||||
first = node_to_mount(rb_last(&ns->mounts));
|
||||
first = node_to_mount(ns->mnt_last_node);
|
||||
else
|
||||
first = node_to_mount(rb_first(&ns->mounts));
|
||||
first = node_to_mount(ns->mnt_first_node);
|
||||
} else {
|
||||
if (reverse)
|
||||
first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);
|
||||
|
@ -13,8 +13,11 @@ netfs-y := \
|
||||
read_collect.o \
|
||||
read_pgpriv2.o \
|
||||
read_retry.o \
|
||||
read_single.o \
|
||||
rolling_buffer.o \
|
||||
write_collect.o \
|
||||
write_issue.o
|
||||
write_issue.o \
|
||||
write_retry.o
|
||||
|
||||
netfs-$(CONFIG_NETFS_STATS) += stats.o
|
||||
|
||||
|
@ -63,37 +63,6 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
|
||||
return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Decant the list of folios to read into a rolling buffer.
|
||||
*/
|
||||
static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
|
||||
struct folio_queue *folioq,
|
||||
struct folio_batch *put_batch)
|
||||
{
|
||||
unsigned int order, nr;
|
||||
size_t size = 0;
|
||||
|
||||
nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
|
||||
ARRAY_SIZE(folioq->vec.folios));
|
||||
folioq->vec.nr = nr;
|
||||
for (int i = 0; i < nr; i++) {
|
||||
struct folio *folio = folioq_folio(folioq, i);
|
||||
|
||||
trace_netfs_folio(folio, netfs_folio_trace_read);
|
||||
order = folio_order(folio);
|
||||
folioq->orders[i] = order;
|
||||
size += PAGE_SIZE << order;
|
||||
|
||||
if (!folio_batch_add(put_batch, folio))
|
||||
folio_batch_release(put_batch);
|
||||
}
|
||||
|
||||
for (int i = nr; i < folioq_nr_slots(folioq); i++)
|
||||
folioq_clear(folioq, i);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
|
||||
* @subreq: The subrequest to be set up
|
||||
@ -128,19 +97,12 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
|
||||
folio_batch_init(&put_batch);
|
||||
while (rreq->submitted < subreq->start + rsize) {
|
||||
struct folio_queue *tail = rreq->buffer_tail, *new;
|
||||
size_t added;
|
||||
ssize_t added;
|
||||
|
||||
new = kmalloc(sizeof(*new), GFP_NOFS);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
netfs_stat(&netfs_n_folioq);
|
||||
folioq_init(new);
|
||||
new->prev = tail;
|
||||
tail->next = new;
|
||||
rreq->buffer_tail = new;
|
||||
added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
|
||||
rreq->iter.count += added;
|
||||
added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl,
|
||||
&put_batch);
|
||||
if (added < 0)
|
||||
return added;
|
||||
rreq->submitted += added;
|
||||
}
|
||||
folio_batch_release(&put_batch);
|
||||
@ -148,7 +110,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
|
||||
subreq->len = rsize;
|
||||
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
|
||||
size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
|
||||
size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
|
||||
rreq->io_streams[0].sreq_max_segs);
|
||||
|
||||
if (limit < rsize) {
|
||||
@ -157,20 +119,10 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
}
|
||||
}
|
||||
|
||||
subreq->io_iter = rreq->iter;
|
||||
|
||||
if (iov_iter_is_folioq(&subreq->io_iter)) {
|
||||
if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
|
||||
subreq->io_iter.folioq = subreq->io_iter.folioq->next;
|
||||
subreq->io_iter.folioq_slot = 0;
|
||||
}
|
||||
subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
|
||||
subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
|
||||
subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
|
||||
}
|
||||
subreq->io_iter = rreq->buffer.iter;
|
||||
|
||||
iov_iter_truncate(&subreq->io_iter, subreq->len);
|
||||
iov_iter_advance(&rreq->iter, subreq->len);
|
||||
rolling_buffer_advance(&rreq->buffer, subreq->len);
|
||||
return subreq->len;
|
||||
}
|
||||
|
||||
@ -179,25 +131,14 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rr
|
||||
loff_t i_size)
|
||||
{
|
||||
struct netfs_cache_resources *cres = &rreq->cache_resources;
|
||||
enum netfs_io_source source;
|
||||
|
||||
if (!cres->ops)
|
||||
return NETFS_DOWNLOAD_FROM_SERVER;
|
||||
return cres->ops->prepare_read(subreq, i_size);
|
||||
}
|
||||
source = cres->ops->prepare_read(subreq, i_size);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
|
||||
return source;
|
||||
|
||||
static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
|
||||
bool was_async)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = priv;
|
||||
|
||||
if (transferred_or_error < 0) {
|
||||
netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
|
||||
return;
|
||||
}
|
||||
|
||||
if (transferred_or_error > 0)
|
||||
subreq->transferred += transferred_or_error;
|
||||
netfs_read_subreq_terminated(subreq, 0, was_async);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -214,6 +155,47 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
|
||||
netfs_cache_read_terminated, subreq);
|
||||
}
|
||||
|
||||
static void netfs_issue_read(struct netfs_io_request *rreq,
|
||||
struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
|
||||
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
|
||||
/* We add to the end of the list whilst the collector may be walking
|
||||
* the list. The collector only goes nextwards and uses the lock to
|
||||
* remove entries off of the front.
|
||||
*/
|
||||
spin_lock(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &stream->subrequests);
|
||||
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
|
||||
stream->front = subreq;
|
||||
if (!stream->active) {
|
||||
stream->collected_to = stream->front->start;
|
||||
/* Store list pointers before active flag */
|
||||
smp_store_release(&stream->active, true);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&rreq->lock);
|
||||
|
||||
switch (subreq->source) {
|
||||
case NETFS_DOWNLOAD_FROM_SERVER:
|
||||
rreq->netfs_ops->issue_read(subreq);
|
||||
break;
|
||||
case NETFS_READ_FROM_CACHE:
|
||||
netfs_read_cache_to_pagecache(rreq, subreq);
|
||||
break;
|
||||
default:
|
||||
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
|
||||
subreq->error = 0;
|
||||
iov_iter_zero(subreq->len, &subreq->io_iter);
|
||||
subreq->transferred = subreq->len;
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a read to the pagecache from a series of sources of different types,
|
||||
* slicing up the region to be read according to available cache blocks and
|
||||
@ -226,11 +208,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
|
||||
ssize_t size = rreq->len;
|
||||
int ret = 0;
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
|
||||
ssize_t slice;
|
||||
|
||||
subreq = netfs_alloc_subrequest(rreq);
|
||||
@ -242,20 +222,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
|
||||
subreq->start = start;
|
||||
subreq->len = size;
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
||||
subreq->prev_donated = rreq->prev_donated;
|
||||
rreq->prev_donated = 0;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
|
||||
source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
|
||||
subreq->source = source;
|
||||
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
|
||||
unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
|
||||
size_t len = subreq->len;
|
||||
|
||||
if (unlikely(rreq->origin == NETFS_READ_SINGLE))
|
||||
zp = rreq->i_size;
|
||||
if (subreq->start >= zp) {
|
||||
subreq->source = source = NETFS_FILL_WITH_ZEROES;
|
||||
goto fill_with_zeroes;
|
||||
@ -276,24 +250,13 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
|
||||
if (rreq->netfs_ops->prepare_read) {
|
||||
ret = rreq->netfs_ops->prepare_read(subreq);
|
||||
if (ret < 0) {
|
||||
atomic_dec(&rreq->nr_outstanding);
|
||||
netfs_put_subrequest(subreq, false,
|
||||
netfs_sreq_trace_put_cancel);
|
||||
break;
|
||||
}
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
|
||||
}
|
||||
|
||||
slice = netfs_prepare_read_iterator(subreq);
|
||||
if (slice < 0) {
|
||||
atomic_dec(&rreq->nr_outstanding);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
|
||||
ret = slice;
|
||||
break;
|
||||
}
|
||||
|
||||
rreq->netfs_ops->issue_read(subreq);
|
||||
goto done;
|
||||
goto issue;
|
||||
}
|
||||
|
||||
fill_with_zeroes:
|
||||
@ -301,82 +264,46 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
|
||||
subreq->source = NETFS_FILL_WITH_ZEROES;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
netfs_stat(&netfs_n_rh_zero);
|
||||
slice = netfs_prepare_read_iterator(subreq);
|
||||
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
|
||||
netfs_read_subreq_terminated(subreq, 0, false);
|
||||
goto done;
|
||||
goto issue;
|
||||
}
|
||||
|
||||
if (source == NETFS_READ_FROM_CACHE) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
slice = netfs_prepare_read_iterator(subreq);
|
||||
netfs_read_cache_to_pagecache(rreq, subreq);
|
||||
goto done;
|
||||
goto issue;
|
||||
}
|
||||
|
||||
pr_err("Unexpected read source %u\n", source);
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
|
||||
done:
|
||||
issue:
|
||||
slice = netfs_prepare_read_iterator(subreq);
|
||||
if (slice < 0) {
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
|
||||
ret = slice;
|
||||
break;
|
||||
}
|
||||
size -= slice;
|
||||
start += slice;
|
||||
if (size <= 0) {
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
|
||||
}
|
||||
|
||||
netfs_issue_read(rreq, subreq);
|
||||
cond_resched();
|
||||
} while (size > 0);
|
||||
|
||||
if (atomic_dec_and_test(&rreq->nr_outstanding))
|
||||
netfs_rreq_terminated(rreq, false);
|
||||
if (unlikely(size > 0)) {
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
|
||||
netfs_wake_read_collector(rreq);
|
||||
}
|
||||
|
||||
/* Defer error return as we may need to wait for outstanding I/O. */
|
||||
cmpxchg(&rreq->error, 0, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the read operation to complete, successfully or otherwise.
|
||||
*/
|
||||
static int netfs_wait_for_read(struct netfs_io_request *rreq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
|
||||
wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
|
||||
ret = rreq->error;
|
||||
if (ret == 0 && rreq->submitted < rreq->len) {
|
||||
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the initial folioq of buffer folios in the rolling buffer and set the
|
||||
* iterator to refer to it.
|
||||
*/
|
||||
static int netfs_prime_buffer(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct folio_queue *folioq;
|
||||
struct folio_batch put_batch;
|
||||
size_t added;
|
||||
|
||||
folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
|
||||
if (!folioq)
|
||||
return -ENOMEM;
|
||||
netfs_stat(&netfs_n_folioq);
|
||||
folioq_init(folioq);
|
||||
rreq->buffer = folioq;
|
||||
rreq->buffer_tail = folioq;
|
||||
rreq->submitted = rreq->start;
|
||||
iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
|
||||
|
||||
folio_batch_init(&put_batch);
|
||||
added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
|
||||
folio_batch_release(&put_batch);
|
||||
rreq->iter.count += added;
|
||||
rreq->submitted += added;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* netfs_readahead - Helper to manage a read request
|
||||
* @ractl: The description of the readahead request
|
||||
@ -405,6 +332,8 @@ void netfs_readahead(struct readahead_control *ractl)
|
||||
if (IS_ERR(rreq))
|
||||
return;
|
||||
|
||||
__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
|
||||
|
||||
ret = netfs_begin_cache_read(rreq, ictx);
|
||||
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
|
||||
goto cleanup_free;
|
||||
@ -416,7 +345,8 @@ void netfs_readahead(struct readahead_control *ractl)
|
||||
netfs_rreq_expand(rreq, ractl);
|
||||
|
||||
rreq->ractl = ractl;
|
||||
if (netfs_prime_buffer(rreq) < 0)
|
||||
rreq->submitted = rreq->start;
|
||||
if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
|
||||
goto cleanup_free;
|
||||
netfs_read_to_pagecache(rreq);
|
||||
|
||||
@ -432,23 +362,18 @@ EXPORT_SYMBOL(netfs_readahead);
|
||||
/*
|
||||
* Create a rolling buffer with a single occupying folio.
|
||||
*/
|
||||
static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio)
|
||||
static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
|
||||
unsigned int rollbuf_flags)
|
||||
{
|
||||
struct folio_queue *folioq;
|
||||
ssize_t added;
|
||||
|
||||
folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
|
||||
if (!folioq)
|
||||
if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
netfs_stat(&netfs_n_folioq);
|
||||
folioq_init(folioq);
|
||||
folioq_append(folioq, folio);
|
||||
BUG_ON(folioq_folio(folioq, 0) != folio);
|
||||
BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio));
|
||||
rreq->buffer = folioq;
|
||||
rreq->buffer_tail = folioq;
|
||||
rreq->submitted = rreq->start + rreq->len;
|
||||
iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len);
|
||||
added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
|
||||
if (added < 0)
|
||||
return added;
|
||||
rreq->submitted = rreq->start + added;
|
||||
rreq->ractl = (struct readahead_control *)1UL;
|
||||
return 0;
|
||||
}
|
||||
@ -516,7 +441,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
|
||||
}
|
||||
if (to < flen)
|
||||
bvec_set_folio(&bvec[i++], folio, flen - to, to);
|
||||
iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
|
||||
iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
|
||||
rreq->submitted = rreq->start + flen;
|
||||
|
||||
netfs_read_to_pagecache(rreq);
|
||||
@ -525,7 +450,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
|
||||
folio_put(sink);
|
||||
|
||||
ret = netfs_wait_for_read(rreq);
|
||||
if (ret == 0) {
|
||||
if (ret >= 0) {
|
||||
flush_dcache_folio(folio);
|
||||
folio_mark_uptodate(folio);
|
||||
}
|
||||
@ -584,7 +509,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
|
||||
trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
|
||||
|
||||
/* Set up the output buffer */
|
||||
ret = netfs_create_singular_buffer(rreq, folio);
|
||||
ret = netfs_create_singular_buffer(rreq, folio, 0);
|
||||
if (ret < 0)
|
||||
goto discard;
|
||||
|
||||
@ -741,7 +666,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
|
||||
trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
|
||||
|
||||
/* Set up the output buffer */
|
||||
ret = netfs_create_singular_buffer(rreq, folio);
|
||||
ret = netfs_create_singular_buffer(rreq, folio, 0);
|
||||
if (ret < 0)
|
||||
goto error_put;
|
||||
|
||||
@ -806,15 +731,14 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
||||
trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
|
||||
|
||||
/* Set up the output buffer */
|
||||
ret = netfs_create_singular_buffer(rreq, folio);
|
||||
ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
|
||||
if (ret < 0)
|
||||
goto error_put;
|
||||
|
||||
folioq_mark2(rreq->buffer, 0);
|
||||
netfs_read_to_pagecache(rreq);
|
||||
ret = netfs_wait_for_read(rreq);
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
|
||||
return ret;
|
||||
return ret < 0 ? ret : 0;
|
||||
|
||||
error_put:
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
|
||||
|
@ -25,7 +25,7 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
subreq->len = rsize;
|
||||
|
||||
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
|
||||
size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
|
||||
size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
|
||||
rreq->io_streams[0].sreq_max_segs);
|
||||
|
||||
if (limit < rsize) {
|
||||
@ -36,9 +36,9 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
|
||||
|
||||
subreq->io_iter = rreq->iter;
|
||||
subreq->io_iter = rreq->buffer.iter;
|
||||
iov_iter_truncate(&subreq->io_iter, subreq->len);
|
||||
iov_iter_advance(&rreq->iter, subreq->len);
|
||||
iov_iter_advance(&rreq->buffer.iter, subreq->len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -47,12 +47,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
|
||||
*/
|
||||
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
unsigned long long start = rreq->start;
|
||||
ssize_t size = rreq->len;
|
||||
int ret = 0;
|
||||
|
||||
atomic_set(&rreq->nr_outstanding, 1);
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
ssize_t slice;
|
||||
@ -67,19 +66,25 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
|
||||
subreq->start = start;
|
||||
subreq->len = size;
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
||||
subreq->prev_donated = rreq->prev_donated;
|
||||
rreq->prev_donated = 0;
|
||||
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
|
||||
spin_lock(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &stream->subrequests);
|
||||
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
|
||||
stream->front = subreq;
|
||||
if (!stream->active) {
|
||||
stream->collected_to = stream->front->start;
|
||||
/* Store list pointers before active flag */
|
||||
smp_store_release(&stream->active, true);
|
||||
}
|
||||
}
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
spin_unlock(&rreq->lock);
|
||||
|
||||
netfs_stat(&netfs_n_rh_download);
|
||||
if (rreq->netfs_ops->prepare_read) {
|
||||
ret = rreq->netfs_ops->prepare_read(subreq);
|
||||
if (ret < 0) {
|
||||
atomic_dec(&rreq->nr_outstanding);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
|
||||
break;
|
||||
}
|
||||
@ -87,20 +92,34 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
|
||||
|
||||
netfs_prepare_dio_read_iterator(subreq);
|
||||
slice = subreq->len;
|
||||
rreq->netfs_ops->issue_read(subreq);
|
||||
|
||||
size -= slice;
|
||||
start += slice;
|
||||
rreq->submitted += slice;
|
||||
if (size <= 0) {
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
|
||||
}
|
||||
|
||||
rreq->netfs_ops->issue_read(subreq);
|
||||
|
||||
if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
|
||||
wait_on_bit(&rreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
|
||||
break;
|
||||
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
|
||||
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
|
||||
break;
|
||||
cond_resched();
|
||||
} while (size > 0);
|
||||
|
||||
if (atomic_dec_and_test(&rreq->nr_outstanding))
|
||||
netfs_rreq_terminated(rreq, false);
|
||||
if (unlikely(size > 0)) {
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
|
||||
netfs_wake_read_collector(rreq);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -133,21 +152,10 @@ static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (sync) {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
|
||||
wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
ret = rreq->error;
|
||||
if (ret == 0 && rreq->submitted < rreq->len &&
|
||||
rreq->origin != NETFS_DIO_READ) {
|
||||
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
|
||||
ret = -EIO;
|
||||
}
|
||||
} else {
|
||||
if (sync)
|
||||
ret = netfs_wait_for_read(rreq);
|
||||
else
|
||||
ret = -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
out:
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
@ -199,15 +207,15 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
|
||||
* the request.
|
||||
*/
|
||||
if (user_backed_iter(iter)) {
|
||||
ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
|
||||
ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
|
||||
rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
|
||||
rreq->direct_bv_count = ret;
|
||||
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
|
||||
rreq->len = iov_iter_count(&rreq->iter);
|
||||
rreq->len = iov_iter_count(&rreq->buffer.iter);
|
||||
} else {
|
||||
rreq->iter = *iter;
|
||||
rreq->buffer.iter = *iter;
|
||||
rreq->len = orig_count;
|
||||
rreq->direct_bv_unpin = false;
|
||||
iov_iter_advance(iter, orig_count);
|
||||
@ -215,8 +223,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
|
||||
|
||||
// TODO: Set up bounce buffer if needed
|
||||
|
||||
if (!sync)
|
||||
if (!sync) {
|
||||
rreq->iocb = iocb;
|
||||
__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
|
||||
}
|
||||
|
||||
ret = netfs_unbuffered_read(rreq, sync);
|
||||
if (ret < 0)
|
||||
|
@ -68,19 +68,19 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
|
||||
* request.
|
||||
*/
|
||||
if (async || user_backed_iter(iter)) {
|
||||
n = netfs_extract_user_iter(iter, len, &wreq->iter, 0);
|
||||
n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
|
||||
if (n < 0) {
|
||||
ret = n;
|
||||
goto out;
|
||||
}
|
||||
wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
|
||||
wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
|
||||
wreq->direct_bv_count = n;
|
||||
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
|
||||
} else {
|
||||
wreq->iter = *iter;
|
||||
wreq->buffer.iter = *iter;
|
||||
}
|
||||
|
||||
wreq->io_iter = wreq->iter;
|
||||
wreq->buffer.iter = wreq->buffer.iter;
|
||||
}
|
||||
|
||||
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
|
||||
@ -92,7 +92,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
|
||||
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
|
||||
if (async)
|
||||
wreq->iocb = iocb;
|
||||
wreq->len = iov_iter_count(&wreq->io_iter);
|
||||
wreq->len = iov_iter_count(&wreq->buffer.iter);
|
||||
wreq->cleanup = netfs_cleanup_dio_write;
|
||||
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
|
||||
if (ret < 0) {
|
||||
|
@ -23,6 +23,7 @@
|
||||
/*
|
||||
* buffered_read.c
|
||||
*/
|
||||
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
|
||||
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
||||
size_t offset, size_t len);
|
||||
|
||||
@ -58,11 +59,8 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
|
||||
/*
|
||||
* misc.c
|
||||
*/
|
||||
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq);
|
||||
int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
|
||||
bool needs_put);
|
||||
struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq);
|
||||
void netfs_clear_buffer(struct netfs_io_request *rreq);
|
||||
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq,
|
||||
enum netfs_folioq_trace trace);
|
||||
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
|
||||
|
||||
/*
|
||||
@ -84,17 +82,25 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
|
||||
trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what);
|
||||
}
|
||||
|
||||
static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
|
||||
enum netfs_sreq_ref_trace what)
|
||||
{
|
||||
trace_netfs_sreq_ref(subreq->rreq->debug_id, subreq->debug_index,
|
||||
refcount_read(&subreq->ref), what);
|
||||
}
|
||||
|
||||
/*
|
||||
* read_collect.c
|
||||
*/
|
||||
void netfs_read_termination_worker(struct work_struct *work);
|
||||
void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async);
|
||||
void netfs_read_collection_worker(struct work_struct *work);
|
||||
void netfs_wake_read_collector(struct netfs_io_request *rreq);
|
||||
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
|
||||
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
|
||||
|
||||
/*
|
||||
* read_pgpriv2.c
|
||||
*/
|
||||
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
|
||||
struct netfs_io_request *rreq,
|
||||
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_request *rreq,
|
||||
struct folio_queue *folioq,
|
||||
int slot);
|
||||
void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq);
|
||||
@ -113,6 +119,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq);
|
||||
extern atomic_t netfs_n_rh_dio_read;
|
||||
extern atomic_t netfs_n_rh_readahead;
|
||||
extern atomic_t netfs_n_rh_read_folio;
|
||||
extern atomic_t netfs_n_rh_read_single;
|
||||
extern atomic_t netfs_n_rh_rreq;
|
||||
extern atomic_t netfs_n_rh_sreq;
|
||||
extern atomic_t netfs_n_rh_download;
|
||||
@ -181,9 +188,9 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
|
||||
struct iov_iter *source);
|
||||
void netfs_issue_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream);
|
||||
int netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof);
|
||||
size_t netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof);
|
||||
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
|
||||
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
|
||||
struct folio *folio, size_t copied, bool to_page_end,
|
||||
@ -192,6 +199,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
|
||||
struct folio *writethrough_cache);
|
||||
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
|
||||
|
||||
/*
|
||||
* write_retry.c
|
||||
*/
|
||||
void netfs_retry_writes(struct netfs_io_request *wreq);
|
||||
|
||||
/*
|
||||
* Miscellaneous functions.
|
||||
*/
|
||||
|
@ -37,9 +37,11 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
|
||||
[NETFS_READAHEAD] = "RA",
|
||||
[NETFS_READPAGE] = "RP",
|
||||
[NETFS_READ_GAPS] = "RG",
|
||||
[NETFS_READ_SINGLE] = "R1",
|
||||
[NETFS_READ_FOR_WRITE] = "RW",
|
||||
[NETFS_DIO_READ] = "DR",
|
||||
[NETFS_WRITEBACK] = "WB",
|
||||
[NETFS_WRITEBACK_SINGLE] = "W1",
|
||||
[NETFS_WRITETHROUGH] = "WT",
|
||||
[NETFS_UNBUFFERED_WRITE] = "UW",
|
||||
[NETFS_DIO_WRITE] = "DW",
|
||||
@ -69,7 +71,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
|
||||
refcount_read(&rreq->ref),
|
||||
rreq->flags,
|
||||
rreq->error,
|
||||
atomic_read(&rreq->nr_outstanding),
|
||||
0,
|
||||
rreq->start, rreq->submitted, rreq->len);
|
||||
seq_putc(m, '\n');
|
||||
return 0;
|
||||
@ -116,7 +118,7 @@ static int __init netfs_init(void)
|
||||
goto error_reqpool;
|
||||
|
||||
netfs_subrequest_slab = kmem_cache_create("netfs_subrequest",
|
||||
sizeof(struct netfs_io_subrequest), 0,
|
||||
sizeof(struct netfs_io_subrequest) + 16, 0,
|
||||
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
|
||||
NULL);
|
||||
if (!netfs_subrequest_slab)
|
||||
|
155
fs/netfs/misc.c
155
fs/netfs/misc.c
@ -8,113 +8,100 @@
|
||||
#include <linux/swap.h>
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* Make sure there's space in the rolling queue.
|
||||
/**
|
||||
* netfs_alloc_folioq_buffer - Allocate buffer space into a folio queue
|
||||
* @mapping: Address space to set on the folio (or NULL).
|
||||
* @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated).
|
||||
* @_cur_size: Current size of the buffer (updated).
|
||||
* @size: Target size of the buffer.
|
||||
* @gfp: The allocation constraints.
|
||||
*/
|
||||
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq)
|
||||
int netfs_alloc_folioq_buffer(struct address_space *mapping,
|
||||
struct folio_queue **_buffer,
|
||||
size_t *_cur_size, ssize_t size, gfp_t gfp)
|
||||
{
|
||||
struct folio_queue *tail = rreq->buffer_tail, *prev;
|
||||
unsigned int prev_nr_slots = 0;
|
||||
struct folio_queue *tail = *_buffer, *p;
|
||||
|
||||
if (WARN_ON_ONCE(!rreq->buffer && tail) ||
|
||||
WARN_ON_ONCE(rreq->buffer && !tail))
|
||||
return ERR_PTR(-EIO);
|
||||
size = round_up(size, PAGE_SIZE);
|
||||
if (*_cur_size >= size)
|
||||
return 0;
|
||||
|
||||
prev = tail;
|
||||
if (prev) {
|
||||
if (!folioq_full(tail))
|
||||
return tail;
|
||||
prev_nr_slots = folioq_nr_slots(tail);
|
||||
}
|
||||
if (tail)
|
||||
while (tail->next)
|
||||
tail = tail->next;
|
||||
|
||||
tail = kmalloc(sizeof(*tail), GFP_NOFS);
|
||||
if (!tail)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
netfs_stat(&netfs_n_folioq);
|
||||
folioq_init(tail);
|
||||
tail->prev = prev;
|
||||
if (prev)
|
||||
/* [!] NOTE: After we set prev->next, the consumer is entirely
|
||||
* at liberty to delete prev.
|
||||
*/
|
||||
WRITE_ONCE(prev->next, tail);
|
||||
do {
|
||||
struct folio *folio;
|
||||
int order = 0, slot;
|
||||
|
||||
rreq->buffer_tail = tail;
|
||||
if (!rreq->buffer) {
|
||||
rreq->buffer = tail;
|
||||
iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0);
|
||||
} else {
|
||||
/* Make sure we don't leave the master iterator pointing to a
|
||||
* block that might get immediately consumed.
|
||||
*/
|
||||
if (rreq->io_iter.folioq == prev &&
|
||||
rreq->io_iter.folioq_slot == prev_nr_slots) {
|
||||
rreq->io_iter.folioq = tail;
|
||||
rreq->io_iter.folioq_slot = 0;
|
||||
if (!tail || folioq_full(tail)) {
|
||||
p = netfs_folioq_alloc(0, GFP_NOFS, netfs_trace_folioq_alloc_buffer);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
if (tail) {
|
||||
tail->next = p;
|
||||
p->prev = tail;
|
||||
} else {
|
||||
*_buffer = p;
|
||||
}
|
||||
tail = p;
|
||||
}
|
||||
}
|
||||
rreq->buffer_tail_slot = 0;
|
||||
return tail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Append a folio to the rolling queue.
|
||||
*/
|
||||
int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
|
||||
bool needs_put)
|
||||
{
|
||||
struct folio_queue *tail;
|
||||
unsigned int slot, order = folio_order(folio);
|
||||
if (size - *_cur_size > PAGE_SIZE)
|
||||
order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT,
|
||||
MAX_PAGECACHE_ORDER);
|
||||
|
||||
tail = netfs_buffer_make_space(rreq);
|
||||
if (IS_ERR(tail))
|
||||
return PTR_ERR(tail);
|
||||
folio = folio_alloc(gfp, order);
|
||||
if (!folio && order > 0)
|
||||
folio = folio_alloc(gfp, 0);
|
||||
if (!folio)
|
||||
return -ENOMEM;
|
||||
|
||||
rreq->io_iter.count += PAGE_SIZE << order;
|
||||
folio->mapping = mapping;
|
||||
folio->index = *_cur_size / PAGE_SIZE;
|
||||
trace_netfs_folio(folio, netfs_folio_trace_alloc_buffer);
|
||||
slot = folioq_append_mark(tail, folio);
|
||||
*_cur_size += folioq_folio_size(tail, slot);
|
||||
} while (*_cur_size < size);
|
||||
|
||||
slot = folioq_append(tail, folio);
|
||||
/* Store the counter after setting the slot. */
|
||||
smp_store_release(&rreq->buffer_tail_slot, slot);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_alloc_folioq_buffer);
|
||||
|
||||
/*
|
||||
* Delete the head of a rolling queue.
|
||||
/**
|
||||
* netfs_free_folioq_buffer - Free a folio queue.
|
||||
* @fq: The start of the folio queue to free
|
||||
*
|
||||
* Free up a chain of folio_queues and, if marked, the marked folios they point
|
||||
* to.
|
||||
*/
|
||||
struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq)
|
||||
void netfs_free_folioq_buffer(struct folio_queue *fq)
|
||||
{
|
||||
struct folio_queue *head = wreq->buffer, *next = head->next;
|
||||
struct folio_queue *next;
|
||||
struct folio_batch fbatch;
|
||||
|
||||
if (next)
|
||||
next->prev = NULL;
|
||||
netfs_stat_d(&netfs_n_folioq);
|
||||
kfree(head);
|
||||
wreq->buffer = next;
|
||||
return next;
|
||||
}
|
||||
folio_batch_init(&fbatch);
|
||||
|
||||
/*
|
||||
* Clear out a rolling queue.
|
||||
*/
|
||||
void netfs_clear_buffer(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct folio_queue *p;
|
||||
|
||||
while ((p = rreq->buffer)) {
|
||||
rreq->buffer = p->next;
|
||||
for (int slot = 0; slot < folioq_count(p); slot++) {
|
||||
struct folio *folio = folioq_folio(p, slot);
|
||||
if (!folio)
|
||||
for (; fq; fq = next) {
|
||||
for (int slot = 0; slot < folioq_count(fq); slot++) {
|
||||
struct folio *folio = folioq_folio(fq, slot);
|
||||
if (!folio ||
|
||||
!folioq_is_marked(fq, slot))
|
||||
continue;
|
||||
if (folioq_is_marked(p, slot)) {
|
||||
trace_netfs_folio(folio, netfs_folio_trace_put);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
trace_netfs_folio(folio, netfs_folio_trace_put);
|
||||
if (folio_batch_add(&fbatch, folio))
|
||||
folio_batch_release(&fbatch);
|
||||
}
|
||||
|
||||
netfs_stat_d(&netfs_n_folioq);
|
||||
kfree(p);
|
||||
next = fq->next;
|
||||
kfree(fq);
|
||||
}
|
||||
|
||||
folio_batch_release(&fbatch);
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_free_folioq_buffer);
|
||||
|
||||
/*
|
||||
* Reset the subrequest iterator to refer just to the region remaining to be
|
||||
|
@ -48,17 +48,20 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
||||
spin_lock_init(&rreq->lock);
|
||||
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
|
||||
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
|
||||
INIT_LIST_HEAD(&rreq->subrequests);
|
||||
init_waitqueue_head(&rreq->waitq);
|
||||
refcount_set(&rreq->ref, 1);
|
||||
|
||||
if (origin == NETFS_READAHEAD ||
|
||||
origin == NETFS_READPAGE ||
|
||||
origin == NETFS_READ_GAPS ||
|
||||
origin == NETFS_READ_SINGLE ||
|
||||
origin == NETFS_READ_FOR_WRITE ||
|
||||
origin == NETFS_DIO_READ)
|
||||
INIT_WORK(&rreq->work, netfs_read_termination_worker);
|
||||
else
|
||||
origin == NETFS_DIO_READ) {
|
||||
INIT_WORK(&rreq->work, netfs_read_collection_worker);
|
||||
rreq->io_streams[0].avail = true;
|
||||
} else {
|
||||
INIT_WORK(&rreq->work, netfs_write_collection_worker);
|
||||
}
|
||||
|
||||
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
|
||||
if (file && file->f_flags & O_NONBLOCK)
|
||||
@ -92,14 +95,6 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
|
||||
struct netfs_io_stream *stream;
|
||||
int s;
|
||||
|
||||
while (!list_empty(&rreq->subrequests)) {
|
||||
subreq = list_first_entry(&rreq->subrequests,
|
||||
struct netfs_io_subrequest, rreq_link);
|
||||
list_del(&subreq->rreq_link);
|
||||
netfs_put_subrequest(subreq, was_async,
|
||||
netfs_sreq_trace_put_clear);
|
||||
}
|
||||
|
||||
for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) {
|
||||
stream = &rreq->io_streams[s];
|
||||
while (!list_empty(&stream->subrequests)) {
|
||||
@ -143,7 +138,7 @@ static void netfs_free_request(struct work_struct *work)
|
||||
}
|
||||
kvfree(rreq->direct_bv);
|
||||
}
|
||||
netfs_clear_buffer(rreq);
|
||||
rolling_buffer_clear(&rreq->buffer);
|
||||
|
||||
if (atomic_dec_and_test(&ictx->io_count))
|
||||
wake_up_var(&ictx->io_count);
|
||||
|
@ -14,6 +14,14 @@
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
#include "internal.h"
|
||||
|
||||
/* Notes made in the collector */
|
||||
#define HIT_PENDING 0x01 /* A front op was still pending */
|
||||
#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
|
||||
#define BUFFERED 0x08 /* The pagecache needs cleaning up */
|
||||
#define NEED_RETRY 0x10 /* A front op requests retrying */
|
||||
#define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */
|
||||
#define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */
|
||||
|
||||
/*
|
||||
* Clear the unread part of an I/O request.
|
||||
*/
|
||||
@ -31,14 +39,18 @@ static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
|
||||
* cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
|
||||
* dirty and let writeback handle it.
|
||||
*/
|
||||
static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
|
||||
struct netfs_io_request *rreq,
|
||||
static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
|
||||
struct folio_queue *folioq,
|
||||
int slot)
|
||||
{
|
||||
struct netfs_folio *finfo;
|
||||
struct folio *folio = folioq_folio(folioq, slot);
|
||||
|
||||
if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
|
||||
trace_netfs_folio(folio, netfs_folio_trace_abandon);
|
||||
goto just_unlock;
|
||||
}
|
||||
|
||||
flush_dcache_folio(folio);
|
||||
folio_mark_uptodate(folio);
|
||||
|
||||
@ -53,7 +65,7 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
|
||||
kfree(finfo);
|
||||
}
|
||||
|
||||
if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
|
||||
if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) {
|
||||
if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
|
||||
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
|
||||
folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
|
||||
@ -64,10 +76,11 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
|
||||
}
|
||||
} else {
|
||||
// TODO: Use of PG_private_2 is deprecated.
|
||||
if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
|
||||
netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
|
||||
if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags))
|
||||
netfs_pgpriv2_mark_copy_to_cache(rreq, folioq, slot);
|
||||
}
|
||||
|
||||
just_unlock:
|
||||
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
|
||||
if (folio->index == rreq->no_unlock_folio &&
|
||||
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
|
||||
@ -82,234 +95,243 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock any folios that are now completely read. Returns true if the
|
||||
* subrequest is removed from the list.
|
||||
* Unlock any folios we've finished with.
|
||||
*/
|
||||
static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
|
||||
static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
|
||||
unsigned int *notes)
|
||||
{
|
||||
struct netfs_io_subrequest *prev, *next;
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
struct folio_queue *folioq = subreq->curr_folioq;
|
||||
size_t avail, prev_donated, next_donated, fsize, part, excess;
|
||||
loff_t fpos, start;
|
||||
loff_t fend;
|
||||
int slot = subreq->curr_folioq_slot;
|
||||
struct folio_queue *folioq = rreq->buffer.tail;
|
||||
unsigned long long collected_to = rreq->collected_to;
|
||||
unsigned int slot = rreq->buffer.first_tail_slot;
|
||||
|
||||
if (WARN(subreq->transferred > subreq->len,
|
||||
"Subreq overread: R%x[%x] %zu > %zu",
|
||||
rreq->debug_id, subreq->debug_index,
|
||||
subreq->transferred, subreq->len))
|
||||
subreq->transferred = subreq->len;
|
||||
if (rreq->cleaned_to >= rreq->collected_to)
|
||||
return;
|
||||
|
||||
next_folio:
|
||||
fsize = PAGE_SIZE << subreq->curr_folio_order;
|
||||
fpos = round_down(subreq->start + subreq->consumed, fsize);
|
||||
fend = fpos + fsize;
|
||||
// TODO: Begin decryption
|
||||
|
||||
if (WARN_ON_ONCE(!folioq) ||
|
||||
WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
|
||||
WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
|
||||
pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
|
||||
rreq->debug_id, subreq->debug_index,
|
||||
subreq->start, subreq->start + subreq->transferred - 1,
|
||||
subreq->consumed, subreq->transferred, subreq->len,
|
||||
slot);
|
||||
if (folioq) {
|
||||
struct folio *folio = folioq_folio(folioq, slot);
|
||||
|
||||
pr_err("folioq: orders=%02x%02x%02x%02x\n",
|
||||
folioq->orders[0], folioq->orders[1],
|
||||
folioq->orders[2], folioq->orders[3]);
|
||||
if (folio)
|
||||
pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
|
||||
fpos, fend - 1, folio_pos(folio), folio_order(folio),
|
||||
folioq_folio_order(folioq, slot));
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = rolling_buffer_delete_spent(&rreq->buffer);
|
||||
if (!folioq) {
|
||||
rreq->front_folio_order = 0;
|
||||
return;
|
||||
}
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
donation_changed:
|
||||
/* Try to consume the current folio if we've hit or passed the end of
|
||||
* it. There's a possibility that this subreq doesn't start at the
|
||||
* beginning of the folio, in which case we need to donate to/from the
|
||||
* preceding subreq.
|
||||
*
|
||||
* We also need to include any potential donation back from the
|
||||
* following subreq.
|
||||
*/
|
||||
prev_donated = READ_ONCE(subreq->prev_donated);
|
||||
next_donated = READ_ONCE(subreq->next_donated);
|
||||
if (prev_donated || next_donated) {
|
||||
spin_lock_bh(&rreq->lock);
|
||||
prev_donated = subreq->prev_donated;
|
||||
next_donated = subreq->next_donated;
|
||||
subreq->start -= prev_donated;
|
||||
subreq->len += prev_donated;
|
||||
subreq->transferred += prev_donated;
|
||||
prev_donated = subreq->prev_donated = 0;
|
||||
if (subreq->transferred == subreq->len) {
|
||||
subreq->len += next_donated;
|
||||
subreq->transferred += next_donated;
|
||||
next_donated = subreq->next_donated = 0;
|
||||
}
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
}
|
||||
for (;;) {
|
||||
struct folio *folio;
|
||||
unsigned long long fpos, fend;
|
||||
unsigned int order;
|
||||
size_t fsize;
|
||||
|
||||
avail = subreq->transferred;
|
||||
if (avail == subreq->len)
|
||||
avail += next_donated;
|
||||
start = subreq->start;
|
||||
if (subreq->consumed == 0) {
|
||||
start -= prev_donated;
|
||||
avail += prev_donated;
|
||||
} else {
|
||||
start += subreq->consumed;
|
||||
avail -= subreq->consumed;
|
||||
}
|
||||
part = umin(avail, fsize);
|
||||
if (*notes & COPY_TO_CACHE)
|
||||
set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
|
||||
|
||||
trace_netfs_progress(subreq, start, avail, part);
|
||||
folio = folioq_folio(folioq, slot);
|
||||
if (WARN_ONCE(!folio_test_locked(folio),
|
||||
"R=%08x: folio %lx is not locked\n",
|
||||
rreq->debug_id, folio->index))
|
||||
trace_netfs_folio(folio, netfs_folio_trace_not_locked);
|
||||
|
||||
if (start + avail >= fend) {
|
||||
if (fpos == start) {
|
||||
/* Flush, unlock and mark for caching any folio we've just read. */
|
||||
subreq->consumed = fend - subreq->start;
|
||||
netfs_unlock_read_folio(subreq, rreq, folioq, slot);
|
||||
folioq_mark2(folioq, slot);
|
||||
if (subreq->consumed >= subreq->len)
|
||||
goto remove_subreq;
|
||||
} else if (fpos < start) {
|
||||
excess = fend - subreq->start;
|
||||
order = folioq_folio_order(folioq, slot);
|
||||
rreq->front_folio_order = order;
|
||||
fsize = PAGE_SIZE << order;
|
||||
fpos = folio_pos(folio);
|
||||
fend = umin(fpos + fsize, rreq->i_size);
|
||||
|
||||
spin_lock_bh(&rreq->lock);
|
||||
/* If we complete first on a folio split with the
|
||||
* preceding subreq, donate to that subreq - otherwise
|
||||
* we get the responsibility.
|
||||
*/
|
||||
if (subreq->prev_donated != prev_donated) {
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
goto donation_changed;
|
||||
}
|
||||
trace_netfs_collect_folio(rreq, folio, fend, collected_to);
|
||||
|
||||
if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
pr_err("Can't donate prior to front\n");
|
||||
goto bad;
|
||||
}
|
||||
/* Unlock any folio we've transferred all of. */
|
||||
if (collected_to < fend)
|
||||
break;
|
||||
|
||||
prev = list_prev_entry(subreq, rreq_link);
|
||||
WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
|
||||
subreq->start += excess;
|
||||
subreq->len -= excess;
|
||||
subreq->transferred -= excess;
|
||||
trace_netfs_donate(rreq, subreq, prev, excess,
|
||||
netfs_trace_donate_tail_to_prev);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
|
||||
netfs_unlock_read_folio(rreq, folioq, slot);
|
||||
WRITE_ONCE(rreq->cleaned_to, fpos + fsize);
|
||||
*notes |= MADE_PROGRESS;
|
||||
|
||||
if (subreq->consumed >= subreq->len)
|
||||
goto remove_subreq_locked;
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
} else {
|
||||
pr_err("fpos > start\n");
|
||||
goto bad;
|
||||
}
|
||||
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
|
||||
|
||||
/* Advance the rolling buffer to the next folio. */
|
||||
/* Clean up the head folioq. If we clear an entire folioq, then
|
||||
* we can get rid of it provided it's not also the tail folioq
|
||||
* being filled by the issuer.
|
||||
*/
|
||||
folioq_clear(folioq, slot);
|
||||
slot++;
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = rolling_buffer_delete_spent(&rreq->buffer);
|
||||
if (!folioq)
|
||||
goto done;
|
||||
slot = 0;
|
||||
folioq = folioq->next;
|
||||
subreq->curr_folioq = folioq;
|
||||
trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress);
|
||||
}
|
||||
subreq->curr_folioq_slot = slot;
|
||||
if (folioq && folioq_folio(folioq, slot))
|
||||
subreq->curr_folio_order = folioq->orders[slot];
|
||||
if (!was_async)
|
||||
cond_resched();
|
||||
goto next_folio;
|
||||
|
||||
if (fpos + fsize >= collected_to)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Deal with partial progress. */
|
||||
if (subreq->transferred < subreq->len)
|
||||
return false;
|
||||
rreq->buffer.tail = folioq;
|
||||
done:
|
||||
rreq->buffer.first_tail_slot = slot;
|
||||
}
|
||||
|
||||
/* Donate the remaining downloaded data to one of the neighbouring
|
||||
* subrequests. Note that we may race with them doing the same thing.
|
||||
/*
|
||||
* Collect and assess the results of various read subrequests. We may need to
|
||||
* retry some of the results.
|
||||
*
|
||||
* Note that we have a sequence of subrequests, which may be drawing on
|
||||
* different sources and may or may not be the same size or starting position
|
||||
* and may not even correspond in boundary alignment.
|
||||
*/
|
||||
static void netfs_collect_read_results(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_subrequest *front, *remove;
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
unsigned int notes;
|
||||
|
||||
_enter("%llx-%llx", rreq->start, rreq->start + rreq->len);
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_collect);
|
||||
trace_netfs_collect(rreq);
|
||||
|
||||
reassess:
|
||||
if (rreq->origin == NETFS_READAHEAD ||
|
||||
rreq->origin == NETFS_READPAGE ||
|
||||
rreq->origin == NETFS_READ_FOR_WRITE)
|
||||
notes = BUFFERED;
|
||||
else
|
||||
notes = 0;
|
||||
|
||||
/* Remove completed subrequests from the front of the stream and
|
||||
* advance the completion point. We stop when we hit something that's
|
||||
* in progress. The issuer thread may be adding stuff to the tail
|
||||
* whilst we're doing this.
|
||||
*/
|
||||
spin_lock_bh(&rreq->lock);
|
||||
front = READ_ONCE(stream->front);
|
||||
while (front) {
|
||||
size_t transferred;
|
||||
|
||||
if (subreq->prev_donated != prev_donated ||
|
||||
subreq->next_donated != next_donated) {
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
cond_resched();
|
||||
goto donation_changed;
|
||||
trace_netfs_collect_sreq(rreq, front);
|
||||
_debug("sreq [%x] %llx %zx/%zx",
|
||||
front->debug_index, front->start, front->transferred, front->len);
|
||||
|
||||
if (stream->collected_to < front->start) {
|
||||
trace_netfs_collect_gap(rreq, stream, front->start, 'F');
|
||||
stream->collected_to = front->start;
|
||||
}
|
||||
|
||||
if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
|
||||
notes |= HIT_PENDING;
|
||||
smp_rmb(); /* Read counters after IN_PROGRESS flag. */
|
||||
transferred = READ_ONCE(front->transferred);
|
||||
|
||||
/* If we can now collect the next folio, do so. We don't want
|
||||
* to defer this as we have to decide whether we need to copy
|
||||
* to the cache or not, and that may differ between adjacent
|
||||
* subreqs.
|
||||
*/
|
||||
if (notes & BUFFERED) {
|
||||
size_t fsize = PAGE_SIZE << rreq->front_folio_order;
|
||||
|
||||
/* Clear the tail of a short read. */
|
||||
if (!(notes & HIT_PENDING) &&
|
||||
front->error == 0 &&
|
||||
transferred < front->len &&
|
||||
(test_bit(NETFS_SREQ_HIT_EOF, &front->flags) ||
|
||||
test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) {
|
||||
netfs_clear_unread(front);
|
||||
transferred = front->transferred = front->len;
|
||||
trace_netfs_sreq(front, netfs_sreq_trace_clear);
|
||||
}
|
||||
|
||||
stream->collected_to = front->start + transferred;
|
||||
rreq->collected_to = stream->collected_to;
|
||||
|
||||
if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
|
||||
notes |= COPY_TO_CACHE;
|
||||
|
||||
if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
|
||||
rreq->abandon_to = front->start + front->len;
|
||||
front->transferred = front->len;
|
||||
transferred = front->len;
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
|
||||
}
|
||||
if (front->start + transferred >= rreq->cleaned_to + fsize ||
|
||||
test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
|
||||
netfs_read_unlock_folios(rreq, ¬es);
|
||||
} else {
|
||||
stream->collected_to = front->start + transferred;
|
||||
rreq->collected_to = stream->collected_to;
|
||||
}
|
||||
|
||||
/* Stall if the front is still undergoing I/O. */
|
||||
if (notes & HIT_PENDING)
|
||||
break;
|
||||
|
||||
if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
|
||||
if (!stream->failed) {
|
||||
stream->error = front->error;
|
||||
rreq->error = front->error;
|
||||
set_bit(NETFS_RREQ_FAILED, &rreq->flags);
|
||||
stream->failed = true;
|
||||
}
|
||||
notes |= MADE_PROGRESS | ABANDON_SREQ;
|
||||
} else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
|
||||
stream->need_retry = true;
|
||||
notes |= NEED_RETRY | MADE_PROGRESS;
|
||||
break;
|
||||
} else {
|
||||
if (!stream->failed)
|
||||
stream->transferred = stream->collected_to - rreq->start;
|
||||
notes |= MADE_PROGRESS;
|
||||
}
|
||||
|
||||
/* Remove if completely consumed. */
|
||||
stream->source = front->source;
|
||||
spin_lock(&rreq->lock);
|
||||
|
||||
remove = front;
|
||||
trace_netfs_sreq(front, netfs_sreq_trace_discard);
|
||||
list_del_init(&front->rreq_link);
|
||||
front = list_first_entry_or_null(&stream->subrequests,
|
||||
struct netfs_io_subrequest, rreq_link);
|
||||
stream->front = front;
|
||||
spin_unlock(&rreq->lock);
|
||||
netfs_put_subrequest(remove, false,
|
||||
notes & ABANDON_SREQ ?
|
||||
netfs_sreq_trace_put_cancel :
|
||||
netfs_sreq_trace_put_done);
|
||||
}
|
||||
|
||||
/* Deal with the trickiest case: that this subreq is in the middle of a
|
||||
* folio, not touching either edge, but finishes first. In such a
|
||||
* case, we donate to the previous subreq, if there is one, so that the
|
||||
* donation is only handled when that completes - and remove this
|
||||
* subreq from the list.
|
||||
*
|
||||
* If the previous subreq finished first, we will have acquired their
|
||||
* donation and should be able to unlock folios and/or donate nextwards.
|
||||
trace_netfs_collect_stream(rreq, stream);
|
||||
trace_netfs_collect_state(rreq, rreq->collected_to, notes);
|
||||
|
||||
if (!(notes & BUFFERED))
|
||||
rreq->cleaned_to = rreq->collected_to;
|
||||
|
||||
if (notes & NEED_RETRY)
|
||||
goto need_retry;
|
||||
if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
|
||||
clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
|
||||
wake_up_bit(&rreq->flags, NETFS_RREQ_PAUSE);
|
||||
}
|
||||
|
||||
if (notes & MADE_PROGRESS) {
|
||||
//cond_resched();
|
||||
goto reassess;
|
||||
}
|
||||
|
||||
out:
|
||||
_leave(" = %x", notes);
|
||||
return;
|
||||
|
||||
need_retry:
|
||||
/* Okay... We're going to have to retry parts of the stream. Note
|
||||
* that any partially completed op will have had any wholly transferred
|
||||
* folios removed from it.
|
||||
*/
|
||||
if (!subreq->consumed &&
|
||||
!prev_donated &&
|
||||
!list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
|
||||
prev = list_prev_entry(subreq, rreq_link);
|
||||
WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
|
||||
subreq->start += subreq->len;
|
||||
subreq->len = 0;
|
||||
subreq->transferred = 0;
|
||||
trace_netfs_donate(rreq, subreq, prev, subreq->len,
|
||||
netfs_trace_donate_to_prev);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
|
||||
goto remove_subreq_locked;
|
||||
}
|
||||
|
||||
/* If we can't donate down the chain, donate up the chain instead. */
|
||||
excess = subreq->len - subreq->consumed + next_donated;
|
||||
|
||||
if (!subreq->consumed)
|
||||
excess += prev_donated;
|
||||
|
||||
if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
|
||||
rreq->prev_donated = excess;
|
||||
trace_netfs_donate(rreq, subreq, NULL, excess,
|
||||
netfs_trace_donate_to_deferred_next);
|
||||
} else {
|
||||
next = list_next_entry(subreq, rreq_link);
|
||||
WRITE_ONCE(next->prev_donated, excess);
|
||||
trace_netfs_donate(rreq, subreq, next, excess,
|
||||
netfs_trace_donate_to_next);
|
||||
}
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
|
||||
subreq->len = subreq->consumed;
|
||||
subreq->transferred = subreq->consumed;
|
||||
goto remove_subreq_locked;
|
||||
|
||||
remove_subreq:
|
||||
spin_lock_bh(&rreq->lock);
|
||||
remove_subreq_locked:
|
||||
subreq->consumed = subreq->len;
|
||||
list_del(&subreq->rreq_link);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
|
||||
return true;
|
||||
|
||||
bad:
|
||||
/* Errr... prev and next both donated to us, but insufficient to finish
|
||||
* the folio.
|
||||
*/
|
||||
printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
|
||||
rreq->debug_id, subreq->debug_index,
|
||||
subreq->start, subreq->start + subreq->transferred - 1,
|
||||
subreq->consumed, subreq->transferred, subreq->len);
|
||||
printk("folio: %llx-%llx\n", fpos, fend - 1);
|
||||
printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
|
||||
printk("s=%llx av=%zx part=%zx\n", start, avail, part);
|
||||
BUG();
|
||||
_debug("retry");
|
||||
netfs_retry_reads(rreq);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -318,12 +340,13 @@ static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was
|
||||
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
unsigned int i;
|
||||
|
||||
/* Collect unbuffered reads and direct reads, adding up the transfer
|
||||
* sizes until we find the first short or failed subrequest.
|
||||
*/
|
||||
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
rreq->transferred += subreq->transferred;
|
||||
|
||||
if (subreq->transferred < subreq->len ||
|
||||
@ -356,25 +379,67 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
|
||||
}
|
||||
|
||||
/*
|
||||
* Assess the state of a read request and decide what to do next.
|
||||
* Do processing after reading a monolithic single object.
|
||||
*/
|
||||
static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
|
||||
if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER &&
|
||||
fscache_resources_valid(&rreq->cache_resources)) {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_dirty);
|
||||
netfs_single_mark_inode_dirty(rreq->inode);
|
||||
}
|
||||
|
||||
if (rreq->iocb) {
|
||||
rreq->iocb->ki_pos += rreq->transferred;
|
||||
if (rreq->iocb->ki_complete)
|
||||
rreq->iocb->ki_complete(
|
||||
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
|
||||
}
|
||||
if (rreq->netfs_ops->done)
|
||||
rreq->netfs_ops->done(rreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the collection of subrequests and folios.
|
||||
*
|
||||
* Note that we're in normal kernel thread context at this point, possibly
|
||||
* running on a workqueue.
|
||||
*/
|
||||
static void netfs_rreq_assess(struct netfs_io_request *rreq)
|
||||
static void netfs_read_collection(struct netfs_io_request *rreq)
|
||||
{
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
|
||||
netfs_collect_read_results(rreq);
|
||||
|
||||
/* We're done when the app thread has finished posting subreqs and the
|
||||
* queue is empty.
|
||||
*/
|
||||
if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
|
||||
return;
|
||||
smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
|
||||
|
||||
if (!list_empty(&stream->subrequests))
|
||||
return;
|
||||
|
||||
/* Okay, declare that all I/O is complete. */
|
||||
rreq->transferred = stream->transferred;
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_complete);
|
||||
|
||||
//netfs_rreq_is_still_valid(rreq);
|
||||
|
||||
if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
|
||||
netfs_retry_reads(rreq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (rreq->origin == NETFS_DIO_READ ||
|
||||
rreq->origin == NETFS_READ_GAPS)
|
||||
switch (rreq->origin) {
|
||||
case NETFS_DIO_READ:
|
||||
case NETFS_READ_GAPS:
|
||||
netfs_rreq_assess_dio(rreq);
|
||||
break;
|
||||
case NETFS_READ_SINGLE:
|
||||
netfs_rreq_assess_single(rreq);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
task_io_account_read(rreq->transferred);
|
||||
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
|
||||
@ -388,57 +453,62 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq)
|
||||
netfs_pgpriv2_write_to_the_cache(rreq);
|
||||
}
|
||||
|
||||
void netfs_read_termination_worker(struct work_struct *work)
|
||||
void netfs_read_collection_worker(struct work_struct *work)
|
||||
{
|
||||
struct netfs_io_request *rreq =
|
||||
container_of(work, struct netfs_io_request, work);
|
||||
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
|
||||
|
||||
netfs_see_request(rreq, netfs_rreq_trace_see_work);
|
||||
netfs_rreq_assess(rreq);
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
|
||||
if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
|
||||
netfs_read_collection(rreq);
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the completion of all outstanding I/O operations on a read request.
|
||||
* We inherit a ref from the caller.
|
||||
* Wake the collection work item.
|
||||
*/
|
||||
void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
|
||||
void netfs_wake_read_collector(struct netfs_io_request *rreq)
|
||||
{
|
||||
if (!was_async)
|
||||
return netfs_rreq_assess(rreq);
|
||||
if (!work_pending(&rreq->work)) {
|
||||
netfs_get_request(rreq, netfs_rreq_trace_get_work);
|
||||
if (!queue_work(system_unbound_wq, &rreq->work))
|
||||
netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
|
||||
if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
|
||||
if (!work_pending(&rreq->work)) {
|
||||
netfs_get_request(rreq, netfs_rreq_trace_get_work);
|
||||
if (!queue_work(system_unbound_wq, &rreq->work))
|
||||
netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq);
|
||||
}
|
||||
} else {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
|
||||
wake_up(&rreq->waitq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* netfs_read_subreq_progress - Note progress of a read operation.
|
||||
* @subreq: The read request that has terminated.
|
||||
* @was_async: True if we're in an asynchronous context.
|
||||
*
|
||||
* This tells the read side of netfs lib that a contributory I/O operation has
|
||||
* made some progress and that it may be possible to unlock some folios.
|
||||
*
|
||||
* Before calling, the filesystem should update subreq->transferred to track
|
||||
* the amount of data copied into the output buffer.
|
||||
*
|
||||
* If @was_async is true, the caller might be running in softirq or interrupt
|
||||
* context and we can't sleep.
|
||||
*/
|
||||
void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
|
||||
bool was_async)
|
||||
void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
size_t fsize = PAGE_SIZE << rreq->front_folio_order;
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
|
||||
|
||||
if (subreq->transferred > subreq->consumed &&
|
||||
/* If we are at the head of the queue, wake up the collector,
|
||||
* getting a ref to it if we were the ones to do so.
|
||||
*/
|
||||
if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize &&
|
||||
(rreq->origin == NETFS_READAHEAD ||
|
||||
rreq->origin == NETFS_READPAGE ||
|
||||
rreq->origin == NETFS_READ_FOR_WRITE)) {
|
||||
netfs_consume_read_data(subreq, was_async);
|
||||
__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
|
||||
rreq->origin == NETFS_READ_FOR_WRITE) &&
|
||||
list_is_first(&subreq->rreq_link, &stream->subrequests)
|
||||
) {
|
||||
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
||||
netfs_wake_read_collector(rreq);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_read_subreq_progress);
|
||||
@ -446,27 +516,23 @@ EXPORT_SYMBOL(netfs_read_subreq_progress);
|
||||
/**
|
||||
* netfs_read_subreq_terminated - Note the termination of an I/O operation.
|
||||
* @subreq: The I/O request that has terminated.
|
||||
* @error: Error code indicating type of completion.
|
||||
* @was_async: The termination was asynchronous
|
||||
*
|
||||
* This tells the read helper that a contributory I/O operation has terminated,
|
||||
* one way or another, and that it should integrate the results.
|
||||
*
|
||||
* The caller indicates the outcome of the operation through @error, supplying
|
||||
* 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
|
||||
* is set) or a negative error code. The helper will look after reissuing I/O
|
||||
* operations as appropriate and writing downloaded data to the cache.
|
||||
* The caller indicates the outcome of the operation through @subreq->error,
|
||||
* supplying 0 to indicate a successful or retryable transfer (if
|
||||
* NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will
|
||||
* look after reissuing I/O operations as appropriate and writing downloaded
|
||||
* data to the cache.
|
||||
*
|
||||
* Before calling, the filesystem should update subreq->transferred to track
|
||||
* the amount of data copied into the output buffer.
|
||||
*
|
||||
* If @was_async is true, the caller might be running in softirq or interrupt
|
||||
* context and we can't sleep.
|
||||
*/
|
||||
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
|
||||
int error, bool was_async)
|
||||
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_io_request *rreq = subreq->rreq;
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
|
||||
switch (subreq->source) {
|
||||
case NETFS_READ_FROM_CACHE:
|
||||
@ -479,68 +545,114 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
|
||||
break;
|
||||
}
|
||||
|
||||
if (rreq->origin != NETFS_DIO_READ) {
|
||||
/* Collect buffered reads.
|
||||
*
|
||||
* If the read completed validly short, then we can clear the
|
||||
* tail before going on to unlock the folios.
|
||||
*/
|
||||
if (error == 0 && subreq->transferred < subreq->len &&
|
||||
(test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
|
||||
test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
|
||||
netfs_clear_unread(subreq);
|
||||
subreq->transferred = subreq->len;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
|
||||
}
|
||||
if (subreq->transferred > subreq->consumed &&
|
||||
(rreq->origin == NETFS_READAHEAD ||
|
||||
rreq->origin == NETFS_READPAGE ||
|
||||
rreq->origin == NETFS_READ_FOR_WRITE)) {
|
||||
netfs_consume_read_data(subreq, was_async);
|
||||
__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
|
||||
}
|
||||
rreq->transferred += subreq->transferred;
|
||||
}
|
||||
|
||||
/* Deal with retry requests, short reads and errors. If we retry
|
||||
* but don't make progress, we abandon the attempt.
|
||||
*/
|
||||
if (!error && subreq->transferred < subreq->len) {
|
||||
if (!subreq->error && subreq->transferred < subreq->len) {
|
||||
if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
|
||||
} else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry);
|
||||
} else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
|
||||
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read);
|
||||
} else {
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
subreq->error = -ENODATA;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_short);
|
||||
if (subreq->transferred > subreq->consumed) {
|
||||
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
|
||||
set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
|
||||
} else if (!__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
|
||||
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
|
||||
} else {
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
error = -ENODATA;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subreq->error = error;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
|
||||
|
||||
if (unlikely(error < 0)) {
|
||||
trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
|
||||
if (unlikely(subreq->error < 0)) {
|
||||
trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read);
|
||||
if (subreq->source == NETFS_READ_FROM_CACHE) {
|
||||
netfs_stat(&netfs_n_rh_read_failed);
|
||||
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
} else {
|
||||
netfs_stat(&netfs_n_rh_download_failed);
|
||||
set_bit(NETFS_RREQ_FAILED, &rreq->flags);
|
||||
rreq->error = subreq->error;
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
}
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
|
||||
set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
|
||||
}
|
||||
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
|
||||
|
||||
clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
|
||||
/* If we are at the head of the queue, wake up the collector. */
|
||||
if (list_is_first(&subreq->rreq_link, &stream->subrequests))
|
||||
netfs_wake_read_collector(rreq);
|
||||
|
||||
netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_read_subreq_terminated);
|
||||
|
||||
/*
|
||||
* Handle termination of a read from the cache.
|
||||
*/
|
||||
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = priv;
|
||||
|
||||
if (transferred_or_error > 0) {
|
||||
subreq->error = 0;
|
||||
if (transferred_or_error > 0) {
|
||||
subreq->transferred += transferred_or_error;
|
||||
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
||||
}
|
||||
} else {
|
||||
subreq->error = transferred_or_error;
|
||||
}
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the read operation to complete, successfully or otherwise.
|
||||
*/
|
||||
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
DEFINE_WAIT(myself);
|
||||
ssize_t ret;
|
||||
|
||||
for (;;) {
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
|
||||
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
subreq = list_first_entry_or_null(&stream->subrequests,
|
||||
struct netfs_io_subrequest, rreq_link);
|
||||
if (subreq &&
|
||||
(!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
|
||||
test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)))
|
||||
netfs_read_collection(rreq);
|
||||
|
||||
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
|
||||
break;
|
||||
|
||||
schedule();
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
|
||||
}
|
||||
|
||||
finish_wait(&rreq->waitq, &myself);
|
||||
|
||||
ret = rreq->error;
|
||||
if (ret == 0) {
|
||||
ret = rreq->transferred;
|
||||
switch (rreq->origin) {
|
||||
case NETFS_DIO_READ:
|
||||
case NETFS_READ_SINGLE:
|
||||
ret = rreq->transferred;
|
||||
break;
|
||||
default:
|
||||
if (rreq->submitted < rreq->len) {
|
||||
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
|
||||
ret = -EIO;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (atomic_dec_and_test(&rreq->nr_outstanding))
|
||||
netfs_rreq_terminated(rreq, was_async);
|
||||
|
||||
netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_read_subreq_terminated);
|
||||
|
@ -18,8 +18,7 @@
|
||||
* third mark in the folio queue is used to indicate that this folio needs
|
||||
* writing.
|
||||
*/
|
||||
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
|
||||
struct netfs_io_request *rreq,
|
||||
void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_request *rreq,
|
||||
struct folio_queue *folioq,
|
||||
int slot)
|
||||
{
|
||||
@ -34,8 +33,9 @@ void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
|
||||
* [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an
|
||||
* unrecoverable error.
|
||||
*/
|
||||
static void netfs_pgpriv2_cancel(struct folio_queue *folioq)
|
||||
static void netfs_pgpriv2_cancel(struct rolling_buffer *buffer)
|
||||
{
|
||||
struct folio_queue *folioq = buffer->tail;
|
||||
struct folio *folio;
|
||||
int slot;
|
||||
|
||||
@ -94,7 +94,7 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
|
||||
trace_netfs_folio(folio, netfs_folio_trace_store_copy);
|
||||
|
||||
/* Attach the folio to the rolling buffer. */
|
||||
if (netfs_buffer_append_folio(wreq, folio, false) < 0)
|
||||
if (rolling_buffer_append(&wreq->buffer, folio, 0) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
cache->submit_extendable_to = fsize;
|
||||
@ -109,7 +109,7 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
|
||||
do {
|
||||
ssize_t part;
|
||||
|
||||
wreq->io_iter.iov_offset = cache->submit_off;
|
||||
wreq->buffer.iter.iov_offset = cache->submit_off;
|
||||
|
||||
atomic64_set(&wreq->issued_to, fpos + cache->submit_off);
|
||||
cache->submit_extendable_to = fsize - cache->submit_off;
|
||||
@ -122,8 +122,8 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
|
||||
cache->submit_len -= part;
|
||||
} while (cache->submit_len > 0);
|
||||
|
||||
wreq->io_iter.iov_offset = 0;
|
||||
iov_iter_advance(&wreq->io_iter, fsize);
|
||||
wreq->buffer.iter.iov_offset = 0;
|
||||
rolling_buffer_advance(&wreq->buffer, fsize);
|
||||
atomic64_set(&wreq->issued_to, fpos + fsize);
|
||||
|
||||
if (flen < fsize)
|
||||
@ -151,7 +151,7 @@ void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
|
||||
goto couldnt_start;
|
||||
|
||||
/* Need the first folio to be able to set up the op. */
|
||||
for (folioq = rreq->buffer; folioq; folioq = folioq->next) {
|
||||
for (folioq = rreq->buffer.tail; folioq; folioq = folioq->next) {
|
||||
if (folioq->marks3) {
|
||||
slot = __ffs(folioq->marks3);
|
||||
break;
|
||||
@ -194,7 +194,7 @@ void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
|
||||
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
|
||||
_leave(" = %d", error);
|
||||
couldnt_start:
|
||||
netfs_pgpriv2_cancel(rreq->buffer);
|
||||
netfs_pgpriv2_cancel(&rreq->buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -203,13 +203,13 @@ void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
|
||||
*/
|
||||
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
|
||||
{
|
||||
struct folio_queue *folioq = wreq->buffer;
|
||||
struct folio_queue *folioq = wreq->buffer.tail;
|
||||
unsigned long long collected_to = wreq->collected_to;
|
||||
unsigned int slot = wreq->buffer_head_slot;
|
||||
unsigned int slot = wreq->buffer.first_tail_slot;
|
||||
bool made_progress = false;
|
||||
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
folioq = rolling_buffer_delete_spent(&wreq->buffer);
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
@ -248,9 +248,9 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
|
||||
folioq_clear(folioq, slot);
|
||||
slot++;
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
if (READ_ONCE(wreq->buffer_tail) == folioq)
|
||||
break;
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
folioq = rolling_buffer_delete_spent(&wreq->buffer);
|
||||
if (!folioq)
|
||||
goto done;
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
@ -258,7 +258,8 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
|
||||
break;
|
||||
}
|
||||
|
||||
wreq->buffer = folioq;
|
||||
wreq->buffer_head_slot = slot;
|
||||
wreq->buffer.tail = folioq;
|
||||
done:
|
||||
wreq->buffer.first_tail_slot = slot;
|
||||
return made_progress;
|
||||
}
|
||||
|
@ -12,15 +12,8 @@
|
||||
static void netfs_reissue_read(struct netfs_io_request *rreq,
|
||||
struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct iov_iter *io_iter = &subreq->io_iter;
|
||||
|
||||
if (iov_iter_is_folioq(io_iter)) {
|
||||
subreq->curr_folioq = (struct folio_queue *)io_iter->folioq;
|
||||
subreq->curr_folioq_slot = io_iter->folioq_slot;
|
||||
subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
|
||||
}
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
subreq->rreq->netfs_ops->issue_read(subreq);
|
||||
@ -33,13 +26,12 @@ static void netfs_reissue_read(struct netfs_io_request *rreq,
|
||||
static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream0 = &rreq->io_streams[0];
|
||||
LIST_HEAD(sublist);
|
||||
LIST_HEAD(queue);
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
struct list_head *next;
|
||||
|
||||
_enter("R=%x", rreq->debug_id);
|
||||
|
||||
if (list_empty(&rreq->subrequests))
|
||||
if (list_empty(&stream->subrequests))
|
||||
return;
|
||||
|
||||
if (rreq->netfs_ops->retry_request)
|
||||
@ -52,7 +44,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
!test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
|
||||
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
|
||||
break;
|
||||
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
||||
@ -73,48 +65,44 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
* populating with smaller subrequests. In the event that the subreq
|
||||
* we just launched finishes before we insert the next subreq, it'll
|
||||
* fill in rreq->prev_donated instead.
|
||||
|
||||
*
|
||||
* Note: Alternatively, we could split the tail subrequest right before
|
||||
* we reissue it and fix up the donations under lock.
|
||||
*/
|
||||
list_splice_init(&rreq->subrequests, &queue);
|
||||
next = stream->subrequests.next;
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *from;
|
||||
struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
|
||||
struct iov_iter source;
|
||||
unsigned long long start, len;
|
||||
size_t part, deferred_next_donated = 0;
|
||||
size_t part;
|
||||
bool boundary = false;
|
||||
|
||||
/* Go through the subreqs and find the next span of contiguous
|
||||
* buffer that we then rejig (cifs, for example, needs the
|
||||
* rsize renegotiating) and reissue.
|
||||
*/
|
||||
from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link);
|
||||
list_move_tail(&from->rreq_link, &sublist);
|
||||
from = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
||||
to = from;
|
||||
start = from->start + from->transferred;
|
||||
len = from->len - from->transferred;
|
||||
|
||||
_debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx",
|
||||
_debug("from R=%08x[%x] s=%llx ctl=%zx/%zx",
|
||||
rreq->debug_id, from->debug_index,
|
||||
from->start, from->consumed, from->transferred, from->len);
|
||||
from->start, from->transferred, from->len);
|
||||
|
||||
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
|
||||
goto abandon;
|
||||
|
||||
deferred_next_donated = from->next_donated;
|
||||
while ((subreq = list_first_entry_or_null(
|
||||
&queue, struct netfs_io_subrequest, rreq_link))) {
|
||||
if (subreq->start != start + len ||
|
||||
subreq->transferred > 0 ||
|
||||
list_for_each_continue(next, &stream->subrequests) {
|
||||
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
||||
if (subreq->start + subreq->transferred != start + len ||
|
||||
test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
||||
break;
|
||||
list_move_tail(&subreq->rreq_link, &sublist);
|
||||
len += subreq->len;
|
||||
deferred_next_donated = subreq->next_donated;
|
||||
if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags))
|
||||
break;
|
||||
to = subreq;
|
||||
len += to->len;
|
||||
}
|
||||
|
||||
_debug(" - range: %llx-%llx %llx", start, start + len - 1, len);
|
||||
@ -127,36 +115,28 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
source.count = len;
|
||||
|
||||
/* Work through the sublist. */
|
||||
while ((subreq = list_first_entry_or_null(
|
||||
&sublist, struct netfs_io_subrequest, rreq_link))) {
|
||||
list_del(&subreq->rreq_link);
|
||||
|
||||
subreq = from;
|
||||
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
|
||||
if (!len)
|
||||
break;
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
subreq->start = start - subreq->transferred;
|
||||
subreq->len = len + subreq->transferred;
|
||||
stream0->sreq_max_len = subreq->len;
|
||||
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
||||
subreq->prev_donated += rreq->prev_donated;
|
||||
rreq->prev_donated = 0;
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
|
||||
BUG_ON(!len);
|
||||
|
||||
/* Renegotiate max_len (rsize) */
|
||||
stream->sreq_max_len = subreq->len;
|
||||
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
goto abandon;
|
||||
}
|
||||
|
||||
part = umin(len, stream0->sreq_max_len);
|
||||
if (unlikely(rreq->io_streams[0].sreq_max_segs))
|
||||
part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs);
|
||||
part = umin(len, stream->sreq_max_len);
|
||||
if (unlikely(stream->sreq_max_segs))
|
||||
part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
|
||||
subreq->len = subreq->transferred + part;
|
||||
subreq->io_iter = source;
|
||||
iov_iter_truncate(&subreq->io_iter, part);
|
||||
@ -166,58 +146,106 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
if (!len) {
|
||||
if (boundary)
|
||||
__set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
||||
subreq->next_donated = deferred_next_donated;
|
||||
} else {
|
||||
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
|
||||
subreq->next_donated = 0;
|
||||
}
|
||||
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
netfs_reissue_read(rreq, subreq);
|
||||
if (!len)
|
||||
if (subreq == to)
|
||||
break;
|
||||
|
||||
/* If we ran out of subrequests, allocate another. */
|
||||
if (list_empty(&sublist)) {
|
||||
subreq = netfs_alloc_subrequest(rreq);
|
||||
if (!subreq)
|
||||
goto abandon;
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
subreq->start = start;
|
||||
|
||||
/* We get two refs, but need just one. */
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_new);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_split);
|
||||
list_add_tail(&subreq->rreq_link, &sublist);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we managed to use fewer subreqs, we can discard the
|
||||
* excess.
|
||||
* excess; if we used the same number, then we're done.
|
||||
*/
|
||||
while ((subreq = list_first_entry_or_null(
|
||||
&sublist, struct netfs_io_subrequest, rreq_link))) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
|
||||
list_del(&subreq->rreq_link);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
|
||||
if (!len) {
|
||||
if (subreq == to)
|
||||
continue;
|
||||
list_for_each_entry_safe_from(subreq, tmp,
|
||||
&stream->subrequests, rreq_link) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
|
||||
list_del(&subreq->rreq_link);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
|
||||
if (subreq == to)
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
} while (!list_empty(&queue));
|
||||
/* We ran out of subrequests, so we need to allocate some more
|
||||
* and insert them after.
|
||||
*/
|
||||
do {
|
||||
subreq = netfs_alloc_subrequest(rreq);
|
||||
if (!subreq) {
|
||||
subreq = to;
|
||||
goto abandon_after;
|
||||
}
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
subreq->start = start;
|
||||
subreq->len = len;
|
||||
subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
|
||||
subreq->stream_nr = stream->stream_nr;
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
|
||||
trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
|
||||
refcount_read(&subreq->ref),
|
||||
netfs_sreq_trace_new);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
|
||||
list_add(&subreq->rreq_link, &to->rreq_link);
|
||||
to = list_next_entry(to, rreq_link);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
|
||||
stream->sreq_max_len = umin(len, rreq->rsize);
|
||||
stream->sreq_max_segs = 0;
|
||||
if (unlikely(stream->sreq_max_segs))
|
||||
part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
|
||||
|
||||
netfs_stat(&netfs_n_rh_download);
|
||||
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
goto abandon;
|
||||
}
|
||||
|
||||
part = umin(len, stream->sreq_max_len);
|
||||
subreq->len = subreq->transferred + part;
|
||||
subreq->io_iter = source;
|
||||
iov_iter_truncate(&subreq->io_iter, part);
|
||||
iov_iter_advance(&source, part);
|
||||
|
||||
len -= part;
|
||||
start += part;
|
||||
if (!len && boundary) {
|
||||
__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
|
||||
boundary = false;
|
||||
}
|
||||
|
||||
netfs_reissue_read(rreq, subreq);
|
||||
} while (len);
|
||||
|
||||
} while (!list_is_head(next, &stream->subrequests));
|
||||
|
||||
return;
|
||||
|
||||
/* If we hit ENOMEM, fail all remaining subrequests */
|
||||
/* If we hit an error, fail all remaining incomplete subrequests */
|
||||
abandon_after:
|
||||
if (list_is_last(&subreq->rreq_link, &stream->subrequests))
|
||||
return;
|
||||
subreq = list_next_entry(subreq, rreq_link);
|
||||
abandon:
|
||||
list_splice_init(&sublist, &queue);
|
||||
list_for_each_entry(subreq, &queue, rreq_link) {
|
||||
if (!subreq->error)
|
||||
subreq->error = -ENOMEM;
|
||||
__clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
|
||||
if (!subreq->error &&
|
||||
!test_bit(NETFS_SREQ_FAILED, &subreq->flags) &&
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
||||
continue;
|
||||
subreq->error = -ENOMEM;
|
||||
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__clear_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
}
|
||||
spin_lock_bh(&rreq->lock);
|
||||
list_splice_tail_init(&queue, &rreq->subrequests);
|
||||
spin_unlock_bh(&rreq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -225,14 +253,19 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
|
||||
*/
|
||||
void netfs_retry_reads(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
|
||||
/* Wait for all outstanding I/O to quiesce before performing retries as
|
||||
* we may need to renegotiate the I/O sizes.
|
||||
*/
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
|
||||
|
||||
atomic_inc(&rreq->nr_outstanding);
|
||||
|
||||
netfs_retry_read_subrequests(rreq);
|
||||
|
||||
if (atomic_dec_and_test(&rreq->nr_outstanding))
|
||||
netfs_rreq_terminated(rreq, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -243,7 +276,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct folio_queue *p;
|
||||
|
||||
for (p = rreq->buffer; p; p = p->next) {
|
||||
for (p = rreq->buffer.tail; p; p = p->next) {
|
||||
for (int slot = 0; slot < folioq_count(p); slot++) {
|
||||
struct folio *folio = folioq_folio(p, slot);
|
||||
|
||||
|
195
fs/netfs/read_single.c
Normal file
195
fs/netfs/read_single.c
Normal file
@ -0,0 +1,195 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Single, monolithic object support (e.g. AFS directory).
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
#include <linux/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
/**
|
||||
* netfs_single_mark_inode_dirty - Mark a single, monolithic object inode dirty
|
||||
* @inode: The inode to mark
|
||||
*
|
||||
* Mark an inode that contains a single, monolithic object as dirty so that its
|
||||
* writepages op will get called. If set, the SINGLE_NO_UPLOAD flag indicates
|
||||
* that the object will only be written to the cache and not uploaded (e.g. AFS
|
||||
* directory contents).
|
||||
*/
|
||||
void netfs_single_mark_inode_dirty(struct inode *inode)
|
||||
{
|
||||
struct netfs_inode *ictx = netfs_inode(inode);
|
||||
bool cache_only = test_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &ictx->flags);
|
||||
bool caching = fscache_cookie_enabled(netfs_i_cookie(netfs_inode(inode)));
|
||||
|
||||
if (cache_only && !caching)
|
||||
return;
|
||||
|
||||
mark_inode_dirty(inode);
|
||||
|
||||
if (caching && !(inode->i_state & I_PINNING_NETFS_WB)) {
|
||||
bool need_use = false;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (!(inode->i_state & I_PINNING_NETFS_WB)) {
|
||||
inode->i_state |= I_PINNING_NETFS_WB;
|
||||
need_use = true;
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
if (need_use)
|
||||
fscache_use_cookie(netfs_i_cookie(ictx), true);
|
||||
}
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_single_mark_inode_dirty);
|
||||
|
||||
static int netfs_single_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
|
||||
{
|
||||
return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
|
||||
}
|
||||
|
||||
static void netfs_single_cache_prepare_read(struct netfs_io_request *rreq,
|
||||
struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_cache_resources *cres = &rreq->cache_resources;
|
||||
|
||||
if (!cres->ops) {
|
||||
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
|
||||
return;
|
||||
}
|
||||
subreq->source = cres->ops->prepare_read(subreq, rreq->i_size);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
|
||||
|
||||
}
|
||||
|
||||
static void netfs_single_read_cache(struct netfs_io_request *rreq,
|
||||
struct netfs_io_subrequest *subreq)
|
||||
{
|
||||
struct netfs_cache_resources *cres = &rreq->cache_resources;
|
||||
|
||||
_enter("R=%08x[%x]", rreq->debug_id, subreq->debug_index);
|
||||
netfs_stat(&netfs_n_rh_read);
|
||||
cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_FAIL,
|
||||
netfs_cache_read_terminated, subreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a read to a buffer from the cache or the server. Only a single
|
||||
* subreq is permitted as the object must be fetched in a single transaction.
|
||||
*/
|
||||
static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
|
||||
{
|
||||
struct netfs_io_stream *stream = &rreq->io_streams[0];
|
||||
struct netfs_io_subrequest *subreq;
|
||||
int ret = 0;
|
||||
|
||||
subreq = netfs_alloc_subrequest(rreq);
|
||||
if (!subreq)
|
||||
return -ENOMEM;
|
||||
|
||||
subreq->source = NETFS_SOURCE_UNKNOWN;
|
||||
subreq->start = 0;
|
||||
subreq->len = rreq->len;
|
||||
subreq->io_iter = rreq->buffer.iter;
|
||||
|
||||
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
|
||||
|
||||
spin_lock(&rreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &stream->subrequests);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
|
||||
stream->front = subreq;
|
||||
/* Store list pointers before active flag */
|
||||
smp_store_release(&stream->active, true);
|
||||
spin_unlock(&rreq->lock);
|
||||
|
||||
netfs_single_cache_prepare_read(rreq, subreq);
|
||||
switch (subreq->source) {
|
||||
case NETFS_DOWNLOAD_FROM_SERVER:
|
||||
netfs_stat(&netfs_n_rh_download);
|
||||
if (rreq->netfs_ops->prepare_read) {
|
||||
ret = rreq->netfs_ops->prepare_read(subreq);
|
||||
if (ret < 0)
|
||||
goto cancel;
|
||||
}
|
||||
|
||||
rreq->netfs_ops->issue_read(subreq);
|
||||
rreq->submitted += subreq->len;
|
||||
break;
|
||||
case NETFS_READ_FROM_CACHE:
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
|
||||
netfs_single_read_cache(rreq, subreq);
|
||||
rreq->submitted += subreq->len;
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
pr_warn("Unexpected single-read source %u\n", subreq->source);
|
||||
WARN_ON_ONCE(true);
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
|
||||
return ret;
|
||||
cancel:
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* netfs_read_single - Synchronously read a single blob of pages.
|
||||
* @inode: The inode to read from.
|
||||
* @file: The file we're using to read or NULL.
|
||||
* @iter: The buffer we're reading into.
|
||||
*
|
||||
* Fulfil a read request for a single monolithic object by drawing data from
|
||||
* the cache if possible, or the netfs if not. The buffer may be larger than
|
||||
* the file content; unused beyond the EOF will be zero-filled. The content
|
||||
* will be read with a single I/O request (though this may be retried).
|
||||
*
|
||||
* The calling netfs must initialise a netfs context contiguous to the vfs
|
||||
* inode before calling this.
|
||||
*
|
||||
* This is usable whether or not caching is enabled. If caching is enabled,
|
||||
* the data will be stored as a single object into the cache.
|
||||
*/
|
||||
ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter)
|
||||
{
|
||||
struct netfs_io_request *rreq;
|
||||
struct netfs_inode *ictx = netfs_inode(inode);
|
||||
ssize_t ret;
|
||||
|
||||
rreq = netfs_alloc_request(inode->i_mapping, file, 0, iov_iter_count(iter),
|
||||
NETFS_READ_SINGLE);
|
||||
if (IS_ERR(rreq))
|
||||
return PTR_ERR(rreq);
|
||||
|
||||
ret = netfs_single_begin_cache_read(rreq, ictx);
|
||||
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
|
||||
goto cleanup_free;
|
||||
|
||||
netfs_stat(&netfs_n_rh_read_single);
|
||||
trace_netfs_read(rreq, 0, rreq->len, netfs_read_trace_read_single);
|
||||
|
||||
rreq->buffer.iter = *iter;
|
||||
netfs_single_dispatch_read(rreq);
|
||||
|
||||
ret = netfs_wait_for_read(rreq);
|
||||
netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
|
||||
return ret;
|
||||
|
||||
cleanup_free:
|
||||
netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_read_single);
|
225
fs/netfs/rolling_buffer.c
Normal file
225
fs/netfs/rolling_buffer.c
Normal file
@ -0,0 +1,225 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Rolling buffer helpers
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/rolling_buffer.h>
|
||||
#include <linux/slab.h>
|
||||
#include "internal.h"
|
||||
|
||||
static atomic_t debug_ids;
|
||||
|
||||
/**
|
||||
* netfs_folioq_alloc - Allocate a folio_queue struct
|
||||
* @rreq_id: Associated debugging ID for tracing purposes
|
||||
* @gfp: Allocation constraints
|
||||
* @trace: Trace tag to indicate the purpose of the allocation
|
||||
*
|
||||
* Allocate, initialise and account the folio_queue struct and log a trace line
|
||||
* to mark the allocation.
|
||||
*/
|
||||
struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp,
|
||||
unsigned int /*enum netfs_folioq_trace*/ trace)
|
||||
{
|
||||
struct folio_queue *fq;
|
||||
|
||||
fq = kmalloc(sizeof(*fq), gfp);
|
||||
if (fq) {
|
||||
netfs_stat(&netfs_n_folioq);
|
||||
folioq_init(fq, rreq_id);
|
||||
fq->debug_id = atomic_inc_return(&debug_ids);
|
||||
trace_netfs_folioq(fq, trace);
|
||||
}
|
||||
return fq;
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_folioq_alloc);
|
||||
|
||||
/**
|
||||
* netfs_folioq_free - Free a folio_queue struct
|
||||
* @folioq: The object to free
|
||||
* @trace: Trace tag to indicate which free
|
||||
*
|
||||
* Free and unaccount the folio_queue struct.
|
||||
*/
|
||||
void netfs_folioq_free(struct folio_queue *folioq,
|
||||
unsigned int /*enum netfs_trace_folioq*/ trace)
|
||||
{
|
||||
trace_netfs_folioq(folioq, trace);
|
||||
netfs_stat_d(&netfs_n_folioq);
|
||||
kfree(folioq);
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_folioq_free);
|
||||
|
||||
/*
|
||||
* Initialise a rolling buffer. We allocate an empty folio queue struct to so
|
||||
* that the pointers can be independently driven by the producer and the
|
||||
* consumer.
|
||||
*/
|
||||
int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id,
|
||||
unsigned int direction)
|
||||
{
|
||||
struct folio_queue *fq;
|
||||
|
||||
fq = netfs_folioq_alloc(rreq_id, GFP_NOFS, netfs_trace_folioq_rollbuf_init);
|
||||
if (!fq)
|
||||
return -ENOMEM;
|
||||
|
||||
roll->head = fq;
|
||||
roll->tail = fq;
|
||||
iov_iter_folio_queue(&roll->iter, direction, fq, 0, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add another folio_queue to a rolling buffer if there's no space left.
|
||||
*/
|
||||
int rolling_buffer_make_space(struct rolling_buffer *roll)
|
||||
{
|
||||
struct folio_queue *fq, *head = roll->head;
|
||||
|
||||
if (!folioq_full(head))
|
||||
return 0;
|
||||
|
||||
fq = netfs_folioq_alloc(head->rreq_id, GFP_NOFS, netfs_trace_folioq_make_space);
|
||||
if (!fq)
|
||||
return -ENOMEM;
|
||||
fq->prev = head;
|
||||
|
||||
roll->head = fq;
|
||||
if (folioq_full(head)) {
|
||||
/* Make sure we don't leave the master iterator pointing to a
|
||||
* block that might get immediately consumed.
|
||||
*/
|
||||
if (roll->iter.folioq == head &&
|
||||
roll->iter.folioq_slot == folioq_nr_slots(head)) {
|
||||
roll->iter.folioq = fq;
|
||||
roll->iter.folioq_slot = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure the initialisation is stored before the next pointer.
|
||||
*
|
||||
* [!] NOTE: After we set head->next, the consumer is at liberty to
|
||||
* immediately delete the old head.
|
||||
*/
|
||||
smp_store_release(&head->next, fq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decant the list of folios to read into a rolling buffer.
|
||||
*/
|
||||
ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll,
|
||||
struct readahead_control *ractl,
|
||||
struct folio_batch *put_batch)
|
||||
{
|
||||
struct folio_queue *fq;
|
||||
struct page **vec;
|
||||
int nr, ix, to;
|
||||
ssize_t size = 0;
|
||||
|
||||
if (rolling_buffer_make_space(roll) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
fq = roll->head;
|
||||
vec = (struct page **)fq->vec.folios;
|
||||
nr = __readahead_batch(ractl, vec + folio_batch_count(&fq->vec),
|
||||
folio_batch_space(&fq->vec));
|
||||
ix = fq->vec.nr;
|
||||
to = ix + nr;
|
||||
fq->vec.nr = to;
|
||||
for (; ix < to; ix++) {
|
||||
struct folio *folio = folioq_folio(fq, ix);
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
fq->orders[ix] = order;
|
||||
size += PAGE_SIZE << order;
|
||||
trace_netfs_folio(folio, netfs_folio_trace_read);
|
||||
if (!folio_batch_add(put_batch, folio))
|
||||
folio_batch_release(put_batch);
|
||||
}
|
||||
WRITE_ONCE(roll->iter.count, roll->iter.count + size);
|
||||
|
||||
/* Store the counter after setting the slot. */
|
||||
smp_store_release(&roll->next_head_slot, to);
|
||||
|
||||
for (; ix < folioq_nr_slots(fq); ix++)
|
||||
folioq_clear(fq, ix);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Append a folio to the rolling buffer.
|
||||
*/
|
||||
ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio,
|
||||
unsigned int flags)
|
||||
{
|
||||
ssize_t size = folio_size(folio);
|
||||
int slot;
|
||||
|
||||
if (rolling_buffer_make_space(roll) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
slot = folioq_append(roll->head, folio);
|
||||
if (flags & ROLLBUF_MARK_1)
|
||||
folioq_mark(roll->head, slot);
|
||||
if (flags & ROLLBUF_MARK_2)
|
||||
folioq_mark2(roll->head, slot);
|
||||
|
||||
WRITE_ONCE(roll->iter.count, roll->iter.count + size);
|
||||
|
||||
/* Store the counter after setting the slot. */
|
||||
smp_store_release(&roll->next_head_slot, slot);
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete a spent buffer from a rolling queue and return the next in line. We
|
||||
* don't return the last buffer to keep the pointers independent, but return
|
||||
* NULL instead.
|
||||
*/
|
||||
struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll)
|
||||
{
|
||||
struct folio_queue *spent = roll->tail, *next = READ_ONCE(spent->next);
|
||||
|
||||
if (!next)
|
||||
return NULL;
|
||||
next->prev = NULL;
|
||||
netfs_folioq_free(spent, netfs_trace_folioq_delete);
|
||||
roll->tail = next;
|
||||
return next;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear out a rolling queue. Folios that have mark 1 set are put.
|
||||
*/
|
||||
void rolling_buffer_clear(struct rolling_buffer *roll)
|
||||
{
|
||||
struct folio_batch fbatch;
|
||||
struct folio_queue *p;
|
||||
|
||||
folio_batch_init(&fbatch);
|
||||
|
||||
while ((p = roll->tail)) {
|
||||
roll->tail = p->next;
|
||||
for (int slot = 0; slot < folioq_count(p); slot++) {
|
||||
struct folio *folio = folioq_folio(p, slot);
|
||||
if (!folio)
|
||||
continue;
|
||||
if (folioq_is_marked(p, slot)) {
|
||||
trace_netfs_folio(folio, netfs_folio_trace_put);
|
||||
if (!folio_batch_add(&fbatch, folio))
|
||||
folio_batch_release(&fbatch);
|
||||
}
|
||||
}
|
||||
|
||||
netfs_folioq_free(p, netfs_trace_folioq_clear);
|
||||
}
|
||||
|
||||
folio_batch_release(&fbatch);
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
atomic_t netfs_n_rh_dio_read;
|
||||
atomic_t netfs_n_rh_readahead;
|
||||
atomic_t netfs_n_rh_read_folio;
|
||||
atomic_t netfs_n_rh_read_single;
|
||||
atomic_t netfs_n_rh_rreq;
|
||||
atomic_t netfs_n_rh_sreq;
|
||||
atomic_t netfs_n_rh_download;
|
||||
@ -46,10 +47,11 @@ atomic_t netfs_n_folioq;
|
||||
|
||||
int netfs_stats_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_printf(m, "Reads : DR=%u RA=%u RF=%u WB=%u WBZ=%u\n",
|
||||
seq_printf(m, "Reads : DR=%u RA=%u RF=%u RS=%u WB=%u WBZ=%u\n",
|
||||
atomic_read(&netfs_n_rh_dio_read),
|
||||
atomic_read(&netfs_n_rh_readahead),
|
||||
atomic_read(&netfs_n_rh_read_folio),
|
||||
atomic_read(&netfs_n_rh_read_single),
|
||||
atomic_read(&netfs_n_rh_write_begin),
|
||||
atomic_read(&netfs_n_rh_write_zskip));
|
||||
seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u 2C=%u\n",
|
||||
|
@ -17,10 +17,38 @@
|
||||
#define HIT_PENDING 0x01 /* A front op was still pending */
|
||||
#define NEED_REASSESS 0x02 /* Need to loop round and reassess */
|
||||
#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
|
||||
#define BUFFERED 0x08 /* The pagecache needs cleaning up */
|
||||
#define NEED_UNLOCK 0x08 /* The pagecache needs unlocking */
|
||||
#define NEED_RETRY 0x10 /* A front op requests retrying */
|
||||
#define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */
|
||||
|
||||
static void netfs_dump_request(const struct netfs_io_request *rreq)
|
||||
{
|
||||
pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n",
|
||||
rreq->debug_id, refcount_read(&rreq->ref), rreq->flags,
|
||||
rreq->origin, rreq->error);
|
||||
pr_err(" st=%llx tsl=%zx/%llx/%llx\n",
|
||||
rreq->start, rreq->transferred, rreq->submitted, rreq->len);
|
||||
pr_err(" cci=%llx/%llx/%llx\n",
|
||||
rreq->cleaned_to, rreq->collected_to, atomic64_read(&rreq->issued_to));
|
||||
pr_err(" iw=%pSR\n", rreq->netfs_ops->issue_write);
|
||||
for (int i = 0; i < NR_IO_STREAMS; i++) {
|
||||
const struct netfs_io_subrequest *sreq;
|
||||
const struct netfs_io_stream *s = &rreq->io_streams[i];
|
||||
|
||||
pr_err(" str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n",
|
||||
s->stream_nr, s->source, s->error,
|
||||
s->avail, s->active, s->need_retry, s->failed);
|
||||
pr_err(" str[%x] ct=%llx t=%zx\n",
|
||||
s->stream_nr, s->collected_to, s->transferred);
|
||||
list_for_each_entry(sreq, &s->subrequests, rreq_link) {
|
||||
pr_err(" sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n",
|
||||
sreq->stream_nr, sreq->debug_index, sreq->source,
|
||||
sreq->start, sreq->transferred, sreq->len,
|
||||
refcount_read(&sreq->ref), sreq->flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Successful completion of write of a folio to the server and/or cache. Note
|
||||
* that we are not allowed to lock the folio here on pain of deadlocking with
|
||||
@ -83,9 +111,15 @@ int netfs_folio_written_back(struct folio *folio)
|
||||
static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
|
||||
unsigned int *notes)
|
||||
{
|
||||
struct folio_queue *folioq = wreq->buffer;
|
||||
struct folio_queue *folioq = wreq->buffer.tail;
|
||||
unsigned long long collected_to = wreq->collected_to;
|
||||
unsigned int slot = wreq->buffer_head_slot;
|
||||
unsigned int slot = wreq->buffer.first_tail_slot;
|
||||
|
||||
if (WARN_ON_ONCE(!folioq)) {
|
||||
pr_err("[!] Writeback unlock found empty rolling buffer!\n");
|
||||
netfs_dump_request(wreq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
|
||||
if (netfs_pgpriv2_unlock_copied_folios(wreq))
|
||||
@ -94,7 +128,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
|
||||
}
|
||||
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
folioq = rolling_buffer_delete_spent(&wreq->buffer);
|
||||
if (!folioq)
|
||||
return;
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
@ -134,9 +170,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
|
||||
folioq_clear(folioq, slot);
|
||||
slot++;
|
||||
if (slot >= folioq_nr_slots(folioq)) {
|
||||
if (READ_ONCE(wreq->buffer_tail) == folioq)
|
||||
break;
|
||||
folioq = netfs_delete_buffer_head(wreq);
|
||||
folioq = rolling_buffer_delete_spent(&wreq->buffer);
|
||||
if (!folioq)
|
||||
goto done;
|
||||
slot = 0;
|
||||
}
|
||||
|
||||
@ -144,223 +180,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
|
||||
break;
|
||||
}
|
||||
|
||||
wreq->buffer = folioq;
|
||||
wreq->buffer_head_slot = slot;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform retries on the streams that need it.
|
||||
*/
|
||||
static void netfs_retry_write_stream(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream)
|
||||
{
|
||||
struct list_head *next;
|
||||
|
||||
_enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
|
||||
|
||||
if (list_empty(&stream->subrequests))
|
||||
return;
|
||||
|
||||
if (stream->source == NETFS_UPLOAD_TO_SERVER &&
|
||||
wreq->netfs_ops->retry_request)
|
||||
wreq->netfs_ops->retry_request(wreq, stream);
|
||||
|
||||
if (unlikely(stream->failed))
|
||||
return;
|
||||
|
||||
/* If there's no renegotiation to do, just resend each failed subreq. */
|
||||
if (!stream->prepare_write) {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
|
||||
break;
|
||||
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
||||
struct iov_iter source = subreq->io_iter;
|
||||
|
||||
iov_iter_revert(&source, subreq->len - source.count);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
netfs_reissue_write(stream, subreq, &source);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
next = stream->subrequests.next;
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
|
||||
struct iov_iter source;
|
||||
unsigned long long start, len;
|
||||
size_t part;
|
||||
bool boundary = false;
|
||||
|
||||
/* Go through the stream and find the next span of contiguous
|
||||
* data that we then rejig (cifs, for example, needs the wsize
|
||||
* renegotiating) and reissue.
|
||||
*/
|
||||
from = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
||||
to = from;
|
||||
start = from->start + from->transferred;
|
||||
len = from->len - from->transferred;
|
||||
|
||||
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
|
||||
return;
|
||||
|
||||
list_for_each_continue(next, &stream->subrequests) {
|
||||
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
||||
if (subreq->start + subreq->transferred != start + len ||
|
||||
test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
||||
break;
|
||||
to = subreq;
|
||||
len += to->len;
|
||||
}
|
||||
|
||||
/* Determine the set of buffers we're going to use. Each
|
||||
* subreq gets a subset of a single overall contiguous buffer.
|
||||
*/
|
||||
netfs_reset_iter(from);
|
||||
source = from->io_iter;
|
||||
source.count = len;
|
||||
|
||||
/* Work through the sublist. */
|
||||
subreq = from;
|
||||
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
|
||||
if (!len)
|
||||
break;
|
||||
/* Renegotiate max_len (wsize) */
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
stream->prepare_write(subreq);
|
||||
|
||||
part = min(len, stream->sreq_max_len);
|
||||
subreq->len = part;
|
||||
subreq->start = start;
|
||||
subreq->transferred = 0;
|
||||
len -= part;
|
||||
start += part;
|
||||
if (len && subreq == to &&
|
||||
__test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
|
||||
boundary = true;
|
||||
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
netfs_reissue_write(stream, subreq, &source);
|
||||
if (subreq == to)
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we managed to use fewer subreqs, we can discard the
|
||||
* excess; if we used the same number, then we're done.
|
||||
*/
|
||||
if (!len) {
|
||||
if (subreq == to)
|
||||
continue;
|
||||
list_for_each_entry_safe_from(subreq, tmp,
|
||||
&stream->subrequests, rreq_link) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
|
||||
list_del(&subreq->rreq_link);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
|
||||
if (subreq == to)
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We ran out of subrequests, so we need to allocate some more
|
||||
* and insert them after.
|
||||
*/
|
||||
do {
|
||||
subreq = netfs_alloc_subrequest(wreq);
|
||||
subreq->source = to->source;
|
||||
subreq->start = start;
|
||||
subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
|
||||
subreq->stream_nr = to->stream_nr;
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
|
||||
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
|
||||
refcount_read(&subreq->ref),
|
||||
netfs_sreq_trace_new);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
|
||||
list_add(&subreq->rreq_link, &to->rreq_link);
|
||||
to = list_next_entry(to, rreq_link);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
|
||||
stream->sreq_max_len = len;
|
||||
stream->sreq_max_segs = INT_MAX;
|
||||
switch (stream->source) {
|
||||
case NETFS_UPLOAD_TO_SERVER:
|
||||
netfs_stat(&netfs_n_wh_upload);
|
||||
stream->sreq_max_len = umin(len, wreq->wsize);
|
||||
break;
|
||||
case NETFS_WRITE_TO_CACHE:
|
||||
netfs_stat(&netfs_n_wh_write);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
stream->prepare_write(subreq);
|
||||
|
||||
part = umin(len, stream->sreq_max_len);
|
||||
subreq->len = subreq->transferred + part;
|
||||
len -= part;
|
||||
start += part;
|
||||
if (!len && boundary) {
|
||||
__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
|
||||
boundary = false;
|
||||
}
|
||||
|
||||
netfs_reissue_write(stream, subreq, &source);
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
} while (len);
|
||||
|
||||
} while (!list_is_head(next, &stream->subrequests));
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform retries on the streams that need it. If we're doing content
|
||||
* encryption and the server copy changed due to a third-party write, we may
|
||||
* need to do an RMW cycle and also rewrite the data to the cache.
|
||||
*/
|
||||
static void netfs_retry_writes(struct netfs_io_request *wreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream;
|
||||
int s;
|
||||
|
||||
/* Wait for all outstanding I/O to quiesce before performing retries as
|
||||
* we may need to renegotiate the I/O sizes.
|
||||
*/
|
||||
for (s = 0; s < NR_IO_STREAMS; s++) {
|
||||
stream = &wreq->io_streams[s];
|
||||
if (!stream->active)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Enc: Fetch changed partial pages
|
||||
// TODO: Enc: Reencrypt content if needed.
|
||||
// TODO: Enc: Wind back transferred point.
|
||||
// TODO: Enc: Mark cache pages for retry.
|
||||
|
||||
for (s = 0; s < NR_IO_STREAMS; s++) {
|
||||
stream = &wreq->io_streams[s];
|
||||
if (stream->need_retry) {
|
||||
stream->need_retry = false;
|
||||
netfs_retry_write_stream(wreq, stream);
|
||||
}
|
||||
}
|
||||
wreq->buffer.tail = folioq;
|
||||
done:
|
||||
wreq->buffer.first_tail_slot = slot;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -391,7 +213,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
|
||||
if (wreq->origin == NETFS_WRITEBACK ||
|
||||
wreq->origin == NETFS_WRITETHROUGH ||
|
||||
wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
|
||||
notes = BUFFERED;
|
||||
notes = NEED_UNLOCK;
|
||||
else
|
||||
notes = 0;
|
||||
|
||||
@ -450,14 +272,14 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
|
||||
|
||||
cancel:
|
||||
/* Remove if completely consumed. */
|
||||
spin_lock_bh(&wreq->lock);
|
||||
spin_lock(&wreq->lock);
|
||||
|
||||
remove = front;
|
||||
list_del_init(&front->rreq_link);
|
||||
front = list_first_entry_or_null(&stream->subrequests,
|
||||
struct netfs_io_subrequest, rreq_link);
|
||||
stream->front = front;
|
||||
spin_unlock_bh(&wreq->lock);
|
||||
spin_unlock(&wreq->lock);
|
||||
netfs_put_subrequest(remove, false,
|
||||
notes & SAW_FAILURE ?
|
||||
netfs_sreq_trace_put_cancel :
|
||||
@ -488,7 +310,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
|
||||
trace_netfs_collect_state(wreq, wreq->collected_to, notes);
|
||||
|
||||
/* Unlock any folios that we have now finished with. */
|
||||
if (notes & BUFFERED) {
|
||||
if (notes & NEED_UNLOCK) {
|
||||
if (wreq->cleaned_to < wreq->collected_to)
|
||||
netfs_writeback_unlock_folios(wreq, ¬es);
|
||||
} else {
|
||||
|
@ -94,9 +94,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
||||
{
|
||||
struct netfs_io_request *wreq;
|
||||
struct netfs_inode *ictx;
|
||||
bool is_buffered = (origin == NETFS_WRITEBACK ||
|
||||
origin == NETFS_WRITETHROUGH ||
|
||||
origin == NETFS_PGPRIV2_COPY_TO_CACHE);
|
||||
bool is_cacheable = (origin == NETFS_WRITEBACK ||
|
||||
origin == NETFS_WRITEBACK_SINGLE ||
|
||||
origin == NETFS_WRITETHROUGH ||
|
||||
origin == NETFS_PGPRIV2_COPY_TO_CACHE);
|
||||
|
||||
wreq = netfs_alloc_request(mapping, file, start, 0, origin);
|
||||
if (IS_ERR(wreq))
|
||||
@ -105,8 +106,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
||||
_enter("R=%x", wreq->debug_id);
|
||||
|
||||
ictx = netfs_inode(wreq->inode);
|
||||
if (is_buffered && netfs_is_cache_enabled(ictx))
|
||||
if (is_cacheable && netfs_is_cache_enabled(ictx))
|
||||
fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
|
||||
if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
|
||||
goto nomem;
|
||||
|
||||
wreq->cleaned_to = wreq->start;
|
||||
|
||||
@ -129,6 +132,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
|
||||
}
|
||||
|
||||
return wreq;
|
||||
nomem:
|
||||
wreq->error = -ENOMEM;
|
||||
netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -153,16 +160,15 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
loff_t start)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct iov_iter *wreq_iter = &wreq->io_iter;
|
||||
struct iov_iter *wreq_iter = &wreq->buffer.iter;
|
||||
|
||||
/* Make sure we don't point the iterator at a used-up folio_queue
|
||||
* struct being used as a placeholder to prevent the queue from
|
||||
* collapsing. In such a case, extend the queue.
|
||||
*/
|
||||
if (iov_iter_is_folioq(wreq_iter) &&
|
||||
wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) {
|
||||
netfs_buffer_make_space(wreq);
|
||||
}
|
||||
wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
|
||||
rolling_buffer_make_space(&wreq->buffer);
|
||||
|
||||
subreq = netfs_alloc_subrequest(wreq);
|
||||
subreq->source = stream->source;
|
||||
@ -198,7 +204,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
* the list. The collector only goes nextwards and uses the lock to
|
||||
* remove entries off of the front.
|
||||
*/
|
||||
spin_lock_bh(&wreq->lock);
|
||||
spin_lock(&wreq->lock);
|
||||
list_add_tail(&subreq->rreq_link, &stream->subrequests);
|
||||
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
|
||||
stream->front = subreq;
|
||||
@ -209,7 +215,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_bh(&wreq->lock);
|
||||
spin_unlock(&wreq->lock);
|
||||
|
||||
stream->construct = subreq;
|
||||
}
|
||||
@ -266,9 +272,9 @@ void netfs_issue_write(struct netfs_io_request *wreq,
|
||||
* we can avoid overrunning the credits obtained (cifs) and try to parallelise
|
||||
* content-crypto preparation with network writes.
|
||||
*/
|
||||
int netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof)
|
||||
size_t netfs_advance_write(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream,
|
||||
loff_t start, size_t len, bool to_eof)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq = stream->construct;
|
||||
size_t part;
|
||||
@ -325,6 +331,9 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
|
||||
|
||||
_enter("");
|
||||
|
||||
if (rolling_buffer_make_space(&wreq->buffer) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
/* netfs_perform_write() may shift i_size around the page or from out
|
||||
* of the page to beyond it, but cannot move i_size into or through the
|
||||
* page since we have it locked.
|
||||
@ -429,7 +438,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
|
||||
}
|
||||
|
||||
/* Attach the folio to the rolling buffer. */
|
||||
netfs_buffer_append_folio(wreq, folio, false);
|
||||
rolling_buffer_append(&wreq->buffer, folio, 0);
|
||||
|
||||
/* Move the submission point forward to allow for write-streaming data
|
||||
* not starting at the front of the page. We don't do write-streaming
|
||||
@ -442,7 +451,8 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
|
||||
stream = &wreq->io_streams[s];
|
||||
stream->submit_off = foff;
|
||||
stream->submit_len = flen;
|
||||
if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
|
||||
if (!stream->avail ||
|
||||
(stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
|
||||
(stream->source == NETFS_UPLOAD_TO_SERVER &&
|
||||
fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
|
||||
stream->submit_off = UINT_MAX;
|
||||
@ -476,7 +486,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
|
||||
|
||||
/* Advance the iterator(s). */
|
||||
if (stream->submit_off > iter_off) {
|
||||
iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off);
|
||||
rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
|
||||
iter_off = stream->submit_off;
|
||||
}
|
||||
|
||||
@ -494,7 +504,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
|
||||
}
|
||||
|
||||
if (fsize > iter_off)
|
||||
iov_iter_advance(&wreq->io_iter, fsize - iter_off);
|
||||
rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
|
||||
atomic64_set(&wreq->issued_to, fpos + fsize);
|
||||
|
||||
if (!debug)
|
||||
@ -633,7 +643,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
|
||||
struct folio **writethrough_cache)
|
||||
{
|
||||
_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
|
||||
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
|
||||
wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
|
||||
|
||||
if (!*writethrough_cache) {
|
||||
if (folio_test_dirty(folio))
|
||||
@ -708,7 +718,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
|
||||
part = netfs_advance_write(wreq, upload, start, len, false);
|
||||
start += part;
|
||||
len -= part;
|
||||
iov_iter_advance(&wreq->io_iter, part);
|
||||
rolling_buffer_advance(&wreq->buffer, part);
|
||||
if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
|
||||
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
|
||||
wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
|
||||
@ -721,3 +731,194 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
|
||||
_leave(" = %d", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write some of a pending folio data back to the server and/or the cache.
|
||||
*/
|
||||
static int netfs_write_folio_single(struct netfs_io_request *wreq,
|
||||
struct folio *folio)
|
||||
{
|
||||
struct netfs_io_stream *upload = &wreq->io_streams[0];
|
||||
struct netfs_io_stream *cache = &wreq->io_streams[1];
|
||||
struct netfs_io_stream *stream;
|
||||
size_t iter_off = 0;
|
||||
size_t fsize = folio_size(folio), flen;
|
||||
loff_t fpos = folio_pos(folio);
|
||||
bool to_eof = false;
|
||||
bool no_debug = false;
|
||||
|
||||
_enter("");
|
||||
|
||||
flen = folio_size(folio);
|
||||
if (flen > wreq->i_size - fpos) {
|
||||
flen = wreq->i_size - fpos;
|
||||
folio_zero_segment(folio, flen, fsize);
|
||||
to_eof = true;
|
||||
} else if (flen == wreq->i_size - fpos) {
|
||||
to_eof = true;
|
||||
}
|
||||
|
||||
_debug("folio %zx/%zx", flen, fsize);
|
||||
|
||||
if (!upload->avail && !cache->avail) {
|
||||
trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!upload->construct)
|
||||
trace_netfs_folio(folio, netfs_folio_trace_store);
|
||||
else
|
||||
trace_netfs_folio(folio, netfs_folio_trace_store_plus);
|
||||
|
||||
/* Attach the folio to the rolling buffer. */
|
||||
folio_get(folio);
|
||||
rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);
|
||||
|
||||
/* Move the submission point forward to allow for write-streaming data
|
||||
* not starting at the front of the page. We don't do write-streaming
|
||||
* with the cache as the cache requires DIO alignment.
|
||||
*
|
||||
* Also skip uploading for data that's been read and just needs copying
|
||||
* to the cache.
|
||||
*/
|
||||
for (int s = 0; s < NR_IO_STREAMS; s++) {
|
||||
stream = &wreq->io_streams[s];
|
||||
stream->submit_off = 0;
|
||||
stream->submit_len = flen;
|
||||
if (!stream->avail) {
|
||||
stream->submit_off = UINT_MAX;
|
||||
stream->submit_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Attach the folio to one or more subrequests. For a big folio, we
|
||||
* could end up with thousands of subrequests if the wsize is small -
|
||||
* but we might need to wait during the creation of subrequests for
|
||||
* network resources (eg. SMB credits).
|
||||
*/
|
||||
for (;;) {
|
||||
ssize_t part;
|
||||
size_t lowest_off = ULONG_MAX;
|
||||
int choose_s = -1;
|
||||
|
||||
/* Always add to the lowest-submitted stream first. */
|
||||
for (int s = 0; s < NR_IO_STREAMS; s++) {
|
||||
stream = &wreq->io_streams[s];
|
||||
if (stream->submit_len > 0 &&
|
||||
stream->submit_off < lowest_off) {
|
||||
lowest_off = stream->submit_off;
|
||||
choose_s = s;
|
||||
}
|
||||
}
|
||||
|
||||
if (choose_s < 0)
|
||||
break;
|
||||
stream = &wreq->io_streams[choose_s];
|
||||
|
||||
/* Advance the iterator(s). */
|
||||
if (stream->submit_off > iter_off) {
|
||||
rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
|
||||
iter_off = stream->submit_off;
|
||||
}
|
||||
|
||||
atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
|
||||
stream->submit_extendable_to = fsize - stream->submit_off;
|
||||
part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
|
||||
stream->submit_len, to_eof);
|
||||
stream->submit_off += part;
|
||||
if (part > stream->submit_len)
|
||||
stream->submit_len = 0;
|
||||
else
|
||||
stream->submit_len -= part;
|
||||
if (part > 0)
|
||||
no_debug = true;
|
||||
}
|
||||
|
||||
wreq->buffer.iter.iov_offset = 0;
|
||||
if (fsize > iter_off)
|
||||
rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
|
||||
atomic64_set(&wreq->issued_to, fpos + fsize);
|
||||
|
||||
if (!no_debug)
|
||||
kdebug("R=%x: No submit", wreq->debug_id);
|
||||
_leave(" = 0");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* netfs_writeback_single - Write back a monolithic payload
|
||||
* @mapping: The mapping to write from
|
||||
* @wbc: Hints from the VM
|
||||
* @iter: Data to write, must be ITER_FOLIOQ.
|
||||
*
|
||||
* Write a monolithic, non-pagecache object back to the server and/or
|
||||
* the cache.
|
||||
*/
|
||||
int netfs_writeback_single(struct address_space *mapping,
|
||||
struct writeback_control *wbc,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
struct netfs_io_request *wreq;
|
||||
struct netfs_inode *ictx = netfs_inode(mapping->host);
|
||||
struct folio_queue *fq;
|
||||
size_t size = iov_iter_count(iter);
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
|
||||
return -EIO;
|
||||
|
||||
if (!mutex_trylock(&ictx->wb_lock)) {
|
||||
if (wbc->sync_mode == WB_SYNC_NONE) {
|
||||
netfs_stat(&netfs_n_wb_lock_skip);
|
||||
return 0;
|
||||
}
|
||||
netfs_stat(&netfs_n_wb_lock_wait);
|
||||
mutex_lock(&ictx->wb_lock);
|
||||
}
|
||||
|
||||
wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
|
||||
if (IS_ERR(wreq)) {
|
||||
ret = PTR_ERR(wreq);
|
||||
goto couldnt_start;
|
||||
}
|
||||
|
||||
trace_netfs_write(wreq, netfs_write_trace_writeback);
|
||||
netfs_stat(&netfs_n_wh_writepages);
|
||||
|
||||
if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
|
||||
wreq->netfs_ops->begin_writeback(wreq);
|
||||
|
||||
for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
|
||||
for (int slot = 0; slot < folioq_count(fq); slot++) {
|
||||
struct folio *folio = folioq_folio(fq, slot);
|
||||
size_t part = umin(folioq_folio_size(fq, slot), size);
|
||||
|
||||
_debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
|
||||
|
||||
ret = netfs_write_folio_single(wreq, folio);
|
||||
if (ret < 0)
|
||||
goto stop;
|
||||
size -= part;
|
||||
if (size <= 0)
|
||||
goto stop;
|
||||
}
|
||||
}
|
||||
|
||||
stop:
|
||||
for (int s = 0; s < NR_IO_STREAMS; s++)
|
||||
netfs_issue_write(wreq, &wreq->io_streams[s]);
|
||||
smp_wmb(); /* Write lists before ALL_QUEUED. */
|
||||
set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
|
||||
|
||||
mutex_unlock(&ictx->wb_lock);
|
||||
|
||||
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
|
||||
couldnt_start:
|
||||
mutex_unlock(&ictx->wb_lock);
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_writeback_single);
|
||||
|
233
fs/netfs/write_retry.c
Normal file
233
fs/netfs/write_retry.c
Normal file
@ -0,0 +1,233 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Network filesystem write retrying.
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/slab.h>
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* Perform retries on the streams that need it.
|
||||
*/
|
||||
static void netfs_retry_write_stream(struct netfs_io_request *wreq,
|
||||
struct netfs_io_stream *stream)
|
||||
{
|
||||
struct list_head *next;
|
||||
|
||||
_enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
|
||||
|
||||
if (list_empty(&stream->subrequests))
|
||||
return;
|
||||
|
||||
if (stream->source == NETFS_UPLOAD_TO_SERVER &&
|
||||
wreq->netfs_ops->retry_request)
|
||||
wreq->netfs_ops->retry_request(wreq, stream);
|
||||
|
||||
if (unlikely(stream->failed))
|
||||
return;
|
||||
|
||||
/* If there's no renegotiation to do, just resend each failed subreq. */
|
||||
if (!stream->prepare_write) {
|
||||
struct netfs_io_subrequest *subreq;
|
||||
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
|
||||
break;
|
||||
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
|
||||
struct iov_iter source = subreq->io_iter;
|
||||
|
||||
iov_iter_revert(&source, subreq->len - source.count);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
netfs_reissue_write(stream, subreq, &source);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
next = stream->subrequests.next;
|
||||
|
||||
do {
|
||||
struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
|
||||
struct iov_iter source;
|
||||
unsigned long long start, len;
|
||||
size_t part;
|
||||
bool boundary = false;
|
||||
|
||||
/* Go through the stream and find the next span of contiguous
|
||||
* data that we then rejig (cifs, for example, needs the wsize
|
||||
* renegotiating) and reissue.
|
||||
*/
|
||||
from = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
||||
to = from;
|
||||
start = from->start + from->transferred;
|
||||
len = from->len - from->transferred;
|
||||
|
||||
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
|
||||
return;
|
||||
|
||||
list_for_each_continue(next, &stream->subrequests) {
|
||||
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
|
||||
if (subreq->start + subreq->transferred != start + len ||
|
||||
test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
|
||||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
|
||||
break;
|
||||
to = subreq;
|
||||
len += to->len;
|
||||
}
|
||||
|
||||
/* Determine the set of buffers we're going to use. Each
|
||||
* subreq gets a subset of a single overall contiguous buffer.
|
||||
*/
|
||||
netfs_reset_iter(from);
|
||||
source = from->io_iter;
|
||||
source.count = len;
|
||||
|
||||
/* Work through the sublist. */
|
||||
subreq = from;
|
||||
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
subreq->start = start;
|
||||
subreq->len = len;
|
||||
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
|
||||
/* Renegotiate max_len (wsize) */
|
||||
stream->sreq_max_len = len;
|
||||
stream->prepare_write(subreq);
|
||||
|
||||
part = umin(len, stream->sreq_max_len);
|
||||
if (unlikely(stream->sreq_max_segs))
|
||||
part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
|
||||
subreq->len = part;
|
||||
subreq->transferred = 0;
|
||||
len -= part;
|
||||
start += part;
|
||||
if (len && subreq == to &&
|
||||
__test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
|
||||
boundary = true;
|
||||
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
netfs_reissue_write(stream, subreq, &source);
|
||||
if (subreq == to)
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we managed to use fewer subreqs, we can discard the
|
||||
* excess; if we used the same number, then we're done.
|
||||
*/
|
||||
if (!len) {
|
||||
if (subreq == to)
|
||||
continue;
|
||||
list_for_each_entry_safe_from(subreq, tmp,
|
||||
&stream->subrequests, rreq_link) {
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
|
||||
list_del(&subreq->rreq_link);
|
||||
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
|
||||
if (subreq == to)
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We ran out of subrequests, so we need to allocate some more
|
||||
* and insert them after.
|
||||
*/
|
||||
do {
|
||||
subreq = netfs_alloc_subrequest(wreq);
|
||||
subreq->source = to->source;
|
||||
subreq->start = start;
|
||||
subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
|
||||
subreq->stream_nr = to->stream_nr;
|
||||
__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
|
||||
|
||||
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
|
||||
refcount_read(&subreq->ref),
|
||||
netfs_sreq_trace_new);
|
||||
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
|
||||
|
||||
list_add(&subreq->rreq_link, &to->rreq_link);
|
||||
to = list_next_entry(to, rreq_link);
|
||||
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
|
||||
|
||||
stream->sreq_max_len = len;
|
||||
stream->sreq_max_segs = INT_MAX;
|
||||
switch (stream->source) {
|
||||
case NETFS_UPLOAD_TO_SERVER:
|
||||
netfs_stat(&netfs_n_wh_upload);
|
||||
stream->sreq_max_len = umin(len, wreq->wsize);
|
||||
break;
|
||||
case NETFS_WRITE_TO_CACHE:
|
||||
netfs_stat(&netfs_n_wh_write);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
stream->prepare_write(subreq);
|
||||
|
||||
part = umin(len, stream->sreq_max_len);
|
||||
subreq->len = subreq->transferred + part;
|
||||
len -= part;
|
||||
start += part;
|
||||
if (!len && boundary) {
|
||||
__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
|
||||
boundary = false;
|
||||
}
|
||||
|
||||
netfs_reissue_write(stream, subreq, &source);
|
||||
if (!len)
|
||||
break;
|
||||
|
||||
} while (len);
|
||||
|
||||
} while (!list_is_head(next, &stream->subrequests));
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform retries on the streams that need it. If we're doing content
|
||||
* encryption and the server copy changed due to a third-party write, we may
|
||||
* need to do an RMW cycle and also rewrite the data to the cache.
|
||||
*/
|
||||
void netfs_retry_writes(struct netfs_io_request *wreq)
|
||||
{
|
||||
struct netfs_io_subrequest *subreq;
|
||||
struct netfs_io_stream *stream;
|
||||
int s;
|
||||
|
||||
/* Wait for all outstanding I/O to quiesce before performing retries as
|
||||
* we may need to renegotiate the I/O sizes.
|
||||
*/
|
||||
for (s = 0; s < NR_IO_STREAMS; s++) {
|
||||
stream = &wreq->io_streams[s];
|
||||
if (!stream->active)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
|
||||
wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Enc: Fetch changed partial pages
|
||||
// TODO: Enc: Reencrypt content if needed.
|
||||
// TODO: Enc: Wind back transferred point.
|
||||
// TODO: Enc: Mark cache pages for retry.
|
||||
|
||||
for (s = 0; s < NR_IO_STREAMS; s++) {
|
||||
stream = &wreq->io_streams[s];
|
||||
if (stream->need_retry) {
|
||||
stream->need_retry = false;
|
||||
netfs_retry_write_stream(wreq, stream);
|
||||
}
|
||||
}
|
||||
}
|
@ -307,8 +307,10 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
|
||||
&nfs_async_read_completion_ops);
|
||||
|
||||
netfs = nfs_netfs_alloc(sreq);
|
||||
if (!netfs)
|
||||
return netfs_read_subreq_terminated(sreq, -ENOMEM, false);
|
||||
if (!netfs) {
|
||||
sreq->error = -ENOMEM;
|
||||
return netfs_read_subreq_terminated(sreq);
|
||||
}
|
||||
|
||||
pgio.pg_netfs = netfs; /* used in completion */
|
||||
|
||||
|
@ -74,7 +74,8 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
|
||||
*/
|
||||
netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
|
||||
atomic64_read(&netfs->transferred));
|
||||
netfs_read_subreq_terminated(netfs->sreq, netfs->error, false);
|
||||
netfs->sreq->error = netfs->error;
|
||||
netfs_read_subreq_terminated(netfs->sreq);
|
||||
kfree(netfs);
|
||||
}
|
||||
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
|
||||
|
@ -27,7 +27,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp)
|
||||
int flags = nfsexp_flags(cred, exp);
|
||||
|
||||
/* discard any old override before preparing the new set */
|
||||
revert_creds(get_cred(current_real_cred()));
|
||||
put_cred(revert_creds(get_cred(current_real_cred())));
|
||||
new = prepare_creds();
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
@ -80,7 +80,6 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp)
|
||||
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
|
||||
new->cap_permitted);
|
||||
put_cred(override_creds(new));
|
||||
put_cred(new);
|
||||
return 0;
|
||||
|
||||
oom:
|
||||
|
@ -1248,7 +1248,7 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
|
||||
|
||||
beres = nfsd_file_do_acquire(NULL, net, cred, client,
|
||||
fhp, may_flags, NULL, pnf, true);
|
||||
revert_creds(save_cred);
|
||||
put_cred(revert_creds(save_cred));
|
||||
return beres;
|
||||
}
|
||||
|
||||
|
@ -82,14 +82,13 @@ nfs4_save_creds(const struct cred **original_creds)
|
||||
new->fsuid = GLOBAL_ROOT_UID;
|
||||
new->fsgid = GLOBAL_ROOT_GID;
|
||||
*original_creds = override_creds(new);
|
||||
put_cred(new);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
nfs4_reset_creds(const struct cred *original)
|
||||
{
|
||||
revert_creds(original);
|
||||
put_cred(revert_creds(original));
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -222,7 +222,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
|
||||
cap_raise_nfsd_set(new->cap_effective,
|
||||
new->cap_permitted);
|
||||
put_cred(override_creds(new));
|
||||
put_cred(new);
|
||||
} else {
|
||||
error = nfsd_setuser_and_check_port(rqstp, cred, exp);
|
||||
if (error)
|
||||
|
@ -274,10 +274,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
||||
if (usize < MNT_NS_INFO_SIZE_VER0)
|
||||
return -EINVAL;
|
||||
|
||||
if (previous)
|
||||
mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns));
|
||||
else
|
||||
mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns));
|
||||
mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous);
|
||||
if (IS_ERR(mnt_ns))
|
||||
return PTR_ERR(mnt_ns);
|
||||
|
||||
|
11
fs/open.c
11
fs/open.c
@ -413,7 +413,6 @@ static bool access_need_override_creds(int flags)
|
||||
|
||||
static const struct cred *access_override_creds(void)
|
||||
{
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
override_cred = prepare_creds();
|
||||
@ -458,13 +457,7 @@ static const struct cred *access_override_creds(void)
|
||||
* freeing.
|
||||
*/
|
||||
override_cred->non_rcu = 1;
|
||||
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
/* override_cred() gets its own ref */
|
||||
put_cred(override_cred);
|
||||
|
||||
return old_cred;
|
||||
return override_creds(override_cred);
|
||||
}
|
||||
|
||||
static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
|
||||
@ -534,7 +527,7 @@ static long do_faccessat(int dfd, const char __user *filename, int mode, int fla
|
||||
}
|
||||
out:
|
||||
if (old_cred)
|
||||
revert_creds(old_cred);
|
||||
put_cred(revert_creds(old_cred));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -575,12 +575,12 @@ static const struct cred *ovl_setup_cred_for_create(struct dentry *dentry,
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller is going to match this with revert_creds_light() and drop
|
||||
* Caller is going to match this with revert_creds() and drop
|
||||
* referenec on the returned creds.
|
||||
* We must be called with creator creds already, otherwise we risk
|
||||
* leaking creds.
|
||||
*/
|
||||
old_cred = override_creds_light(override_cred);
|
||||
old_cred = override_creds(override_cred);
|
||||
WARN_ON_ONCE(old_cred != ovl_creds(dentry->d_sb));
|
||||
|
||||
return override_cred;
|
||||
|
@ -65,12 +65,12 @@ const struct cred *ovl_override_creds(struct super_block *sb)
|
||||
{
|
||||
struct ovl_fs *ofs = OVL_FS(sb);
|
||||
|
||||
return override_creds_light(ofs->creator_cred);
|
||||
return override_creds(ofs->creator_cred);
|
||||
}
|
||||
|
||||
void ovl_revert_creds(const struct cred *old_cred)
|
||||
{
|
||||
revert_creds_light(old_cred);
|
||||
revert_creds(old_cred);
|
||||
}
|
||||
|
||||
/*
|
||||
|
296
fs/pidfs.c
296
fs/pidfs.c
@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/exportfs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/cgroup.h>
|
||||
@ -23,6 +24,97 @@
|
||||
#include "internal.h"
|
||||
#include "mount.h"
|
||||
|
||||
static struct rb_root pidfs_ino_tree = RB_ROOT;
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
static inline unsigned long pidfs_ino(u64 ino)
|
||||
{
|
||||
return lower_32_bits(ino);
|
||||
}
|
||||
|
||||
/* On 32 bit the generation number are the upper 32 bits. */
|
||||
static inline u32 pidfs_gen(u64 ino)
|
||||
{
|
||||
return upper_32_bits(ino);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* On 64 bit simply return ino. */
|
||||
static inline unsigned long pidfs_ino(u64 ino)
|
||||
{
|
||||
return ino;
|
||||
}
|
||||
|
||||
/* On 64 bit the generation number is 0. */
|
||||
static inline u32 pidfs_gen(u64 ino)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int pidfs_ino_cmp(struct rb_node *a, const struct rb_node *b)
|
||||
{
|
||||
struct pid *pid_a = rb_entry(a, struct pid, pidfs_node);
|
||||
struct pid *pid_b = rb_entry(b, struct pid, pidfs_node);
|
||||
u64 pid_ino_a = pid_a->ino;
|
||||
u64 pid_ino_b = pid_b->ino;
|
||||
|
||||
if (pid_ino_a < pid_ino_b)
|
||||
return -1;
|
||||
if (pid_ino_a > pid_ino_b)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pidfs_add_pid(struct pid *pid)
|
||||
{
|
||||
static u64 pidfs_ino_nr = 2;
|
||||
|
||||
/*
|
||||
* On 64 bit nothing special happens. The 64bit number assigned
|
||||
* to struct pid is the inode number.
|
||||
*
|
||||
* On 32 bit the 64 bit number assigned to struct pid is split
|
||||
* into two 32 bit numbers. The lower 32 bits are used as the
|
||||
* inode number and the upper 32 bits are used as the inode
|
||||
* generation number.
|
||||
*
|
||||
* On 32 bit pidfs_ino() will return the lower 32 bit. When
|
||||
* pidfs_ino() returns zero a wrap around happened. When a
|
||||
* wraparound happens the 64 bit number will be incremented by 2
|
||||
* so inode numbering starts at 2 again.
|
||||
*
|
||||
* On 64 bit comparing two pidfds is as simple as comparing
|
||||
* inode numbers.
|
||||
*
|
||||
* When a wraparound happens on 32 bit multiple pidfds with the
|
||||
* same inode number are likely to exist (This isn't a problem
|
||||
* since before pidfs pidfds used the anonymous inode meaning
|
||||
* all pidfds had the same inode number.). Userspace can
|
||||
* reconstruct the 64 bit identifier by retrieving both the
|
||||
* inode number and the inode generation number to compare or
|
||||
* use file handles.
|
||||
*/
|
||||
if (pidfs_ino(pidfs_ino_nr) == 0)
|
||||
pidfs_ino_nr += 2;
|
||||
|
||||
pid->ino = pidfs_ino_nr;
|
||||
pid->stashed = NULL;
|
||||
pidfs_ino_nr++;
|
||||
|
||||
write_seqcount_begin(&pidmap_lock_seq);
|
||||
rb_find_add_rcu(&pid->pidfs_node, &pidfs_ino_tree, pidfs_ino_cmp);
|
||||
write_seqcount_end(&pidmap_lock_seq);
|
||||
}
|
||||
|
||||
void pidfs_remove_pid(struct pid *pid)
|
||||
{
|
||||
write_seqcount_begin(&pidmap_lock_seq);
|
||||
rb_erase(&pid->pidfs_node, &pidfs_ino_tree);
|
||||
write_seqcount_end(&pidmap_lock_seq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
/**
|
||||
* pidfd_show_fdinfo - print information about a pidfd
|
||||
@ -190,6 +282,27 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool pidfs_ioctl_valid(unsigned int cmd)
|
||||
{
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETVERSION:
|
||||
case PIDFD_GET_CGROUP_NAMESPACE:
|
||||
case PIDFD_GET_INFO:
|
||||
case PIDFD_GET_IPC_NAMESPACE:
|
||||
case PIDFD_GET_MNT_NAMESPACE:
|
||||
case PIDFD_GET_NET_NAMESPACE:
|
||||
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
|
||||
case PIDFD_GET_TIME_NAMESPACE:
|
||||
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
|
||||
case PIDFD_GET_UTS_NAMESPACE:
|
||||
case PIDFD_GET_USER_NAMESPACE:
|
||||
case PIDFD_GET_PID_NAMESPACE:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct task_struct *task __free(put_task) = NULL;
|
||||
@ -198,6 +311,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
struct ns_common *ns_common = NULL;
|
||||
struct pid_namespace *pid_ns;
|
||||
|
||||
if (!pidfs_ioctl_valid(cmd))
|
||||
return -ENOIOCTLCMD;
|
||||
|
||||
if (cmd == FS_IOC_GETVERSION) {
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
__u32 __user *argp = (__u32 __user *)arg;
|
||||
return put_user(file_inode(file)->i_generation, argp);
|
||||
}
|
||||
|
||||
task = get_pid_task(pid, PIDTYPE_PID);
|
||||
if (!task)
|
||||
return -ESRCH;
|
||||
@ -318,40 +442,6 @@ struct pid *pidfd_pid(const struct file *file)
|
||||
|
||||
static struct vfsmount *pidfs_mnt __ro_after_init;
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
/*
|
||||
* Provide a fallback mechanism for 32-bit systems so processes remain
|
||||
* reliably comparable by inode number even on those systems.
|
||||
*/
|
||||
static DEFINE_IDA(pidfd_inum_ida);
|
||||
|
||||
static int pidfs_inum(struct pid *pid, unsigned long *ino)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ida_alloc_range(&pidfd_inum_ida, RESERVED_PIDS + 1,
|
||||
UINT_MAX, GFP_ATOMIC);
|
||||
if (ret < 0)
|
||||
return -ENOSPC;
|
||||
|
||||
*ino = ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void pidfs_free_inum(unsigned long ino)
|
||||
{
|
||||
if (ino > 0)
|
||||
ida_free(&pidfd_inum_ida, ino);
|
||||
}
|
||||
#else
|
||||
static inline int pidfs_inum(struct pid *pid, unsigned long *ino)
|
||||
{
|
||||
*ino = pid->ino;
|
||||
return 0;
|
||||
}
|
||||
#define pidfs_free_inum(ino) ((void)(ino))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The vfs falls back to simple_setattr() if i_op->setattr() isn't
|
||||
* implemented. Let's reject it completely until we have a clean
|
||||
@ -403,7 +493,6 @@ static void pidfs_evict_inode(struct inode *inode)
|
||||
|
||||
clear_inode(inode);
|
||||
put_pid(pid);
|
||||
pidfs_free_inum(inode->i_ino);
|
||||
}
|
||||
|
||||
static const struct super_operations pidfs_sops = {
|
||||
@ -427,19 +516,143 @@ static const struct dentry_operations pidfs_dentry_operations = {
|
||||
.d_prune = stashed_dentry_prune,
|
||||
};
|
||||
|
||||
static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
|
||||
struct inode *parent)
|
||||
{
|
||||
const struct pid *pid = inode->i_private;
|
||||
|
||||
if (*max_len < 2) {
|
||||
*max_len = 2;
|
||||
return FILEID_INVALID;
|
||||
}
|
||||
|
||||
*max_len = 2;
|
||||
*(u64 *)fh = pid->ino;
|
||||
return FILEID_KERNFS;
|
||||
}
|
||||
|
||||
static int pidfs_ino_find(const void *key, const struct rb_node *node)
|
||||
{
|
||||
const u64 pid_ino = *(u64 *)key;
|
||||
const struct pid *pid = rb_entry(node, struct pid, pidfs_node);
|
||||
|
||||
if (pid_ino < pid->ino)
|
||||
return -1;
|
||||
if (pid_ino > pid->ino)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Find a struct pid based on the inode number. */
|
||||
static struct pid *pidfs_ino_get_pid(u64 ino)
|
||||
{
|
||||
struct pid *pid;
|
||||
struct rb_node *node;
|
||||
unsigned int seq;
|
||||
|
||||
guard(rcu)();
|
||||
do {
|
||||
seq = read_seqcount_begin(&pidmap_lock_seq);
|
||||
node = rb_find_rcu(&ino, &pidfs_ino_tree, pidfs_ino_find);
|
||||
if (node)
|
||||
break;
|
||||
} while (read_seqcount_retry(&pidmap_lock_seq, seq));
|
||||
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
pid = rb_entry(node, struct pid, pidfs_node);
|
||||
|
||||
/* Within our pid namespace hierarchy? */
|
||||
if (pid_vnr(pid) == 0)
|
||||
return NULL;
|
||||
|
||||
return get_pid(pid);
|
||||
}
|
||||
|
||||
static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
|
||||
struct fid *fid, int fh_len,
|
||||
int fh_type)
|
||||
{
|
||||
int ret;
|
||||
u64 pid_ino;
|
||||
struct path path;
|
||||
struct pid *pid;
|
||||
|
||||
if (fh_len < 2)
|
||||
return NULL;
|
||||
|
||||
switch (fh_type) {
|
||||
case FILEID_KERNFS:
|
||||
pid_ino = *(u64 *)fid;
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pid = pidfs_ino_get_pid(pid_ino);
|
||||
if (!pid)
|
||||
return NULL;
|
||||
|
||||
ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
mntput(path.mnt);
|
||||
return path.dentry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that we reject any nonsensical flags that users pass via
|
||||
* open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and
|
||||
* PIDFD_NONBLOCK as O_NONBLOCK.
|
||||
*/
|
||||
#define VALID_FILE_HANDLE_OPEN_FLAGS \
|
||||
(O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL)
|
||||
|
||||
static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
|
||||
unsigned int oflags)
|
||||
{
|
||||
if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* pidfd_ino_get_pid() will verify that the struct pid is part
|
||||
* of the caller's pid namespace hierarchy. No further
|
||||
* permission checks are needed.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
|
||||
{
|
||||
/*
|
||||
* Clear O_LARGEFILE as open_by_handle_at() forces it and raise
|
||||
* O_RDWR as pidfds always are.
|
||||
*/
|
||||
oflags &= ~O_LARGEFILE;
|
||||
return dentry_open(path, oflags | O_RDWR, current_cred());
|
||||
}
|
||||
|
||||
static const struct export_operations pidfs_export_operations = {
|
||||
.encode_fh = pidfs_encode_fh,
|
||||
.fh_to_dentry = pidfs_fh_to_dentry,
|
||||
.open = pidfs_export_open,
|
||||
.permission = pidfs_export_permission,
|
||||
};
|
||||
|
||||
static int pidfs_init_inode(struct inode *inode, void *data)
|
||||
{
|
||||
const struct pid *pid = data;
|
||||
|
||||
inode->i_private = data;
|
||||
inode->i_flags |= S_PRIVATE;
|
||||
inode->i_mode |= S_IRWXU;
|
||||
inode->i_op = &pidfs_inode_operations;
|
||||
inode->i_fop = &pidfs_file_operations;
|
||||
/*
|
||||
* Inode numbering for pidfs start at RESERVED_PIDS + 1. This
|
||||
* avoids collisions with the root inode which is 1 for pseudo
|
||||
* filesystems.
|
||||
*/
|
||||
return pidfs_inum(data, &inode->i_ino);
|
||||
inode->i_ino = pidfs_ino(pid->ino);
|
||||
inode->i_generation = pidfs_gen(pid->ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pidfs_put_data(void *data)
|
||||
@ -462,6 +675,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->ops = &pidfs_sops;
|
||||
ctx->eops = &pidfs_export_operations;
|
||||
ctx->dops = &pidfs_dentry_operations;
|
||||
fc->s_fs_info = (void *)&pidfs_stashed_ops;
|
||||
return 0;
|
||||
|
@ -611,10 +611,10 @@ int propagate_umount(struct list_head *list)
|
||||
continue;
|
||||
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
|
||||
/*
|
||||
* We have come accross an partially unmounted
|
||||
* mount in list that has not been visited yet.
|
||||
* Remember it has been visited and continue
|
||||
* about our merry way.
|
||||
* We have come across a partially unmounted
|
||||
* mount in a list that has not been visited
|
||||
* yet. Remember it has been visited and
|
||||
* continue about our merry way.
|
||||
*/
|
||||
list_add_tail(&child->mnt_umounting, &visited);
|
||||
continue;
|
||||
|
@ -65,7 +65,11 @@ static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
|
||||
#endif
|
||||
|
||||
static LIST_HEAD(kclist_head);
|
||||
static DECLARE_RWSEM(kclist_lock);
|
||||
static int kcore_nphdr;
|
||||
static size_t kcore_phdrs_len;
|
||||
static size_t kcore_notes_len;
|
||||
static size_t kcore_data_offset;
|
||||
DEFINE_STATIC_PERCPU_RWSEM(kclist_lock);
|
||||
static int kcore_need_update = 1;
|
||||
|
||||
/*
|
||||
@ -101,33 +105,32 @@ void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
|
||||
list_add_tail(&new->list, &kclist_head);
|
||||
}
|
||||
|
||||
static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
|
||||
size_t *data_offset)
|
||||
static void update_kcore_size(void)
|
||||
{
|
||||
size_t try, size;
|
||||
struct kcore_list *m;
|
||||
|
||||
*nphdr = 1; /* PT_NOTE */
|
||||
kcore_nphdr = 1; /* PT_NOTE */
|
||||
size = 0;
|
||||
|
||||
list_for_each_entry(m, &kclist_head, list) {
|
||||
try = kc_vaddr_to_offset((size_t)m->addr + m->size);
|
||||
if (try > size)
|
||||
size = try;
|
||||
*nphdr = *nphdr + 1;
|
||||
kcore_nphdr++;
|
||||
}
|
||||
|
||||
*phdrs_len = *nphdr * sizeof(struct elf_phdr);
|
||||
*notes_len = (4 * sizeof(struct elf_note) +
|
||||
3 * ALIGN(sizeof(CORE_STR), 4) +
|
||||
VMCOREINFO_NOTE_NAME_BYTES +
|
||||
ALIGN(sizeof(struct elf_prstatus), 4) +
|
||||
ALIGN(sizeof(struct elf_prpsinfo), 4) +
|
||||
ALIGN(arch_task_struct_size, 4) +
|
||||
ALIGN(vmcoreinfo_size, 4));
|
||||
*data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
|
||||
*notes_len);
|
||||
return *data_offset + size;
|
||||
kcore_phdrs_len = kcore_nphdr * sizeof(struct elf_phdr);
|
||||
kcore_notes_len = (4 * sizeof(struct elf_note) +
|
||||
3 * ALIGN(sizeof(CORE_STR), 4) +
|
||||
VMCOREINFO_NOTE_NAME_BYTES +
|
||||
ALIGN(sizeof(struct elf_prstatus), 4) +
|
||||
ALIGN(sizeof(struct elf_prpsinfo), 4) +
|
||||
ALIGN(arch_task_struct_size, 4) +
|
||||
ALIGN(vmcoreinfo_size, 4));
|
||||
kcore_data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + kcore_phdrs_len +
|
||||
kcore_notes_len);
|
||||
proc_root_kcore->size = kcore_data_offset + size;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
@ -270,12 +273,10 @@ static int kcore_update_ram(void)
|
||||
{
|
||||
LIST_HEAD(list);
|
||||
LIST_HEAD(garbage);
|
||||
int nphdr;
|
||||
size_t phdrs_len, notes_len, data_offset;
|
||||
struct kcore_list *tmp, *pos;
|
||||
int ret = 0;
|
||||
|
||||
down_write(&kclist_lock);
|
||||
percpu_down_write(&kclist_lock);
|
||||
if (!xchg(&kcore_need_update, 0))
|
||||
goto out;
|
||||
|
||||
@ -293,11 +294,10 @@ static int kcore_update_ram(void)
|
||||
}
|
||||
list_splice_tail(&list, &kclist_head);
|
||||
|
||||
proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len,
|
||||
&data_offset);
|
||||
update_kcore_size();
|
||||
|
||||
out:
|
||||
up_write(&kclist_lock);
|
||||
percpu_up_write(&kclist_lock);
|
||||
list_for_each_entry_safe(pos, tmp, &garbage, list) {
|
||||
list_del(&pos->list);
|
||||
kfree(pos);
|
||||
@ -326,27 +326,24 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
struct file *file = iocb->ki_filp;
|
||||
char *buf = file->private_data;
|
||||
loff_t *fpos = &iocb->ki_pos;
|
||||
size_t phdrs_offset, notes_offset, data_offset;
|
||||
size_t phdrs_offset, notes_offset;
|
||||
size_t page_offline_frozen = 1;
|
||||
size_t phdrs_len, notes_len;
|
||||
struct kcore_list *m;
|
||||
size_t tsz;
|
||||
int nphdr;
|
||||
unsigned long start;
|
||||
size_t buflen = iov_iter_count(iter);
|
||||
size_t orig_buflen = buflen;
|
||||
int ret = 0;
|
||||
|
||||
down_read(&kclist_lock);
|
||||
percpu_down_read(&kclist_lock);
|
||||
/*
|
||||
* Don't race against drivers that set PageOffline() and expect no
|
||||
* further page access.
|
||||
*/
|
||||
page_offline_freeze();
|
||||
|
||||
get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset);
|
||||
phdrs_offset = sizeof(struct elfhdr);
|
||||
notes_offset = phdrs_offset + phdrs_len;
|
||||
notes_offset = phdrs_offset + kcore_phdrs_len;
|
||||
|
||||
/* ELF file header. */
|
||||
if (buflen && *fpos < sizeof(struct elfhdr)) {
|
||||
@ -368,7 +365,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
.e_flags = ELF_CORE_EFLAGS,
|
||||
.e_ehsize = sizeof(struct elfhdr),
|
||||
.e_phentsize = sizeof(struct elf_phdr),
|
||||
.e_phnum = nphdr,
|
||||
.e_phnum = kcore_nphdr,
|
||||
};
|
||||
|
||||
tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
|
||||
@ -382,10 +379,10 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
}
|
||||
|
||||
/* ELF program headers. */
|
||||
if (buflen && *fpos < phdrs_offset + phdrs_len) {
|
||||
if (buflen && *fpos < phdrs_offset + kcore_phdrs_len) {
|
||||
struct elf_phdr *phdrs, *phdr;
|
||||
|
||||
phdrs = kzalloc(phdrs_len, GFP_KERNEL);
|
||||
phdrs = kzalloc(kcore_phdrs_len, GFP_KERNEL);
|
||||
if (!phdrs) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@ -393,13 +390,14 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
|
||||
phdrs[0].p_type = PT_NOTE;
|
||||
phdrs[0].p_offset = notes_offset;
|
||||
phdrs[0].p_filesz = notes_len;
|
||||
phdrs[0].p_filesz = kcore_notes_len;
|
||||
|
||||
phdr = &phdrs[1];
|
||||
list_for_each_entry(m, &kclist_head, list) {
|
||||
phdr->p_type = PT_LOAD;
|
||||
phdr->p_flags = PF_R | PF_W | PF_X;
|
||||
phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
|
||||
phdr->p_offset = kc_vaddr_to_offset(m->addr)
|
||||
+ kcore_data_offset;
|
||||
phdr->p_vaddr = (size_t)m->addr;
|
||||
if (m->type == KCORE_RAM)
|
||||
phdr->p_paddr = __pa(m->addr);
|
||||
@ -412,7 +410,8 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
phdr++;
|
||||
}
|
||||
|
||||
tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
|
||||
tsz = min_t(size_t, buflen,
|
||||
phdrs_offset + kcore_phdrs_len - *fpos);
|
||||
if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz,
|
||||
iter) != tsz) {
|
||||
kfree(phdrs);
|
||||
@ -426,7 +425,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
}
|
||||
|
||||
/* ELF note segment. */
|
||||
if (buflen && *fpos < notes_offset + notes_len) {
|
||||
if (buflen && *fpos < notes_offset + kcore_notes_len) {
|
||||
struct elf_prstatus prstatus = {};
|
||||
struct elf_prpsinfo prpsinfo = {
|
||||
.pr_sname = 'R',
|
||||
@ -438,7 +437,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
strscpy(prpsinfo.pr_psargs, saved_command_line,
|
||||
sizeof(prpsinfo.pr_psargs));
|
||||
|
||||
notes = kzalloc(notes_len, GFP_KERNEL);
|
||||
notes = kzalloc(kcore_notes_len, GFP_KERNEL);
|
||||
if (!notes) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@ -459,9 +458,10 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
*/
|
||||
append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
|
||||
vmcoreinfo_data,
|
||||
min(vmcoreinfo_size, notes_len - i));
|
||||
min(vmcoreinfo_size, kcore_notes_len - i));
|
||||
|
||||
tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
|
||||
tsz = min_t(size_t, buflen,
|
||||
notes_offset + kcore_notes_len - *fpos);
|
||||
if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) {
|
||||
kfree(notes);
|
||||
ret = -EFAULT;
|
||||
@ -477,7 +477,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
* Check to see if our file offset matches with any of
|
||||
* the addresses in the elf_phdr on our list.
|
||||
*/
|
||||
start = kc_offset_to_vaddr(*fpos - data_offset);
|
||||
start = kc_offset_to_vaddr(*fpos - kcore_data_offset);
|
||||
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
|
||||
tsz = buflen;
|
||||
|
||||
@ -626,7 +626,7 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
|
||||
out:
|
||||
page_offline_thaw();
|
||||
up_read(&kclist_lock);
|
||||
percpu_up_read(&kclist_lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
return orig_buflen - buflen;
|
||||
@ -663,6 +663,7 @@ static int release_kcore(struct inode *inode, struct file *file)
|
||||
}
|
||||
|
||||
static const struct proc_ops kcore_proc_ops = {
|
||||
.proc_flags = PROC_ENTRY_PERMANENT,
|
||||
.proc_read_iter = read_kcore_iter,
|
||||
.proc_open = open_kcore,
|
||||
.proc_release = release_kcore,
|
||||
|
@ -83,7 +83,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
|
||||
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
|
||||
res = ns_get_name(name, sizeof(name), task, ns_ops);
|
||||
if (res >= 0)
|
||||
res = readlink_copy(buffer, buflen, name);
|
||||
res = readlink_copy(buffer, buflen, name, strlen(name));
|
||||
}
|
||||
put_task_struct(task);
|
||||
return res;
|
||||
|
@ -1258,14 +1258,6 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void cifs_readv_worker(struct work_struct *work)
|
||||
{
|
||||
struct cifs_io_subrequest *rdata =
|
||||
container_of(work, struct cifs_io_subrequest, subreq.work);
|
||||
|
||||
netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false);
|
||||
}
|
||||
|
||||
static void
|
||||
cifs_readv_callback(struct mid_q_entry *mid)
|
||||
{
|
||||
@ -1328,11 +1320,13 @@ cifs_readv_callback(struct mid_q_entry *mid)
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
|
||||
rdata->result = 0;
|
||||
}
|
||||
if (rdata->got_bytes)
|
||||
__set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
|
||||
}
|
||||
|
||||
rdata->credits.value = 0;
|
||||
rdata->subreq.error = rdata->result;
|
||||
rdata->subreq.transferred += rdata->got_bytes;
|
||||
INIT_WORK(&rdata->subreq.work, cifs_readv_worker);
|
||||
queue_work(cifsiod_wq, &rdata->subreq.work);
|
||||
release_mid(mid);
|
||||
add_credits(server, &credits, 0);
|
||||
|
@ -227,7 +227,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
|
||||
return;
|
||||
|
||||
failed:
|
||||
netfs_read_subreq_terminated(subreq, rc, false);
|
||||
subreq->error = rc;
|
||||
netfs_read_subreq_terminated(subreq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4388,7 +4388,7 @@ static struct folio_queue *cifs_alloc_folioq_buffer(ssize_t size)
|
||||
p = kmalloc(sizeof(*p), GFP_NOFS);
|
||||
if (!p)
|
||||
goto nomem;
|
||||
folioq_init(p);
|
||||
folioq_init(p, 0);
|
||||
if (tail) {
|
||||
tail->next = p;
|
||||
p->prev = tail;
|
||||
|
@ -4500,14 +4500,6 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void smb2_readv_worker(struct work_struct *work)
|
||||
{
|
||||
struct cifs_io_subrequest *rdata =
|
||||
container_of(work, struct cifs_io_subrequest, subreq.work);
|
||||
|
||||
netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false);
|
||||
}
|
||||
|
||||
static void
|
||||
smb2_readv_callback(struct mid_q_entry *mid)
|
||||
{
|
||||
@ -4615,15 +4607,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
|
||||
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
|
||||
rdata->result = 0;
|
||||
}
|
||||
if (rdata->got_bytes)
|
||||
__set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
|
||||
}
|
||||
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value,
|
||||
server->credits, server->in_flight,
|
||||
0, cifs_trace_rw_credits_read_response_clear);
|
||||
rdata->credits.value = 0;
|
||||
rdata->subreq.error = rdata->result;
|
||||
rdata->subreq.transferred += rdata->got_bytes;
|
||||
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress);
|
||||
INIT_WORK(&rdata->subreq.work, smb2_readv_worker);
|
||||
queue_work(cifsiod_wq, &rdata->subreq.work);
|
||||
netfs_read_subreq_terminated(&rdata->subreq);
|
||||
release_mid(mid);
|
||||
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
|
||||
server->credits, server->in_flight,
|
||||
|
@ -781,10 +781,6 @@ int __ksmbd_override_fsids(struct ksmbd_work *work,
|
||||
|
||||
WARN_ON(work->saved_cred);
|
||||
work->saved_cred = override_creds(cred);
|
||||
if (!work->saved_cred) {
|
||||
abort_creds(cred);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -796,13 +792,11 @@ int ksmbd_override_fsids(struct ksmbd_work *work)
|
||||
void ksmbd_revert_fsids(struct ksmbd_work *work)
|
||||
{
|
||||
const struct cred *cred;
|
||||
|
||||
WARN_ON(!work->saved_cred);
|
||||
|
||||
cred = current_cred();
|
||||
revert_creds(work->saved_cred);
|
||||
put_cred(cred);
|
||||
cred = revert_creds(work->saved_cred);
|
||||
work->saved_cred = NULL;
|
||||
put_cred(cred);
|
||||
}
|
||||
|
||||
__le32 smb_map_generic_desired_access(__le32 daccess)
|
||||
|
@ -155,8 +155,6 @@ extern struct cred *prepare_creds(void);
|
||||
extern struct cred *prepare_exec_creds(void);
|
||||
extern int commit_creds(struct cred *);
|
||||
extern void abort_creds(struct cred *);
|
||||
extern const struct cred *override_creds(const struct cred *);
|
||||
extern void revert_creds(const struct cred *);
|
||||
extern struct cred *prepare_kernel_cred(struct task_struct *);
|
||||
extern int set_security_override(struct cred *, u32);
|
||||
extern int set_security_override_from_ctx(struct cred *, const char *);
|
||||
@ -172,12 +170,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred)
|
||||
cred->cap_inheritable));
|
||||
}
|
||||
|
||||
/*
|
||||
* Override creds without bumping reference count. Caller must ensure
|
||||
* reference remains valid or has taken reference. Almost always not the
|
||||
* interface you want. Use override_creds()/revert_creds() instead.
|
||||
*/
|
||||
static inline const struct cred *override_creds_light(const struct cred *override_cred)
|
||||
static inline const struct cred *override_creds(const struct cred *override_cred)
|
||||
{
|
||||
const struct cred *old = current->cred;
|
||||
|
||||
@ -185,35 +178,12 @@ static inline const struct cred *override_creds_light(const struct cred *overrid
|
||||
return old;
|
||||
}
|
||||
|
||||
static inline void revert_creds_light(const struct cred *revert_cred)
|
||||
static inline const struct cred *revert_creds(const struct cred *revert_cred)
|
||||
{
|
||||
const struct cred *override_cred = current->cred;
|
||||
|
||||
rcu_assign_pointer(current->cred, revert_cred);
|
||||
}
|
||||
|
||||
/**
|
||||
* get_new_cred_many - Get references on a new set of credentials
|
||||
* @cred: The new credentials to reference
|
||||
* @nr: Number of references to acquire
|
||||
*
|
||||
* Get references on the specified set of new credentials. The caller must
|
||||
* release all acquired references.
|
||||
*/
|
||||
static inline struct cred *get_new_cred_many(struct cred *cred, int nr)
|
||||
{
|
||||
atomic_long_add(nr, &cred->usage);
|
||||
return cred;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_new_cred - Get a reference on a new set of credentials
|
||||
* @cred: The new credentials to reference
|
||||
*
|
||||
* Get a reference on the specified set of new credentials. The caller must
|
||||
* release the reference.
|
||||
*/
|
||||
static inline struct cred *get_new_cred(struct cred *cred)
|
||||
{
|
||||
return get_new_cred_many(cred, 1);
|
||||
return override_cred;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -236,7 +206,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr)
|
||||
if (!cred)
|
||||
return cred;
|
||||
nonconst_cred->non_rcu = 0;
|
||||
return get_new_cred_many(nonconst_cred, nr);
|
||||
atomic_long_add(nr, &nonconst_cred->usage);
|
||||
return cred;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define LINUX_EXPORTFS_H 1
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/path.h>
|
||||
|
||||
struct dentry;
|
||||
struct iattr;
|
||||
@ -156,6 +157,17 @@ struct fid {
|
||||
};
|
||||
};
|
||||
|
||||
enum handle_to_path_flags {
|
||||
HANDLE_CHECK_PERMS = (1 << 0),
|
||||
HANDLE_CHECK_SUBTREE = (1 << 1),
|
||||
};
|
||||
|
||||
struct handle_to_path_ctx {
|
||||
struct path root;
|
||||
enum handle_to_path_flags flags;
|
||||
unsigned int fh_flags;
|
||||
};
|
||||
|
||||
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
|
||||
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
|
||||
#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
|
||||
@ -225,6 +237,12 @@ struct fid {
|
||||
* is also a directory. In the event that it cannot be found, or storage
|
||||
* space cannot be allocated, a %ERR_PTR should be returned.
|
||||
*
|
||||
* permission:
|
||||
* Allow filesystems to specify a custom permission function.
|
||||
*
|
||||
* open:
|
||||
* Allow filesystems to specify a custom open function.
|
||||
*
|
||||
* commit_metadata:
|
||||
* @commit_metadata should commit metadata changes to stable storage.
|
||||
*
|
||||
@ -251,6 +269,8 @@ struct export_operations {
|
||||
bool write, u32 *device_generation);
|
||||
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
|
||||
int nr_iomaps, struct iattr *iattr);
|
||||
int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
|
||||
struct file * (*open)(struct path *path, unsigned int oflags);
|
||||
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
|
||||
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
|
||||
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
|
||||
|
@ -5,12 +5,18 @@
|
||||
#include <uapi/linux/fiemap.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
/**
|
||||
* struct fiemap_extent_info - fiemap request to a filesystem
|
||||
* @fi_flags: Flags as passed from user
|
||||
* @fi_extents_mapped: Number of mapped extents
|
||||
* @fi_extents_max: Size of fiemap_extent array
|
||||
* @fi_extents_start: Start of fiemap_extent array
|
||||
*/
|
||||
struct fiemap_extent_info {
|
||||
unsigned int fi_flags; /* Flags as passed from user */
|
||||
unsigned int fi_extents_mapped; /* Number of mapped extents */
|
||||
unsigned int fi_extents_max; /* Size of fiemap_extent array */
|
||||
struct fiemap_extent __user *fi_extents_start; /* Start of
|
||||
fiemap_extent array */
|
||||
unsigned int fi_flags;
|
||||
unsigned int fi_extents_mapped;
|
||||
unsigned int fi_extents_max;
|
||||
struct fiemap_extent __user *fi_extents_start;
|
||||
};
|
||||
|
||||
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
|
@ -37,16 +37,20 @@ struct folio_queue {
|
||||
#if PAGEVEC_SIZE > BITS_PER_LONG
|
||||
#error marks is not big enough
|
||||
#endif
|
||||
unsigned int rreq_id;
|
||||
unsigned int debug_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* folioq_init - Initialise a folio queue segment
|
||||
* @folioq: The segment to initialise
|
||||
* @rreq_id: The request identifier to use in tracelines.
|
||||
*
|
||||
* Initialise a folio queue segment. Note that the folio pointers are
|
||||
* left uninitialised.
|
||||
* Initialise a folio queue segment and set an identifier to be used in traces.
|
||||
*
|
||||
* Note that the folio pointers are left uninitialised.
|
||||
*/
|
||||
static inline void folioq_init(struct folio_queue *folioq)
|
||||
static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id)
|
||||
{
|
||||
folio_batch_init(&folioq->vec);
|
||||
folioq->next = NULL;
|
||||
@ -54,6 +58,8 @@ static inline void folioq_init(struct folio_queue *folioq)
|
||||
folioq->marks = 0;
|
||||
folioq->marks2 = 0;
|
||||
folioq->marks3 = 0;
|
||||
folioq->rreq_id = rreq_id;
|
||||
folioq->debug_id = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -659,6 +659,7 @@ is_uncached_acl(struct posix_acl *acl)
|
||||
#define IOP_XATTR 0x0008
|
||||
#define IOP_DEFAULT_READLINK 0x0010
|
||||
#define IOP_MGTIME 0x0020
|
||||
#define IOP_CACHED_LINK 0x0040
|
||||
|
||||
/*
|
||||
* Keep mostly read-only and often accessed (especially for
|
||||
@ -756,7 +757,10 @@ struct inode {
|
||||
};
|
||||
struct file_lock_context *i_flctx;
|
||||
struct address_space i_data;
|
||||
struct list_head i_devices;
|
||||
union {
|
||||
struct list_head i_devices;
|
||||
int i_linklen;
|
||||
};
|
||||
union {
|
||||
struct pipe_inode_info *i_pipe;
|
||||
struct cdev *i_cdev;
|
||||
@ -782,6 +786,13 @@ struct inode {
|
||||
void *i_private; /* fs or device private pointer */
|
||||
} __randomize_layout;
|
||||
|
||||
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
|
||||
{
|
||||
inode->i_link = link;
|
||||
inode->i_linklen = linklen;
|
||||
inode->i_opflags |= IOP_CACHED_LINK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get bit address from inode->i_state to use with wait_var_event()
|
||||
* infrastructre.
|
||||
@ -3409,7 +3420,7 @@ extern const struct file_operations generic_ro_fops;
|
||||
|
||||
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
|
||||
|
||||
extern int readlink_copy(char __user *, int, const char *);
|
||||
extern int readlink_copy(char __user *, int, const char *, int);
|
||||
extern int page_readlink(struct dentry *, char __user *, int);
|
||||
extern const char *page_get_link(struct dentry *, struct inode *,
|
||||
struct delayed_call *);
|
||||
@ -3526,7 +3537,6 @@ struct offset_ctx {
|
||||
void simple_offset_init(struct offset_ctx *octx);
|
||||
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
|
||||
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
|
||||
int simple_offset_empty(struct dentry *dentry);
|
||||
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
struct inode *new_dir, struct dentry *new_dentry);
|
||||
int simple_offset_rename_exchange(struct inode *old_dir,
|
||||
|
@ -50,7 +50,7 @@ struct path;
|
||||
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
|
||||
|
||||
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
|
||||
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)
|
||||
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
|
||||
|
||||
#define MNT_INTERNAL 0x4000
|
||||
|
||||
@ -64,7 +64,6 @@ struct path;
|
||||
#define MNT_SYNC_UMOUNT 0x2000000
|
||||
#define MNT_MARKED 0x4000000
|
||||
#define MNT_UMOUNT 0x8000000
|
||||
#define MNT_ONRB 0x10000000
|
||||
|
||||
struct vfsmount {
|
||||
struct dentry *mnt_root; /* root of the mounted tree */
|
||||
@ -76,7 +75,7 @@ struct vfsmount {
|
||||
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
|
||||
{
|
||||
/* Pairs with smp_store_release() in do_idmap_mount(). */
|
||||
return smp_load_acquire(&mnt->mnt_idmap);
|
||||
return READ_ONCE(mnt->mnt_idmap);
|
||||
}
|
||||
|
||||
extern int mnt_want_write(struct vfsmount *mnt);
|
||||
|
@ -18,9 +18,11 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/rolling_buffer.h>
|
||||
|
||||
enum netfs_sreq_ref_trace;
|
||||
typedef struct mempool_s mempool_t;
|
||||
struct folio_queue;
|
||||
|
||||
/**
|
||||
* folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
|
||||
@ -71,6 +73,7 @@ struct netfs_inode {
|
||||
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
|
||||
#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
|
||||
#define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */
|
||||
#define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -178,23 +181,17 @@ struct netfs_io_subrequest {
|
||||
unsigned long long start; /* Where to start the I/O */
|
||||
size_t len; /* Size of the I/O */
|
||||
size_t transferred; /* Amount of data transferred */
|
||||
size_t consumed; /* Amount of read data consumed */
|
||||
size_t prev_donated; /* Amount of data donated from previous subreq */
|
||||
size_t next_donated; /* Amount of data donated from next subreq */
|
||||
refcount_t ref;
|
||||
short error; /* 0 or error that occurred */
|
||||
unsigned short debug_index; /* Index in list (for debugging output) */
|
||||
unsigned int nr_segs; /* Number of segs in io_iter */
|
||||
enum netfs_io_source source; /* Where to read from/write to */
|
||||
unsigned char stream_nr; /* I/O stream this belongs to */
|
||||
unsigned char curr_folioq_slot; /* Folio currently being read */
|
||||
unsigned char curr_folio_order; /* Order of folio */
|
||||
struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
|
||||
unsigned long flags;
|
||||
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
|
||||
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
|
||||
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
|
||||
#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
|
||||
#define NETFS_SREQ_MADE_PROGRESS 4 /* Set if we managed to read more data */
|
||||
#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
|
||||
#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
|
||||
#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
|
||||
@ -208,9 +205,11 @@ enum netfs_io_origin {
|
||||
NETFS_READAHEAD, /* This read was triggered by readahead */
|
||||
NETFS_READPAGE, /* This read is a synchronous read */
|
||||
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
|
||||
NETFS_READ_SINGLE, /* This read should be treated as a single object */
|
||||
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
|
||||
NETFS_DIO_READ, /* This is a direct I/O read */
|
||||
NETFS_WRITEBACK, /* This write was triggered by writepages */
|
||||
NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */
|
||||
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
|
||||
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
|
||||
NETFS_DIO_WRITE, /* This is a direct I/O write */
|
||||
@ -233,14 +232,13 @@ struct netfs_io_request {
|
||||
struct netfs_cache_resources cache_resources;
|
||||
struct readahead_control *ractl; /* Readahead descriptor */
|
||||
struct list_head proc_link; /* Link in netfs_iorequests */
|
||||
struct list_head subrequests; /* Contributory I/O operations */
|
||||
struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
|
||||
#define NR_IO_STREAMS 2 //wreq->nr_io_streams
|
||||
struct netfs_group *group; /* Writeback group being written back */
|
||||
struct folio_queue *buffer; /* Head of I/O buffer */
|
||||
struct folio_queue *buffer_tail; /* Tail of I/O buffer */
|
||||
struct iov_iter iter; /* Unencrypted-side iterator */
|
||||
struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
|
||||
struct rolling_buffer buffer; /* Unencrypted buffer */
|
||||
#define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1
|
||||
#define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2
|
||||
wait_queue_head_t waitq; /* Processor waiter */
|
||||
void *netfs_priv; /* Private data for the netfs */
|
||||
void *netfs_priv2; /* Private data for the netfs */
|
||||
struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
|
||||
@ -251,29 +249,29 @@ struct netfs_io_request {
|
||||
atomic_t subreq_counter; /* Next subreq->debug_index */
|
||||
unsigned int nr_group_rel; /* Number of refs to release on ->group */
|
||||
spinlock_t lock; /* Lock for queuing subreqs */
|
||||
atomic_t nr_outstanding; /* Number of ops in progress */
|
||||
unsigned long long submitted; /* Amount submitted for I/O so far */
|
||||
unsigned long long len; /* Length of the request */
|
||||
size_t transferred; /* Amount to be indicated as transferred */
|
||||
long error; /* 0 or error that occurred */
|
||||
enum netfs_io_origin origin; /* Origin of the request */
|
||||
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
|
||||
u8 buffer_head_slot; /* First slot in ->buffer */
|
||||
u8 buffer_tail_slot; /* Next slot in ->buffer_tail */
|
||||
unsigned long long i_size; /* Size of the file */
|
||||
unsigned long long start; /* Start position */
|
||||
atomic64_t issued_to; /* Write issuer folio cursor */
|
||||
unsigned long long collected_to; /* Point we've collected to */
|
||||
unsigned long long cleaned_to; /* Position we've cleaned folios to */
|
||||
unsigned long long abandon_to; /* Position to abandon folios to */
|
||||
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
|
||||
size_t prev_donated; /* Fallback for subreq->prev_donated */
|
||||
unsigned char front_folio_order; /* Order (size) of front folio */
|
||||
refcount_t ref;
|
||||
unsigned long flags;
|
||||
#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */
|
||||
#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
|
||||
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
|
||||
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
|
||||
#define NETFS_RREQ_FAILED 4 /* The request failed */
|
||||
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
|
||||
#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */
|
||||
#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
|
||||
#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
|
||||
#define NETFS_RREQ_BLOCKED 10 /* We blocked */
|
||||
@ -410,6 +408,13 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
|
||||
struct netfs_group *netfs_group);
|
||||
ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
|
||||
|
||||
/* Single, monolithic object read/write API. */
|
||||
void netfs_single_mark_inode_dirty(struct inode *inode);
|
||||
ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter);
|
||||
int netfs_writeback_single(struct address_space *mapping,
|
||||
struct writeback_control *wbc,
|
||||
struct iov_iter *iter);
|
||||
|
||||
/* Address operations API */
|
||||
struct readahead_control;
|
||||
void netfs_readahead(struct readahead_control *);
|
||||
@ -429,10 +434,8 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
|
||||
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
|
||||
|
||||
/* (Sub)request management API. */
|
||||
void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
|
||||
bool was_async);
|
||||
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
|
||||
int error, bool was_async);
|
||||
void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq);
|
||||
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq);
|
||||
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
|
||||
enum netfs_sreq_ref_trace what);
|
||||
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
|
||||
@ -454,6 +457,18 @@ void netfs_end_io_write(struct inode *inode);
|
||||
int netfs_start_io_direct(struct inode *inode);
|
||||
void netfs_end_io_direct(struct inode *inode);
|
||||
|
||||
/* Miscellaneous APIs. */
|
||||
struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp,
|
||||
unsigned int /*enum netfs_folioq_trace*/ trace);
|
||||
void netfs_folioq_free(struct folio_queue *folioq,
|
||||
unsigned int /*enum netfs_trace_folioq*/ trace);
|
||||
|
||||
/* Buffer wrangling helpers API. */
|
||||
int netfs_alloc_folioq_buffer(struct address_space *mapping,
|
||||
struct folio_queue **_buffer,
|
||||
size_t *_cur_size, ssize_t size, gfp_t gfp);
|
||||
void netfs_free_folioq_buffer(struct folio_queue *fq);
|
||||
|
||||
/**
|
||||
* netfs_inode - Get the netfs inode context from the inode
|
||||
* @inode: The inode to query
|
||||
|
@ -59,6 +59,7 @@ struct pid
|
||||
spinlock_t lock;
|
||||
struct dentry *stashed;
|
||||
u64 ino;
|
||||
struct rb_node pidfs_node;
|
||||
/* lists of tasks that use this pid */
|
||||
struct hlist_head tasks[PIDTYPE_MAX];
|
||||
struct hlist_head inodes;
|
||||
@ -68,6 +69,7 @@ struct pid
|
||||
struct upid numbers[];
|
||||
};
|
||||
|
||||
extern seqcount_spinlock_t pidmap_lock_seq;
|
||||
extern struct pid init_struct_pid;
|
||||
|
||||
struct file;
|
||||
@ -106,9 +108,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old);
|
||||
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
|
||||
enum pid_type);
|
||||
|
||||
extern int pid_max;
|
||||
extern int pid_max_min, pid_max_max;
|
||||
|
||||
/*
|
||||
* look up a PID in the hash table. Must be called with the tasklist_lock
|
||||
* or rcu_read_lock() held.
|
||||
|
@ -30,6 +30,7 @@ struct pid_namespace {
|
||||
struct task_struct *child_reaper;
|
||||
struct kmem_cache *pid_cachep;
|
||||
unsigned int level;
|
||||
int pid_max;
|
||||
struct pid_namespace *parent;
|
||||
#ifdef CONFIG_BSD_PROCESS_ACCT
|
||||
struct fs_pin *bacct;
|
||||
@ -38,9 +39,14 @@ struct pid_namespace {
|
||||
struct ucounts *ucounts;
|
||||
int reboot; /* group exit code if this pidns was rebooted */
|
||||
struct ns_common ns;
|
||||
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
|
||||
struct work_struct work;
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table_set set;
|
||||
struct ctl_table_header *sysctls;
|
||||
#if defined(CONFIG_MEMFD_CREATE)
|
||||
int memfd_noexec_scope;
|
||||
#endif
|
||||
#endif
|
||||
} __randomize_layout;
|
||||
|
||||
extern struct pid_namespace init_pid_ns;
|
||||
@ -117,6 +123,8 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
|
||||
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
|
||||
void pidhash_init(void);
|
||||
void pid_idr_init(void);
|
||||
int register_pidns_sysctls(struct pid_namespace *pidns);
|
||||
void unregister_pidns_sysctls(struct pid_namespace *pidns);
|
||||
|
||||
static inline bool task_is_in_init_pid_ns(struct task_struct *tsk)
|
||||
{
|
||||
|
@ -4,5 +4,7 @@
|
||||
|
||||
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
|
||||
void __init pidfs_init(void);
|
||||
void pidfs_add_pid(struct pid *pid);
|
||||
void pidfs_remove_pid(struct pid *pid);
|
||||
|
||||
#endif /* _LINUX_PID_FS_H */
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
struct pseudo_fs_context {
|
||||
const struct super_operations *ops;
|
||||
const struct export_operations *eops;
|
||||
const struct xattr_handler * const *xattr;
|
||||
const struct dentry_operations *dops;
|
||||
unsigned long magic;
|
||||
|
@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
|
||||
* way, we must not access it directly
|
||||
*/
|
||||
#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
|
||||
/*
|
||||
* Return the ->prev pointer of a list_head in an rcu safe way. Don't
|
||||
* access it directly.
|
||||
*
|
||||
* Any list traversed with list_bidir_prev_rcu() must never use
|
||||
* list_del_rcu(). Doing so will poison the ->prev pointer that
|
||||
* list_bidir_prev_rcu() relies on, which will result in segfaults.
|
||||
* To prevent these segfaults, use list_bidir_del_rcu() instead
|
||||
* of list_del_rcu().
|
||||
*/
|
||||
#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev)))
|
||||
|
||||
/**
|
||||
* list_tail_rcu - returns the prev pointer of the head of the list
|
||||
@ -158,6 +169,39 @@ static inline void list_del_rcu(struct list_head *entry)
|
||||
entry->prev = LIST_POISON2;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_bidir_del_rcu - deletes entry from list without re-initialization
|
||||
* @entry: the element to delete from the list.
|
||||
*
|
||||
* In contrast to list_del_rcu() doesn't poison the prev pointer thus
|
||||
* allowing backwards traversal via list_bidir_prev_rcu().
|
||||
*
|
||||
* Note: list_empty() on entry does not return true after this because
|
||||
* the entry is in a special undefined state that permits RCU-based
|
||||
* lockfree reverse traversal. In particular this means that we can not
|
||||
* poison the forward and backwards pointers that may still be used for
|
||||
* walking the list.
|
||||
*
|
||||
* The caller must take whatever precautions are necessary (such as
|
||||
* holding appropriate locks) to avoid racing with another list-mutation
|
||||
* primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on
|
||||
* this same list. However, it is perfectly legal to run concurrently
|
||||
* with the _rcu list-traversal primitives, such as
|
||||
* list_for_each_entry_rcu().
|
||||
*
|
||||
* Note that list_del_rcu() and list_bidir_del_rcu() must not be used on
|
||||
* the same list.
|
||||
*
|
||||
* Note that the caller is not permitted to immediately free
|
||||
* the newly deleted entry. Instead, either synchronize_rcu()
|
||||
* or call_rcu() must be used to defer freeing until an RCU
|
||||
* grace period has elapsed.
|
||||
*/
|
||||
static inline void list_bidir_del_rcu(struct list_head *entry)
|
||||
{
|
||||
__list_del_entry(entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* hlist_del_init_rcu - deletes entry from hash list with re-initialization
|
||||
* @n: the element to delete from the hash list.
|
||||
|
61
include/linux/rolling_buffer.h
Normal file
61
include/linux/rolling_buffer.h
Normal file
@ -0,0 +1,61 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Rolling buffer of folios
|
||||
*
|
||||
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#ifndef _ROLLING_BUFFER_H
|
||||
#define _ROLLING_BUFFER_H
|
||||
|
||||
#include <linux/folio_queue.h>
|
||||
#include <linux/uio.h>
|
||||
|
||||
/*
|
||||
* Rolling buffer. Whilst the buffer is live and in use, folios and folio
|
||||
* queue segments can be added to one end by one thread and removed from the
|
||||
* other end by another thread. The buffer isn't allowed to be empty; it must
|
||||
* always have at least one folio_queue in it so that neither side has to
|
||||
* modify both queue pointers.
|
||||
*
|
||||
* The iterator in the buffer is extended as buffers are inserted. It can be
|
||||
* snapshotted to use a segment of the buffer.
|
||||
*/
|
||||
struct rolling_buffer {
|
||||
struct folio_queue *head; /* Producer's insertion point */
|
||||
struct folio_queue *tail; /* Consumer's removal point */
|
||||
struct iov_iter iter; /* Iterator tracking what's left in the buffer */
|
||||
u8 next_head_slot; /* Next slot in ->head */
|
||||
u8 first_tail_slot; /* First slot in ->tail */
|
||||
};
|
||||
|
||||
/*
|
||||
* Snapshot of a rolling buffer.
|
||||
*/
|
||||
struct rolling_buffer_snapshot {
|
||||
struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
|
||||
unsigned char curr_slot; /* Folio currently being read */
|
||||
unsigned char curr_order; /* Order of folio */
|
||||
};
|
||||
|
||||
/* Marks to store per-folio in the internal folio_queue structs. */
|
||||
#define ROLLBUF_MARK_1 BIT(0)
|
||||
#define ROLLBUF_MARK_2 BIT(1)
|
||||
|
||||
int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id,
|
||||
unsigned int direction);
|
||||
int rolling_buffer_make_space(struct rolling_buffer *roll);
|
||||
ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll,
|
||||
struct readahead_control *ractl,
|
||||
struct folio_batch *put_batch);
|
||||
ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio,
|
||||
unsigned int flags);
|
||||
struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll);
|
||||
void rolling_buffer_clear(struct rolling_buffer *roll);
|
||||
|
||||
static inline void rolling_buffer_advance(struct rolling_buffer *roll, size_t amount)
|
||||
{
|
||||
iov_iter_advance(&roll->iter, amount);
|
||||
}
|
||||
|
||||
#endif /* _ROLLING_BUFFER_H */
|
@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
|
||||
({ \
|
||||
unsigned __seq; \
|
||||
\
|
||||
while ((__seq = seqprop_sequence(s)) & 1) \
|
||||
while (unlikely((__seq = seqprop_sequence(s)) & 1)) \
|
||||
cpu_relax(); \
|
||||
\
|
||||
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
|
||||
|
@ -118,6 +118,8 @@ enum yfs_cm_operation {
|
||||
*/
|
||||
#define afs_call_traces \
|
||||
EM(afs_call_trace_alloc, "ALLOC") \
|
||||
EM(afs_call_trace_async_abort, "ASYAB") \
|
||||
EM(afs_call_trace_async_kill, "ASYKL") \
|
||||
EM(afs_call_trace_free, "FREE ") \
|
||||
EM(afs_call_trace_get, "GET ") \
|
||||
EM(afs_call_trace_put, "PUT ") \
|
||||
@ -323,6 +325,44 @@ enum yfs_cm_operation {
|
||||
EM(yfs_CB_TellMeAboutYourself, "YFSCB.TellMeAboutYourself") \
|
||||
E_(yfs_CB_CallBack, "YFSCB.CallBack")
|
||||
|
||||
#define afs_cb_promise_traces \
|
||||
EM(afs_cb_promise_clear_cb_break, "CLEAR cb-break") \
|
||||
EM(afs_cb_promise_clear_rmdir, "CLEAR rmdir") \
|
||||
EM(afs_cb_promise_clear_rotate_server, "CLEAR rot-srv") \
|
||||
EM(afs_cb_promise_clear_server_change, "CLEAR srv-chg") \
|
||||
EM(afs_cb_promise_clear_vol_init_cb, "CLEAR vol-init-cb") \
|
||||
EM(afs_cb_promise_set_apply_cb, "SET apply-cb") \
|
||||
EM(afs_cb_promise_set_new_inode, "SET new-inode") \
|
||||
E_(afs_cb_promise_set_new_symlink, "SET new-symlink")
|
||||
|
||||
#define afs_vnode_invalid_traces \
|
||||
EM(afs_vnode_invalid_trace_cb_ro_snapshot, "cb-ro-snapshot") \
|
||||
EM(afs_vnode_invalid_trace_cb_scrub, "cb-scrub") \
|
||||
EM(afs_vnode_invalid_trace_cb_v_break, "cb-v-break") \
|
||||
EM(afs_vnode_invalid_trace_expired, "expired") \
|
||||
EM(afs_vnode_invalid_trace_no_cb_promise, "no-cb-promise") \
|
||||
EM(afs_vnode_invalid_trace_vol_expired, "vol-expired") \
|
||||
EM(afs_vnode_invalid_trace_zap_data, "zap-data") \
|
||||
E_(afs_vnode_valid_trace, "valid")
|
||||
|
||||
#define afs_dir_invalid_traces \
|
||||
EM(afs_dir_invalid_edit_add_bad_size, "edit-add-bad-size") \
|
||||
EM(afs_dir_invalid_edit_add_no_slots, "edit-add-no-slots") \
|
||||
EM(afs_dir_invalid_edit_add_too_many_blocks, "edit-add-too-many-blocks") \
|
||||
EM(afs_dir_invalid_edit_get_block, "edit-get-block") \
|
||||
EM(afs_dir_invalid_edit_mkdir, "edit-mkdir") \
|
||||
EM(afs_dir_invalid_edit_rem_bad_size, "edit-rem-bad-size") \
|
||||
EM(afs_dir_invalid_edit_rem_wrong_name, "edit-rem-wrong_name") \
|
||||
EM(afs_dir_invalid_edit_upd_bad_size, "edit-upd-bad-size") \
|
||||
EM(afs_dir_invalid_edit_upd_no_dd, "edit-upd-no-dotdot") \
|
||||
EM(afs_dir_invalid_dv_mismatch, "dv-mismatch") \
|
||||
EM(afs_dir_invalid_inval_folio, "inv-folio") \
|
||||
EM(afs_dir_invalid_iter_stale, "iter-stale") \
|
||||
EM(afs_dir_invalid_reclaimed_folio, "reclaimed-folio") \
|
||||
EM(afs_dir_invalid_release_folio, "rel-folio") \
|
||||
EM(afs_dir_invalid_remote, "remote") \
|
||||
E_(afs_dir_invalid_subdir_removed, "subdir-removed")
|
||||
|
||||
#define afs_edit_dir_ops \
|
||||
EM(afs_edit_dir_create, "create") \
|
||||
EM(afs_edit_dir_create_error, "c_fail") \
|
||||
@ -332,6 +372,7 @@ enum yfs_cm_operation {
|
||||
EM(afs_edit_dir_delete_error, "d_err ") \
|
||||
EM(afs_edit_dir_delete_inval, "d_invl") \
|
||||
EM(afs_edit_dir_delete_noent, "d_nent") \
|
||||
EM(afs_edit_dir_mkdir, "mk_ent") \
|
||||
EM(afs_edit_dir_update_dd, "u_ddot") \
|
||||
EM(afs_edit_dir_update_error, "u_fail") \
|
||||
EM(afs_edit_dir_update_inval, "u_invl") \
|
||||
@ -385,6 +426,7 @@ enum yfs_cm_operation {
|
||||
EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \
|
||||
EM(afs_file_error_dir_small, "DIR_SMALL") \
|
||||
EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \
|
||||
EM(afs_file_error_symlink_big, "SYM_BIG") \
|
||||
EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \
|
||||
E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED")
|
||||
|
||||
@ -487,7 +529,9 @@ enum yfs_cm_operation {
|
||||
enum afs_alist_trace { afs_alist_traces } __mode(byte);
|
||||
enum afs_call_trace { afs_call_traces } __mode(byte);
|
||||
enum afs_cb_break_reason { afs_cb_break_reasons } __mode(byte);
|
||||
enum afs_cb_promise_trace { afs_cb_promise_traces } __mode(byte);
|
||||
enum afs_cell_trace { afs_cell_traces } __mode(byte);
|
||||
enum afs_dir_invalid_trace { afs_dir_invalid_traces} __mode(byte);
|
||||
enum afs_edit_dir_op { afs_edit_dir_ops } __mode(byte);
|
||||
enum afs_edit_dir_reason { afs_edit_dir_reasons } __mode(byte);
|
||||
enum afs_eproto_cause { afs_eproto_causes } __mode(byte);
|
||||
@ -498,6 +542,7 @@ enum afs_flock_operation { afs_flock_operations } __mode(byte);
|
||||
enum afs_io_error { afs_io_errors } __mode(byte);
|
||||
enum afs_rotate_trace { afs_rotate_traces } __mode(byte);
|
||||
enum afs_server_trace { afs_server_traces } __mode(byte);
|
||||
enum afs_vnode_invalid_trace { afs_vnode_invalid_traces} __mode(byte);
|
||||
enum afs_volume_trace { afs_volume_traces } __mode(byte);
|
||||
|
||||
#endif /* end __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY */
|
||||
@ -513,8 +558,10 @@ enum afs_volume_trace { afs_volume_traces } __mode(byte);
|
||||
afs_alist_traces;
|
||||
afs_call_traces;
|
||||
afs_cb_break_reasons;
|
||||
afs_cb_promise_traces;
|
||||
afs_cell_traces;
|
||||
afs_cm_operations;
|
||||
afs_dir_invalid_traces;
|
||||
afs_edit_dir_ops;
|
||||
afs_edit_dir_reasons;
|
||||
afs_eproto_causes;
|
||||
@ -526,6 +573,7 @@ afs_fs_operations;
|
||||
afs_io_errors;
|
||||
afs_rotate_traces;
|
||||
afs_server_traces;
|
||||
afs_vnode_invalid_traces;
|
||||
afs_vl_operations;
|
||||
yfs_cm_operations;
|
||||
|
||||
@ -670,7 +718,7 @@ TRACE_EVENT(afs_make_fs_call,
|
||||
}
|
||||
),
|
||||
|
||||
TP_printk("c=%08x %06llx:%06llx:%06x %s",
|
||||
TP_printk("c=%08x V=%llx i=%llx:%x %s",
|
||||
__entry->call,
|
||||
__entry->fid.vid,
|
||||
__entry->fid.vnode,
|
||||
@ -704,7 +752,7 @@ TRACE_EVENT(afs_make_fs_calli,
|
||||
}
|
||||
),
|
||||
|
||||
TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
|
||||
TP_printk("c=%08x V=%llx i=%llx:%x %s i=%u",
|
||||
__entry->call,
|
||||
__entry->fid.vid,
|
||||
__entry->fid.vnode,
|
||||
@ -741,7 +789,7 @@ TRACE_EVENT(afs_make_fs_call1,
|
||||
__entry->name[__len] = 0;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\"",
|
||||
TP_printk("c=%08x V=%llx i=%llx:%x %s \"%s\"",
|
||||
__entry->call,
|
||||
__entry->fid.vid,
|
||||
__entry->fid.vnode,
|
||||
@ -782,7 +830,7 @@ TRACE_EVENT(afs_make_fs_call2,
|
||||
__entry->name2[__len2] = 0;
|
||||
),
|
||||
|
||||
TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\" \"%s\"",
|
||||
TP_printk("c=%08x V=%llx i=%llx:%x %s \"%s\" \"%s\"",
|
||||
__entry->call,
|
||||
__entry->fid.vid,
|
||||
__entry->fid.vnode,
|
||||
@ -887,9 +935,9 @@ TRACE_EVENT(afs_sent_data,
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_dir_check_failed,
|
||||
TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size),
|
||||
TP_PROTO(struct afs_vnode *vnode, loff_t off),
|
||||
|
||||
TP_ARGS(vnode, off, i_size),
|
||||
TP_ARGS(vnode, off),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct afs_vnode *, vnode)
|
||||
@ -900,7 +948,7 @@ TRACE_EVENT(afs_dir_check_failed,
|
||||
TP_fast_assign(
|
||||
__entry->vnode = vnode;
|
||||
__entry->off = off;
|
||||
__entry->i_size = i_size;
|
||||
__entry->i_size = i_size_read(&vnode->netfs.inode);
|
||||
),
|
||||
|
||||
TP_printk("vn=%p %llx/%llx",
|
||||
@ -1002,7 +1050,7 @@ TRACE_EVENT(afs_edit_dir,
|
||||
__entry->name[__len] = 0;
|
||||
),
|
||||
|
||||
TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x \"%s\"",
|
||||
TP_printk("di=%x:%x %s %s %u[%u] fi=%x:%x \"%s\"",
|
||||
__entry->vnode, __entry->unique,
|
||||
__print_symbolic(__entry->why, afs_edit_dir_reasons),
|
||||
__print_symbolic(__entry->op, afs_edit_dir_ops),
|
||||
@ -1011,6 +1059,122 @@ TRACE_EVENT(afs_edit_dir,
|
||||
__entry->name)
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_dir_invalid,
|
||||
TP_PROTO(const struct afs_vnode *dvnode, enum afs_dir_invalid_trace trace),
|
||||
|
||||
TP_ARGS(dvnode, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, vnode)
|
||||
__field(unsigned int, unique)
|
||||
__field(enum afs_dir_invalid_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vnode = dvnode->fid.vnode;
|
||||
__entry->unique = dvnode->fid.unique;
|
||||
__entry->trace = trace;
|
||||
),
|
||||
|
||||
TP_printk("di=%x:%x %s",
|
||||
__entry->vnode, __entry->unique,
|
||||
__print_symbolic(__entry->trace, afs_dir_invalid_traces))
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_cb_promise,
|
||||
TP_PROTO(const struct afs_vnode *vnode, enum afs_cb_promise_trace trace),
|
||||
|
||||
TP_ARGS(vnode, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, vnode)
|
||||
__field(unsigned int, unique)
|
||||
__field(enum afs_cb_promise_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vnode = vnode->fid.vnode;
|
||||
__entry->unique = vnode->fid.unique;
|
||||
__entry->trace = trace;
|
||||
),
|
||||
|
||||
TP_printk("di=%x:%x %s",
|
||||
__entry->vnode, __entry->unique,
|
||||
__print_symbolic(__entry->trace, afs_cb_promise_traces))
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_vnode_invalid,
|
||||
TP_PROTO(const struct afs_vnode *vnode, enum afs_vnode_invalid_trace trace),
|
||||
|
||||
TP_ARGS(vnode, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, vnode)
|
||||
__field(unsigned int, unique)
|
||||
__field(enum afs_vnode_invalid_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vnode = vnode->fid.vnode;
|
||||
__entry->unique = vnode->fid.unique;
|
||||
__entry->trace = trace;
|
||||
),
|
||||
|
||||
TP_printk("di=%x:%x %s",
|
||||
__entry->vnode, __entry->unique,
|
||||
__print_symbolic(__entry->trace, afs_vnode_invalid_traces))
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_set_dv,
|
||||
TP_PROTO(const struct afs_vnode *dvnode, u64 new_dv),
|
||||
|
||||
TP_ARGS(dvnode, new_dv),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, vnode)
|
||||
__field(unsigned int, unique)
|
||||
__field(u64, old_dv)
|
||||
__field(u64, new_dv)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vnode = dvnode->fid.vnode;
|
||||
__entry->unique = dvnode->fid.unique;
|
||||
__entry->old_dv = dvnode->status.data_version;
|
||||
__entry->new_dv = new_dv;
|
||||
),
|
||||
|
||||
TP_printk("di=%x:%x dv=%llx -> dv=%llx",
|
||||
__entry->vnode, __entry->unique,
|
||||
__entry->old_dv, __entry->new_dv)
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_dv_mismatch,
|
||||
TP_PROTO(const struct afs_vnode *dvnode, u64 before_dv, int delta, u64 new_dv),
|
||||
|
||||
TP_ARGS(dvnode, before_dv, delta, new_dv),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, vnode)
|
||||
__field(unsigned int, unique)
|
||||
__field(int, delta)
|
||||
__field(u64, before_dv)
|
||||
__field(u64, new_dv)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->vnode = dvnode->fid.vnode;
|
||||
__entry->unique = dvnode->fid.unique;
|
||||
__entry->delta = delta;
|
||||
__entry->before_dv = before_dv;
|
||||
__entry->new_dv = new_dv;
|
||||
),
|
||||
|
||||
TP_printk("di=%x:%x xdv=%llx+%d dv=%llx",
|
||||
__entry->vnode, __entry->unique,
|
||||
__entry->before_dv, __entry->delta, __entry->new_dv)
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_protocol_error,
|
||||
TP_PROTO(struct afs_call *call, enum afs_eproto_cause cause),
|
||||
|
||||
@ -1611,6 +1775,36 @@ TRACE_EVENT(afs_make_call,
|
||||
__entry->fid.unique)
|
||||
);
|
||||
|
||||
TRACE_EVENT(afs_read_recv,
|
||||
TP_PROTO(const struct afs_operation *op, const struct afs_call *call),
|
||||
|
||||
TP_ARGS(op, call),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq)
|
||||
__field(unsigned int, sreq)
|
||||
__field(unsigned int, op)
|
||||
__field(unsigned int, op_flags)
|
||||
__field(unsigned int, call)
|
||||
__field(enum afs_call_state, call_state)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->op = op->debug_id;
|
||||
__entry->sreq = op->fetch.subreq->debug_index;
|
||||
__entry->rreq = op->fetch.subreq->rreq->debug_id;
|
||||
__entry->op_flags = op->flags;
|
||||
__entry->call = call->debug_id;
|
||||
__entry->call_state = call->state;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x[%x] OP=%08x c=%08x cs=%x of=%x",
|
||||
__entry->rreq, __entry->sreq,
|
||||
__entry->op,
|
||||
__entry->call, __entry->call_state,
|
||||
__entry->op_flags)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_AFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -380,10 +380,11 @@ TRACE_EVENT(cachefiles_rename,
|
||||
TRACE_EVENT(cachefiles_coherency,
|
||||
TP_PROTO(struct cachefiles_object *obj,
|
||||
ino_t ino,
|
||||
u64 disk_aux,
|
||||
enum cachefiles_content content,
|
||||
enum cachefiles_coherency_trace why),
|
||||
|
||||
TP_ARGS(obj, ino, content, why),
|
||||
TP_ARGS(obj, ino, disk_aux, content, why),
|
||||
|
||||
/* Note that obj may be NULL */
|
||||
TP_STRUCT__entry(
|
||||
@ -391,6 +392,8 @@ TRACE_EVENT(cachefiles_coherency,
|
||||
__field(enum cachefiles_coherency_trace, why )
|
||||
__field(enum cachefiles_content, content )
|
||||
__field(u64, ino )
|
||||
__field(u64, aux )
|
||||
__field(u64, disk_aux)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -398,13 +401,17 @@ TRACE_EVENT(cachefiles_coherency,
|
||||
__entry->why = why;
|
||||
__entry->content = content;
|
||||
__entry->ino = ino;
|
||||
__entry->aux = be64_to_cpup((__be64 *)obj->cookie->inline_aux);
|
||||
__entry->disk_aux = disk_aux;
|
||||
),
|
||||
|
||||
TP_printk("o=%08x %s B=%llx c=%u",
|
||||
TP_printk("o=%08x %s B=%llx c=%u aux=%llx dsk=%llx",
|
||||
__entry->obj,
|
||||
__print_symbolic(__entry->why, cachefiles_coherency_traces),
|
||||
__entry->ino,
|
||||
__entry->content)
|
||||
__entry->content,
|
||||
__entry->aux,
|
||||
__entry->disk_aux)
|
||||
);
|
||||
|
||||
TRACE_EVENT(cachefiles_vol_coherency,
|
||||
|
@ -21,6 +21,7 @@
|
||||
EM(netfs_read_trace_readahead, "READAHEAD") \
|
||||
EM(netfs_read_trace_readpage, "READPAGE ") \
|
||||
EM(netfs_read_trace_read_gaps, "READ-GAPS") \
|
||||
EM(netfs_read_trace_read_single, "READ-SNGL") \
|
||||
EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \
|
||||
E_(netfs_read_trace_write_begin, "WRITEBEGN")
|
||||
|
||||
@ -35,9 +36,11 @@
|
||||
EM(NETFS_READAHEAD, "RA") \
|
||||
EM(NETFS_READPAGE, "RP") \
|
||||
EM(NETFS_READ_GAPS, "RG") \
|
||||
EM(NETFS_READ_SINGLE, "R1") \
|
||||
EM(NETFS_READ_FOR_WRITE, "RW") \
|
||||
EM(NETFS_DIO_READ, "DR") \
|
||||
EM(NETFS_WRITEBACK, "WB") \
|
||||
EM(NETFS_WRITEBACK_SINGLE, "W1") \
|
||||
EM(NETFS_WRITETHROUGH, "WT") \
|
||||
EM(NETFS_UNBUFFERED_WRITE, "UW") \
|
||||
EM(NETFS_DIO_WRITE, "DW") \
|
||||
@ -47,17 +50,23 @@
|
||||
EM(netfs_rreq_trace_assess, "ASSESS ") \
|
||||
EM(netfs_rreq_trace_copy, "COPY ") \
|
||||
EM(netfs_rreq_trace_collect, "COLLECT") \
|
||||
EM(netfs_rreq_trace_complete, "COMPLET") \
|
||||
EM(netfs_rreq_trace_dirty, "DIRTY ") \
|
||||
EM(netfs_rreq_trace_done, "DONE ") \
|
||||
EM(netfs_rreq_trace_free, "FREE ") \
|
||||
EM(netfs_rreq_trace_redirty, "REDIRTY") \
|
||||
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
|
||||
EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \
|
||||
EM(netfs_rreq_trace_set_pause, "PAUSE ") \
|
||||
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
|
||||
EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \
|
||||
EM(netfs_rreq_trace_unmark, "UNMARK ") \
|
||||
EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
|
||||
EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \
|
||||
EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \
|
||||
EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
|
||||
EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \
|
||||
EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \
|
||||
EM(netfs_rreq_trace_unpause, "UNPAUSE") \
|
||||
E_(netfs_rreq_trace_write_done, "WR-DONE")
|
||||
|
||||
@ -74,6 +83,9 @@
|
||||
#define netfs_sreq_traces \
|
||||
EM(netfs_sreq_trace_add_donations, "+DON ") \
|
||||
EM(netfs_sreq_trace_added, "ADD ") \
|
||||
EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \
|
||||
EM(netfs_sreq_trace_cache_prepare, "CA-PR") \
|
||||
EM(netfs_sreq_trace_cache_write, "CA-WR") \
|
||||
EM(netfs_sreq_trace_clear, "CLEAR") \
|
||||
EM(netfs_sreq_trace_discard, "DSCRD") \
|
||||
EM(netfs_sreq_trace_donate_to_prev, "DON-P") \
|
||||
@ -84,6 +96,8 @@
|
||||
EM(netfs_sreq_trace_hit_eof, "EOF ") \
|
||||
EM(netfs_sreq_trace_io_progress, "IO ") \
|
||||
EM(netfs_sreq_trace_limited, "LIMIT") \
|
||||
EM(netfs_sreq_trace_partial_read, "PARTR") \
|
||||
EM(netfs_sreq_trace_need_retry, "NRTRY") \
|
||||
EM(netfs_sreq_trace_prepare, "PREP ") \
|
||||
EM(netfs_sreq_trace_prep_failed, "PRPFL") \
|
||||
EM(netfs_sreq_trace_progress, "PRGRS") \
|
||||
@ -152,6 +166,7 @@
|
||||
EM(netfs_streaming_filled_page, "mod-streamw-f") \
|
||||
EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \
|
||||
EM(netfs_folio_trace_abandon, "abandon") \
|
||||
EM(netfs_folio_trace_alloc_buffer, "alloc-buf") \
|
||||
EM(netfs_folio_trace_cancel_copy, "cancel-copy") \
|
||||
EM(netfs_folio_trace_cancel_store, "cancel-store") \
|
||||
EM(netfs_folio_trace_clear, "clear") \
|
||||
@ -168,6 +183,7 @@
|
||||
EM(netfs_folio_trace_mkwrite, "mkwrite") \
|
||||
EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \
|
||||
EM(netfs_folio_trace_not_under_wback, "!wback") \
|
||||
EM(netfs_folio_trace_not_locked, "!locked") \
|
||||
EM(netfs_folio_trace_put, "put") \
|
||||
EM(netfs_folio_trace_read, "read") \
|
||||
EM(netfs_folio_trace_read_done, "read-done") \
|
||||
@ -191,6 +207,14 @@
|
||||
EM(netfs_trace_donate_to_next, "to-next") \
|
||||
E_(netfs_trace_donate_to_deferred_next, "defer-next")
|
||||
|
||||
#define netfs_folioq_traces \
|
||||
EM(netfs_trace_folioq_alloc_buffer, "alloc-buf") \
|
||||
EM(netfs_trace_folioq_clear, "clear") \
|
||||
EM(netfs_trace_folioq_delete, "delete") \
|
||||
EM(netfs_trace_folioq_make_space, "make-space") \
|
||||
EM(netfs_trace_folioq_rollbuf_init, "roll-init") \
|
||||
E_(netfs_trace_folioq_read_progress, "r-progress")
|
||||
|
||||
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
|
||||
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
|
||||
|
||||
@ -209,6 +233,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
|
||||
enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
|
||||
enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte);
|
||||
enum netfs_donate_trace { netfs_donate_traces } __mode(byte);
|
||||
enum netfs_folioq_trace { netfs_folioq_traces } __mode(byte);
|
||||
|
||||
#endif
|
||||
|
||||
@ -232,6 +257,7 @@ netfs_sreq_ref_traces;
|
||||
netfs_folio_traces;
|
||||
netfs_collect_contig_traces;
|
||||
netfs_donate_traces;
|
||||
netfs_folioq_traces;
|
||||
|
||||
/*
|
||||
* Now redefine the EM() and E_() macros to map the enums to the strings that
|
||||
@ -317,6 +343,7 @@ TRACE_EVENT(netfs_sreq,
|
||||
__field(unsigned short, flags )
|
||||
__field(enum netfs_io_source, source )
|
||||
__field(enum netfs_sreq_trace, what )
|
||||
__field(u8, slot )
|
||||
__field(size_t, len )
|
||||
__field(size_t, transferred )
|
||||
__field(loff_t, start )
|
||||
@ -332,15 +359,16 @@ TRACE_EVENT(netfs_sreq,
|
||||
__entry->len = sreq->len;
|
||||
__entry->transferred = sreq->transferred;
|
||||
__entry->start = sreq->start;
|
||||
__entry->slot = sreq->io_iter.folioq_slot;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx e=%d",
|
||||
TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d",
|
||||
__entry->rreq, __entry->index,
|
||||
__print_symbolic(__entry->source, netfs_sreq_sources),
|
||||
__print_symbolic(__entry->what, netfs_sreq_traces),
|
||||
__entry->flags,
|
||||
__entry->start, __entry->transferred, __entry->len,
|
||||
__entry->error)
|
||||
__entry->slot, __entry->error)
|
||||
);
|
||||
|
||||
TRACE_EVENT(netfs_failure,
|
||||
@ -680,69 +708,27 @@ TRACE_EVENT(netfs_collect_stream,
|
||||
__entry->collected_to, __entry->front)
|
||||
);
|
||||
|
||||
TRACE_EVENT(netfs_progress,
|
||||
TP_PROTO(const struct netfs_io_subrequest *subreq,
|
||||
unsigned long long start, size_t avail, size_t part),
|
||||
TRACE_EVENT(netfs_folioq,
|
||||
TP_PROTO(const struct folio_queue *fq,
|
||||
enum netfs_folioq_trace trace),
|
||||
|
||||
TP_ARGS(subreq, start, avail, part),
|
||||
TP_ARGS(fq, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq)
|
||||
__field(unsigned int, subreq)
|
||||
__field(unsigned int, consumed)
|
||||
__field(unsigned int, transferred)
|
||||
__field(unsigned long long, f_start)
|
||||
__field(unsigned int, f_avail)
|
||||
__field(unsigned int, f_part)
|
||||
__field(unsigned char, slot)
|
||||
__field(unsigned int, id)
|
||||
__field(enum netfs_folioq_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rreq = subreq->rreq->debug_id;
|
||||
__entry->subreq = subreq->debug_index;
|
||||
__entry->consumed = subreq->consumed;
|
||||
__entry->transferred = subreq->transferred;
|
||||
__entry->f_start = start;
|
||||
__entry->f_avail = avail;
|
||||
__entry->f_part = part;
|
||||
__entry->slot = subreq->curr_folioq_slot;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x",
|
||||
__entry->rreq, __entry->subreq, __entry->f_start,
|
||||
__entry->consumed, __entry->transferred,
|
||||
__entry->f_part, __entry->f_avail, __entry->slot)
|
||||
);
|
||||
|
||||
TRACE_EVENT(netfs_donate,
|
||||
TP_PROTO(const struct netfs_io_request *rreq,
|
||||
const struct netfs_io_subrequest *from,
|
||||
const struct netfs_io_subrequest *to,
|
||||
size_t amount,
|
||||
enum netfs_donate_trace trace),
|
||||
|
||||
TP_ARGS(rreq, from, to, amount, trace),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq)
|
||||
__field(unsigned int, from)
|
||||
__field(unsigned int, to)
|
||||
__field(unsigned int, amount)
|
||||
__field(enum netfs_donate_trace, trace)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rreq = rreq->debug_id;
|
||||
__entry->from = from->debug_index;
|
||||
__entry->to = to ? to->debug_index : -1;
|
||||
__entry->amount = amount;
|
||||
__entry->rreq = fq ? fq->rreq_id : 0;
|
||||
__entry->id = fq ? fq->debug_id : 0;
|
||||
__entry->trace = trace;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x[%02x] -> [%02x] %s am=%x",
|
||||
__entry->rreq, __entry->from, __entry->to,
|
||||
__print_symbolic(__entry->trace, netfs_donate_traces),
|
||||
__entry->amount)
|
||||
TP_printk("R=%08x fq=%x %s",
|
||||
__entry->rreq, __entry->id,
|
||||
__print_symbolic(__entry->trace, netfs_folioq_traces))
|
||||
);
|
||||
|
||||
#undef EM
|
||||
|
@ -14,37 +14,56 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/**
|
||||
* struct fiemap_extent - description of one fiemap extent
|
||||
* @fe_logical: byte offset of the extent in the file
|
||||
* @fe_physical: byte offset of extent on disk
|
||||
* @fe_length: length in bytes for this extent
|
||||
* @fe_flags: FIEMAP_EXTENT_* flags for this extent
|
||||
*/
|
||||
struct fiemap_extent {
|
||||
__u64 fe_logical; /* logical offset in bytes for the start of
|
||||
* the extent from the beginning of the file */
|
||||
__u64 fe_physical; /* physical offset in bytes for the start
|
||||
* of the extent from the beginning of the disk */
|
||||
__u64 fe_length; /* length in bytes for this extent */
|
||||
__u64 fe_logical;
|
||||
__u64 fe_physical;
|
||||
__u64 fe_length;
|
||||
/* private: */
|
||||
__u64 fe_reserved64[2];
|
||||
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
|
||||
/* public: */
|
||||
__u32 fe_flags;
|
||||
/* private: */
|
||||
__u32 fe_reserved[3];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct fiemap - file extent mappings
|
||||
* @fm_start: byte offset (inclusive) at which to start mapping (in)
|
||||
* @fm_length: logical length of mapping which userspace wants (in)
|
||||
* @fm_flags: FIEMAP_FLAG_* flags for request (in/out)
|
||||
* @fm_mapped_extents: number of extents that were mapped (out)
|
||||
* @fm_extent_count: size of fm_extents array (in)
|
||||
* @fm_extents: array of mapped extents (out)
|
||||
*/
|
||||
struct fiemap {
|
||||
__u64 fm_start; /* logical offset (inclusive) at
|
||||
* which to start mapping (in) */
|
||||
__u64 fm_length; /* logical length of mapping which
|
||||
* userspace wants (in) */
|
||||
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
|
||||
__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
|
||||
__u32 fm_extent_count; /* size of fm_extents array (in) */
|
||||
__u64 fm_start;
|
||||
__u64 fm_length;
|
||||
__u32 fm_flags;
|
||||
__u32 fm_mapped_extents;
|
||||
__u32 fm_extent_count;
|
||||
/* private: */
|
||||
__u32 fm_reserved;
|
||||
struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */
|
||||
/* public: */
|
||||
struct fiemap_extent fm_extents[];
|
||||
};
|
||||
|
||||
#define FIEMAP_MAX_OFFSET (~0ULL)
|
||||
|
||||
/* flags used in fm_flags: */
|
||||
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
|
||||
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
|
||||
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
|
||||
|
||||
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
|
||||
|
||||
/* flags used in fe_flags: */
|
||||
#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
|
||||
#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
|
||||
#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
|
||||
|
@ -476,56 +476,6 @@ void abort_creds(struct cred *new)
|
||||
}
|
||||
EXPORT_SYMBOL(abort_creds);
|
||||
|
||||
/**
|
||||
* override_creds - Override the current process's subjective credentials
|
||||
* @new: The credentials to be assigned
|
||||
*
|
||||
* Install a set of temporary override subjective credentials on the current
|
||||
* process, returning the old set for later reversion.
|
||||
*/
|
||||
const struct cred *override_creds(const struct cred *new)
|
||||
{
|
||||
const struct cred *old;
|
||||
|
||||
kdebug("override_creds(%p{%ld})", new,
|
||||
atomic_long_read(&new->usage));
|
||||
|
||||
/*
|
||||
* NOTE! This uses 'get_new_cred()' rather than 'get_cred()'.
|
||||
*
|
||||
* That means that we do not clear the 'non_rcu' flag, since
|
||||
* we are only installing the cred into the thread-synchronous
|
||||
* '->cred' pointer, not the '->real_cred' pointer that is
|
||||
* visible to other threads under RCU.
|
||||
*/
|
||||
get_new_cred((struct cred *)new);
|
||||
old = override_creds_light(new);
|
||||
|
||||
kdebug("override_creds() = %p{%ld}", old,
|
||||
atomic_long_read(&old->usage));
|
||||
return old;
|
||||
}
|
||||
EXPORT_SYMBOL(override_creds);
|
||||
|
||||
/**
|
||||
* revert_creds - Revert a temporary subjective credentials override
|
||||
* @old: The credentials to be restored
|
||||
*
|
||||
* Revert a temporary set of override subjective credentials to an old set,
|
||||
* discarding the override set.
|
||||
*/
|
||||
void revert_creds(const struct cred *old)
|
||||
{
|
||||
const struct cred *override = current->cred;
|
||||
|
||||
kdebug("revert_creds(%p{%ld})", old,
|
||||
atomic_long_read(&old->usage));
|
||||
|
||||
revert_creds_light(old);
|
||||
put_cred(override);
|
||||
}
|
||||
EXPORT_SYMBOL(revert_creds);
|
||||
|
||||
/**
|
||||
* cred_fscmp - Compare two credentials with respect to filesystem access.
|
||||
* @a: The first credential
|
||||
|
@ -89,6 +89,7 @@ find $cpio_dir -type f -print0 |
|
||||
|
||||
# Create archive and try to normalize metadata for reproducibility.
|
||||
tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
|
||||
--exclude=".__afs*" --exclude=".nfs*" \
|
||||
--owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
|
||||
-I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
|
||||
|
||||
|
139
kernel/pid.c
139
kernel/pid.c
@ -43,6 +43,7 @@
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/pidfs.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <net/sock.h>
|
||||
#include <uapi/linux/pidfd.h>
|
||||
|
||||
@ -60,15 +61,8 @@ struct pid init_struct_pid = {
|
||||
}, }
|
||||
};
|
||||
|
||||
int pid_max = PID_MAX_DEFAULT;
|
||||
|
||||
int pid_max_min = RESERVED_PIDS + 1;
|
||||
int pid_max_max = PID_MAX_LIMIT;
|
||||
/*
|
||||
* Pseudo filesystems start inode numbering after one. We use Reserved
|
||||
* PIDs as a natural offset.
|
||||
*/
|
||||
static u64 pidfs_ino = RESERVED_PIDS;
|
||||
static int pid_max_min = RESERVED_PIDS + 1;
|
||||
static int pid_max_max = PID_MAX_LIMIT;
|
||||
|
||||
/*
|
||||
* PID-map pages start out as NULL, they get allocated upon
|
||||
@ -87,6 +81,7 @@ struct pid_namespace init_pid_ns = {
|
||||
#ifdef CONFIG_PID_NS
|
||||
.ns.ops = &pidns_operations,
|
||||
#endif
|
||||
.pid_max = PID_MAX_DEFAULT,
|
||||
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
|
||||
.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
|
||||
#endif
|
||||
@ -108,6 +103,7 @@ EXPORT_SYMBOL_GPL(init_pid_ns);
|
||||
*/
|
||||
|
||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
||||
seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
|
||||
|
||||
void put_pid(struct pid *pid)
|
||||
{
|
||||
@ -158,6 +154,7 @@ void free_pid(struct pid *pid)
|
||||
|
||||
idr_remove(&ns->idr, upid->nr);
|
||||
}
|
||||
pidfs_remove_pid(pid);
|
||||
spin_unlock_irqrestore(&pidmap_lock, flags);
|
||||
|
||||
call_rcu(&pid->rcu, delayed_put_pid);
|
||||
@ -193,6 +190,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
|
||||
for (i = ns->level; i >= 0; i--) {
|
||||
int tid = 0;
|
||||
int pid_max = READ_ONCE(tmp->pid_max);
|
||||
|
||||
if (set_tid_size) {
|
||||
tid = set_tid[ns->level - i];
|
||||
@ -273,22 +271,24 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
INIT_HLIST_HEAD(&pid->inodes);
|
||||
|
||||
upid = pid->numbers + ns->level;
|
||||
idr_preload(GFP_KERNEL);
|
||||
spin_lock_irq(&pidmap_lock);
|
||||
if (!(ns->pid_allocated & PIDNS_ADDING))
|
||||
goto out_unlock;
|
||||
pid->stashed = NULL;
|
||||
pid->ino = ++pidfs_ino;
|
||||
pidfs_add_pid(pid);
|
||||
for ( ; upid >= pid->numbers; --upid) {
|
||||
/* Make the PID visible to find_pid_ns. */
|
||||
idr_replace(&upid->ns->idr, pid, upid->nr);
|
||||
upid->ns->pid_allocated++;
|
||||
}
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
|
||||
return pid;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irq(&pidmap_lock);
|
||||
idr_preload_end();
|
||||
put_pid_ns(ns);
|
||||
|
||||
out_free:
|
||||
@ -644,17 +644,118 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
|
||||
return fd;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static struct ctl_table_set *pid_table_root_lookup(struct ctl_table_root *root)
|
||||
{
|
||||
return &task_active_pid_ns(current)->set;
|
||||
}
|
||||
|
||||
static int set_is_seen(struct ctl_table_set *set)
|
||||
{
|
||||
return &task_active_pid_ns(current)->set == set;
|
||||
}
|
||||
|
||||
static int pid_table_root_permissions(struct ctl_table_header *head,
|
||||
const struct ctl_table *table)
|
||||
{
|
||||
struct pid_namespace *pidns =
|
||||
container_of(head->set, struct pid_namespace, set);
|
||||
int mode = table->mode;
|
||||
|
||||
if (ns_capable(pidns->user_ns, CAP_SYS_ADMIN) ||
|
||||
uid_eq(current_euid(), make_kuid(pidns->user_ns, 0)))
|
||||
mode = (mode & S_IRWXU) >> 6;
|
||||
else if (in_egroup_p(make_kgid(pidns->user_ns, 0)))
|
||||
mode = (mode & S_IRWXG) >> 3;
|
||||
else
|
||||
mode = mode & S_IROTH;
|
||||
return (mode << 6) | (mode << 3) | mode;
|
||||
}
|
||||
|
||||
static void pid_table_root_set_ownership(struct ctl_table_header *head,
|
||||
kuid_t *uid, kgid_t *gid)
|
||||
{
|
||||
struct pid_namespace *pidns =
|
||||
container_of(head->set, struct pid_namespace, set);
|
||||
kuid_t ns_root_uid;
|
||||
kgid_t ns_root_gid;
|
||||
|
||||
ns_root_uid = make_kuid(pidns->user_ns, 0);
|
||||
if (uid_valid(ns_root_uid))
|
||||
*uid = ns_root_uid;
|
||||
|
||||
ns_root_gid = make_kgid(pidns->user_ns, 0);
|
||||
if (gid_valid(ns_root_gid))
|
||||
*gid = ns_root_gid;
|
||||
}
|
||||
|
||||
static struct ctl_table_root pid_table_root = {
|
||||
.lookup = pid_table_root_lookup,
|
||||
.permissions = pid_table_root_permissions,
|
||||
.set_ownership = pid_table_root_set_ownership,
|
||||
};
|
||||
|
||||
static struct ctl_table pid_table[] = {
|
||||
{
|
||||
.procname = "pid_max",
|
||||
.data = &init_pid_ns.pid_max,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &pid_max_min,
|
||||
.extra2 = &pid_max_max,
|
||||
},
|
||||
};
|
||||
#endif
|
||||
|
||||
int register_pidns_sysctls(struct pid_namespace *pidns)
|
||||
{
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table *tbl;
|
||||
|
||||
setup_sysctl_set(&pidns->set, &pid_table_root, set_is_seen);
|
||||
|
||||
tbl = kmemdup(pid_table, sizeof(pid_table), GFP_KERNEL);
|
||||
if (!tbl)
|
||||
return -ENOMEM;
|
||||
tbl->data = &pidns->pid_max;
|
||||
pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max,
|
||||
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
|
||||
|
||||
pidns->sysctls = __register_sysctl_table(&pidns->set, "kernel", tbl,
|
||||
ARRAY_SIZE(pid_table));
|
||||
if (!pidns->sysctls) {
|
||||
kfree(tbl);
|
||||
retire_sysctl_set(&pidns->set);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void unregister_pidns_sysctls(struct pid_namespace *pidns)
|
||||
{
|
||||
#ifdef CONFIG_SYSCTL
|
||||
const struct ctl_table *tbl;
|
||||
|
||||
tbl = pidns->sysctls->ctl_table_arg;
|
||||
unregister_sysctl_table(pidns->sysctls);
|
||||
retire_sysctl_set(&pidns->set);
|
||||
kfree(tbl);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init pid_idr_init(void)
|
||||
{
|
||||
/* Verify no one has done anything silly: */
|
||||
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
|
||||
|
||||
/* bump default and minimum pid_max based on number of cpus */
|
||||
pid_max = min(pid_max_max, max_t(int, pid_max,
|
||||
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
|
||||
init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max,
|
||||
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
|
||||
pid_max_min = max_t(int, pid_max_min,
|
||||
PIDS_PER_CPU_MIN * num_possible_cpus());
|
||||
pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
|
||||
pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min);
|
||||
|
||||
idr_init(&init_pid_ns.idr);
|
||||
|
||||
@ -665,6 +766,16 @@ void __init pid_idr_init(void)
|
||||
NULL);
|
||||
}
|
||||
|
||||
static __init int pid_namespace_sysctl_init(void)
|
||||
{
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/* "kernel" directory will have already been initialized. */
|
||||
BUG_ON(register_pidns_sysctls(&init_pid_ns));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(pid_namespace_sysctl_init);
|
||||
|
||||
static struct file *__pidfd_fget(struct task_struct *task, int fd)
|
||||
{
|
||||
struct file *file;
|
||||
|
@ -70,6 +70,8 @@ static void dec_pid_namespaces(struct ucounts *ucounts)
|
||||
dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
|
||||
}
|
||||
|
||||
static void destroy_pid_namespace_work(struct work_struct *work);
|
||||
|
||||
static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
|
||||
struct pid_namespace *parent_pid_ns)
|
||||
{
|
||||
@ -105,17 +107,27 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
|
||||
goto out_free_idr;
|
||||
ns->ns.ops = &pidns_operations;
|
||||
|
||||
ns->pid_max = parent_pid_ns->pid_max;
|
||||
err = register_pidns_sysctls(ns);
|
||||
if (err)
|
||||
goto out_free_inum;
|
||||
|
||||
refcount_set(&ns->ns.count, 1);
|
||||
ns->level = level;
|
||||
ns->parent = get_pid_ns(parent_pid_ns);
|
||||
ns->user_ns = get_user_ns(user_ns);
|
||||
ns->ucounts = ucounts;
|
||||
ns->pid_allocated = PIDNS_ADDING;
|
||||
INIT_WORK(&ns->work, destroy_pid_namespace_work);
|
||||
|
||||
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
|
||||
ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
|
||||
#endif
|
||||
|
||||
return ns;
|
||||
|
||||
out_free_inum:
|
||||
ns_free_inum(&ns->ns);
|
||||
out_free_idr:
|
||||
idr_destroy(&ns->idr);
|
||||
kmem_cache_free(pid_ns_cachep, ns);
|
||||
@ -137,12 +149,28 @@ static void delayed_free_pidns(struct rcu_head *p)
|
||||
|
||||
static void destroy_pid_namespace(struct pid_namespace *ns)
|
||||
{
|
||||
unregister_pidns_sysctls(ns);
|
||||
|
||||
ns_free_inum(&ns->ns);
|
||||
|
||||
idr_destroy(&ns->idr);
|
||||
call_rcu(&ns->rcu, delayed_free_pidns);
|
||||
}
|
||||
|
||||
static void destroy_pid_namespace_work(struct work_struct *work)
|
||||
{
|
||||
struct pid_namespace *ns =
|
||||
container_of(work, struct pid_namespace, work);
|
||||
|
||||
do {
|
||||
struct pid_namespace *parent;
|
||||
|
||||
parent = ns->parent;
|
||||
destroy_pid_namespace(ns);
|
||||
ns = parent;
|
||||
} while (ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count));
|
||||
}
|
||||
|
||||
struct pid_namespace *copy_pid_ns(unsigned long flags,
|
||||
struct user_namespace *user_ns, struct pid_namespace *old_ns)
|
||||
{
|
||||
@ -155,15 +183,8 @@ struct pid_namespace *copy_pid_ns(unsigned long flags,
|
||||
|
||||
void put_pid_ns(struct pid_namespace *ns)
|
||||
{
|
||||
struct pid_namespace *parent;
|
||||
|
||||
while (ns != &init_pid_ns) {
|
||||
parent = ns->parent;
|
||||
if (!refcount_dec_and_test(&ns->ns.count))
|
||||
break;
|
||||
destroy_pid_namespace(ns);
|
||||
ns = parent;
|
||||
}
|
||||
if (ns && ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count))
|
||||
schedule_work(&ns->work);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(put_pid_ns);
|
||||
|
||||
@ -274,6 +295,7 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write,
|
||||
next = idr_get_cursor(&pid_ns->idr) - 1;
|
||||
|
||||
tmp.data = &next;
|
||||
tmp.extra2 = &pid_ns->pid_max;
|
||||
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
|
||||
if (!ret && write)
|
||||
idr_set_cursor(&pid_ns->idr, next + 1);
|
||||
@ -281,7 +303,6 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write,
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern int pid_max;
|
||||
static struct ctl_table pid_ns_ctl_table[] = {
|
||||
{
|
||||
.procname = "ns_last_pid",
|
||||
@ -289,7 +310,7 @@ static struct ctl_table pid_ns_ctl_table[] = {
|
||||
.mode = 0666, /* permissions are checked in the handler */
|
||||
.proc_handler = pid_ns_ctl_handler,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &pid_max,
|
||||
.extra2 = &init_pid_ns.pid_max,
|
||||
},
|
||||
};
|
||||
#endif /* CONFIG_CHECKPOINT_RESTORE */
|
||||
|
@ -1803,15 +1803,6 @@ static struct ctl_table kern_table[] = {
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "pid_max",
|
||||
.data = &pid_max,
|
||||
.maxlen = sizeof (int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &pid_max_min,
|
||||
.extra2 = &pid_max_max,
|
||||
},
|
||||
{
|
||||
.procname = "panic_on_oops",
|
||||
.data = &panic_on_oops,
|
||||
|
@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
|
||||
int i;
|
||||
|
||||
/* According to linux/thread.h, pids can be no bigger that 30 bits */
|
||||
WARN_ON_ONCE(pid_max > (1 << 30));
|
||||
WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30));
|
||||
|
||||
pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL);
|
||||
if (!pid_list)
|
||||
|
@ -717,8 +717,6 @@ extern unsigned long tracing_thresh;
|
||||
|
||||
/* PID filtering */
|
||||
|
||||
extern int pid_max;
|
||||
|
||||
bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
|
||||
pid_t search_pid);
|
||||
bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user