fscache fixes

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEqG5UsNXhtOCrfGQP+7dXa6fLC2sFAmHrJ4UACgkQ+7dXa6fL
 C2taFg/+K0nKY3TDKc7JVxe9BEUa/uUvNba5j16qKWJKnjCrjqU0RWJSUnVOxcwH
 eBwxjaoZTmtGqS3snvKMYNIGLf7FAEeRY3a1JO81sdoCA2X7P/4HtfDO+DdaCFpB
 tYr9+Hfjy+2baMLEimTWp2SYsfVKD/J4gHeqdRF12QMQJ01m+X9fcV7TqpkyPo5C
 L35Gi1RVDXzwn81qOV9yrmkOBhPK4hiREmCmD0BGEGT/+FYlYVBOv92n31CQMl1Q
 ef8eMNiDYgXQ0iWoUYIJqMBs88M7DwAmeaA/qi85gzfGM0Z5pBE3SNy02F/81b8b
 PHgmzDfj42AuAQ9ZOuwREwtvQrpGsCsw5+60klzZ6PiFtO+q/PtS/ODTdi4foDDd
 ura6RM3wqlE9P4Vi7gj9NYhdUlp5p0YYPlJOGr6CepYS63O3DgwXyoVMPUE5s5fF
 Boc3Ef7fKy25Wc1s/ZVSbvvgh9KYDJOKqxs4ELeNSW8GD7uA2d0XWkhjzZIsVdKZ
 kd7VEYr2/j6gpPR63yw2MVOfymJN1WYuu6ittuL40hSYW8hbBg/m8hVE1E7b7NlP
 +u10xvCwN9LBK+757I733wV3hB4rYa6pg9KBjHDXyCLQ1uCKVE0QaILTSjqC/mCM
 OTPFmi50y1oa/9BxooEg5+ShXOH95EE4jTBCaL1lv7JYfYNUbYA=
 =CgMV
 -----END PGP SIGNATURE-----

Merge tag 'fscache-fixes-20220121' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull more fscache updates from David Howells:
 "A set of fixes and minor updates for the fscache rewrite:

   - Fix mishandling of volume collisions (the wait condition is
     inverted and so it was only waiting if the volume collision was
     already resolved).

   - Fix miscalculation of whether there's space available in
     cachefiles.

   - Make sure a default cache name is set on a cache if the user hasn't
     set one by the time they bind the cache.

   - Adjust the way the backing inode is presented in tracepoints, add a
     tracepoint for mkdir and trace directory lookup.

   - Add a tracepoint for failure to set the active file mark.

   - Add an explanation of the checks made on the backing filesystem.

   - Check that the backing filesystem supports tmpfile.

   - Document how the page-release cancellation of the read-skip
     optimisation works.

  And I've included a change for netfslib:

   - Make ops->init_rreq() optional"

* tag 'fscache-fixes-20220121' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  netfs: Make ops->init_rreq() optional
  fscache: Add a comment explaining how page-release optimisation works
  cachefiles: Check that the backing filesystem supports tmpfiles
  cachefiles: Explain checks in a comment
  cachefiles: Trace active-mark failure
  cachefiles: Make some tracepoint adjustments
  cachefiles: set default tag name if it's unspecified
  cachefiles: Calculate the blockshift in terms of bytes, not pages
  fscache: Fix the volume collision wait condition
This commit is contained in:
Linus Torvalds 2022-01-22 10:59:32 +02:00
commit 7fd350f6ff
10 changed files with 113 additions and 51 deletions

View File

@ -49,11 +49,19 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
}
/* check parameters */
/* Check features of the backing filesystem:
* - Directories must support looking up and directory creation
* - We create tmpfiles to handle invalidation
* - We use xattrs to store metadata
* - We need to be able to query the amount of space available
* - We want to be able to sync the filesystem when stopping the cache
* - We use DIO to/from pages, so the blocksize mustn't be too big.
*/
ret = -EOPNOTSUPP;
if (d_is_negative(root) ||
!d_backing_inode(root)->i_op->lookup ||
!d_backing_inode(root)->i_op->mkdir ||
!d_backing_inode(root)->i_op->tmpfile ||
!(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs ||
@ -84,9 +92,7 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
cache->bsize = stats.f_bsize;
cache->bshift = 0;
if (stats.f_bsize < PAGE_SIZE)
cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
cache->bshift = ilog2(stats.f_bsize);
_debug("blksize %u (shift %u)",
cache->bsize, cache->bshift);
@ -106,7 +112,6 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
(unsigned long long) cache->fcull,
(unsigned long long) cache->fstop);
stats.f_blocks >>= cache->bshift;
do_div(stats.f_blocks, 100);
cache->bstop = stats.f_blocks * cache->bstop_percent;
cache->bcull = stats.f_blocks * cache->bcull_percent;
@ -209,7 +214,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
return ret;
}
b_avail = stats.f_bavail >> cache->bshift;
b_avail = stats.f_bavail;
b_writing = atomic_long_read(&cache->b_writing);
if (b_avail > b_writing)
b_avail -= b_writing;

View File

@ -703,6 +703,17 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
return -EBUSY;
}
/* Make sure we have copies of the tag string */
if (!cache->tag) {
/*
* The tag string is released by the fops->release()
* function, so we don't release it on error here
*/
cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
if (!cache->tag)
return -ENOMEM;
}
return cachefiles_add_cache(cache);
}

View File

@ -86,7 +86,7 @@ struct cachefiles_cache {
unsigned bcull_percent; /* when to start culling (% blocks) */
unsigned bstop_percent; /* when to stop allocating (% blocks) */
unsigned bsize; /* cache's block size */
unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
unsigned bshift; /* ilog2(bsize) */
uint64_t frun; /* when to stop culling */
uint64_t fcull; /* when to start culling */
uint64_t fstop; /* when to stop allocating */

View File

@ -264,7 +264,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
ki->term_func = term_func;
ki->term_func_priv = term_func_priv;
ki->was_async = true;
ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift;
ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
if (ki->term_func)
ki->iocb.ki_complete = cachefiles_write_complete;

View File

@ -25,7 +25,9 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
trace_cachefiles_mark_active(object, inode);
can_use = true;
} else {
pr_notice("cachefiles: Inode already in use: %pd\n", dentry);
trace_cachefiles_mark_failed(object, inode);
pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
dentry, inode->i_ino);
}
return can_use;
@ -101,6 +103,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
subdir = lookup_one_len(dirname, dir, strlen(dirname));
else
subdir = ERR_PTR(ret);
trace_cachefiles_lookup(NULL, dir, subdir);
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
PTR_ERR(subdir),
@ -135,6 +138,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
cachefiles_trace_mkdir_error);
goto mkdir_error;
}
trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir))) {
cachefiles_put_directory(subdir);
@ -233,7 +237,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
};
int ret;
trace_cachefiles_unlink(object, dentry, why);
trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
ret = security_path_unlink(&path, dentry);
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
@ -386,7 +390,7 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
.new_dir = d_inode(cache->graveyard),
.new_dentry = grave,
};
trace_cachefiles_rename(object, rep, grave, why);
trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
ret = cachefiles_inject_read_error();
if (ret == 0)
ret = vfs_rename(&rd);
@ -617,7 +621,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
object->d_name_len);
else
dentry = ERR_PTR(ret);
trace_cachefiles_lookup(object, dentry);
trace_cachefiles_lookup(object, fan, dentry);
if (IS_ERR(dentry)) {
if (dentry == ERR_PTR(-ENOENT))
goto new_file;

View File

@ -297,10 +297,6 @@ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq)
dout("%s: result %d\n", __func__, err);
}
static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
{
}
static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
{
struct inode *inode = mapping->host;
@ -312,7 +308,6 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
}
static const struct netfs_read_request_ops ceph_netfs_read_ops = {
.init_rreq = ceph_init_rreq,
.is_cache_enabled = ceph_is_cache_enabled,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_op = ceph_netfs_issue_op,

View File

@ -142,12 +142,12 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
wait_var_event_timeout(&candidate->flags,
fscache_is_acquire_pending(candidate), 20 * HZ);
!fscache_is_acquire_pending(candidate), 20 * HZ);
if (!fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
wait_var_event(&candidate->flags, fscache_is_acquire_pending(candidate));
wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
}
}

View File

@ -55,7 +55,8 @@ static struct netfs_read_request *netfs_alloc_read_request(
INIT_WORK(&rreq->work, netfs_rreq_work);
refcount_set(&rreq->usage, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
ops->init_rreq(rreq, file);
if (ops->init_rreq)
ops->init_rreq(rreq, file);
netfs_stat(&netfs_n_rh_rreq);
}

View File

@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
static inline
void fscache_note_page_release(struct fscache_cookie *cookie)
{
/* If we've written data to the cache (HAVE_DATA) and there wasn't any
* data in the cache when we started (NO_DATA_TO_READ), it may no
* longer be true that we can skip reading from the cache - so clear
* the flag that causes reads to be skipped.
*/
if (cookie &&
test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))

View File

@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref,
TRACE_EVENT(cachefiles_lookup,
TP_PROTO(struct cachefiles_object *obj,
struct dentry *dir,
struct dentry *de),
TP_ARGS(obj, de),
TP_ARGS(obj, dir, de),
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(short, error )
__field(unsigned long, dino )
__field(unsigned long, ino )
),
TP_fast_assign(
__entry->obj = obj->debug_id;
__entry->obj = obj ? obj->debug_id : 0;
__entry->dino = d_backing_inode(dir)->i_ino;
__entry->ino = (!IS_ERR(de) && d_backing_inode(de) ?
d_backing_inode(de)->i_ino : 0);
__entry->error = IS_ERR(de) ? PTR_ERR(de) : 0;
),
TP_printk("o=%08x i=%lx e=%d",
__entry->obj, __entry->ino, __entry->error)
TP_printk("o=%08x dB=%lx B=%lx e=%d",
__entry->obj, __entry->dino, __entry->ino, __entry->error)
);
TRACE_EVENT(cachefiles_mkdir,
TP_PROTO(struct dentry *dir, struct dentry *subdir),
TP_ARGS(dir, subdir),
TP_STRUCT__entry(
__field(unsigned int, dir )
__field(unsigned int, subdir )
),
TP_fast_assign(
__entry->dir = d_backing_inode(dir)->i_ino;
__entry->subdir = d_backing_inode(subdir)->i_ino;
),
TP_printk("dB=%x sB=%x",
__entry->dir,
__entry->subdir)
);
TRACE_EVENT(cachefiles_tmpfile,
@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile,
__entry->backer = backer->i_ino;
),
TP_printk("o=%08x b=%08x",
TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link,
__entry->backer = backer->i_ino;
),
TP_printk("o=%08x b=%08x",
TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
TRACE_EVENT(cachefiles_unlink,
TP_PROTO(struct cachefiles_object *obj,
struct dentry *de,
ino_t ino,
enum fscache_why_object_killed why),
TP_ARGS(obj, de, why),
TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(struct dentry *, de )
__field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
__entry->de = de;
__entry->ino = ino;
__entry->why = why;
),
TP_printk("o=%08x d=%p w=%s",
__entry->obj, __entry->de,
TP_printk("o=%08x B=%x w=%s",
__entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
TRACE_EVENT(cachefiles_rename,
TP_PROTO(struct cachefiles_object *obj,
struct dentry *de,
struct dentry *to,
ino_t ino,
enum fscache_why_object_killed why),
TP_ARGS(obj, de, to, why),
TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(struct dentry *, de )
__field(struct dentry *, to )
__field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
__entry->de = de;
__entry->to = to;
__entry->ino = ino;
__entry->why = why;
),
TP_printk("o=%08x d=%p t=%p w=%s",
__entry->obj, __entry->de, __entry->to,
TP_printk("o=%08x B=%x w=%s",
__entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency,
__entry->ino = ino;
),
TP_printk("o=%08x %s i=%llx c=%u",
TP_printk("o=%08x %s B=%llx c=%u",
__entry->obj,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino,
@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency,
__entry->ino = ino;
),
TP_printk("V=%08x %s i=%llx",
TP_printk("V=%08x %s B=%llx",
__entry->vol,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino)
@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read,
__entry->cache_inode = cache_inode;
),
TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x",
TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->why, cachefiles_prepare_read_traces),
@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read,
__entry->len = len;
),
TP_printk("o=%08x b=%08x s=%llx l=%zx",
TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write,
__entry->len = len;
),
TP_printk("o=%08x b=%08x s=%llx l=%zx",
TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc,
__entry->why = why;
),
TP_printk("o=%08x b=%08x %s l=%llx->%llx",
TP_printk("o=%08x B=%x %s l=%llx->%llx",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->why, cachefiles_trunc_traces),
@ -549,7 +569,28 @@ TRACE_EVENT(cachefiles_mark_active,
__entry->inode = inode->i_ino;
),
TP_printk("o=%08x i=%lx",
TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
TRACE_EVENT(cachefiles_mark_failed,
TP_PROTO(struct cachefiles_object *obj,
struct inode *inode),
TP_ARGS(obj, inode),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(ino_t, inode )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : 0;
__entry->inode = inode->i_ino;
),
TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@ -570,7 +611,7 @@ TRACE_EVENT(cachefiles_mark_inactive,
__entry->inode = inode->i_ino;
),
TP_printk("o=%08x i=%lx",
TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@ -594,7 +635,7 @@ TRACE_EVENT(cachefiles_vfs_error,
__entry->where = where;
),
TP_printk("o=%08x b=%08x %s e=%d",
TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),
@ -621,7 +662,7 @@ TRACE_EVENT(cachefiles_io_error,
__entry->where = where;
),
TP_printk("o=%08x b=%08x %s e=%d",
TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),