mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-18 19:05:39 +00:00
3288666c72
fscache_create_volume_work() uses wake_up_bit() to wake up the processes which are waiting for the completion of volume creation. According to comments in wake_up_bit() and waitqueue_active(), an extra smp_mb() is needed to guarantee the memory order between FSCACHE_VOLUME_CREATING flag and waitqueue_active() before invoking wake_up_bit(). Fixing it by using clear_and_wake_up_bit() to add the missing memory barrier. Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com> Signed-off-by: Hou Tao <houtao1@huawei.com> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Link: https://lore.kernel.org/r/20230113115211.2895845-3-houtao@huaweicloud.com/ # v3
521 lines
15 KiB
C
521 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/* Volume-level cache cookie handling.
|
|
*
|
|
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
#define FSCACHE_DEBUG_LEVEL COOKIE
|
|
#include <linux/export.h>
|
|
#include <linux/slab.h>
|
|
#include "internal.h"
|
|
|
|
#define fscache_volume_hash_shift 10
|
|
static struct hlist_bl_head fscache_volume_hash[1 << fscache_volume_hash_shift];
|
|
static atomic_t fscache_volume_debug_id;
|
|
static LIST_HEAD(fscache_volumes);
|
|
|
|
static void fscache_create_volume_work(struct work_struct *work);
|
|
|
|
struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
|
|
enum fscache_volume_trace where)
|
|
{
|
|
int ref;
|
|
|
|
__refcount_inc(&volume->ref, &ref);
|
|
trace_fscache_volume(volume->debug_id, ref + 1, where);
|
|
return volume;
|
|
}
|
|
|
|
static void fscache_see_volume(struct fscache_volume *volume,
|
|
enum fscache_volume_trace where)
|
|
{
|
|
int ref = refcount_read(&volume->ref);
|
|
|
|
trace_fscache_volume(volume->debug_id, ref, where);
|
|
}
|
|
|
|
/*
|
|
* Pin the cache behind a volume so that we can access it.
|
|
*/
|
|
static void __fscache_begin_volume_access(struct fscache_volume *volume,
|
|
struct fscache_cookie *cookie,
|
|
enum fscache_access_trace why)
|
|
{
|
|
int n_accesses;
|
|
|
|
n_accesses = atomic_inc_return(&volume->n_accesses);
|
|
smp_mb__after_atomic();
|
|
trace_fscache_access_volume(volume->debug_id, cookie ? cookie->debug_id : 0,
|
|
refcount_read(&volume->ref),
|
|
n_accesses, why);
|
|
}
|
|
|
|
/**
|
|
* fscache_begin_volume_access - Pin a cache so a volume can be accessed
|
|
* @volume: The volume cookie
|
|
* @cookie: A datafile cookie for a tracing reference (or NULL)
|
|
* @why: An indication of the circumstances of the access for tracing
|
|
*
|
|
* Attempt to pin the cache to prevent it from going away whilst we're
|
|
* accessing a volume and returns true if successful. This works as follows:
|
|
*
|
|
* (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE),
|
|
* then we return false to indicate access was not permitted.
|
|
*
|
|
* (2) If the cache tests as live, then we increment the volume's n_accesses
|
|
* count and then recheck the cache liveness, ending the access if it
|
|
* ceased to be live.
|
|
*
|
|
* (3) When we end the access, we decrement the volume's n_accesses and wake
|
|
* up the any waiters if it reaches 0.
|
|
*
|
|
* (4) Whilst the cache is caching, the volume's n_accesses is kept
|
|
* artificially incremented to prevent wakeups from happening.
|
|
*
|
|
* (5) When the cache is taken offline, the state is changed to prevent new
|
|
* accesses, the volume's n_accesses is decremented and we wait for it to
|
|
* become 0.
|
|
*
|
|
* The datafile @cookie and the @why indicator are merely provided for tracing
|
|
* purposes.
|
|
*/
|
|
bool fscache_begin_volume_access(struct fscache_volume *volume,
|
|
struct fscache_cookie *cookie,
|
|
enum fscache_access_trace why)
|
|
{
|
|
if (!fscache_cache_is_live(volume->cache))
|
|
return false;
|
|
__fscache_begin_volume_access(volume, cookie, why);
|
|
if (!fscache_cache_is_live(volume->cache)) {
|
|
fscache_end_volume_access(volume, cookie, fscache_access_unlive);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* fscache_end_volume_access - Unpin a cache at the end of an access.
|
|
* @volume: The volume cookie
|
|
* @cookie: A datafile cookie for a tracing reference (or NULL)
|
|
* @why: An indication of the circumstances of the access for tracing
|
|
*
|
|
* Unpin a cache volume after we've accessed it. The datafile @cookie and the
|
|
* @why indicator are merely provided for tracing purposes.
|
|
*/
|
|
void fscache_end_volume_access(struct fscache_volume *volume,
|
|
struct fscache_cookie *cookie,
|
|
enum fscache_access_trace why)
|
|
{
|
|
int n_accesses;
|
|
|
|
smp_mb__before_atomic();
|
|
n_accesses = atomic_dec_return(&volume->n_accesses);
|
|
trace_fscache_access_volume(volume->debug_id, cookie ? cookie->debug_id : 0,
|
|
refcount_read(&volume->ref),
|
|
n_accesses, why);
|
|
if (n_accesses == 0)
|
|
wake_up_var(&volume->n_accesses);
|
|
}
|
|
EXPORT_SYMBOL(fscache_end_volume_access);
|
|
|
|
static bool fscache_volume_same(const struct fscache_volume *a,
|
|
const struct fscache_volume *b)
|
|
{
|
|
size_t klen;
|
|
|
|
if (a->key_hash != b->key_hash ||
|
|
a->cache != b->cache ||
|
|
a->key[0] != b->key[0])
|
|
return false;
|
|
|
|
klen = round_up(a->key[0] + 1, sizeof(__le32));
|
|
return memcmp(a->key, b->key, klen) == 0;
|
|
}
|
|
|
|
static bool fscache_is_acquire_pending(struct fscache_volume *volume)
|
|
{
|
|
return test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &volume->flags);
|
|
}
|
|
|
|
static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
|
|
unsigned int collidee_debug_id)
|
|
{
|
|
wait_on_bit_timeout(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
|
|
TASK_UNINTERRUPTIBLE, 20 * HZ);
|
|
if (fscache_is_acquire_pending(candidate)) {
|
|
pr_notice("Potential volume collision new=%08x old=%08x",
|
|
candidate->debug_id, collidee_debug_id);
|
|
fscache_stat(&fscache_n_volumes_collision);
|
|
wait_on_bit(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
|
|
TASK_UNINTERRUPTIBLE);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Attempt to insert the new volume into the hash. If there's a collision, we
|
|
* wait for the old volume to complete if it's being relinquished and an error
|
|
* otherwise.
|
|
*/
|
|
static bool fscache_hash_volume(struct fscache_volume *candidate)
|
|
{
|
|
struct fscache_volume *cursor;
|
|
struct hlist_bl_head *h;
|
|
struct hlist_bl_node *p;
|
|
unsigned int bucket, collidee_debug_id = 0;
|
|
|
|
bucket = candidate->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1);
|
|
h = &fscache_volume_hash[bucket];
|
|
|
|
hlist_bl_lock(h);
|
|
hlist_bl_for_each_entry(cursor, p, h, hash_link) {
|
|
if (fscache_volume_same(candidate, cursor)) {
|
|
if (!test_bit(FSCACHE_VOLUME_RELINQUISHED, &cursor->flags))
|
|
goto collision;
|
|
fscache_see_volume(cursor, fscache_volume_get_hash_collision);
|
|
set_bit(FSCACHE_VOLUME_COLLIDED_WITH, &cursor->flags);
|
|
set_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &candidate->flags);
|
|
collidee_debug_id = cursor->debug_id;
|
|
break;
|
|
}
|
|
}
|
|
|
|
hlist_bl_add_head(&candidate->hash_link, h);
|
|
hlist_bl_unlock(h);
|
|
|
|
if (fscache_is_acquire_pending(candidate))
|
|
fscache_wait_on_volume_collision(candidate, collidee_debug_id);
|
|
return true;
|
|
|
|
collision:
|
|
fscache_see_volume(cursor, fscache_volume_collision);
|
|
hlist_bl_unlock(h);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Allocate and initialise a volume representation cookie.
|
|
*/
|
|
static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
|
|
const char *cache_name,
|
|
const void *coherency_data,
|
|
size_t coherency_len)
|
|
{
|
|
struct fscache_volume *volume;
|
|
struct fscache_cache *cache;
|
|
size_t klen, hlen;
|
|
u8 *key;
|
|
|
|
klen = strlen(volume_key);
|
|
if (klen > NAME_MAX)
|
|
return NULL;
|
|
|
|
if (!coherency_data)
|
|
coherency_len = 0;
|
|
|
|
cache = fscache_lookup_cache(cache_name, false);
|
|
if (IS_ERR(cache))
|
|
return NULL;
|
|
|
|
volume = kzalloc(struct_size(volume, coherency, coherency_len),
|
|
GFP_KERNEL);
|
|
if (!volume)
|
|
goto err_cache;
|
|
|
|
volume->cache = cache;
|
|
volume->coherency_len = coherency_len;
|
|
if (coherency_data)
|
|
memcpy(volume->coherency, coherency_data, coherency_len);
|
|
INIT_LIST_HEAD(&volume->proc_link);
|
|
INIT_WORK(&volume->work, fscache_create_volume_work);
|
|
refcount_set(&volume->ref, 1);
|
|
spin_lock_init(&volume->lock);
|
|
|
|
/* Stick the length on the front of the key and pad it out to make
|
|
* hashing easier.
|
|
*/
|
|
hlen = round_up(1 + klen + 1, sizeof(__le32));
|
|
key = kzalloc(hlen, GFP_KERNEL);
|
|
if (!key)
|
|
goto err_vol;
|
|
key[0] = klen;
|
|
memcpy(key + 1, volume_key, klen);
|
|
|
|
volume->key = key;
|
|
volume->key_hash = fscache_hash(0, key, hlen);
|
|
|
|
volume->debug_id = atomic_inc_return(&fscache_volume_debug_id);
|
|
down_write(&fscache_addremove_sem);
|
|
atomic_inc(&cache->n_volumes);
|
|
list_add_tail(&volume->proc_link, &fscache_volumes);
|
|
fscache_see_volume(volume, fscache_volume_new_acquire);
|
|
fscache_stat(&fscache_n_volumes);
|
|
up_write(&fscache_addremove_sem);
|
|
_leave(" = v=%x", volume->debug_id);
|
|
return volume;
|
|
|
|
err_vol:
|
|
kfree(volume);
|
|
err_cache:
|
|
fscache_put_cache(cache, fscache_cache_put_alloc_volume);
|
|
fscache_stat(&fscache_n_volumes_nomem);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Create a volume's representation on disk. Have a volume ref and a cache
|
|
* access we have to release.
|
|
*/
|
|
static void fscache_create_volume_work(struct work_struct *work)
|
|
{
|
|
const struct fscache_cache_ops *ops;
|
|
struct fscache_volume *volume =
|
|
container_of(work, struct fscache_volume, work);
|
|
|
|
fscache_see_volume(volume, fscache_volume_see_create_work);
|
|
|
|
ops = volume->cache->ops;
|
|
if (ops->acquire_volume)
|
|
ops->acquire_volume(volume);
|
|
fscache_end_cache_access(volume->cache,
|
|
fscache_access_acquire_volume_end);
|
|
|
|
clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
|
|
fscache_put_volume(volume, fscache_volume_put_create_work);
|
|
}
|
|
|
|
/*
|
|
* Dispatch a worker thread to create a volume's representation on disk.
|
|
*/
|
|
void fscache_create_volume(struct fscache_volume *volume, bool wait)
|
|
{
|
|
if (test_and_set_bit(FSCACHE_VOLUME_CREATING, &volume->flags))
|
|
goto maybe_wait;
|
|
if (volume->cache_priv)
|
|
goto no_wait; /* We raced */
|
|
if (!fscache_begin_cache_access(volume->cache,
|
|
fscache_access_acquire_volume))
|
|
goto no_wait;
|
|
|
|
fscache_get_volume(volume, fscache_volume_get_create_work);
|
|
if (!schedule_work(&volume->work))
|
|
fscache_put_volume(volume, fscache_volume_put_create_work);
|
|
|
|
maybe_wait:
|
|
if (wait) {
|
|
fscache_see_volume(volume, fscache_volume_wait_create_work);
|
|
wait_on_bit(&volume->flags, FSCACHE_VOLUME_CREATING,
|
|
TASK_UNINTERRUPTIBLE);
|
|
}
|
|
return;
|
|
no_wait:
|
|
clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
|
|
wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
|
|
}
|
|
|
|
/*
|
|
* Acquire a volume representation cookie and link it to a (proposed) cache.
|
|
*/
|
|
struct fscache_volume *__fscache_acquire_volume(const char *volume_key,
|
|
const char *cache_name,
|
|
const void *coherency_data,
|
|
size_t coherency_len)
|
|
{
|
|
struct fscache_volume *volume;
|
|
|
|
volume = fscache_alloc_volume(volume_key, cache_name,
|
|
coherency_data, coherency_len);
|
|
if (!volume)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (!fscache_hash_volume(volume)) {
|
|
fscache_put_volume(volume, fscache_volume_put_hash_collision);
|
|
return ERR_PTR(-EBUSY);
|
|
}
|
|
|
|
fscache_create_volume(volume, false);
|
|
return volume;
|
|
}
|
|
EXPORT_SYMBOL(__fscache_acquire_volume);
|
|
|
|
static void fscache_wake_pending_volume(struct fscache_volume *volume,
|
|
struct hlist_bl_head *h)
|
|
{
|
|
struct fscache_volume *cursor;
|
|
struct hlist_bl_node *p;
|
|
|
|
hlist_bl_for_each_entry(cursor, p, h, hash_link) {
|
|
if (fscache_volume_same(cursor, volume)) {
|
|
fscache_see_volume(cursor, fscache_volume_see_hash_wake);
|
|
clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING,
|
|
&cursor->flags);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Remove a volume cookie from the hash table.
|
|
*/
|
|
static void fscache_unhash_volume(struct fscache_volume *volume)
|
|
{
|
|
struct hlist_bl_head *h;
|
|
unsigned int bucket;
|
|
|
|
bucket = volume->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1);
|
|
h = &fscache_volume_hash[bucket];
|
|
|
|
hlist_bl_lock(h);
|
|
hlist_bl_del(&volume->hash_link);
|
|
if (test_bit(FSCACHE_VOLUME_COLLIDED_WITH, &volume->flags))
|
|
fscache_wake_pending_volume(volume, h);
|
|
hlist_bl_unlock(h);
|
|
}
|
|
|
|
/*
|
|
* Drop a cache's volume attachments.
|
|
*/
|
|
static void fscache_free_volume(struct fscache_volume *volume)
|
|
{
|
|
struct fscache_cache *cache = volume->cache;
|
|
|
|
if (volume->cache_priv) {
|
|
__fscache_begin_volume_access(volume, NULL,
|
|
fscache_access_relinquish_volume);
|
|
if (volume->cache_priv)
|
|
cache->ops->free_volume(volume);
|
|
fscache_end_volume_access(volume, NULL,
|
|
fscache_access_relinquish_volume_end);
|
|
}
|
|
|
|
down_write(&fscache_addremove_sem);
|
|
list_del_init(&volume->proc_link);
|
|
atomic_dec(&volume->cache->n_volumes);
|
|
up_write(&fscache_addremove_sem);
|
|
|
|
if (!hlist_bl_unhashed(&volume->hash_link))
|
|
fscache_unhash_volume(volume);
|
|
|
|
trace_fscache_volume(volume->debug_id, 0, fscache_volume_free);
|
|
kfree(volume->key);
|
|
kfree(volume);
|
|
fscache_stat_d(&fscache_n_volumes);
|
|
fscache_put_cache(cache, fscache_cache_put_volume);
|
|
}
|
|
|
|
/*
|
|
* Drop a reference to a volume cookie.
|
|
*/
|
|
void fscache_put_volume(struct fscache_volume *volume,
|
|
enum fscache_volume_trace where)
|
|
{
|
|
if (volume) {
|
|
unsigned int debug_id = volume->debug_id;
|
|
bool zero;
|
|
int ref;
|
|
|
|
zero = __refcount_dec_and_test(&volume->ref, &ref);
|
|
trace_fscache_volume(debug_id, ref - 1, where);
|
|
if (zero)
|
|
fscache_free_volume(volume);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Relinquish a volume representation cookie.
|
|
*/
|
|
void __fscache_relinquish_volume(struct fscache_volume *volume,
|
|
const void *coherency_data,
|
|
bool invalidate)
|
|
{
|
|
if (WARN_ON(test_and_set_bit(FSCACHE_VOLUME_RELINQUISHED, &volume->flags)))
|
|
return;
|
|
|
|
if (invalidate) {
|
|
set_bit(FSCACHE_VOLUME_INVALIDATE, &volume->flags);
|
|
} else if (coherency_data) {
|
|
memcpy(volume->coherency, coherency_data, volume->coherency_len);
|
|
}
|
|
|
|
fscache_put_volume(volume, fscache_volume_put_relinquish);
|
|
}
|
|
EXPORT_SYMBOL(__fscache_relinquish_volume);
|
|
|
|
/**
|
|
* fscache_withdraw_volume - Withdraw a volume from being cached
|
|
* @volume: Volume cookie
|
|
*
|
|
* Withdraw a cache volume from service, waiting for all accesses to complete
|
|
* before returning.
|
|
*/
|
|
void fscache_withdraw_volume(struct fscache_volume *volume)
|
|
{
|
|
int n_accesses;
|
|
|
|
_debug("withdraw V=%x", volume->debug_id);
|
|
|
|
/* Allow wakeups on dec-to-0 */
|
|
n_accesses = atomic_dec_return(&volume->n_accesses);
|
|
trace_fscache_access_volume(volume->debug_id, 0,
|
|
refcount_read(&volume->ref),
|
|
n_accesses, fscache_access_cache_unpin);
|
|
|
|
wait_var_event(&volume->n_accesses,
|
|
atomic_read(&volume->n_accesses) == 0);
|
|
}
|
|
EXPORT_SYMBOL(fscache_withdraw_volume);
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
/*
|
|
* Generate a list of volumes in /proc/fs/fscache/volumes
|
|
*/
|
|
static int fscache_volumes_seq_show(struct seq_file *m, void *v)
|
|
{
|
|
struct fscache_volume *volume;
|
|
|
|
if (v == &fscache_volumes) {
|
|
seq_puts(m,
|
|
"VOLUME REF nCOOK ACC FL CACHE KEY\n"
|
|
"======== ===== ===== === == =============== ================\n");
|
|
return 0;
|
|
}
|
|
|
|
volume = list_entry(v, struct fscache_volume, proc_link);
|
|
seq_printf(m,
|
|
"%08x %5d %5d %3d %02lx %-15.15s %s\n",
|
|
volume->debug_id,
|
|
refcount_read(&volume->ref),
|
|
atomic_read(&volume->n_cookies),
|
|
atomic_read(&volume->n_accesses),
|
|
volume->flags,
|
|
volume->cache->name ?: "-",
|
|
volume->key + 1);
|
|
return 0;
|
|
}
|
|
|
|
static void *fscache_volumes_seq_start(struct seq_file *m, loff_t *_pos)
|
|
__acquires(&fscache_addremove_sem)
|
|
{
|
|
down_read(&fscache_addremove_sem);
|
|
return seq_list_start_head(&fscache_volumes, *_pos);
|
|
}
|
|
|
|
static void *fscache_volumes_seq_next(struct seq_file *m, void *v, loff_t *_pos)
|
|
{
|
|
return seq_list_next(v, &fscache_volumes, _pos);
|
|
}
|
|
|
|
static void fscache_volumes_seq_stop(struct seq_file *m, void *v)
|
|
__releases(&fscache_addremove_sem)
|
|
{
|
|
up_read(&fscache_addremove_sem);
|
|
}
|
|
|
|
const struct seq_operations fscache_volumes_seq_ops = {
|
|
.start = fscache_volumes_seq_start,
|
|
.next = fscache_volumes_seq_next,
|
|
.stop = fscache_volumes_seq_stop,
|
|
.show = fscache_volumes_seq_show,
|
|
};
|
|
#endif /* CONFIG_PROC_FS */
|