mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 09:13:38 +00:00
dm vdo: add the block allocators and physical zones
Each slab is independent of every other. They are assigned to "physical zones" in round-robin fashion. If there are P physical zones, then slab n is assigned to zone n mod P. The set of slabs in each physical zone is managed by a block allocator. Co-developed-by: J. corwin Coburn <corwin@hurlbutnet.net> Signed-off-by: J. corwin Coburn <corwin@hurlbutnet.net> Co-developed-by: Michael Sclafani <dm-devel@lists.linux.dev> Signed-off-by: Michael Sclafani <dm-devel@lists.linux.dev> Co-developed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me> Signed-off-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me> Signed-off-by: Matthew Sakai <msakai@redhat.com> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
This commit is contained in:
parent
883069e30e
commit
c9ba9fd33c
646
drivers/md/dm-vdo/physical-zone.c
Normal file
646
drivers/md/dm-vdo/physical-zone.c
Normal file
@ -0,0 +1,646 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright 2023 Red Hat
|
||||
*/
|
||||
|
||||
#include "physical-zone.h"
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
#include "logger.h"
|
||||
#include "memory-alloc.h"
|
||||
#include "permassert.h"
|
||||
|
||||
#include "block-map.h"
|
||||
#include "completion.h"
|
||||
#include "constants.h"
|
||||
#include "data-vio.h"
|
||||
#include "dedupe.h"
|
||||
#include "encodings.h"
|
||||
#include "flush.h"
|
||||
#include "int-map.h"
|
||||
#include "slab-depot.h"
|
||||
#include "status-codes.h"
|
||||
#include "vdo.h"
|
||||
|
||||
enum {
|
||||
/* Each user data_vio needs a PBN read lock and write lock. */
|
||||
LOCK_POOL_CAPACITY = 2 * MAXIMUM_VDO_USER_VIOS,
|
||||
};
|
||||
|
||||
struct pbn_lock_implementation {
|
||||
enum pbn_lock_type type;
|
||||
const char *name;
|
||||
const char *release_reason;
|
||||
};
|
||||
|
||||
/* This array must have an entry for every pbn_lock_type value. */
|
||||
static const struct pbn_lock_implementation LOCK_IMPLEMENTATIONS[] = {
|
||||
[VIO_READ_LOCK] = {
|
||||
.type = VIO_READ_LOCK,
|
||||
.name = "read",
|
||||
.release_reason = "candidate duplicate",
|
||||
},
|
||||
[VIO_WRITE_LOCK] = {
|
||||
.type = VIO_WRITE_LOCK,
|
||||
.name = "write",
|
||||
.release_reason = "newly allocated",
|
||||
},
|
||||
[VIO_BLOCK_MAP_WRITE_LOCK] = {
|
||||
.type = VIO_BLOCK_MAP_WRITE_LOCK,
|
||||
.name = "block map write",
|
||||
.release_reason = "block map write",
|
||||
},
|
||||
};
|
||||
|
||||
static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type type)
|
||||
{
|
||||
return (lock->implementation == &LOCK_IMPLEMENTATIONS[type]);
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_is_pbn_read_lock() - Check whether a pbn_lock is a read lock.
|
||||
* @lock: The lock to check.
|
||||
*
|
||||
* Return: true if the lock is a read lock.
|
||||
*/
|
||||
bool vdo_is_pbn_read_lock(const struct pbn_lock *lock)
|
||||
{
|
||||
return has_lock_type(lock, VIO_READ_LOCK);
|
||||
}
|
||||
|
||||
static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type type)
|
||||
{
|
||||
lock->implementation = &LOCK_IMPLEMENTATIONS[type];
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_downgrade_pbn_write_lock() - Downgrade a PBN write lock to a PBN read lock.
|
||||
* @lock: The PBN write lock to downgrade.
|
||||
*
|
||||
* The lock holder count is cleared and the caller is responsible for setting the new count.
|
||||
*/
|
||||
void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write)
|
||||
{
|
||||
ASSERT_LOG_ONLY(!vdo_is_pbn_read_lock(lock),
|
||||
"PBN lock must not already have been downgraded");
|
||||
ASSERT_LOG_ONLY(!has_lock_type(lock, VIO_BLOCK_MAP_WRITE_LOCK),
|
||||
"must not downgrade block map write locks");
|
||||
ASSERT_LOG_ONLY(lock->holder_count == 1,
|
||||
"PBN write lock should have one holder but has %u",
|
||||
lock->holder_count);
|
||||
/*
|
||||
* data_vio write locks are downgraded in place--the writer retains the hold on the lock.
|
||||
* If this was a compressed write, the holder has not yet journaled its own inc ref,
|
||||
* otherwise, it has.
|
||||
*/
|
||||
lock->increment_limit =
|
||||
(compressed_write ? MAXIMUM_REFERENCE_COUNT : MAXIMUM_REFERENCE_COUNT - 1);
|
||||
set_pbn_lock_type(lock, VIO_READ_LOCK);
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_claim_pbn_lock_increment() - Try to claim one of the available reference count increments on
|
||||
* a read lock.
|
||||
* @lock: The PBN read lock from which to claim an increment.
|
||||
*
|
||||
* Claims may be attempted from any thread. A claim is only valid until the PBN lock is released.
|
||||
*
|
||||
* Return: true if the claim succeeded, guaranteeing one increment can be made without overflowing
|
||||
* the PBN's reference count.
|
||||
*/
|
||||
bool vdo_claim_pbn_lock_increment(struct pbn_lock *lock)
|
||||
{
|
||||
/*
|
||||
* Claim the next free reference atomically since hash locks from multiple hash zone
|
||||
* threads might be concurrently deduplicating against a single PBN lock on compressed
|
||||
* block. As long as hitting the increment limit will lead to the PBN lock being released
|
||||
* in a sane time-frame, we won't overflow a 32-bit claim counter, allowing a simple add
|
||||
* instead of a compare-and-swap.
|
||||
*/
|
||||
u32 claim_number = (u32) atomic_add_return(1, &lock->increments_claimed);
|
||||
|
||||
return (claim_number <= lock->increment_limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_assign_pbn_lock_provisional_reference() - Inform a PBN lock that it is responsible for a
|
||||
* provisional reference.
|
||||
* @lock: The PBN lock.
|
||||
*/
|
||||
void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock)
|
||||
{
|
||||
ASSERT_LOG_ONLY(!lock->has_provisional_reference,
|
||||
"lock does not have a provisional reference");
|
||||
lock->has_provisional_reference = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_unassign_pbn_lock_provisional_reference() - Inform a PBN lock that it is no longer
|
||||
* responsible for a provisional reference.
|
||||
* @lock: The PBN lock.
|
||||
*/
|
||||
void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock)
|
||||
{
|
||||
lock->has_provisional_reference = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* release_pbn_lock_provisional_reference() - If the lock is responsible for a provisional
|
||||
* reference, release that reference.
|
||||
* @lock: The lock.
|
||||
* @locked_pbn: The PBN covered by the lock.
|
||||
* @allocator: The block allocator from which to release the reference.
|
||||
*
|
||||
* This method is called when the lock is released.
|
||||
*/
|
||||
static void release_pbn_lock_provisional_reference(struct pbn_lock *lock,
|
||||
physical_block_number_t locked_pbn,
|
||||
struct block_allocator *allocator)
|
||||
{
|
||||
int result;
|
||||
|
||||
if (!vdo_pbn_lock_has_provisional_reference(lock))
|
||||
return;
|
||||
|
||||
result = vdo_release_block_reference(allocator, locked_pbn);
|
||||
if (result != VDO_SUCCESS) {
|
||||
uds_log_error_strerror(result,
|
||||
"Failed to release reference to %s physical block %llu",
|
||||
lock->implementation->release_reason,
|
||||
(unsigned long long) locked_pbn);
|
||||
}
|
||||
|
||||
vdo_unassign_pbn_lock_provisional_reference(lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* union idle_pbn_lock - PBN lock list entries.
|
||||
*
|
||||
* Unused (idle) PBN locks are kept in a list. Just like in a malloc implementation, the lock
|
||||
* structure is unused memory, so we can save a bit of space (and not pollute the lock structure
|
||||
* proper) by using a union to overlay the lock structure with the free list.
|
||||
*/
|
||||
typedef union {
|
||||
/** @entry: Only used while locks are in the pool. */
|
||||
struct list_head entry;
|
||||
/** @lock: Only used while locks are not in the pool. */
|
||||
struct pbn_lock lock;
|
||||
} idle_pbn_lock;
|
||||
|
||||
/**
|
||||
* struct pbn_lock_pool - list of PBN locks.
|
||||
*
|
||||
* The lock pool is little more than the memory allocated for the locks.
|
||||
*/
|
||||
struct pbn_lock_pool {
|
||||
/** @capacity: The number of locks allocated for the pool. */
|
||||
size_t capacity;
|
||||
/** @borrowed: The number of locks currently borrowed from the pool. */
|
||||
size_t borrowed;
|
||||
/** @idle_list: A list containing all idle PBN lock instances. */
|
||||
struct list_head idle_list;
|
||||
/** @locks: The memory for all the locks allocated by this pool. */
|
||||
idle_pbn_lock locks[];
|
||||
};
|
||||
|
||||
/**
|
||||
* return_pbn_lock_to_pool() - Return a pbn lock to its pool.
|
||||
* @pool: The pool from which the lock was borrowed.
|
||||
* @lock: The last reference to the lock being returned.
|
||||
*
|
||||
* It must be the last live reference, as if the memory were being freed (the lock memory will
|
||||
* re-initialized or zeroed).
|
||||
*/
|
||||
static void return_pbn_lock_to_pool(struct pbn_lock_pool *pool, struct pbn_lock *lock)
|
||||
{
|
||||
idle_pbn_lock *idle;
|
||||
|
||||
/* A bit expensive, but will promptly catch some use-after-free errors. */
|
||||
memset(lock, 0, sizeof(*lock));
|
||||
|
||||
idle = container_of(lock, idle_pbn_lock, lock);
|
||||
INIT_LIST_HEAD(&idle->entry);
|
||||
list_add_tail(&idle->entry, &pool->idle_list);
|
||||
|
||||
ASSERT_LOG_ONLY(pool->borrowed > 0, "shouldn't return more than borrowed");
|
||||
pool->borrowed -= 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* make_pbn_lock_pool() - Create a new PBN lock pool and all the lock instances it can loan out.
|
||||
*
|
||||
* @capacity: The number of PBN locks to allocate for the pool.
|
||||
* @pool_ptr: A pointer to receive the new pool.
|
||||
*
|
||||
* Return: VDO_SUCCESS or an error code.
|
||||
*/
|
||||
static int make_pbn_lock_pool(size_t capacity, struct pbn_lock_pool **pool_ptr)
|
||||
{
|
||||
size_t i;
|
||||
struct pbn_lock_pool *pool;
|
||||
int result;
|
||||
|
||||
result = uds_allocate_extended(struct pbn_lock_pool, capacity, idle_pbn_lock,
|
||||
__func__, &pool);
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
|
||||
pool->capacity = capacity;
|
||||
pool->borrowed = capacity;
|
||||
INIT_LIST_HEAD(&pool->idle_list);
|
||||
|
||||
for (i = 0; i < capacity; i++)
|
||||
return_pbn_lock_to_pool(pool, &pool->locks[i].lock);
|
||||
|
||||
*pool_ptr = pool;
|
||||
return VDO_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_free_pbn_lock_pool() - Free a PBN lock pool.
|
||||
* @pool: The lock pool to free.
|
||||
*
|
||||
* This also frees all the PBN locks it allocated, so the caller must ensure that all locks have
|
||||
* been returned to the pool.
|
||||
*/
|
||||
static void free_pbn_lock_pool(struct pbn_lock_pool *pool)
|
||||
{
|
||||
if (pool == NULL)
|
||||
return;
|
||||
|
||||
ASSERT_LOG_ONLY(pool->borrowed == 0,
|
||||
"All PBN locks must be returned to the pool before it is freed, but %zu locks are still on loan",
|
||||
pool->borrowed);
|
||||
uds_free(pool);
|
||||
}
|
||||
|
||||
/**
|
||||
* borrow_pbn_lock_from_pool() - Borrow a PBN lock from the pool and initialize it with the
|
||||
* provided type.
|
||||
* @pool: The pool from which to borrow.
|
||||
* @type: The type with which to initialize the lock.
|
||||
* @lock_ptr: A pointer to receive the borrowed lock.
|
||||
*
|
||||
* Pools do not grow on demand or allocate memory, so this will fail if the pool is empty. Borrowed
|
||||
* locks are still associated with this pool and must be returned to only this pool.
|
||||
*
|
||||
* Return: VDO_SUCCESS, or VDO_LOCK_ERROR if the pool is empty.
|
||||
*/
|
||||
static int __must_check borrow_pbn_lock_from_pool(struct pbn_lock_pool *pool,
|
||||
enum pbn_lock_type type,
|
||||
struct pbn_lock **lock_ptr)
|
||||
{
|
||||
int result;
|
||||
struct list_head *idle_entry;
|
||||
idle_pbn_lock *idle;
|
||||
|
||||
if (pool->borrowed >= pool->capacity)
|
||||
return uds_log_error_strerror(VDO_LOCK_ERROR,
|
||||
"no free PBN locks left to borrow");
|
||||
pool->borrowed += 1;
|
||||
|
||||
result = ASSERT(!list_empty(&pool->idle_list),
|
||||
"idle list should not be empty if pool not at capacity");
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
|
||||
idle_entry = pool->idle_list.prev;
|
||||
list_del(idle_entry);
|
||||
memset(idle_entry, 0, sizeof(*idle_entry));
|
||||
|
||||
idle = list_entry(idle_entry, idle_pbn_lock, entry);
|
||||
idle->lock.holder_count = 0;
|
||||
set_pbn_lock_type(&idle->lock, type);
|
||||
|
||||
*lock_ptr = &idle->lock;
|
||||
return VDO_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize_zone() - Initialize a physical zone.
|
||||
* @vdo: The vdo to which the zone will belong.
|
||||
* @zones: The physical_zones to which the zone being initialized belongs
|
||||
*
|
||||
* Return: VDO_SUCCESS or an error code.
|
||||
*/
|
||||
static int initialize_zone(struct vdo *vdo, struct physical_zones *zones)
|
||||
{
|
||||
int result;
|
||||
zone_count_t zone_number = zones->zone_count;
|
||||
struct physical_zone *zone = &zones->zones[zone_number];
|
||||
|
||||
result = vdo_make_int_map(VDO_LOCK_MAP_CAPACITY, 0, &zone->pbn_operations);
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = make_pbn_lock_pool(LOCK_POOL_CAPACITY, &zone->lock_pool);
|
||||
if (result != VDO_SUCCESS) {
|
||||
vdo_free_int_map(zone->pbn_operations);
|
||||
return result;
|
||||
}
|
||||
|
||||
zone->zone_number = zone_number;
|
||||
zone->thread_id = vdo->thread_config.physical_threads[zone_number];
|
||||
zone->allocator = &vdo->depot->allocators[zone_number];
|
||||
zone->next = &zones->zones[(zone_number + 1) % vdo->thread_config.physical_zone_count];
|
||||
result = vdo_make_default_thread(vdo, zone->thread_id);
|
||||
if (result != VDO_SUCCESS) {
|
||||
free_pbn_lock_pool(uds_forget(zone->lock_pool));
|
||||
vdo_free_int_map(zone->pbn_operations);
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_make_physical_zones() - Make the physical zones for a vdo.
|
||||
* @vdo: The vdo being constructed
|
||||
* @zones_ptr: A pointer to hold the zones
|
||||
*
|
||||
* Return: VDO_SUCCESS or an error code.
|
||||
*/
|
||||
int vdo_make_physical_zones(struct vdo *vdo, struct physical_zones **zones_ptr)
|
||||
{
|
||||
struct physical_zones *zones;
|
||||
int result;
|
||||
zone_count_t zone_count = vdo->thread_config.physical_zone_count;
|
||||
|
||||
if (zone_count == 0)
|
||||
return VDO_SUCCESS;
|
||||
|
||||
result = uds_allocate_extended(struct physical_zones, zone_count,
|
||||
struct physical_zone, __func__, &zones);
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
|
||||
for (zones->zone_count = 0; zones->zone_count < zone_count; zones->zone_count++) {
|
||||
result = initialize_zone(vdo, zones);
|
||||
if (result != VDO_SUCCESS) {
|
||||
vdo_free_physical_zones(zones);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
*zones_ptr = zones;
|
||||
return VDO_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_free_physical_zones() - Destroy the physical zones.
|
||||
* @zones: The zones to free.
|
||||
*/
|
||||
void vdo_free_physical_zones(struct physical_zones *zones)
|
||||
{
|
||||
zone_count_t index;
|
||||
|
||||
if (zones == NULL)
|
||||
return;
|
||||
|
||||
for (index = 0; index < zones->zone_count; index++) {
|
||||
struct physical_zone *zone = &zones->zones[index];
|
||||
|
||||
free_pbn_lock_pool(uds_forget(zone->lock_pool));
|
||||
vdo_free_int_map(uds_forget(zone->pbn_operations));
|
||||
}
|
||||
|
||||
uds_free(zones);
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_get_physical_zone_pbn_lock() - Get the lock on a PBN if one exists.
|
||||
* @zone: The physical zone responsible for the PBN.
|
||||
* @pbn: The physical block number whose lock is desired.
|
||||
*
|
||||
* Return: The lock or NULL if the PBN is not locked.
|
||||
*/
|
||||
struct pbn_lock *vdo_get_physical_zone_pbn_lock(struct physical_zone *zone,
|
||||
physical_block_number_t pbn)
|
||||
{
|
||||
return ((zone == NULL) ? NULL : vdo_int_map_get(zone->pbn_operations, pbn));
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_attempt_physical_zone_pbn_lock() - Attempt to lock a physical block in the zone responsible
|
||||
* for it.
|
||||
* @zone: The physical zone responsible for the PBN.
|
||||
* @pbn: The physical block number to lock.
|
||||
* @type: The type with which to initialize a new lock.
|
||||
* @lock_ptr: A pointer to receive the lock, existing or new.
|
||||
*
|
||||
* If the PBN is already locked, the existing lock will be returned. Otherwise, a new lock instance
|
||||
* will be borrowed from the pool, initialized, and returned. The lock owner will be NULL for a new
|
||||
* lock acquired by the caller, who is responsible for setting that field promptly. The lock owner
|
||||
* will be non-NULL when there is already an existing lock on the PBN.
|
||||
*
|
||||
* Return: VDO_SUCCESS or an error.
|
||||
*/
|
||||
int vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone,
|
||||
physical_block_number_t pbn,
|
||||
enum pbn_lock_type type,
|
||||
struct pbn_lock **lock_ptr)
|
||||
{
|
||||
/*
|
||||
* Borrow and prepare a lock from the pool so we don't have to do two int_map accesses in
|
||||
* the common case of no lock contention.
|
||||
*/
|
||||
struct pbn_lock *lock, *new_lock = NULL;
|
||||
int result;
|
||||
|
||||
result = borrow_pbn_lock_from_pool(zone->lock_pool, type, &new_lock);
|
||||
if (result != VDO_SUCCESS) {
|
||||
ASSERT_LOG_ONLY(false, "must always be able to borrow a PBN lock");
|
||||
return result;
|
||||
}
|
||||
|
||||
result = vdo_int_map_put(zone->pbn_operations, pbn, new_lock, false,
|
||||
(void **) &lock);
|
||||
if (result != VDO_SUCCESS) {
|
||||
return_pbn_lock_to_pool(zone->lock_pool, new_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
if (lock != NULL) {
|
||||
/* The lock is already held, so we don't need the borrowed one. */
|
||||
return_pbn_lock_to_pool(zone->lock_pool, uds_forget(new_lock));
|
||||
result = ASSERT(lock->holder_count > 0, "physical block %llu lock held",
|
||||
(unsigned long long) pbn);
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
*lock_ptr = lock;
|
||||
} else {
|
||||
*lock_ptr = new_lock;
|
||||
}
|
||||
return VDO_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate_and_lock_block() - Attempt to allocate a block from this zone.
|
||||
* @allocation: The struct allocation of the data_vio attempting to allocate.
|
||||
*
|
||||
* If a block is allocated, the recipient will also hold a lock on it.
|
||||
*
|
||||
* Return: VDO_SUCCESS if a block was allocated, or an error code.
|
||||
*/
|
||||
static int allocate_and_lock_block(struct allocation *allocation)
|
||||
{
|
||||
int result;
|
||||
struct pbn_lock *lock;
|
||||
|
||||
ASSERT_LOG_ONLY(allocation->lock == NULL,
|
||||
"must not allocate a block while already holding a lock on one");
|
||||
|
||||
result = vdo_allocate_block(allocation->zone->allocator, &allocation->pbn);
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = vdo_attempt_physical_zone_pbn_lock(allocation->zone, allocation->pbn,
|
||||
allocation->write_lock_type, &lock);
|
||||
if (result != VDO_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (lock->holder_count > 0) {
|
||||
/* This block is already locked, which should be impossible. */
|
||||
return uds_log_error_strerror(VDO_LOCK_ERROR,
|
||||
"Newly allocated block %llu was spuriously locked (holder_count=%u)",
|
||||
(unsigned long long) allocation->pbn,
|
||||
lock->holder_count);
|
||||
}
|
||||
|
||||
/* We've successfully acquired a new lock, so mark it as ours. */
|
||||
lock->holder_count += 1;
|
||||
allocation->lock = lock;
|
||||
vdo_assign_pbn_lock_provisional_reference(lock);
|
||||
return VDO_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* retry_allocation() - Retry allocating a block now that we're done waiting for scrubbing.
|
||||
* @waiter: The allocating_vio that was waiting to allocate.
|
||||
* @context: The context (unused).
|
||||
*/
|
||||
static void retry_allocation(struct waiter *waiter, void *context __always_unused)
|
||||
{
|
||||
struct data_vio *data_vio = waiter_as_data_vio(waiter);
|
||||
|
||||
/* Now that some slab has scrubbed, restart the allocation process. */
|
||||
data_vio->allocation.wait_for_clean_slab = false;
|
||||
data_vio->allocation.first_allocation_zone = data_vio->allocation.zone->zone_number;
|
||||
continue_data_vio(data_vio);
|
||||
}
|
||||
|
||||
/**
|
||||
* continue_allocating() - Continue searching for an allocation by enqueuing to wait for scrubbing
|
||||
* or switching to the next zone.
|
||||
* @data_vio: The data_vio attempting to get an allocation.
|
||||
*
|
||||
* This method should only be called from the error handler set in data_vio_allocate_data_block.
|
||||
*
|
||||
* Return: true if the allocation process has continued in another zone.
|
||||
*/
|
||||
static bool continue_allocating(struct data_vio *data_vio)
|
||||
{
|
||||
struct allocation *allocation = &data_vio->allocation;
|
||||
struct physical_zone *zone = allocation->zone;
|
||||
struct vdo_completion *completion = &data_vio->vio.completion;
|
||||
int result = VDO_SUCCESS;
|
||||
bool was_waiting = allocation->wait_for_clean_slab;
|
||||
bool tried_all = (allocation->first_allocation_zone == zone->next->zone_number);
|
||||
|
||||
vdo_reset_completion(completion);
|
||||
|
||||
if (tried_all && !was_waiting) {
|
||||
/*
|
||||
* We've already looked in all the zones, and found nothing. So go through the
|
||||
* zones again, and wait for each to scrub before trying to allocate.
|
||||
*/
|
||||
allocation->wait_for_clean_slab = true;
|
||||
allocation->first_allocation_zone = zone->zone_number;
|
||||
}
|
||||
|
||||
if (allocation->wait_for_clean_slab) {
|
||||
data_vio->waiter.callback = retry_allocation;
|
||||
result = vdo_enqueue_clean_slab_waiter(zone->allocator,
|
||||
&data_vio->waiter);
|
||||
if (result == VDO_SUCCESS) {
|
||||
/* We've enqueued to wait for a slab to be scrubbed. */
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((result != VDO_NO_SPACE) || (was_waiting && tried_all)) {
|
||||
vdo_set_completion_result(completion, result);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
allocation->zone = zone->next;
|
||||
completion->callback_thread_id = allocation->zone->thread_id;
|
||||
vdo_launch_completion(completion);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_allocate_block_in_zone() - Attempt to allocate a block in the current physical zone, and if
|
||||
* that fails try the next if possible.
|
||||
* @data_vio: The data_vio needing an allocation.
|
||||
*
|
||||
* Return: true if a block was allocated, if not the data_vio will have been dispatched so the
|
||||
* caller must not touch it.
|
||||
*/
|
||||
bool vdo_allocate_block_in_zone(struct data_vio *data_vio)
|
||||
{
|
||||
int result = allocate_and_lock_block(&data_vio->allocation);
|
||||
|
||||
if (result == VDO_SUCCESS)
|
||||
return true;
|
||||
|
||||
if ((result != VDO_NO_SPACE) || !continue_allocating(data_vio))
|
||||
continue_data_vio_with_error(data_vio, result);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_release_physical_zone_pbn_lock() - Release a physical block lock if it is held and return it
|
||||
* to the lock pool.
|
||||
* @zone: The physical zone in which the lock was obtained.
|
||||
* @locked_pbn: The physical block number to unlock.
|
||||
* @lock: The lock being released.
|
||||
*
|
||||
* It must be the last live reference, as if the memory were being freed (the
|
||||
* lock memory will re-initialized or zeroed).
|
||||
*/
|
||||
void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone,
|
||||
physical_block_number_t locked_pbn,
|
||||
struct pbn_lock *lock)
|
||||
{
|
||||
struct pbn_lock *holder;
|
||||
|
||||
if (lock == NULL)
|
||||
return;
|
||||
|
||||
ASSERT_LOG_ONLY(lock->holder_count > 0,
|
||||
"should not be releasing a lock that is not held");
|
||||
|
||||
lock->holder_count -= 1;
|
||||
if (lock->holder_count > 0) {
|
||||
/* The lock was shared and is still referenced, so don't release it yet. */
|
||||
return;
|
||||
}
|
||||
|
||||
holder = vdo_int_map_remove(zone->pbn_operations, locked_pbn);
|
||||
ASSERT_LOG_ONLY((lock == holder), "physical block lock mismatch for block %llu",
|
||||
(unsigned long long) locked_pbn);
|
||||
|
||||
release_pbn_lock_provisional_reference(lock, locked_pbn, zone->allocator);
|
||||
return_pbn_lock_to_pool(zone->lock_pool, lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* vdo_dump_physical_zone() - Dump information about a physical zone to the log for debugging.
|
||||
* @zone: The zone to dump.
|
||||
*/
|
||||
void vdo_dump_physical_zone(const struct physical_zone *zone)
|
||||
{
|
||||
vdo_dump_block_allocator(zone->allocator);
|
||||
}
|
115
drivers/md/dm-vdo/physical-zone.h
Normal file
115
drivers/md/dm-vdo/physical-zone.h
Normal file
@ -0,0 +1,115 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright 2023 Red Hat
|
||||
*/
|
||||
|
||||
#ifndef VDO_PHYSICAL_ZONE_H
|
||||
#define VDO_PHYSICAL_ZONE_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
/*
|
||||
* The type of a PBN lock.
|
||||
*/
|
||||
enum pbn_lock_type {
|
||||
VIO_READ_LOCK,
|
||||
VIO_WRITE_LOCK,
|
||||
VIO_BLOCK_MAP_WRITE_LOCK,
|
||||
};
|
||||
|
||||
struct pbn_lock_implementation;
|
||||
|
||||
/*
|
||||
* A PBN lock.
|
||||
*/
|
||||
struct pbn_lock {
|
||||
/* The implementation of the lock */
|
||||
const struct pbn_lock_implementation *implementation;
|
||||
|
||||
/* The number of VIOs holding or sharing this lock */
|
||||
data_vio_count_t holder_count;
|
||||
/*
|
||||
* The number of compressed block writers holding a share of this lock while they are
|
||||
* acquiring a reference to the PBN.
|
||||
*/
|
||||
u8 fragment_locks;
|
||||
|
||||
/* Whether the locked PBN has been provisionally referenced on behalf of the lock holder. */
|
||||
bool has_provisional_reference;
|
||||
|
||||
/*
|
||||
* For read locks, the number of references that were known to be available on the locked
|
||||
* block at the time the lock was acquired.
|
||||
*/
|
||||
u8 increment_limit;
|
||||
|
||||
/*
|
||||
* For read locks, the number of data_vios that have tried to claim one of the available
|
||||
* increments during the lifetime of the lock. Each claim will first increment this
|
||||
* counter, so it can exceed the increment limit.
|
||||
*/
|
||||
atomic_t increments_claimed;
|
||||
};
|
||||
|
||||
struct physical_zone {
|
||||
/* Which physical zone this is */
|
||||
zone_count_t zone_number;
|
||||
/* The thread ID for this zone */
|
||||
thread_id_t thread_id;
|
||||
/* In progress operations keyed by PBN */
|
||||
struct int_map *pbn_operations;
|
||||
/* Pool of unused pbn_lock instances */
|
||||
struct pbn_lock_pool *lock_pool;
|
||||
/* The block allocator for this zone */
|
||||
struct block_allocator *allocator;
|
||||
/* The next zone from which to attempt an allocation */
|
||||
struct physical_zone *next;
|
||||
};
|
||||
|
||||
struct physical_zones {
|
||||
/* The number of zones */
|
||||
zone_count_t zone_count;
|
||||
/* The physical zones themselves */
|
||||
struct physical_zone zones[];
|
||||
};
|
||||
|
||||
bool __must_check vdo_is_pbn_read_lock(const struct pbn_lock *lock);
|
||||
void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write);
|
||||
bool __must_check vdo_claim_pbn_lock_increment(struct pbn_lock *lock);
|
||||
|
||||
/**
|
||||
* vdo_pbn_lock_has_provisional_reference() - Check whether a PBN lock has a provisional reference.
|
||||
* @lock: The PBN lock.
|
||||
*/
|
||||
static inline bool vdo_pbn_lock_has_provisional_reference(struct pbn_lock *lock)
|
||||
{
|
||||
return ((lock != NULL) && lock->has_provisional_reference);
|
||||
}
|
||||
|
||||
void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock);
|
||||
void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock);
|
||||
|
||||
int __must_check vdo_make_physical_zones(struct vdo *vdo,
|
||||
struct physical_zones **zones_ptr);
|
||||
|
||||
void vdo_free_physical_zones(struct physical_zones *zones);
|
||||
|
||||
struct pbn_lock * __must_check vdo_get_physical_zone_pbn_lock(struct physical_zone *zone,
|
||||
physical_block_number_t pbn);
|
||||
|
||||
int __must_check vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone,
|
||||
physical_block_number_t pbn,
|
||||
enum pbn_lock_type type,
|
||||
struct pbn_lock **lock_ptr);
|
||||
|
||||
bool __must_check vdo_allocate_block_in_zone(struct data_vio *data_vio);
|
||||
|
||||
void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone,
|
||||
physical_block_number_t locked_pbn,
|
||||
struct pbn_lock *lock);
|
||||
|
||||
void vdo_dump_physical_zone(const struct physical_zone *zone);
|
||||
|
||||
#endif /* VDO_PHYSICAL_ZONE_H */
|
File diff suppressed because it is too large
Load Diff
@ -257,6 +257,54 @@ struct vdo_slab {
|
||||
struct reference_block *reference_blocks;
|
||||
};
|
||||
|
||||
enum block_allocator_drain_step {
|
||||
VDO_DRAIN_ALLOCATOR_START,
|
||||
VDO_DRAIN_ALLOCATOR_STEP_SCRUBBER,
|
||||
VDO_DRAIN_ALLOCATOR_STEP_SLABS,
|
||||
VDO_DRAIN_ALLOCATOR_STEP_SUMMARY,
|
||||
VDO_DRAIN_ALLOCATOR_STEP_FINISHED,
|
||||
};
|
||||
|
||||
struct slab_scrubber {
|
||||
/* The queue of slabs to scrub first */
|
||||
struct list_head high_priority_slabs;
|
||||
/* The queue of slabs to scrub once there are no high_priority_slabs */
|
||||
struct list_head slabs;
|
||||
/* The queue of VIOs waiting for a slab to be scrubbed */
|
||||
struct wait_queue waiters;
|
||||
|
||||
/*
|
||||
* The number of slabs that are unrecovered or being scrubbed. This field is modified by
|
||||
* the physical zone thread, but is queried by other threads.
|
||||
*/
|
||||
slab_count_t slab_count;
|
||||
|
||||
/* The administrative state of the scrubber */
|
||||
struct admin_state admin_state;
|
||||
/* Whether to only scrub high-priority slabs */
|
||||
bool high_priority_only;
|
||||
/* The slab currently being scrubbed */
|
||||
struct vdo_slab *slab;
|
||||
/* The vio for loading slab journal blocks */
|
||||
struct vio vio;
|
||||
};
|
||||
|
||||
/* A sub-structure for applying actions in parallel to all an allocator's slabs. */
|
||||
struct slab_actor {
|
||||
/* The number of slabs performing a slab action */
|
||||
slab_count_t slab_action_count;
|
||||
/* The method to call when a slab action has been completed by all slabs */
|
||||
vdo_action_fn callback;
|
||||
};
|
||||
|
||||
/* A slab_iterator is a structure for iterating over a set of slabs. */
|
||||
struct slab_iterator {
|
||||
struct vdo_slab **slabs;
|
||||
struct vdo_slab *next;
|
||||
slab_count_t end;
|
||||
slab_count_t stride;
|
||||
};
|
||||
|
||||
/*
|
||||
* The slab_summary provides hints during load and recovery about the state of the slabs in order
|
||||
* to avoid the need to read the slab journals in their entirety before a VDO can come online.
|
||||
@ -314,6 +362,81 @@ struct atomic_slab_summary_statistics {
|
||||
atomic64_t blocks_written;
|
||||
};
|
||||
|
||||
struct block_allocator {
|
||||
struct vdo_completion completion;
|
||||
/* The slab depot for this allocator */
|
||||
struct slab_depot *depot;
|
||||
/* The nonce of the VDO */
|
||||
nonce_t nonce;
|
||||
/* The physical zone number of this allocator */
|
||||
zone_count_t zone_number;
|
||||
/* The thread ID for this allocator's physical zone */
|
||||
thread_id_t thread_id;
|
||||
/* The number of slabs in this allocator */
|
||||
slab_count_t slab_count;
|
||||
/* The number of the last slab owned by this allocator */
|
||||
slab_count_t last_slab;
|
||||
/* The reduced priority level used to preserve unopened slabs */
|
||||
unsigned int unopened_slab_priority;
|
||||
/* The state of this allocator */
|
||||
struct admin_state state;
|
||||
/* The actor for applying an action to all slabs */
|
||||
struct slab_actor slab_actor;
|
||||
|
||||
/* The slab from which blocks are currently being allocated */
|
||||
struct vdo_slab *open_slab;
|
||||
/* A priority queue containing all slabs available for allocation */
|
||||
struct priority_table *prioritized_slabs;
|
||||
/* The slab scrubber */
|
||||
struct slab_scrubber scrubber;
|
||||
/* What phase of the close operation the allocator is to perform */
|
||||
enum block_allocator_drain_step drain_step;
|
||||
|
||||
/*
|
||||
* These statistics are all mutated only by the physical zone thread, but are read by other
|
||||
* threads when gathering statistics for the entire depot.
|
||||
*/
|
||||
/*
|
||||
* The count of allocated blocks in this zone. Not in block_allocator_statistics for
|
||||
* historical reasons.
|
||||
*/
|
||||
u64 allocated_blocks;
|
||||
/* Statistics for this block allocator */
|
||||
struct block_allocator_statistics statistics;
|
||||
/* Cumulative statistics for the slab journals in this zone */
|
||||
struct slab_journal_statistics slab_journal_statistics;
|
||||
/* Cumulative statistics for the reference counters in this zone */
|
||||
struct ref_counts_statistics ref_counts_statistics;
|
||||
|
||||
/*
|
||||
* This is the head of a queue of slab journals which have entries in their tail blocks
|
||||
* which have not yet started to commit. When the recovery journal is under space pressure,
|
||||
* slab journals which have uncommitted entries holding a lock on the recovery journal head
|
||||
* are forced to commit their blocks early. This list is kept in order, with the tail
|
||||
* containing the slab journal holding the most recent recovery journal lock.
|
||||
*/
|
||||
struct list_head dirty_slab_journals;
|
||||
|
||||
/* The vio pool for reading and writing block allocator metadata */
|
||||
struct vio_pool *vio_pool;
|
||||
/* The dm_kcopyd client for erasing slab journals */
|
||||
struct dm_kcopyd_client *eraser;
|
||||
/* Iterator over the slabs to be erased */
|
||||
struct slab_iterator slabs_to_erase;
|
||||
|
||||
/* The portion of the slab summary managed by this allocator */
|
||||
/* The state of the slab summary */
|
||||
struct admin_state summary_state;
|
||||
/* The number of outstanding summary writes */
|
||||
block_count_t summary_write_count;
|
||||
/* The array (owned by the blocks) of all entries */
|
||||
struct slab_summary_entry *summary_entries;
|
||||
/* The array of slab_summary_blocks */
|
||||
struct slab_summary_block *summary_blocks;
|
||||
};
|
||||
|
||||
struct reference_updater;
|
||||
|
||||
bool __must_check vdo_attempt_replay_into_slab(struct vdo_slab *slab,
|
||||
physical_block_number_t pbn,
|
||||
enum journal_operation operation,
|
||||
@ -321,6 +444,30 @@ bool __must_check vdo_attempt_replay_into_slab(struct vdo_slab *slab,
|
||||
struct journal_point *recovery_point,
|
||||
struct vdo_completion *parent);
|
||||
|
||||
static inline struct block_allocator *vdo_as_block_allocator(struct vdo_completion *completion)
|
||||
{
|
||||
vdo_assert_completion_type(completion, VDO_BLOCK_ALLOCATOR_COMPLETION);
|
||||
return container_of(completion, struct block_allocator, completion);
|
||||
}
|
||||
|
||||
int __must_check vdo_acquire_provisional_reference(struct vdo_slab *slab,
|
||||
physical_block_number_t pbn,
|
||||
struct pbn_lock *lock);
|
||||
|
||||
int __must_check vdo_allocate_block(struct block_allocator *allocator,
|
||||
physical_block_number_t *block_number_ptr);
|
||||
|
||||
int vdo_enqueue_clean_slab_waiter(struct block_allocator *allocator,
|
||||
struct waiter *waiter);
|
||||
|
||||
void vdo_modify_reference_count(struct vdo_completion *completion,
|
||||
struct reference_updater *updater);
|
||||
|
||||
int __must_check vdo_release_block_reference(struct block_allocator *allocator,
|
||||
physical_block_number_t pbn);
|
||||
|
||||
void vdo_notify_slab_journals_are_recovered(struct vdo_completion *completion);
|
||||
|
||||
void vdo_dump_block_allocator(const struct block_allocator *allocator);
|
||||
|
||||
#endif /* VDO_SLAB_DEPOT_H */
|
||||
|
Loading…
Reference in New Issue
Block a user