linux/fs/afs/vl_rotate.c

400 lines
11 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/* Handle vlserver selection and rotation.
*
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include "internal.h"
#include "afs_vl.h"
/*
* Begin an operation on a volume location server.
*/
bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
struct key *key)
{
static atomic_t debug_ids;
memset(vc, 0, sizeof(*vc));
vc->cell = cell;
vc->key = key;
vc->cumul_error.error = -EDESTADDRREQ;
vc->nr_iterations = -1;
if (signal_pending(current)) {
vc->cumul_error.error = -EINTR;
vc->flags |= AFS_VL_CURSOR_STOP;
return false;
}
vc->debug_id = atomic_inc_return(&debug_ids);
return true;
}
/*
* Begin iteration through a server list, starting with the last used server if
* possible, or the last recorded good server if not.
*/
static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
{
struct afs_cell *cell = vc->cell;
unsigned int dns_lookup_count;
if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
cell->dns_expiry <= ktime_get_real_seconds()) {
dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
if (wait_var_event_interruptible(
&cell->dns_lookup_count,
smp_load_acquire(&cell->dns_lookup_count)
!= dns_lookup_count) < 0) {
vc->cumul_error.error = -ERESTARTSYS;
return false;
}
}
/* Status load is ordered after lookup counter load */
if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
pr_warn("No record of cell %s\n", cell->name);
vc->cumul_error.error = -ENOENT;
return false;
}
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
vc->cumul_error.error = -EDESTADDRREQ;
return false;
}
}
read_lock(&cell->vl_servers_lock);
vc->server_list = afs_get_vlserverlist(
rcu_dereference_protected(cell->vl_servers,
lockdep_is_held(&cell->vl_servers_lock)));
read_unlock(&cell->vl_servers_lock);
if (!vc->server_list->nr_servers)
return false;
vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
vc->server_index = -1;
return true;
}
/*
* Select the vlserver to use. May be called multiple times to rotate
* through the vlservers.
*/
bool afs_select_vlserver(struct afs_vl_cursor *vc)
{
struct afs_addr_list *alist = vc->alist;
struct afs_vlserver *vlserver;
unsigned long set, failed;
rxrpc, afs: Allow afs to pin rxrpc_peer objects Change rxrpc's API such that: (1) A new function, rxrpc_kernel_lookup_peer(), is provided to look up an rxrpc_peer record for a remote address and a corresponding function, rxrpc_kernel_put_peer(), is provided to dispose of it again. (2) When setting up a call, the rxrpc_peer object used during a call is now passed in rather than being set up by rxrpc_connect_call(). For afs, this meenat passing it to rxrpc_kernel_begin_call() rather than the full address (the service ID then has to be passed in as a separate parameter). (3) A new function, rxrpc_kernel_remote_addr(), is added so that afs can get a pointer to the transport address for display purposed, and another, rxrpc_kernel_remote_srx(), to gain a pointer to the full rxrpc address. (4) The function to retrieve the RTT from a call, rxrpc_kernel_get_srtt(), is then altered to take a peer. This now returns the RTT or -1 if there are insufficient samples. (5) Rename rxrpc_kernel_get_peer() to rxrpc_kernel_call_get_peer(). (6) Provide a new function, rxrpc_kernel_get_peer(), to get a ref on a peer the caller already has. This allows the afs filesystem to pin the rxrpc_peer records that it is using, allowing faster lookups and pointer comparisons rather than comparing sockaddr_rxrpc contents. It also makes it easier to get hold of the RTT. The following changes are made to afs: (1) The addr_list struct's addrs[] elements now hold a peer struct pointer and a service ID rather than a sockaddr_rxrpc. (2) When displaying the transport address, rxrpc_kernel_remote_addr() is used. (3) The port arg is removed from afs_alloc_addrlist() since it's always overridden. (4) afs_merge_fs_addr4() and afs_merge_fs_addr6() do peer lookup and may now return an error that must be handled. (5) afs_find_server() now takes a peer pointer to specify the address. (6) afs_find_server(), afs_compare_fs_alists() and afs_merge_fs_addr[46]{} now do peer pointer comparison rather than address comparison. Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org
2023-10-19 12:55:11 +01:00
unsigned int rtt;
s32 abort_code = vc->call_abort_code;
int error = vc->call_error, i;
vc->nr_iterations++;
_enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
vc->addr_index, vc->addr_tried,
error, abort_code);
if (vc->flags & AFS_VL_CURSOR_STOP) {
_leave(" = f [stopped]");
return false;
}
if (vc->nr_iterations == 0)
goto start;
WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
/* Evaluate the result of the previous operation, if there was one. */
switch (error) {
default:
case 0:
/* Success or local failure. Stop. */
vc->cumul_error.error = error;
vc->flags |= AFS_VL_CURSOR_STOP;
_leave(" = f [okay/local %d]", vc->cumul_error.error);
return false;
case -ECONNABORTED:
/* The far side rejected the operation on some grounds. This
* might involve the server being busy or the volume having been moved.
*/
switch (abort_code) {
case AFSVL_IO:
case AFSVL_BADVOLOPER:
case AFSVL_NOMEM:
/* The server went weird. */
afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
//write_lock(&vc->cell->vl_servers_lock);
//vc->server_list->weird_mask |= 1 << vc->server_index;
//write_unlock(&vc->cell->vl_servers_lock);
goto next_server;
default:
afs_prioritise_error(&vc->cumul_error, error, abort_code);
goto failed;
}
case -ERFKILL:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EHOSTDOWN:
case -ECONNREFUSED:
case -ETIMEDOUT:
case -ETIME:
_debug("no conn %d", error);
afs_prioritise_error(&vc->cumul_error, error, 0);
goto iterate_address;
case -ECONNRESET:
_debug("call reset");
afs_prioritise_error(&vc->cumul_error, error, 0);
vc->flags |= AFS_VL_CURSOR_RETRY;
goto next_server;
case -EOPNOTSUPP:
_debug("notsupp");
goto next_server;
}
restart_from_beginning:
_debug("restart");
if (vc->call_responded &&
vc->addr_index != vc->alist->preferred &&
test_bit(alist->preferred, &vc->addr_tried))
WRITE_ONCE(alist->preferred, vc->addr_index);
afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
alist = vc->alist = NULL;
afs_put_vlserverlist(vc->cell->net, vc->server_list);
vc->server_list = NULL;
if (vc->flags & AFS_VL_CURSOR_RETRIED)
goto failed;
vc->flags |= AFS_VL_CURSOR_RETRIED;
start:
_debug("start");
ASSERTCMP(alist, ==, NULL);
if (!afs_start_vl_iteration(vc))
goto failed;
error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
if (error < 0) {
afs_prioritise_error(&vc->cumul_error, error, 0);
goto failed;
}
pick_server:
_debug("pick [%lx]", vc->untried_servers);
ASSERTCMP(alist, ==, NULL);
error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
if (error < 0) {
afs_prioritise_error(&vc->cumul_error, error, 0);
goto failed;
}
/* Pick the untried server with the lowest RTT. */
vc->server_index = vc->server_list->preferred;
if (test_bit(vc->server_index, &vc->untried_servers))
goto selected_server;
vc->server_index = -1;
rxrpc, afs: Allow afs to pin rxrpc_peer objects Change rxrpc's API such that: (1) A new function, rxrpc_kernel_lookup_peer(), is provided to look up an rxrpc_peer record for a remote address and a corresponding function, rxrpc_kernel_put_peer(), is provided to dispose of it again. (2) When setting up a call, the rxrpc_peer object used during a call is now passed in rather than being set up by rxrpc_connect_call(). For afs, this meenat passing it to rxrpc_kernel_begin_call() rather than the full address (the service ID then has to be passed in as a separate parameter). (3) A new function, rxrpc_kernel_remote_addr(), is added so that afs can get a pointer to the transport address for display purposed, and another, rxrpc_kernel_remote_srx(), to gain a pointer to the full rxrpc address. (4) The function to retrieve the RTT from a call, rxrpc_kernel_get_srtt(), is then altered to take a peer. This now returns the RTT or -1 if there are insufficient samples. (5) Rename rxrpc_kernel_get_peer() to rxrpc_kernel_call_get_peer(). (6) Provide a new function, rxrpc_kernel_get_peer(), to get a ref on a peer the caller already has. This allows the afs filesystem to pin the rxrpc_peer records that it is using, allowing faster lookups and pointer comparisons rather than comparing sockaddr_rxrpc contents. It also makes it easier to get hold of the RTT. The following changes are made to afs: (1) The addr_list struct's addrs[] elements now hold a peer struct pointer and a service ID rather than a sockaddr_rxrpc. (2) When displaying the transport address, rxrpc_kernel_remote_addr() is used. (3) The port arg is removed from afs_alloc_addrlist() since it's always overridden. (4) afs_merge_fs_addr4() and afs_merge_fs_addr6() do peer lookup and may now return an error that must be handled. (5) afs_find_server() now takes a peer pointer to specify the address. (6) afs_find_server(), afs_compare_fs_alists() and afs_merge_fs_addr[46]{} now do peer pointer comparison rather than address comparison. Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org
2023-10-19 12:55:11 +01:00
rtt = UINT_MAX;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
if (!test_bit(i, &vc->untried_servers) ||
!test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
continue;
if (s->probe.rtt <= rtt) {
vc->server_index = i;
rtt = s->probe.rtt;
}
}
if (vc->server_index == -1)
goto no_more_servers;
selected_server:
_debug("use %d", vc->server_index);
__clear_bit(vc->server_index, &vc->untried_servers);
/* We're starting on a different vlserver from the list. We need to
* check it, find its address list and probe its capabilities before we
* use it.
*/
vlserver = vc->server_list->servers[vc->server_index].server;
vc->server = vlserver;
_debug("USING VLSERVER: %s", vlserver->name);
read_lock(&vlserver->lock);
alist = rcu_dereference_protected(vlserver->addresses,
lockdep_is_held(&vlserver->lock));
vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
read_unlock(&vlserver->lock);
vc->addr_tried = 0;
vc->addr_index = -1;
iterate_address:
/* Iterate over the current server's address list to try and find an
* address on which it will respond to us.
*/
set = READ_ONCE(alist->responded);
failed = READ_ONCE(alist->probe_failed);
vc->addr_index = READ_ONCE(alist->preferred);
_debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
set &= ~(failed | vc->addr_tried);
if (!set)
goto next_server;
if (!test_bit(vc->addr_index, &set))
vc->addr_index = __ffs(set);
set_bit(vc->addr_index, &vc->addr_tried);
vc->alist = alist;
_debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
vc->call_responded = false;
_leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
return true;
next_server:
_debug("next");
ASSERT(alist);
if (vc->call_responded &&
vc->addr_index != alist->preferred &&
test_bit(alist->preferred, &vc->addr_tried))
WRITE_ONCE(alist->preferred, vc->addr_index);
afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
alist = vc->alist = NULL;
goto pick_server;
no_more_servers:
/* That's all the servers poked to no good effect. Try again if some
* of them were busy.
*/
if (vc->flags & AFS_VL_CURSOR_RETRY)
goto restart_from_beginning;
for (i = 0; i < vc->server_list->nr_servers; i++) {
struct afs_vlserver *s = vc->server_list->servers[i].server;
if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
vc->cumul_error.responded = true;
afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
s->probe.abort_code);
}
failed:
if (alist) {
if (vc->call_responded &&
vc->addr_index != alist->preferred &&
test_bit(alist->preferred, &vc->addr_tried))
WRITE_ONCE(alist->preferred, vc->addr_index);
afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
alist = vc->alist = NULL;
}
vc->flags |= AFS_VL_CURSOR_STOP;
_leave(" = f [failed %d]", vc->cumul_error.error);
return false;
}
/*
* Dump cursor state in the case of the error being EDESTADDRREQ.
*/
static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
{
struct afs_cell *cell = vc->cell;
static int count;
int i;
if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
return;
count++;
rcu_read_lock();
pr_notice("EDESTADDR occurred\n");
pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
pr_notice("DNS: src=%u st=%u lc=%x\n",
cell->dns_source, cell->dns_status, cell->dns_lookup_count);
pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
vc->untried_servers, vc->server_index, vc->nr_iterations,
vc->flags, vc->cumul_error.error);
pr_notice("VC: call er=%d ac=%d r=%u\n",
vc->call_error, vc->call_abort_code, vc->call_responded);
if (vc->server_list) {
const struct afs_vlserver_list *sl = vc->server_list;
pr_notice("VC: SL nr=%u ix=%u\n",
sl->nr_servers, sl->index);
for (i = 0; i < sl->nr_servers; i++) {
const struct afs_vlserver *s = sl->servers[i].server;
pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
s->name, s->port, s->flags, s->probe.error);
if (s->addresses) {
const struct afs_addr_list *a =
rcu_dereference(s->addresses);
pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
a->nr_ipv4, a->nr_addrs, a->max_addrs,
a->preferred);
pr_notice("VC: - R=%lx F=%lx\n",
a->responded, a->probe_failed);
if (a == vc->alist)
pr_notice("VC: - current\n");
}
}
}
pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
rcu_read_unlock();
}
/*
* Tidy up a volume location server cursor and unlock the vnode.
*/
int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
{
struct afs_net *net = vc->cell->net;
_enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
switch (vc->cumul_error.error) {
case -EDESTADDRREQ:
case -EADDRNOTAVAIL:
case -ENETUNREACH:
case -EHOSTUNREACH:
afs_vl_dump_edestaddrreq(vc);
break;
}
if (vc->alist) {
if (vc->call_responded &&
vc->addr_index != vc->alist->preferred &&
test_bit(vc->alist->preferred, &vc->addr_tried))
WRITE_ONCE(vc->alist->preferred, vc->addr_index);
afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
vc->alist = NULL;
}
afs_put_vlserverlist(net, vc->server_list);
return vc->cumul_error.error;
}