Merge branch 'mptcp-add-mixed-v4-v6-support-for-the-in-kernel-pm'

Matthieu Baerts says:

====================
mptcp: add mixed v4/v6 support for the in-kernel PM

Before these patches, the in-kernel Path-Manager would not allow, for
the same MPTCP connection, having a mix of subflows in v4 and v6.

MPTCP's RFC 8684 doesn't forbid that and it is even recommended to do so
as the path in v4 and v6 are likely different. Some networks are also
v4 or v6 only, we cannot assume they all have both v4 and v6 support.

Patch 1 then removes this artificial constraint in the in-kernel PM
currently enforcing there are no mixed subflows in place, either in
address announcement or in subflow creation areas.

Patch 2 makes sure the sk_ipv6only attribute is also propagated to
subflows, just in case a new PM wouldn't respect it.

Some selftests have also been added for the in-kernel PM (patch 3).

Patches 4 to 8 are just some cleanups and small improvements in the
printed messages in the userspace PM. It is not linked to the rest but
identified when working on a related patch modifying this selftest,
already in -net:

  commit 4656d72c1efa ("selftests: mptcp: userspace: validate v4-v6 subflows mix")
---
====================

Link: https://lore.kernel.org/r/20230123-upstream-net-next-pm-v4-v6-v1-0-43fac502bfbf@tessares.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2023-01-26 13:33:32 +01:00
commit 97f7d3dd76
5 changed files with 171 additions and 99 deletions

View File

@ -152,7 +152,6 @@ static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk)
{
const struct sock *sk = (const struct sock *)msk;
struct mptcp_pm_addr_entry *entry, *ret = NULL;
msk_owned_by_me(msk);
@ -165,16 +164,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if ((entry->addr.family == AF_INET &&
!ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
(sk->sk_family == AF_INET &&
!ipv6_addr_v4mapped(&entry->addr.addr6)))
#endif
continue;
}
ret = entry;
break;
}
@ -423,7 +412,9 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *local,
bool fullmesh,
struct mptcp_addr_info *addrs)
{
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
@ -443,6 +434,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0)
return 0;
if (!mptcp_pm_addr_families_match(sk, local, &remote))
return 0;
msk->pm.subflows++;
addrs[i++] = remote;
} else {
@ -453,6 +447,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0 && !addrs[i].id)
continue;
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@ -603,9 +600,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
msk->pm.local_addr_used++;
nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
if (nr)
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
if (nr == 0)
continue;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
@ -628,11 +627,11 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
* and return the array size.
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *remote,
struct mptcp_addr_info *addrs)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
struct mptcp_addr_info local;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
@ -645,15 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if ((entry->addr.family == AF_INET &&
!ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
(sk->sk_family == AF_INET &&
!ipv6_addr_v4mapped(&entry->addr.addr6)))
#endif
continue;
}
if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
continue;
if (msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@ -666,8 +658,18 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
struct mptcp_addr_info local;
memset(&local, 0, sizeof(local));
local.family = msk->pm.remote.family;
local.family =
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
remote->family == AF_INET6 &&
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
#endif
remote->family;
if (!mptcp_pm_addr_families_match(sk, &local, remote))
return 0;
msk->pm.subflows++;
addrs[i++] = local;
@ -706,7 +708,9 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
nr = fill_local_addresses_vec(msk, addrs);
nr = fill_local_addresses_vec(msk, &remote, addrs);
if (nr == 0)
return;
msk->pm.add_addr_accepted++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||

View File

@ -59,8 +59,8 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
*/
e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
if (!e) {
spin_unlock_bh(&msk->pm.lock);
return -ENOMEM;
ret = -ENOMEM;
goto append_err;
}
*e = *entry;
@ -74,6 +74,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
ret = entry->addr.id;
}
append_err:
spin_unlock_bh(&msk->pm.lock);
return ret;
}

View File

@ -1255,6 +1255,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
ssk->sk_ipv6only = sk->sk_ipv6only;
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {

View File

@ -774,24 +774,17 @@ do_transfer()
addr_nr_ns2=${addr_nr_ns2:9}
fi
local local_addr
if is_v6 "${connect_addr}"; then
local_addr="::"
else
local_addr="0.0.0.0"
fi
extra_srv_args="$extra_args $extra_srv_args"
if [ "$test_link_fail" -gt 1 ];then
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
$extra_srv_args ${local_addr} < "$sinfail" > "$sout" &
$extra_srv_args "::" < "$sinfail" > "$sout" &
else
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
$extra_srv_args ${local_addr} < "$sin" > "$sout" &
$extra_srv_args "::" < "$sin" > "$sout" &
fi
local spid=$!
@ -2448,6 +2441,47 @@ v4mapped_tests()
fi
}
mixed_tests()
{
if reset "IPv4 sockets do not use IPv6 addresses"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr 0 0 0
fi
# Need an IPv6 mptcp socket to allow subflows of both families
if reset "simult IPv4 and IPv6 subflows"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
chk_join_nr 1 1 1
fi
# cross families subflows will not be created even in fullmesh mode
if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then
pm_nl_set_limits $ns1 0 4
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
chk_join_nr 1 1 1
fi
# fullmesh still tries to create all the possibly subflows with
# matching family
if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then
pm_nl_set_limits $ns1 0 4
pm_nl_set_limits $ns2 2 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow
chk_join_nr 4 4 4
fi
}
backup_tests()
{
# single subflow, backup
@ -3120,6 +3154,7 @@ all_tests_sorted=(
a@add_tests
6@ipv6_tests
4@v4mapped_tests
M@mixed_tests
b@backup_tests
p@add_addr_ports_tests
k@syncookies_tests

View File

@ -43,41 +43,40 @@ rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
ns2="ns2-$rndh"
print_title()
{
stdbuf -o0 -e0 printf "INFO: %s\n" "${1}"
}
kill_wait()
{
[ $1 -eq 0 ] && return 0
kill -SIGUSR1 $1 > /dev/null 2>&1
kill $1 > /dev/null 2>&1
wait $1 2>/dev/null
}
cleanup()
{
echo "cleanup"
rm -rf $file $client_evts $server_evts
print_title "Cleanup"
# Terminate the MPTCP connection and related processes
if [ $client4_pid -ne 0 ]; then
kill -SIGUSR1 $client4_pid > /dev/null 2>&1
fi
if [ $server4_pid -ne 0 ]; then
kill_wait $server4_pid
fi
if [ $client6_pid -ne 0 ]; then
kill -SIGUSR1 $client6_pid > /dev/null 2>&1
fi
if [ $server6_pid -ne 0 ]; then
kill_wait $server6_pid
fi
if [ $server_evts_pid -ne 0 ]; then
kill_wait $server_evts_pid
fi
if [ $client_evts_pid -ne 0 ]; then
kill_wait $client_evts_pid
fi
local pid
for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
$server_evts_pid $client_evts_pid
do
kill_wait $pid
done
local netns
for netns in "$ns1" "$ns2" ;do
ip netns del "$netns"
done
rm -rf $file $client_evts $server_evts
stdbuf -o0 -e0 printf "Done\n"
}
trap cleanup EXIT
@ -108,6 +107,7 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
ip -net "$ns2" link set ns2eth1 up
print_title "Init"
stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2 \t\t\t[OK]\n"
make_file()
@ -193,11 +193,16 @@ make_connection()
server_serverside=$(grep "type:1," "$server_evts" |
sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t" $is_v6
if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
[ "$server_serverside" = 1 ]
then
stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t[OK]\n" $is_v6
stdbuf -o0 -e0 printf "[OK]\n"
else
stdbuf -o0 -e0 printf "[FAIL]\n"
stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \
"${client_token}" "${server_token}" \
"${client_serverside}" "${server_serverside}"
exit 1
fi
@ -217,6 +222,48 @@ make_connection()
fi
}
# $1: var name ; $2: prev ret
check_expected_one()
{
local var="${1}"
local exp="e_${var}"
local prev_ret="${2}"
if [ "${!var}" = "${!exp}" ]
then
return 0
fi
if [ "${prev_ret}" = "0" ]
then
stdbuf -o0 -e0 printf "[FAIL]\n"
fi
stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \
"${var}" "${!var}" "${!exp}"
return 1
}
# $@: all var names to check
check_expected()
{
local ret=0
local var
for var in "${@}"
do
check_expected_one "${var}" "${ret}" || ret=1
done
if [ ${ret} -eq 0 ]
then
stdbuf -o0 -e0 printf "[OK]\n"
return 0
fi
exit 1
}
verify_announce_event()
{
local evt=$1
@ -242,19 +289,14 @@ verify_announce_event()
fi
dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
[ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] &&
[ "$id" = "$e_id" ]
then
stdbuf -o0 -e0 printf "[OK]\n"
return 0
fi
stdbuf -o0 -e0 printf "[FAIL]\n"
exit 1
check_expected "type" "token" "addr" "dport" "id"
}
test_announce()
{
print_title "Announce tests"
# Capture events on the network namespace running the server
:>"$server_evts"
@ -270,7 +312,7 @@ test_announce()
then
stdbuf -o0 -e0 printf "[OK]\n"
else
stdbuf -o0 -e0 printf "[FAIL]\n"
stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}"
exit 1
fi
@ -347,18 +389,14 @@ verify_remove_event()
type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
[ "$id" = "$e_id" ]
then
stdbuf -o0 -e0 printf "[OK]\n"
return 0
fi
stdbuf -o0 -e0 printf "[FAIL]\n"
exit 1
check_expected "type" "token" "id"
}
test_remove()
{
print_title "Remove tests"
# Capture events on the network namespace running the server
:>"$server_evts"
@ -507,20 +545,13 @@ verify_subflow_events()
daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
fi
if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
[ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] &&
[ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] &&
[ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ]
then
stdbuf -o0 -e0 printf "[OK]\n"
return 0
fi
stdbuf -o0 -e0 printf "[FAIL]\n"
exit 1
check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
}
test_subflows()
{
print_title "Subflows v4 or v6 only tests"
# Capture events on the network namespace running the server
:>"$server_evts"
@ -754,6 +785,8 @@ test_subflows()
test_subflows_v4_v6_mix()
{
print_title "Subflows v4 and v6 mix tests"
# Attempt to add a listener at 10.0.2.1:<subflow-port>
ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
$app6_port > /dev/null 2>&1 &
@ -800,6 +833,8 @@ test_subflows_v4_v6_mix()
test_prio()
{
print_title "Prio tests"
local count
# Send MP_PRIO signal from client to server machine
@ -811,7 +846,7 @@ test_prio()
count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ $count != 1 ]; then
stdbuf -o0 -e0 printf "[FAIL]\n"
stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
exit 1
else
stdbuf -o0 -e0 printf "[OK]\n"
@ -822,7 +857,7 @@ test_prio()
count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ $count != 1 ]; then
stdbuf -o0 -e0 printf "[FAIL]\n"
stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
exit 1
else
stdbuf -o0 -e0 printf "[OK]\n"
@ -863,19 +898,13 @@ verify_listener_events()
sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
fi
if [ $type ] && [ $type = $e_type ] &&
[ $family ] && [ $family = $e_family ] &&
[ $saddr ] && [ $saddr = $e_saddr ] &&
[ $sport ] && [ $sport = $e_sport ]; then
stdbuf -o0 -e0 printf "[OK]\n"
return 0
fi
stdbuf -o0 -e0 printf "[FAIL]\n"
exit 1
check_expected "type" "family" "saddr" "sport"
}
test_listener()
{
print_title "Listener tests"
# Capture events on the network namespace running the client
:>$client_evts
@ -902,8 +931,10 @@ test_listener()
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}
print_title "Make connections"
make_connection
make_connection "v6"
test_announce
test_remove
test_subflows