inet: implement lockless IP_TTL

ip_select_ttl() is racy, because it reads inet->uc_ttl
without proper locking.

Add READ_ONCE()/WRITE_ONCE() annotations while
allowing IP_TTL socket option to be set/read without
holding the socket lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2023-08-16 08:15:46 +00:00 committed by David S. Miller
parent 08e39c0dfa
commit 10f42426e5
2 changed files with 13 additions and 16 deletions

View File

@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(ip_local_out);
static inline int ip_select_ttl(const struct inet_sock *inet, static inline int ip_select_ttl(const struct inet_sock *inet,
const struct dst_entry *dst) const struct dst_entry *dst)
{ {
int ttl = inet->uc_ttl; int ttl = READ_ONCE(inet->uc_ttl);
if (ttl < 0) if (ttl < 0)
ttl = ip4_dst_hoplimit(dst); ttl = ip4_dst_hoplimit(dst);

View File

@ -1023,6 +1023,13 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_BIND_ADDRESS_NO_PORT: case IP_BIND_ADDRESS_NO_PORT:
inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val); inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val);
return 0; return 0;
case IP_TTL:
if (optlen < 1)
return -EINVAL;
if (val != -1 && (val < 1 || val > 255))
return -EINVAL;
WRITE_ONCE(inet->uc_ttl, val);
return 0;
} }
err = 0; err = 0;
@ -1080,13 +1087,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_TOS: /* This sets both TOS and Precedence */ case IP_TOS: /* This sets both TOS and Precedence */
__ip_sock_set_tos(sk, val); __ip_sock_set_tos(sk, val);
break; break;
case IP_TTL:
if (optlen < 1)
goto e_inval;
if (val != -1 && (val < 1 || val > 255))
goto e_inval;
inet->uc_ttl = val;
break;
case IP_MTU_DISCOVER: case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval; goto e_inval;
@ -1590,6 +1590,11 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_BIND_ADDRESS_NO_PORT: case IP_BIND_ADDRESS_NO_PORT:
val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
goto copyval; goto copyval;
case IP_TTL:
val = READ_ONCE(inet->uc_ttl);
if (val < 0)
val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl);
goto copyval;
} }
if (needs_rtnl) if (needs_rtnl)
@ -1629,14 +1634,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_TOS: case IP_TOS:
val = inet->tos; val = inet->tos;
break; break;
case IP_TTL:
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
inet->uc_ttl);
break;
}
case IP_MTU_DISCOVER: case IP_MTU_DISCOVER:
val = inet->pmtudisc; val = inet->pmtudisc;
break; break;