net: implement lockless SO_PRIORITY

This is a followup of 8bf43be799 ("net: annotate data-races
around sk->sk_priority").

sk->sk_priority can be read and written without holding the socket lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2023-09-21 20:28:11 +00:00 committed by David S. Miller
parent 06bc3668cc
commit 10bbf1652c
24 changed files with 36 additions and 35 deletions

View File

@ -877,7 +877,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
skb->dev = dev; skb->dev = dev;
skb->priority = sk->sk_priority; skb->priority = READ_ONCE(sk->sk_priority);
skb->protocol = cpu_to_be16(ETH_P_PPP_SES); skb->protocol = cpu_to_be16(ETH_P_PPP_SES);
ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr));

View File

@ -541,7 +541,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
} }
skb->priority = sk->sk_priority; skb->priority = READ_ONCE(sk->sk_priority);
return skb; return skb;
} }

View File

@ -664,7 +664,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
sendit: sendit:
if (skb->sk) if (skb->sk)
skb->priority = skb->sk->sk_priority; skb->priority = READ_ONCE(skb->sk->sk_priority);
if (dev_queue_xmit(skb)) if (dev_queue_xmit(skb))
goto drop; goto drop;
sent: sent:

View File

@ -939,7 +939,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
sock_init_data(NULL, sk); sock_init_data(NULL, sk);
sk->sk_type = osk->sk_type; sk->sk_type = osk->sk_type;
sk->sk_priority = osk->sk_priority; sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol; sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_sndbuf = osk->sk_sndbuf;

View File

@ -1615,7 +1615,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
return ERR_PTR(-ENOTCONN); return ERR_PTR(-ENOTCONN);
} }
skb->priority = sk->sk_priority; skb->priority = READ_ONCE(sk->sk_priority);
bt_cb(skb)->l2cap.chan = chan; bt_cb(skb)->l2cap.chan = chan;

View File

@ -884,7 +884,7 @@ static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
skcb = j1939_skb_to_cb(skb); skcb = j1939_skb_to_cb(skb);
memset(skcb, 0, sizeof(*skcb)); memset(skcb, 0, sizeof(*skcb));
skcb->addr = jsk->addr; skcb->addr = jsk->addr;
skcb->priority = j1939_prio(sk->sk_priority); skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority));
if (msg->msg_name) { if (msg->msg_name) {
struct sockaddr_can *addr = msg->msg_name; struct sockaddr_can *addr = msg->msg_name;

View File

@ -881,7 +881,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
} }
skb->dev = dev; skb->dev = dev;
skb->priority = sk->sk_priority; skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark); skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time; skb->tstamp = sockc.transmit_time;

View File

@ -806,9 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority) void sock_set_priority(struct sock *sk, u32 priority)
{ {
lock_sock(sk);
WRITE_ONCE(sk->sk_priority, priority); WRITE_ONCE(sk->sk_priority, priority);
release_sock(sk);
} }
EXPORT_SYMBOL(sock_set_priority); EXPORT_SYMBOL(sock_set_priority);
@ -1118,6 +1116,18 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
valbool = val ? 1 : 0; valbool = val ? 1 : 0;
/* handle options which do not require locking the socket. */
switch (optname) {
case SO_PRIORITY:
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
sock_set_priority(sk, val);
return 0;
}
return -EPERM;
}
sockopt_lock_sock(sk); sockopt_lock_sock(sk);
switch (optname) { switch (optname) {
@ -1213,15 +1223,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
sk->sk_no_check_tx = valbool; sk->sk_no_check_tx = valbool;
break; break;
case SO_PRIORITY:
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
WRITE_ONCE(sk->sk_priority, val);
else
ret = -EPERM;
break;
case SO_LINGER: case SO_LINGER:
if (optlen < sizeof(ling)) { if (optlen < sizeof(ling)) {
ret = -EINVAL; /* 1003.1g */ ret = -EINVAL; /* 1003.1g */

View File

@ -239,7 +239,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
if (!opt) if (!opt)
opt = rcu_dereference(np->opt); opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
np->tclass, sk->sk_priority); np->tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock(); rcu_read_unlock();
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }

View File

@ -165,7 +165,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
* For cgroup2 classid is always zero. * For cgroup2 classid is always zero.
*/ */
if (!classid) if (!classid)
classid = sk->sk_priority; classid = READ_ONCE(sk->sk_priority);
if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
goto errout; goto errout;

View File

@ -1449,7 +1449,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_options_build(skb, opt, cork->addr, rt); ip_options_build(skb, opt, cork->addr, rt);
} }
skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
skb->mark = cork->mark; skb->mark = cork->mark;
skb->tstamp = cork->transmit_time; skb->tstamp = cork->transmit_time;
/* /*

View File

@ -828,7 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_mark : sk->sk_mark; inet_twsk(sk)->tw_mark : sk->sk_mark;
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : sk->sk_priority; inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk); transmit_time = tcp_transmit_time(sk);
xfrm_sk_clone_policy(ctl_sk, sk); xfrm_sk_clone_policy(ctl_sk, sk);
txhash = (sk->sk_state == TCP_TIME_WAIT) ? txhash = (sk->sk_state == TCP_TIME_WAIT) ?

View File

@ -292,7 +292,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_mark = sk->sk_mark; tw->tw_mark = sk->sk_mark;
tw->tw_priority = sk->sk_priority; tw->tw_priority = READ_ONCE(sk->sk_priority);
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_rcv_nxt = tp->rcv_nxt;
tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_snd_nxt = tp->snd_nxt;

View File

@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
fl6.daddr = sk->sk_v6_daddr; fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
np->tclass, sk->sk_priority); np->tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock(); rcu_read_unlock();
return res; return res;
} }

View File

@ -1984,7 +1984,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->saddr = fl6->saddr; hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst; hdr->daddr = *final_dst;
skb->priority = sk->sk_priority; skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark; skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time; skb->tstamp = cork->base.transmit_time;

View File

@ -565,7 +565,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (!opt) if (!opt)
opt = rcu_dereference(np->opt); opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
opt, tclass, sk->sk_priority); opt, tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock(); rcu_read_unlock();
err = net_xmit_eval(err); err = net_xmit_eval(err);
} }
@ -1058,7 +1058,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
trace_tcp_send_reset(sk, skb); trace_tcp_send_reset(sk, skb);
if (inet6_test_bit(REPFLOW, sk)) if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h); label = ip6_flowlabel(ipv6h);
priority = sk->sk_priority; priority = READ_ONCE(sk->sk_priority);
txhash = sk->sk_txhash; txhash = sk->sk_txhash;
} }
if (sk->sk_state == TCP_TIME_WAIT) { if (sk->sk_state == TCP_TIME_WAIT) {

View File

@ -89,7 +89,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
break; break;
case SO_PRIORITY: case SO_PRIORITY:
ssk->sk_priority = val; WRITE_ONCE(ssk->sk_priority, val);
break; break;
case SO_SNDBUF: case SO_SNDBUF:
case SO_SNDBUFFORCE: case SO_SNDBUFFORCE:

View File

@ -487,7 +487,7 @@ static struct sock *nr_make_new(struct sock *osk)
sock_init_data(NULL, sk); sock_init_data(NULL, sk);
sk->sk_type = osk->sk_type; sk->sk_type = osk->sk_type;
sk->sk_priority = osk->sk_priority; sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol; sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_sndbuf = osk->sk_sndbuf;

View File

@ -583,7 +583,7 @@ static struct sock *rose_make_new(struct sock *osk)
#endif #endif
sk->sk_type = osk->sk_type; sk->sk_type = osk->sk_type;
sk->sk_priority = osk->sk_priority; sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol; sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_sndbuf = osk->sk_sndbuf;

View File

@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio)
*err = -1; *err = -1;
return; return;
} }
dst->value = sk->sk_priority; dst->value = READ_ONCE(sk->sk_priority);
} }
META_COLLECTOR(int_sk_rcvlowat) META_COLLECTOR(int_sk_rcvlowat)

View File

@ -247,7 +247,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
rcu_read_lock(); rcu_read_lock();
res = ip6_xmit(sk, skb, fl6, sk->sk_mark, res = ip6_xmit(sk, skb, fl6, sk->sk_mark,
rcu_dereference(np->opt), rcu_dereference(np->opt),
tclass, sk->sk_priority); tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock(); rcu_read_unlock();
return res; return res;
} }

View File

@ -493,7 +493,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
nsk->sk_sndtimeo = osk->sk_sndtimeo; nsk->sk_sndtimeo = osk->sk_sndtimeo;
nsk->sk_rcvtimeo = osk->sk_rcvtimeo; nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
nsk->sk_mark = READ_ONCE(osk->sk_mark); nsk->sk_mark = READ_ONCE(osk->sk_mark);
nsk->sk_priority = osk->sk_priority; nsk->sk_priority = READ_ONCE(osk->sk_priority);
nsk->sk_rcvlowat = osk->sk_rcvlowat; nsk->sk_rcvlowat = osk->sk_rcvlowat;
nsk->sk_bound_dev_if = osk->sk_bound_dev_if; nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
nsk->sk_err = osk->sk_err; nsk->sk_err = osk->sk_err;

View File

@ -598,7 +598,7 @@ static struct sock *x25_make_new(struct sock *osk)
x25 = x25_sk(sk); x25 = x25_sk(sk);
sk->sk_type = osk->sk_type; sk->sk_type = osk->sk_type;
sk->sk_priority = osk->sk_priority; sk->sk_priority = READ_ONCE(osk->sk_priority);
sk->sk_protocol = osk->sk_protocol; sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_sndbuf = osk->sk_sndbuf;

View File

@ -684,7 +684,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
} }
skb->dev = dev; skb->dev = dev;
skb->priority = xs->sk.sk_priority; skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark); skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb; skb->destructor = xsk_destruct_skb;
xsk_set_destructor_arg(skb); xsk_set_destructor_arg(skb);