mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 16:56:26 +00:00
fcc79e1714
The most significant set of changes is the per netns RTNL. The new behavior is disabled by default, regression risk should be contained. Notably the new config knob PTP_1588_CLOCK_VMCLOCK will inherit its default value from PTP_1588_CLOCK_KVM, as the first is intended to be a more reliable replacement for the latter. Core ---- - Started a very large, in-progress, effort to make the RTNL lock scope per network-namespace, thus reducing the lock contention significantly in the containerized use-case, comprising: - RCU-ified some relevant slices of the FIB control path - introduce basic per netns locking helpers - namespacified the IPv4 address hash table - remove rtnl_register{,_module}() in favour of rtnl_register_many() - refactor rtnl_{new,del,set}link() moving as much validation as possible out of RTNL lock - convert all phonet doit() and dumpit() handlers to RCU - convert IPv4 addresses manipulation to per-netns RTNL - convert virtual interface creation to per-netns RTNL the per-netns lock infra is guarded by the CONFIG_DEBUG_NET_SMALL_RTNL knob, disabled by default ad interim. - Introduce NAPI suspension, to efficiently switching between busy polling (NAPI processing suspended) and normal processing. - Migrate the IPv4 routing input, output and control path from direct ToS usage to DSCP macros. This is a work in progress to make ECN handling consistent and reliable. - Add drop reasons support to the IPv4 rotue input path, allowing better introspection in case of packets drop. - Make FIB seqnum lockless, dropping RTNL protection for read access. - Make inet{,v6} addresses hashing less predicable. - Allow providing timestamp OPT_ID via cmsg, to correlate TX packets and timestamps Things we sprinkled into general kernel code -------------------------------------------- - Add small file operations for debugfs, to reduce the struct ops size. - Refactoring and optimization for the implementation of page_frag API, This is a preparatory work to consolidate the page_frag implementation. Netfilter --------- - Optimize set element transactions to reduce memory consumption - Extended netlink error reporting for attribute parser failure. - Make legacy xtables configs user selectable, giving users the option to configure iptables without enabling any other config. - Address a lot of false-positive RCU issues, pointed by recent CI improvements. BPF --- - Put xsk sockets on a struct diet and add various cleanups. Overall, this helps to bump performance by 12% for some workloads. - Extend BPF selftests to increase coverage of XDP features in combination with BPF cpumap. - Optimize and homogenize bpf_csum_diff helper for all archs and also add a batch of new BPF selftests for it. - Extend netkit with an option to delegate skb->{mark,priority} scrubbing to its BPF program. - Make the bpf_get_netns_cookie() helper available also to tc(x) BPF programs. Protocols --------- - Introduces 4-tuple hash for connected udp sockets, speeding-up significantly connected sockets lookup. - Add a fastpath for some TCP timers that usually expires after close, the socket lock contention. - Add inbound and outbound xfrm state caches to speed up state lookups. - Avoid sending MPTCP advertisements on stale subflows, reducing risks on loosing them. - Make neighbours table flushing more scalable, maintaining per device neigh lists. Driver API ---------- - Introduce a unified interface to configure transmission H/W shaping, and expose it to user-space via generic-netlink. - Add support for per-NAPI config via netlink. This makes napi configuration persistent across queues removal and re-creation. Requires driver updates, currently supported drivers are: nVidia/Mellanox mlx4 and mlx5, Broadcom brcm and Intel ice. - Add ethtool support for writing SFP / PHY firmware blocks. - Track RSS context allocation from ethtool core. - Implement support for mirroring to DSA CPU port, via TC mirror offload. - Consolidate FDB updates notification, to avoid duplicates on device-specific entries. - Expose DPLL clock quality level to the user-space. - Support master-slave PHY config via device tree. Tests and tooling ----------------- - forwarding: introduce deferred commands, to simplify the cleanup phase Drivers ------- - Updated several drivers - Amazon vNic, Google vNic, Microsoft vNic, Intel e1000e and Broadcom Tigon3 - to use netdev-genl to link the IRQs and queues to NAPI IDs, allowing busy polling and better introspection. - Ethernet high-speed NICs: - nVidia/Mellanox: - mlx5: - a large refactor to implement support for cross E-Switch scheduling - refactor H/W conter management to let it scale better - H/W GRO cleanups - Intel (100G, ice):: - adds support for ethtool reset - implement support for per TX queue H/W shaping - AMD/Solarflare: - implement per device queue stats support - Broadcom (bnxt): - improve wildcard l4proto on IPv4/IPv6 ntuple rules - Marvell Octeon: - Adds representor support for each Resource Virtualization Unit (RVU) device. - Hisilicon: - adds support for the BMC Gigabit Ethernet - IBM (EMAC): - driver cleanup and modernization - Cisco (VIC): - raise the queues number limit to 256 - Ethernet virtual: - Google vNIC: - implements page pool support - macsec: - inherit lower device's features and TSO limits when offloading - virtio_net: - enable premapped mode by default - support for XDP socket(AF_XDP) zerocopy TX - wireguard: - set the TSO max size to be GSO_MAX_SIZE, to aggregate larger packets. - Ethernet NICs embedded and virtual: - Broadcom ASP: - enable software timestamping - Freescale: - add enetc4 PF driver - MediaTek: Airoha SoC: - implement BQL support - RealTek r8169: - enable TSO by default on r8168/r8125 - implement extended ethtool stats - Renesas AVB: - enable TX checksum offload - Synopsys (stmmac): - support header splitting for vlan tagged packets - move common code for DWMAC4 and DWXGMAC into a separate FPE module. - Add the dwmac driver support for T-HEAD TH1520 SoC - Synopsys (xpcs): - driver refactor and cleanup - TI: - icssg_prueth: add VLAN offload support - Xilinx emaclite: - adds clock support - Ethernet switches: - Microchip: - implement support for the lan969x Ethernet switch family - add LAN9646 switch support to KSZ DSA driver - Ethernet PHYs: - Marvel: 88q2x: enable auto negotiation - Microchip: add support for LAN865X Rev B1 and LAN867X Rev C1/C2 - PTP: - Add support for the Amazon virtual clock device - Add PtP driver for s390 clocks - WiFi: - mac80211 - EHT 1024 aggregation size for transmissions - new operation to indicate that a new interface is to be added - support radio separation of multi-band devices - move wireless extension spy implementation to libiw - Broadcom: - brcmfmac: optional LPO clock support - Microchip: - add support for Atmel WILC3000 - Qualcomm (ath12k): - firmware coredump collection support - add debugfs support for a multitude of statistics - Qualcomm (ath5k): - Arcadyan ARV45XX AR2417 & Gigaset SX76[23] AR241[34]A support - Realtek: - rtw88: 8821au and 8812au USB adapters support - rtw89: add thermal protection - rtw89: fine tune BT-coexsitence to improve user experience - rtw89: firmware secure boot for WiFi 6 chip - Bluetooth - add Qualcomm WCN785x support for ids Foxconn 0xe0fc/0xe0f3 and 0x13d3:0x3623 - add Realtek RTL8852BE support for id Foxconn 0xe123 - add MediaTek MT7920 support for wireless module ids - btintel_pcie: add handshake between driver and firmware - btintel_pcie: add recovery mechanism - btnxpuart: add GPIO support to power save feature Signed-off-by: Paolo Abeni <pabeni@redhat.com> -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEEg1AjqC77wbdLX2LbKSR5jcyPE6QFAmc8sukSHHBhYmVuaUBy ZWRoYXQuY29tAAoJECkkeY3MjxOkLEYQAIMM6Qjh0bh3Byr3gOS1xZzXG+APLjP4 9Jr0p3i+X53i90jvVqzeVO5FTc95MVHSKZ3kvPkDMXSLUaEJxocNHCI5Dzl/2/qL wWdpUB6/ou+jKB4Bn6Z8OvVODT7qrr0tVa9M2/fuKWrIsOU/ntIhG8EhnGddk5U/ vKPSf5PUIb81uNRnF58VusY3wrT1dEoh9VfJYxL+ST+inPxjEAMy6Y+lmlsjGaSX jrS+Pp9KYiUwl3Qt0AQs+cG4OHkJdjbnChrfosWwpkiyddO8klVq06+wX/TiSzfF b9VZtBfy/GZs3lkE1mQkcILdtX5pP3YHQdpsuxFfVI0JHVszx2ck7WdoRux/8F0v kKZsYcO7bH9I1wMFP66Ff9hIbdEQaeucK+KdDkXyPNMfP91Vzmfjii8IBxOC36Ie BbOeFUrXyTxxJ2u0vf/X9JtIq8bcrkNrSd1n1jlGPMqG3FVzsY95+Oi4qfsyeUbl lS1PlVTqPMPFdX54HnxM3y2rJjhd7iXhkvmtuXNjRFThXlOiK3maAPWlM1aZ3b8u Vjs4JFUsW0tleZG+RzANjsGjXbf7AiPUGLZt+acem0K+fcjG4i5aGIAJrxwa/ORx eG74IZRt5cOI371W7gNLGHjwnuge8tFPgOWcRP2eozNm7jvMYALBejYS7eWUTvaf THcvVM+bupEZ =GzPr -----END PGP SIGNATURE----- Merge tag 'net-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next Pull networking updates from Paolo Abeni: "The most significant set of changes is the per netns RTNL. The new behavior is disabled by default, regression risk should be contained. Notably the new config knob PTP_1588_CLOCK_VMCLOCK will inherit its default value from PTP_1588_CLOCK_KVM, as the first is intended to be a more reliable replacement for the latter. Core: - Started a very large, in-progress, effort to make the RTNL lock scope per network-namespace, thus reducing the lock contention significantly in the containerized use-case, comprising: - RCU-ified some relevant slices of the FIB control path - introduce basic per netns locking helpers - namespacified the IPv4 address hash table - remove rtnl_register{,_module}() in favour of rtnl_register_many() - refactor rtnl_{new,del,set}link() moving as much validation as possible out of RTNL lock - convert all phonet doit() and dumpit() handlers to RCU - convert IPv4 addresses manipulation to per-netns RTNL - convert virtual interface creation to per-netns RTNL the per-netns lock infrastructure is guarded by the CONFIG_DEBUG_NET_SMALL_RTNL knob, disabled by default ad interim. - Introduce NAPI suspension, to efficiently switching between busy polling (NAPI processing suspended) and normal processing. - Migrate the IPv4 routing input, output and control path from direct ToS usage to DSCP macros. This is a work in progress to make ECN handling consistent and reliable. - Add drop reasons support to the IPv4 rotue input path, allowing better introspection in case of packets drop. - Make FIB seqnum lockless, dropping RTNL protection for read access. - Make inet{,v6} addresses hashing less predicable. - Allow providing timestamp OPT_ID via cmsg, to correlate TX packets and timestamps Things we sprinkled into general kernel code: - Add small file operations for debugfs, to reduce the struct ops size. - Refactoring and optimization for the implementation of page_frag API, This is a preparatory work to consolidate the page_frag implementation. Netfilter: - Optimize set element transactions to reduce memory consumption - Extended netlink error reporting for attribute parser failure. - Make legacy xtables configs user selectable, giving users the option to configure iptables without enabling any other config. - Address a lot of false-positive RCU issues, pointed by recent CI improvements. BPF: - Put xsk sockets on a struct diet and add various cleanups. Overall, this helps to bump performance by 12% for some workloads. - Extend BPF selftests to increase coverage of XDP features in combination with BPF cpumap. - Optimize and homogenize bpf_csum_diff helper for all archs and also add a batch of new BPF selftests for it. - Extend netkit with an option to delegate skb->{mark,priority} scrubbing to its BPF program. - Make the bpf_get_netns_cookie() helper available also to tc(x) BPF programs. Protocols: - Introduces 4-tuple hash for connected udp sockets, speeding-up significantly connected sockets lookup. - Add a fastpath for some TCP timers that usually expires after close, the socket lock contention. - Add inbound and outbound xfrm state caches to speed up state lookups. - Avoid sending MPTCP advertisements on stale subflows, reducing risks on loosing them. - Make neighbours table flushing more scalable, maintaining per device neigh lists. Driver API: - Introduce a unified interface to configure transmission H/W shaping, and expose it to user-space via generic-netlink. - Add support for per-NAPI config via netlink. This makes napi configuration persistent across queues removal and re-creation. Requires driver updates, currently supported drivers are: nVidia/Mellanox mlx4 and mlx5, Broadcom brcm and Intel ice. - Add ethtool support for writing SFP / PHY firmware blocks. - Track RSS context allocation from ethtool core. - Implement support for mirroring to DSA CPU port, via TC mirror offload. - Consolidate FDB updates notification, to avoid duplicates on device-specific entries. - Expose DPLL clock quality level to the user-space. - Support master-slave PHY config via device tree. Tests and tooling: - forwarding: introduce deferred commands, to simplify the cleanup phase Drivers: - Updated several drivers - Amazon vNic, Google vNic, Microsoft vNic, Intel e1000e and Broadcom Tigon3 - to use netdev-genl to link the IRQs and queues to NAPI IDs, allowing busy polling and better introspection. - Ethernet high-speed NICs: - nVidia/Mellanox: - mlx5: - a large refactor to implement support for cross E-Switch scheduling - refactor H/W conter management to let it scale better - H/W GRO cleanups - Intel (100G, ice):: - add support for ethtool reset - implement support for per TX queue H/W shaping - AMD/Solarflare: - implement per device queue stats support - Broadcom (bnxt): - improve wildcard l4proto on IPv4/IPv6 ntuple rules - Marvell Octeon: - Add representor support for each Resource Virtualization Unit (RVU) device. - Hisilicon: - add support for the BMC Gigabit Ethernet - IBM (EMAC): - driver cleanup and modernization - Cisco (VIC): - raise the queues number limit to 256 - Ethernet virtual: - Google vNIC: - implement page pool support - macsec: - inherit lower device's features and TSO limits when offloading - virtio_net: - enable premapped mode by default - support for XDP socket(AF_XDP) zerocopy TX - wireguard: - set the TSO max size to be GSO_MAX_SIZE, to aggregate larger packets. - Ethernet NICs embedded and virtual: - Broadcom ASP: - enable software timestamping - Freescale: - add enetc4 PF driver - MediaTek: Airoha SoC: - implement BQL support - RealTek r8169: - enable TSO by default on r8168/r8125 - implement extended ethtool stats - Renesas AVB: - enable TX checksum offload - Synopsys (stmmac): - support header splitting for vlan tagged packets - move common code for DWMAC4 and DWXGMAC into a separate FPE module. - add dwmac driver support for T-HEAD TH1520 SoC - Synopsys (xpcs): - driver refactor and cleanup - TI: - icssg_prueth: add VLAN offload support - Xilinx emaclite: - add clock support - Ethernet switches: - Microchip: - implement support for the lan969x Ethernet switch family - add LAN9646 switch support to KSZ DSA driver - Ethernet PHYs: - Marvel: 88q2x: enable auto negotiation - Microchip: add support for LAN865X Rev B1 and LAN867X Rev C1/C2 - PTP: - Add support for the Amazon virtual clock device - Add PtP driver for s390 clocks - WiFi: - mac80211 - EHT 1024 aggregation size for transmissions - new operation to indicate that a new interface is to be added - support radio separation of multi-band devices - move wireless extension spy implementation to libiw - Broadcom: - brcmfmac: optional LPO clock support - Microchip: - add support for Atmel WILC3000 - Qualcomm (ath12k): - firmware coredump collection support - add debugfs support for a multitude of statistics - Qualcomm (ath5k): - Arcadyan ARV45XX AR2417 & Gigaset SX76[23] AR241[34]A support - Realtek: - rtw88: 8821au and 8812au USB adapters support - rtw89: add thermal protection - rtw89: fine tune BT-coexsitence to improve user experience - rtw89: firmware secure boot for WiFi 6 chip - Bluetooth - add Qualcomm WCN785x support for ids Foxconn 0xe0fc/0xe0f3 and 0x13d3:0x3623 - add Realtek RTL8852BE support for id Foxconn 0xe123 - add MediaTek MT7920 support for wireless module ids - btintel_pcie: add handshake between driver and firmware - btintel_pcie: add recovery mechanism - btnxpuart: add GPIO support to power save feature" * tag 'net-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1475 commits) mm: page_frag: fix a compile error when kernel is not compiled Documentation: tipc: fix formatting issue in tipc.rst selftests: nic_performance: Add selftest for performance of NIC driver selftests: nic_link_layer: Add selftest case for speed and duplex states selftests: nic_link_layer: Add link layer selftest for NIC driver bnxt_en: Add FW trace coredump segments to the coredump bnxt_en: Add a new ethtool -W dump flag bnxt_en: Add 2 parameters to bnxt_fill_coredump_seg_hdr() bnxt_en: Add functions to copy host context memory bnxt_en: Do not free FW log context memory bnxt_en: Manage the FW trace context memory bnxt_en: Allocate backing store memory for FW trace logs bnxt_en: Add a 'force' parameter to bnxt_free_ctx_mem() bnxt_en: Refactor bnxt_free_ctx_mem() bnxt_en: Add mem_valid bit to struct bnxt_ctx_mem_type bnxt_en: Update firmware interface spec to 1.10.3.85 selftests/bpf: Add some tests with sockmap SK_PASS bpf: fix recursive lock when verdict program return SK_PASS wireguard: device: support big tcp GSO wireguard: selftests: load nf_conntrack if not present ...
361 lines
11 KiB
C
361 lines
11 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_NETLINK_H
|
|
#define __LINUX_NETLINK_H
|
|
|
|
|
|
#include <linux/capability.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/export.h>
|
|
#include <net/scm.h>
|
|
#include <uapi/linux/netlink.h>
|
|
|
|
struct net;
|
|
|
|
void do_trace_netlink_extack(const char *msg);
|
|
|
|
static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
|
|
{
|
|
return (struct nlmsghdr *)skb->data;
|
|
}
|
|
|
|
enum netlink_skb_flags {
|
|
NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */
|
|
};
|
|
|
|
struct netlink_skb_parms {
|
|
struct scm_creds creds; /* Skb credentials */
|
|
__u32 portid;
|
|
__u32 dst_group;
|
|
__u32 flags;
|
|
struct sock *sk;
|
|
bool nsid_is_set;
|
|
int nsid;
|
|
};
|
|
|
|
#define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb))
|
|
#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
|
|
#define NETLINK_CTX_SIZE 48
|
|
|
|
|
|
void netlink_table_grab(void);
|
|
void netlink_table_ungrab(void);
|
|
|
|
#define NL_CFG_F_NONROOT_RECV (1 << 0)
|
|
#define NL_CFG_F_NONROOT_SEND (1 << 1)
|
|
|
|
/* optional Netlink kernel configuration parameters */
|
|
struct netlink_kernel_cfg {
|
|
unsigned int groups;
|
|
unsigned int flags;
|
|
void (*input)(struct sk_buff *skb);
|
|
int (*bind)(struct net *net, int group);
|
|
void (*unbind)(struct net *net, int group);
|
|
void (*release) (struct sock *sk, unsigned long *groups);
|
|
};
|
|
|
|
struct sock *__netlink_kernel_create(struct net *net, int unit,
|
|
struct module *module,
|
|
struct netlink_kernel_cfg *cfg);
|
|
static inline struct sock *
|
|
netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
|
|
{
|
|
return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
|
|
}
|
|
|
|
/* this can be increased when necessary - don't expose to userland */
|
|
#define NETLINK_MAX_COOKIE_LEN 20
|
|
#define NETLINK_MAX_FMTMSG_LEN 80
|
|
|
|
/**
|
|
* struct netlink_ext_ack - netlink extended ACK report struct
|
|
* @_msg: message string to report - don't access directly, use
|
|
* %NL_SET_ERR_MSG
|
|
* @bad_attr: attribute with error
|
|
* @policy: policy for a bad attribute
|
|
* @miss_type: attribute type which was missing
|
|
* @miss_nest: nest missing an attribute (%NULL if missing top level attr)
|
|
* @cookie: cookie data to return to userspace (for success)
|
|
* @cookie_len: actual cookie data length
|
|
* @_msg_buf: output buffer for formatted message strings - don't access
|
|
* directly, use %NL_SET_ERR_MSG_FMT
|
|
*/
|
|
struct netlink_ext_ack {
|
|
const char *_msg;
|
|
const struct nlattr *bad_attr;
|
|
const struct nla_policy *policy;
|
|
const struct nlattr *miss_nest;
|
|
u16 miss_type;
|
|
u8 cookie[NETLINK_MAX_COOKIE_LEN];
|
|
u8 cookie_len;
|
|
char _msg_buf[NETLINK_MAX_FMTMSG_LEN];
|
|
};
|
|
|
|
/* Always use this macro, this allows later putting the
|
|
* message into a separate section or such for things
|
|
* like translation or listing all possible messages.
|
|
* If string formatting is needed use NL_SET_ERR_MSG_FMT.
|
|
*/
|
|
#define NL_SET_ERR_MSG(extack, msg) do { \
|
|
static const char __msg[] = msg; \
|
|
struct netlink_ext_ack *__extack = (extack); \
|
|
\
|
|
do_trace_netlink_extack(__msg); \
|
|
\
|
|
if (__extack) \
|
|
__extack->_msg = __msg; \
|
|
} while (0)
|
|
|
|
/* We splice fmt with %s at each end even in the snprintf so that both calls
|
|
* can use the same string constant, avoiding its duplication in .ro
|
|
*/
|
|
#define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do { \
|
|
struct netlink_ext_ack *__extack = (extack); \
|
|
\
|
|
if (!__extack) \
|
|
break; \
|
|
if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \
|
|
"%s" fmt "%s", "", ##args, "") >= \
|
|
NETLINK_MAX_FMTMSG_LEN) \
|
|
net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \
|
|
##args, "\n"); \
|
|
\
|
|
do_trace_netlink_extack(__extack->_msg_buf); \
|
|
\
|
|
__extack->_msg = __extack->_msg_buf; \
|
|
} while (0)
|
|
|
|
#define NL_SET_ERR_MSG_MOD(extack, msg) \
|
|
NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
|
|
|
|
#define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \
|
|
NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args)
|
|
|
|
#define NL_SET_ERR_MSG_WEAK(extack, msg) do { \
|
|
if ((extack) && !(extack)->_msg) \
|
|
NL_SET_ERR_MSG((extack), msg); \
|
|
} while (0)
|
|
|
|
#define NL_SET_ERR_MSG_WEAK_MOD(extack, msg) do { \
|
|
if ((extack) && !(extack)->_msg) \
|
|
NL_SET_ERR_MSG_MOD((extack), msg); \
|
|
} while (0)
|
|
|
|
#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \
|
|
if ((extack)) { \
|
|
(extack)->bad_attr = (attr); \
|
|
(extack)->policy = (pol); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL)
|
|
|
|
#define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do { \
|
|
static const char __msg[] = msg; \
|
|
struct netlink_ext_ack *__extack = (extack); \
|
|
\
|
|
do_trace_netlink_extack(__msg); \
|
|
\
|
|
if (__extack) { \
|
|
__extack->_msg = __msg; \
|
|
__extack->bad_attr = (attr); \
|
|
__extack->policy = (pol); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, pol, fmt, args...) do { \
|
|
struct netlink_ext_ack *__extack = (extack); \
|
|
\
|
|
if (!__extack) \
|
|
break; \
|
|
\
|
|
if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \
|
|
"%s" fmt "%s", "", ##args, "") >= \
|
|
NETLINK_MAX_FMTMSG_LEN) \
|
|
net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \
|
|
##args, "\n"); \
|
|
\
|
|
do_trace_netlink_extack(__extack->_msg_buf); \
|
|
\
|
|
__extack->_msg = __extack->_msg_buf; \
|
|
__extack->bad_attr = (attr); \
|
|
__extack->policy = (pol); \
|
|
} while (0)
|
|
|
|
#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \
|
|
NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg)
|
|
|
|
#define NL_SET_ERR_MSG_ATTR_FMT(extack, attr, msg, args...) \
|
|
NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, NULL, msg, ##args)
|
|
|
|
#define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \
|
|
struct netlink_ext_ack *__extack = (extack); \
|
|
\
|
|
if (__extack) { \
|
|
__extack->miss_nest = (nest); \
|
|
__extack->miss_type = (type); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define NL_REQ_ATTR_CHECK(extack, nest, tb, type) ({ \
|
|
struct nlattr **__tb = (tb); \
|
|
u32 __attr = (type); \
|
|
int __retval; \
|
|
\
|
|
__retval = !__tb[__attr]; \
|
|
if (__retval) \
|
|
NL_SET_ERR_ATTR_MISS((extack), (nest), __attr); \
|
|
__retval; \
|
|
})
|
|
|
|
static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
|
|
u64 cookie)
|
|
{
|
|
if (!extack)
|
|
return;
|
|
memcpy(extack->cookie, &cookie, sizeof(cookie));
|
|
extack->cookie_len = sizeof(cookie);
|
|
}
|
|
|
|
void netlink_kernel_release(struct sock *sk);
|
|
int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
|
|
int netlink_change_ngroups(struct sock *sk, unsigned int groups);
|
|
void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
|
|
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
|
|
const struct netlink_ext_ack *extack);
|
|
int netlink_has_listeners(struct sock *sk, unsigned int group);
|
|
bool netlink_strict_get_check(struct sk_buff *skb);
|
|
|
|
int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
|
|
int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
|
|
__u32 group, gfp_t allocation);
|
|
|
|
typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data);
|
|
|
|
int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
|
|
__u32 portid, __u32 group, gfp_t allocation,
|
|
netlink_filter_fn filter,
|
|
void *filter_data);
|
|
int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
|
|
int netlink_register_notifier(struct notifier_block *nb);
|
|
int netlink_unregister_notifier(struct notifier_block *nb);
|
|
|
|
/* finegrained unicast helpers: */
|
|
struct sock *netlink_getsockbyfd(int fd);
|
|
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
|
|
long *timeo, struct sock *ssk);
|
|
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
|
|
int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
|
|
|
|
static inline struct sk_buff *
|
|
netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
|
|
{
|
|
struct sk_buff *nskb;
|
|
|
|
nskb = skb_clone(skb, gfp_mask);
|
|
if (!nskb)
|
|
return NULL;
|
|
|
|
/* This is a large skb, set destructor callback to release head */
|
|
if (is_vmalloc_addr(skb->head))
|
|
nskb->destructor = skb->destructor;
|
|
|
|
return nskb;
|
|
}
|
|
|
|
/*
|
|
* skb should fit one page. This choice is good for headerless malloc.
|
|
* But we should limit to 8K so that userspace does not have to
|
|
* use enormous buffer sizes on recvmsg() calls just to avoid
|
|
* MSG_TRUNC when PAGE_SIZE is very large.
|
|
*/
|
|
#if PAGE_SIZE < 8192UL
|
|
#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE)
|
|
#else
|
|
#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL)
|
|
#endif
|
|
|
|
#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
|
|
|
|
|
|
struct netlink_callback {
|
|
struct sk_buff *skb;
|
|
const struct nlmsghdr *nlh;
|
|
int (*dump)(struct sk_buff * skb,
|
|
struct netlink_callback *cb);
|
|
int (*done)(struct netlink_callback *cb);
|
|
void *data;
|
|
/* the module that dump function belong to */
|
|
struct module *module;
|
|
struct netlink_ext_ack *extack;
|
|
u16 family;
|
|
u16 answer_flags;
|
|
u32 min_dump_alloc;
|
|
unsigned int prev_seq, seq;
|
|
int flags;
|
|
bool strict_check;
|
|
union {
|
|
u8 ctx[NETLINK_CTX_SIZE];
|
|
|
|
/* args is deprecated. Cast a struct over ctx instead
|
|
* for proper type safety.
|
|
*/
|
|
long args[6];
|
|
};
|
|
};
|
|
|
|
#define NL_ASSERT_CTX_FITS(type_name) \
|
|
BUILD_BUG_ON(sizeof(type_name) > \
|
|
sizeof_field(struct netlink_callback, ctx))
|
|
|
|
struct netlink_notify {
|
|
struct net *net;
|
|
u32 portid;
|
|
int protocol;
|
|
};
|
|
|
|
struct nlmsghdr *
|
|
__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags);
|
|
|
|
struct netlink_dump_control {
|
|
int (*start)(struct netlink_callback *);
|
|
int (*dump)(struct sk_buff *skb, struct netlink_callback *);
|
|
int (*done)(struct netlink_callback *);
|
|
struct netlink_ext_ack *extack;
|
|
void *data;
|
|
struct module *module;
|
|
u32 min_dump_alloc;
|
|
int flags;
|
|
};
|
|
|
|
int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh,
|
|
struct netlink_dump_control *control);
|
|
static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
|
|
const struct nlmsghdr *nlh,
|
|
struct netlink_dump_control *control)
|
|
{
|
|
if (!control->module)
|
|
control->module = THIS_MODULE;
|
|
|
|
return __netlink_dump_start(ssk, skb, nlh, control);
|
|
}
|
|
|
|
struct netlink_tap {
|
|
struct net_device *dev;
|
|
struct module *module;
|
|
struct list_head list;
|
|
};
|
|
|
|
int netlink_add_tap(struct netlink_tap *nt);
|
|
int netlink_remove_tap(struct netlink_tap *nt);
|
|
|
|
bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
|
|
struct user_namespace *ns, int cap);
|
|
bool netlink_ns_capable(const struct sk_buff *skb,
|
|
struct user_namespace *ns, int cap);
|
|
bool netlink_capable(const struct sk_buff *skb, int cap);
|
|
bool netlink_net_capable(const struct sk_buff *skb, int cap);
|
|
struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast);
|
|
|
|
#endif /* __LINUX_NETLINK_H */
|