Tanner Love 9c661b0b85 net/packet: make packet_fanout.arr size configurable up to 64K
One use case of PACKET_FANOUT is lockless reception with one socket
per CPU. 256 is a practical limit on increasingly many machines.

Increase PACKET_FANOUT_MAX to 64K. Expand setsockopt PACKET_FANOUT to
take an extra argument max_num_members. Also explicitly define a
fanout_args struct, instead of implicitly casting to an integer. This
documents the API and simplifies the control flow.

If max_num_members is not specified or is set to 0, then 256 is used,
same as before.

Signed-off-by: Tanner Love <tannerlove@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-11-09 16:41:40 -08:00

317 lines
8.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __LINUX_IF_PACKET_H
#define __LINUX_IF_PACKET_H
#include <asm/byteorder.h>
#include <linux/types.h>
struct sockaddr_pkt {
unsigned short spkt_family;
unsigned char spkt_device[14];
__be16 spkt_protocol;
};
struct sockaddr_ll {
unsigned short sll_family;
__be16 sll_protocol;
int sll_ifindex;
unsigned short sll_hatype;
unsigned char sll_pkttype;
unsigned char sll_halen;
unsigned char sll_addr[8];
};
/* Packet types */
#define PACKET_HOST 0 /* To us */
#define PACKET_BROADCAST 1 /* To all */
#define PACKET_MULTICAST 2 /* To group */
#define PACKET_OTHERHOST 3 /* To someone else */
#define PACKET_OUTGOING 4 /* Outgoing of any type */
#define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */
#define PACKET_USER 6 /* To user space */
#define PACKET_KERNEL 7 /* To kernel space */
/* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */
#define PACKET_FASTROUTE 6 /* Fastrouted frame */
/* Packet socket options */
#define PACKET_ADD_MEMBERSHIP 1
#define PACKET_DROP_MEMBERSHIP 2
#define PACKET_RECV_OUTPUT 3
/* Value 4 is still used by obsolete turbo-packet. */
#define PACKET_RX_RING 5
#define PACKET_STATISTICS 6
#define PACKET_COPY_THRESH 7
#define PACKET_AUXDATA 8
#define PACKET_ORIGDEV 9
#define PACKET_VERSION 10
#define PACKET_HDRLEN 11
#define PACKET_RESERVE 12
#define PACKET_TX_RING 13
#define PACKET_LOSS 14
#define PACKET_VNET_HDR 15
#define PACKET_TX_TIMESTAMP 16
#define PACKET_TIMESTAMP 17
#define PACKET_FANOUT 18
#define PACKET_TX_HAS_OFF 19
#define PACKET_QDISC_BYPASS 20
#define PACKET_ROLLOVER_STATS 21
#define PACKET_FANOUT_DATA 22
#define PACKET_IGNORE_OUTGOING 23
#define PACKET_FANOUT_HASH 0
#define PACKET_FANOUT_LB 1
#define PACKET_FANOUT_CPU 2
#define PACKET_FANOUT_ROLLOVER 3
#define PACKET_FANOUT_RND 4
#define PACKET_FANOUT_QM 5
#define PACKET_FANOUT_CBPF 6
#define PACKET_FANOUT_EBPF 7
#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000
#define PACKET_FANOUT_FLAG_UNIQUEID 0x2000
#define PACKET_FANOUT_FLAG_DEFRAG 0x8000
struct tpacket_stats {
unsigned int tp_packets;
unsigned int tp_drops;
};
struct tpacket_stats_v3 {
unsigned int tp_packets;
unsigned int tp_drops;
unsigned int tp_freeze_q_cnt;
};
struct tpacket_rollover_stats {
__aligned_u64 tp_all;
__aligned_u64 tp_huge;
__aligned_u64 tp_failed;
};
union tpacket_stats_u {
struct tpacket_stats stats1;
struct tpacket_stats_v3 stats3;
};
struct tpacket_auxdata {
__u32 tp_status;
__u32 tp_len;
__u32 tp_snaplen;
__u16 tp_mac;
__u16 tp_net;
__u16 tp_vlan_tci;
__u16 tp_vlan_tpid;
};
/* Rx ring - header status */
#define TP_STATUS_KERNEL 0
#define TP_STATUS_USER (1 << 0)
#define TP_STATUS_COPY (1 << 1)
#define TP_STATUS_LOSING (1 << 2)
#define TP_STATUS_CSUMNOTREADY (1 << 3)
#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */
#define TP_STATUS_BLK_TMO (1 << 5)
#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */
#define TP_STATUS_CSUM_VALID (1 << 7)
/* Tx ring - header status */
#define TP_STATUS_AVAILABLE 0
#define TP_STATUS_SEND_REQUEST (1 << 0)
#define TP_STATUS_SENDING (1 << 1)
#define TP_STATUS_WRONG_FORMAT (1 << 2)
/* Rx and Tx ring - header status */
#define TP_STATUS_TS_SOFTWARE (1 << 29)
#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) /* deprecated, never set */
#define TP_STATUS_TS_RAW_HARDWARE (1U << 31)
/* Rx ring - feature request bits */
#define TP_FT_REQ_FILL_RXHASH 0x1
struct tpacket_hdr {
unsigned long tp_status;
unsigned int tp_len;
unsigned int tp_snaplen;
unsigned short tp_mac;
unsigned short tp_net;
unsigned int tp_sec;
unsigned int tp_usec;
};
#define TPACKET_ALIGNMENT 16
#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
#define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
struct tpacket2_hdr {
__u32 tp_status;
__u32 tp_len;
__u32 tp_snaplen;
__u16 tp_mac;
__u16 tp_net;
__u32 tp_sec;
__u32 tp_nsec;
__u16 tp_vlan_tci;
__u16 tp_vlan_tpid;
__u8 tp_padding[4];
};
struct tpacket_hdr_variant1 {
__u32 tp_rxhash;
__u32 tp_vlan_tci;
__u16 tp_vlan_tpid;
__u16 tp_padding;
};
struct tpacket3_hdr {
__u32 tp_next_offset;
__u32 tp_sec;
__u32 tp_nsec;
__u32 tp_snaplen;
__u32 tp_len;
__u32 tp_status;
__u16 tp_mac;
__u16 tp_net;
/* pkt_hdr variants */
union {
struct tpacket_hdr_variant1 hv1;
};
__u8 tp_padding[8];
};
struct tpacket_bd_ts {
unsigned int ts_sec;
union {
unsigned int ts_usec;
unsigned int ts_nsec;
};
};
struct tpacket_hdr_v1 {
__u32 block_status;
__u32 num_pkts;
__u32 offset_to_first_pkt;
/* Number of valid bytes (including padding)
* blk_len <= tp_block_size
*/
__u32 blk_len;
/*
* Quite a few uses of sequence number:
* 1. Make sure cache flush etc worked.
* Well, one can argue - why not use the increasing ts below?
* But look at 2. below first.
* 2. When you pass around blocks to other user space decoders,
* you can see which blk[s] is[are] outstanding etc.
* 3. Validate kernel code.
*/
__aligned_u64 seq_num;
/*
* ts_last_pkt:
*
* Case 1. Block has 'N'(N >=1) packets and TMO'd(timed out)
* ts_last_pkt == 'time-stamp of last packet' and NOT the
* time when the timer fired and the block was closed.
* By providing the ts of the last packet we can absolutely
* guarantee that time-stamp wise, the first packet in the
* next block will never precede the last packet of the
* previous block.
* Case 2. Block has zero packets and TMO'd
* ts_last_pkt = time when the timer fired and the block
* was closed.
* Case 3. Block has 'N' packets and NO TMO.
* ts_last_pkt = time-stamp of the last pkt in the block.
*
* ts_first_pkt:
* Is always the time-stamp when the block was opened.
* Case a) ZERO packets
* No packets to deal with but atleast you know the
* time-interval of this block.
* Case b) Non-zero packets
* Use the ts of the first packet in the block.
*
*/
struct tpacket_bd_ts ts_first_pkt, ts_last_pkt;
};
union tpacket_bd_header_u {
struct tpacket_hdr_v1 bh1;
};
struct tpacket_block_desc {
__u32 version;
__u32 offset_to_priv;
union tpacket_bd_header_u hdr;
};
#define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
enum tpacket_versions {
TPACKET_V1,
TPACKET_V2,
TPACKET_V3
};
/*
Frame structure:
- Start. Frame must be aligned to TPACKET_ALIGNMENT=16
- struct tpacket_hdr
- pad to TPACKET_ALIGNMENT=16
- struct sockaddr_ll
- Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
- Start+tp_mac: [ Optional MAC header ]
- Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
- Pad to align to TPACKET_ALIGNMENT=16
*/
struct tpacket_req {
unsigned int tp_block_size; /* Minimal size of contiguous block */
unsigned int tp_block_nr; /* Number of blocks */
unsigned int tp_frame_size; /* Size of frame */
unsigned int tp_frame_nr; /* Total number of frames */
};
struct tpacket_req3 {
unsigned int tp_block_size; /* Minimal size of contiguous block */
unsigned int tp_block_nr; /* Number of blocks */
unsigned int tp_frame_size; /* Size of frame */
unsigned int tp_frame_nr; /* Total number of frames */
unsigned int tp_retire_blk_tov; /* timeout in msecs */
unsigned int tp_sizeof_priv; /* offset to private data area */
unsigned int tp_feature_req_word;
};
union tpacket_req_u {
struct tpacket_req req;
struct tpacket_req3 req3;
};
struct packet_mreq {
int mr_ifindex;
unsigned short mr_type;
unsigned short mr_alen;
unsigned char mr_address[8];
};
struct fanout_args {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u16 id;
__u16 type_flags;
#else
__u16 type_flags;
__u16 id;
#endif
__u32 max_num_members;
};
#define PACKET_MR_MULTICAST 0
#define PACKET_MR_PROMISC 1
#define PACKET_MR_ALLMULTI 2
#define PACKET_MR_UNICAST 3
#endif