mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-10 07:00:48 +00:00
77162022ab
This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
160 lines
4.0 KiB
C
160 lines
4.0 KiB
C
#ifndef __LINUX_NEIGHBOUR_H
|
|
#define __LINUX_NEIGHBOUR_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/netlink.h>
|
|
|
|
struct ndmsg {
|
|
__u8 ndm_family;
|
|
__u8 ndm_pad1;
|
|
__u16 ndm_pad2;
|
|
__s32 ndm_ifindex;
|
|
__u16 ndm_state;
|
|
__u8 ndm_flags;
|
|
__u8 ndm_type;
|
|
};
|
|
|
|
enum {
|
|
NDA_UNSPEC,
|
|
NDA_DST,
|
|
NDA_LLADDR,
|
|
NDA_CACHEINFO,
|
|
NDA_PROBES,
|
|
__NDA_MAX
|
|
};
|
|
|
|
#define NDA_MAX (__NDA_MAX - 1)
|
|
|
|
/*
|
|
* Neighbor Cache Entry Flags
|
|
*/
|
|
|
|
#define NTF_USE 0x01
|
|
#define NTF_PROXY 0x08 /* == ATF_PUBL */
|
|
#define NTF_ROUTER 0x80
|
|
|
|
#define NTF_SELF 0x02
|
|
#define NTF_MASTER 0x04
|
|
|
|
/*
|
|
* Neighbor Cache Entry States.
|
|
*/
|
|
|
|
#define NUD_INCOMPLETE 0x01
|
|
#define NUD_REACHABLE 0x02
|
|
#define NUD_STALE 0x04
|
|
#define NUD_DELAY 0x08
|
|
#define NUD_PROBE 0x10
|
|
#define NUD_FAILED 0x20
|
|
|
|
/* Dummy states */
|
|
#define NUD_NOARP 0x40
|
|
#define NUD_PERMANENT 0x80
|
|
#define NUD_NONE 0x00
|
|
|
|
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
|
|
and make no address resolution or NUD.
|
|
NUD_PERMANENT is also cannot be deleted by garbage collectors.
|
|
*/
|
|
|
|
struct nda_cacheinfo {
|
|
__u32 ndm_confirmed;
|
|
__u32 ndm_used;
|
|
__u32 ndm_updated;
|
|
__u32 ndm_refcnt;
|
|
};
|
|
|
|
/*****************************************************************
|
|
* Neighbour tables specific messages.
|
|
*
|
|
* To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
|
|
* NLM_F_DUMP flag set. Every neighbour table configuration is
|
|
* spread over multiple messages to avoid running into message
|
|
* size limits on systems with many interfaces. The first message
|
|
* in the sequence transports all not device specific data such as
|
|
* statistics, configuration, and the default parameter set.
|
|
* This message is followed by 0..n messages carrying device
|
|
* specific parameter sets.
|
|
* Although the ordering should be sufficient, NDTA_NAME can be
|
|
* used to identify sequences. The initial message can be identified
|
|
* by checking for NDTA_CONFIG. The device specific messages do
|
|
* not contain this TLV but have NDTPA_IFINDEX set to the
|
|
* corresponding interface index.
|
|
*
|
|
* To change neighbour table attributes, send RTM_SETNEIGHTBL
|
|
* with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
|
|
* NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
|
|
* otherwise. Device specific parameter sets can be changed by
|
|
* setting NDTPA_IFINDEX to the interface index of the corresponding
|
|
* device.
|
|
****/
|
|
|
|
struct ndt_stats {
|
|
__u64 ndts_allocs;
|
|
__u64 ndts_destroys;
|
|
__u64 ndts_hash_grows;
|
|
__u64 ndts_res_failed;
|
|
__u64 ndts_lookups;
|
|
__u64 ndts_hits;
|
|
__u64 ndts_rcv_probes_mcast;
|
|
__u64 ndts_rcv_probes_ucast;
|
|
__u64 ndts_periodic_gc_runs;
|
|
__u64 ndts_forced_gc_runs;
|
|
};
|
|
|
|
enum {
|
|
NDTPA_UNSPEC,
|
|
NDTPA_IFINDEX, /* u32, unchangeable */
|
|
NDTPA_REFCNT, /* u32, read-only */
|
|
NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
|
|
NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
|
|
NDTPA_RETRANS_TIME, /* u64, msecs */
|
|
NDTPA_GC_STALETIME, /* u64, msecs */
|
|
NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
|
|
NDTPA_QUEUE_LEN, /* u32 */
|
|
NDTPA_APP_PROBES, /* u32 */
|
|
NDTPA_UCAST_PROBES, /* u32 */
|
|
NDTPA_MCAST_PROBES, /* u32 */
|
|
NDTPA_ANYCAST_DELAY, /* u64, msecs */
|
|
NDTPA_PROXY_DELAY, /* u64, msecs */
|
|
NDTPA_PROXY_QLEN, /* u32 */
|
|
NDTPA_LOCKTIME, /* u64, msecs */
|
|
NDTPA_QUEUE_LENBYTES, /* u32 */
|
|
__NDTPA_MAX
|
|
};
|
|
#define NDTPA_MAX (__NDTPA_MAX - 1)
|
|
|
|
struct ndtmsg {
|
|
__u8 ndtm_family;
|
|
__u8 ndtm_pad1;
|
|
__u16 ndtm_pad2;
|
|
};
|
|
|
|
struct ndt_config {
|
|
__u16 ndtc_key_len;
|
|
__u16 ndtc_entry_size;
|
|
__u32 ndtc_entries;
|
|
__u32 ndtc_last_flush; /* delta to now in msecs */
|
|
__u32 ndtc_last_rand; /* delta to now in msecs */
|
|
__u32 ndtc_hash_rnd;
|
|
__u32 ndtc_hash_mask;
|
|
__u32 ndtc_hash_chain_gc;
|
|
__u32 ndtc_proxy_qlen;
|
|
};
|
|
|
|
enum {
|
|
NDTA_UNSPEC,
|
|
NDTA_NAME, /* char *, unchangeable */
|
|
NDTA_THRESH1, /* u32 */
|
|
NDTA_THRESH2, /* u32 */
|
|
NDTA_THRESH3, /* u32 */
|
|
NDTA_CONFIG, /* struct ndt_config, read-only */
|
|
NDTA_PARMS, /* nested TLV NDTPA_* */
|
|
NDTA_STATS, /* struct ndt_stats, read-only */
|
|
NDTA_GC_INTERVAL, /* u64, msecs */
|
|
__NDTA_MAX
|
|
};
|
|
#define NDTA_MAX (__NDTA_MAX - 1)
|
|
|
|
#endif
|