2017-11-01 15:09:13 +01:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
2013-03-29 14:46:52 +01:00
|
|
|
|
|
|
|
/*
|
openvswitch: Add original direction conntrack tuple to sw_flow_key.
Add the fields of the conntrack original direction 5-tuple to struct
sw_flow_key. The new fields are initially marked as non-existent, and
are populated whenever a conntrack action is executed and either finds
or generates a conntrack entry. This means that these fields exist
for all packets that were not rejected by conntrack as untrackable.
The original tuple fields in the sw_flow_key are filled from the
original direction tuple of the conntrack entry relating to the
current packet, or from the original direction tuple of the master
conntrack entry, if the current conntrack entry has a master.
Generally, expected connections of connections having an assigned
helper (e.g., FTP), have a master conntrack entry.
The main purpose of the new conntrack original tuple fields is to
allow matching on them for policy decision purposes, with the premise
that the admissibility of tracked connections reply packets (as well
as original direction packets), and both direction packets of any
related connections may be based on ACL rules applying to the master
connection's original direction 5-tuple. This also makes it easier to
make policy decisions when the actual packet headers might have been
transformed by NAT, as the original direction 5-tuple represents the
packet headers before any such transformation.
When using the original direction 5-tuple the admissibility of return
and/or related packets need not be based on the mere existence of a
conntrack entry, allowing separation of admission policy from the
established conntrack state. While existence of a conntrack entry is
required for admission of the return or related packets, policy
changes can render connections that were initially admitted to be
rejected or dropped afterwards. If the admission of the return and
related packets was based on mere conntrack state (e.g., connection
being in an established state), a policy change that would make the
connection rejected or dropped would need to find and delete all
conntrack entries affected by such a change. When using the original
direction 5-tuple matching the affected conntrack entries can be
allowed to time out instead, as the established state of the
connection would not need to be the basis for packet admission any
more.
It should be noted that the directionality of related connections may
be the same or different than that of the master connection, and
neither the original direction 5-tuple nor the conntrack state bits
carry this information. If needed, the directionality of the master
connection can be stored in master's conntrack mark or labels, which
are automatically inherited by the expected related connections.
The fact that neither ARP nor ND packets are trackable by conntrack
allows mutual exclusion between ARP/ND and the new conntrack original
tuple fields. Hence, the IP addresses are overlaid in union with ARP
and ND fields. This allows the sw_flow_key to not grow much due to
this patch, but it also means that we must be careful to never use the
new key fields with ARP or ND packets. ARP is easy to distinguish and
keep mutually exclusive based on the ethernet type, but ND being an
ICMPv6 protocol requires a bit more attention.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-02-09 11:21:59 -08:00
|
|
|
* Copyright (c) 2007-2017 Nicira, Inc.
|
2013-03-29 14:46:52 +01:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
* 02110-1301, USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _UAPI__LINUX_OPENVSWITCH_H
|
|
|
|
#define _UAPI__LINUX_OPENVSWITCH_H 1
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/if_ether.h>
|
|
|
|
|
|
|
|
/**
|
|
|
|
* struct ovs_header - header for OVS Generic Netlink messages.
|
|
|
|
* @dp_ifindex: ifindex of local port for datapath (0 to make a request not
|
|
|
|
* specific to a datapath).
|
|
|
|
*
|
|
|
|
* Attributes following the header are specific to a particular OVS Generic
|
|
|
|
* Netlink family, but all of the OVS families use this header.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct ovs_header {
|
|
|
|
int dp_ifindex;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Datapaths. */
|
|
|
|
|
|
|
|
#define OVS_DATAPATH_FAMILY "ovs_datapath"
|
|
|
|
#define OVS_DATAPATH_MCGROUP "ovs_datapath"
|
2013-12-13 15:22:19 +01:00
|
|
|
|
|
|
|
/* V2:
|
|
|
|
* - API users are expected to provide OVS_DP_ATTR_USER_FEATURES
|
|
|
|
* when creating the datapath.
|
|
|
|
*/
|
|
|
|
#define OVS_DATAPATH_VERSION 2
|
|
|
|
|
|
|
|
/* First OVS datapath version to support features */
|
|
|
|
#define OVS_DP_VER_FEATURES 2
|
2013-03-29 14:46:52 +01:00
|
|
|
|
|
|
|
enum ovs_datapath_cmd {
|
|
|
|
OVS_DP_CMD_UNSPEC,
|
|
|
|
OVS_DP_CMD_NEW,
|
|
|
|
OVS_DP_CMD_DEL,
|
|
|
|
OVS_DP_CMD_GET,
|
|
|
|
OVS_DP_CMD_SET
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
|
|
|
|
* @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
|
|
|
|
* port". This is the name of the network device whose dp_ifindex is given in
|
|
|
|
* the &struct ovs_header. Always present in notifications. Required in
|
|
|
|
* %OVS_DP_NEW requests. May be used as an alternative to specifying
|
|
|
|
* dp_ifindex in other requests (with a dp_ifindex of 0).
|
|
|
|
* @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
|
|
|
|
* set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
|
|
|
|
* %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
|
|
|
|
* not be sent.
|
2021-07-23 10:24:12 -04:00
|
|
|
* @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
|
openvswitch: Introduce per-cpu upcall dispatch
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is a correspondingly
large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
The corresponding user space code can be found at:
https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385139.html
Bugzilla: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-15 08:27:54 -04:00
|
|
|
* OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
|
2013-03-29 14:46:52 +01:00
|
|
|
* @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
|
|
|
|
* datapath. Always present in notifications.
|
2013-10-22 10:42:46 -07:00
|
|
|
* @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
|
|
|
|
* datapath. Always present in notifications.
|
2022-08-25 05:04:49 +03:00
|
|
|
* @OVS_DP_ATTR_IFINDEX: Interface index for a new datapath netdev. Only
|
|
|
|
* valid for %OVS_DP_CMD_NEW requests.
|
2013-03-29 14:46:52 +01:00
|
|
|
*
|
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_DP_* commands.
|
|
|
|
*/
|
|
|
|
enum ovs_datapath_attr {
|
|
|
|
OVS_DP_ATTR_UNSPEC,
|
2013-10-22 10:42:46 -07:00
|
|
|
OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
|
|
|
|
OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
|
|
|
|
OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
|
|
|
|
OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
|
2013-12-13 15:22:18 +01:00
|
|
|
OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */
|
2016-04-26 10:06:15 +02:00
|
|
|
OVS_DP_ATTR_PAD,
|
2020-07-31 14:21:34 +02:00
|
|
|
OVS_DP_ATTR_MASKS_CACHE_SIZE,
|
2021-07-23 10:24:13 -04:00
|
|
|
OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in
|
|
|
|
* per-cpu dispatch mode
|
openvswitch: Introduce per-cpu upcall dispatch
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is a correspondingly
large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
The corresponding user space code can be found at:
https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385139.html
Bugzilla: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-15 08:27:54 -04:00
|
|
|
*/
|
2022-08-25 05:04:49 +03:00
|
|
|
OVS_DP_ATTR_IFINDEX,
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_DP_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
struct ovs_dp_stats {
|
|
|
|
__u64 n_hit; /* Number of flow table matches. */
|
|
|
|
__u64 n_missed; /* Number of flow table misses. */
|
|
|
|
__u64 n_lost; /* Number of misses not sent to userspace. */
|
|
|
|
__u64 n_flows; /* Number of flows present */
|
|
|
|
};
|
|
|
|
|
2013-10-22 10:42:46 -07:00
|
|
|
struct ovs_dp_megaflow_stats {
|
|
|
|
__u64 n_mask_hit; /* Number of masks used for flow lookups. */
|
|
|
|
__u32 n_masks; /* Number of masks for the datapath. */
|
|
|
|
__u32 pad0; /* Pad for future expension. */
|
2020-07-31 14:20:56 +02:00
|
|
|
__u64 n_cache_hit; /* Number of cache matches for flow lookups. */
|
2013-10-22 10:42:46 -07:00
|
|
|
__u64 pad1; /* Pad for future expension. */
|
|
|
|
};
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
struct ovs_vport_stats {
|
|
|
|
__u64 rx_packets; /* total packets received */
|
|
|
|
__u64 tx_packets; /* total packets transmitted */
|
|
|
|
__u64 rx_bytes; /* total bytes received */
|
|
|
|
__u64 tx_bytes; /* total bytes transmitted */
|
|
|
|
__u64 rx_errors; /* bad packets received */
|
|
|
|
__u64 tx_errors; /* packet transmit problems */
|
|
|
|
__u64 rx_dropped; /* no space in linux buffers */
|
|
|
|
__u64 tx_dropped; /* no space available in linux */
|
|
|
|
};
|
|
|
|
|
2013-12-13 15:22:18 +01:00
|
|
|
/* Allow last Netlink attribute to be unaligned */
|
|
|
|
#define OVS_DP_F_UNALIGNED (1 << 0)
|
|
|
|
|
2014-07-17 15:14:13 -07:00
|
|
|
/* Allow datapath to associate multiple Netlink PIDs to each vport */
|
|
|
|
#define OVS_DP_F_VPORT_PIDS (1 << 1)
|
|
|
|
|
net: openvswitch: Set OvS recirc_id from tc chain index
Offloaded OvS datapath rules are translated one to one to tc rules,
for example the following simplified OvS rule:
recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
Will be translated to the following tc rule:
$ tc filter add dev dev1 ingress \
prio 1 chain 0 proto ip \
flower tcp ct_state -trk \
action ct pipe \
action goto chain 2
Received packets will first travel though tc, and if they aren't stolen
by it, like in the above rule, they will continue to OvS datapath.
Since we already did some actions (action ct in this case) which might
modify the packets, and updated action stats, we would like to continue
the proccessing with the correct recirc_id in OvS (here recirc_id(2))
where we left off.
To support this, introduce a new skb extension for tc, which
will be used for translating tc chain to ovs recirc_id to
handle these miss cases. Last tc chain index will be set
by tc goto chain action and read by OvS datapath.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-09-04 16:56:37 +03:00
|
|
|
/* Allow tc offload recirc sharing */
|
|
|
|
#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
|
|
|
|
|
openvswitch: Introduce per-cpu upcall dispatch
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.
This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:
* On systems with a large number of vports, there is a correspondingly
large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)
This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.
In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:
a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.
The corresponding user space code can be found at:
https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385139.html
Bugzilla: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-15 08:27:54 -04:00
|
|
|
/* Allow per-cpu dispatch of upcalls */
|
|
|
|
#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/* Fixed logical ports. */
|
|
|
|
#define OVSP_LOCAL ((__u32)0)
|
|
|
|
|
|
|
|
/* Packet transfer. */
|
|
|
|
|
|
|
|
#define OVS_PACKET_FAMILY "ovs_packet"
|
|
|
|
#define OVS_PACKET_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_packet_cmd {
|
|
|
|
OVS_PACKET_CMD_UNSPEC,
|
|
|
|
|
|
|
|
/* Kernel-to-user notifications. */
|
|
|
|
OVS_PACKET_CMD_MISS, /* Flow table miss. */
|
|
|
|
OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
|
|
|
|
|
|
|
|
/* Userspace commands. */
|
|
|
|
OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
|
|
|
|
* @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
|
|
|
|
* packet as received, from the start of the Ethernet header onward. For
|
|
|
|
* %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
|
|
|
|
* actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
|
|
|
|
* the flow key extracted from the packet as originally received.
|
|
|
|
* @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
|
|
|
|
* extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
|
|
|
|
* userspace to adapt its flow setup strategy by comparing its notion of the
|
|
|
|
* flow key against the kernel's.
|
|
|
|
* @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
|
|
|
|
* for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
|
2015-05-26 20:59:43 -07:00
|
|
|
* Also used in upcall when %OVS_ACTION_ATTR_USERSPACE has optional
|
|
|
|
* %OVS_USERSPACE_ATTR_ACTIONS attribute.
|
2013-03-29 14:46:52 +01:00
|
|
|
* @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
|
|
|
|
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
|
|
|
|
* %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
|
|
|
|
* specified there.
|
2014-11-06 06:51:24 -08:00
|
|
|
* @OVS_PACKET_ATTR_EGRESS_TUN_KEY: Present for an %OVS_PACKET_CMD_ACTION
|
|
|
|
* notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
|
|
|
|
* %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
|
|
|
|
* output port is actually a tunnel port. Contains the output tunnel key
|
|
|
|
* extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
|
2015-08-26 11:31:48 -07:00
|
|
|
* @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
|
2016-06-20 07:26:17 -07:00
|
|
|
* @OVS_PACKET_ATTR_LEN: Packet size before truncation.
|
2015-08-26 11:31:48 -07:00
|
|
|
* %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
|
|
|
|
* size.
|
net: openvswitch: add hash info to upcall
When using the kernel datapath, the upcall don't
include skb hash info relatived. That will introduce
some problem, because the hash of skb is important
in kernel stack. For example, VXLAN module uses
it to select UDP src port. The tx queue selection
may also use the hash in stack.
Hash is computed in different ways. Hash is random
for a TCP socket, and hash may be computed in hardware,
or software stack. Recalculation hash is not easy.
Hash of TCP socket is computed:
tcp_v4_connect
-> sk_set_txhash (is random)
__tcp_transmit_skb
-> skb_set_hash_from_sk
There will be one upcall, without information of skb
hash, to ovs-vswitchd, for the first packet of a TCP
session. The rest packets will be processed in Open vSwitch
modules, hash kept. If this tcp session is forward to
VXLAN module, then the UDP src port of first tcp packet
is different from rest packets.
TCP packets may come from the host or dockers, to Open vSwitch.
To fix it, we store the hash info to upcall, and restore hash
when packets sent back.
+---------------+ +-------------------------+
| Docker/VMs | | ovs-vswitchd |
+----+----------+ +-+--------------------+--+
| ^ |
| | |
| | upcall v restore packet hash (not recalculate)
| +-+--------------------+--+
| tap netdev | | vxlan module
+---------------> +--> Open vSwitch ko +-->
or internal type | |
+-------------------------+
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-13 23:04:49 +08:00
|
|
|
* @OVS_PACKET_ATTR_HASH: Packet hash info (e.g. hash, sw_hash and l4_hash in skb).
|
2013-03-29 14:46:52 +01:00
|
|
|
*
|
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_PACKET_* commands.
|
|
|
|
*/
|
|
|
|
enum ovs_packet_attr {
|
|
|
|
OVS_PACKET_ATTR_UNSPEC,
|
|
|
|
OVS_PACKET_ATTR_PACKET, /* Packet data. */
|
|
|
|
OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
|
|
|
|
OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
|
|
|
|
OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
|
2014-11-06 06:51:24 -08:00
|
|
|
OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_*
|
|
|
|
attributes. */
|
2015-01-14 13:56:19 +00:00
|
|
|
OVS_PACKET_ATTR_UNUSED1,
|
|
|
|
OVS_PACKET_ATTR_UNUSED2,
|
|
|
|
OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe,
|
|
|
|
error logging should be suppressed. */
|
2015-08-26 11:31:48 -07:00
|
|
|
OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
|
net: openvswitch: add hash info to upcall
When using the kernel datapath, the upcall don't
include skb hash info relatived. That will introduce
some problem, because the hash of skb is important
in kernel stack. For example, VXLAN module uses
it to select UDP src port. The tx queue selection
may also use the hash in stack.
Hash is computed in different ways. Hash is random
for a TCP socket, and hash may be computed in hardware,
or software stack. Recalculation hash is not easy.
Hash of TCP socket is computed:
tcp_v4_connect
-> sk_set_txhash (is random)
__tcp_transmit_skb
-> skb_set_hash_from_sk
There will be one upcall, without information of skb
hash, to ovs-vswitchd, for the first packet of a TCP
session. The rest packets will be processed in Open vSwitch
modules, hash kept. If this tcp session is forward to
VXLAN module, then the UDP src port of first tcp packet
is different from rest packets.
TCP packets may come from the host or dockers, to Open vSwitch.
To fix it, we store the hash info to upcall, and restore hash
when packets sent back.
+---------------+ +-------------------------+
| Docker/VMs | | ovs-vswitchd |
+----+----------+ +-+--------------------+--+
| ^ |
| | |
| | upcall v restore packet hash (not recalculate)
| +-+--------------------+--+
| tap netdev | | vxlan module
+---------------> +--> Open vSwitch ko +-->
or internal type | |
+-------------------------+
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-13 23:04:49 +08:00
|
|
|
OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */
|
|
|
|
OVS_PACKET_ATTR_HASH, /* Packet hash. */
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_PACKET_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
/* Virtual ports. */
|
|
|
|
|
|
|
|
#define OVS_VPORT_FAMILY "ovs_vport"
|
|
|
|
#define OVS_VPORT_MCGROUP "ovs_vport"
|
|
|
|
#define OVS_VPORT_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_vport_cmd {
|
|
|
|
OVS_VPORT_CMD_UNSPEC,
|
|
|
|
OVS_VPORT_CMD_NEW,
|
|
|
|
OVS_VPORT_CMD_DEL,
|
|
|
|
OVS_VPORT_CMD_GET,
|
|
|
|
OVS_VPORT_CMD_SET
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ovs_vport_type {
|
|
|
|
OVS_VPORT_TYPE_UNSPEC,
|
|
|
|
OVS_VPORT_TYPE_NETDEV, /* network device */
|
|
|
|
OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
|
2013-06-17 17:50:33 -07:00
|
|
|
OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
|
2013-08-19 11:23:34 -07:00
|
|
|
OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel. */
|
2014-10-03 15:35:33 -07:00
|
|
|
OVS_VPORT_TYPE_GENEVE, /* Geneve tunnel. */
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_VPORT_TYPE_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
|
|
|
|
* @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
|
|
|
|
* @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
|
|
|
|
* of vport.
|
|
|
|
* @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
|
|
|
|
* this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
|
|
|
|
* plus a null terminator.
|
|
|
|
* @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
|
2014-07-17 15:14:13 -07:00
|
|
|
* @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
|
|
|
|
* among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
|
|
|
|
* received on this port. If this is a single-element array of value 0,
|
|
|
|
* upcalls should not be sent.
|
2013-03-29 14:46:52 +01:00
|
|
|
* @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
|
|
|
|
* packets sent or received through the vport.
|
|
|
|
*
|
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_VPORT_* commands.
|
|
|
|
*
|
|
|
|
* For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
|
|
|
|
* %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
|
|
|
|
* optional; if not specified a free port number is automatically selected.
|
|
|
|
* Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
|
|
|
|
* of vport.
|
|
|
|
*
|
|
|
|
* For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
|
|
|
|
* look up the vport to operate on; otherwise dp_idx from the &struct
|
|
|
|
* ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
|
|
|
|
*/
|
|
|
|
enum ovs_vport_attr {
|
|
|
|
OVS_VPORT_ATTR_UNSPEC,
|
|
|
|
OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
|
|
|
|
OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
|
|
|
|
OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
|
|
|
|
OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
|
2014-07-17 15:14:13 -07:00
|
|
|
OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
|
|
|
|
/* receiving upcalls */
|
2013-03-29 14:46:52 +01:00
|
|
|
OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
|
2016-04-26 10:06:15 +02:00
|
|
|
OVS_VPORT_ATTR_PAD,
|
2017-11-02 17:04:37 -02:00
|
|
|
OVS_VPORT_ATTR_IFINDEX,
|
|
|
|
OVS_VPORT_ATTR_NETNSID,
|
2022-12-06 20:38:57 -05:00
|
|
|
OVS_VPORT_ATTR_UPCALL_STATS,
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_VPORT_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
|
|
|
|
|
2022-12-06 20:38:57 -05:00
|
|
|
/**
|
|
|
|
* enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands
|
|
|
|
* @OVS_VPORT_UPCALL_SUCCESS: 64-bit upcall success packets.
|
|
|
|
* @OVS_VPORT_UPCALL_FAIL: 64-bit upcall fail packets.
|
|
|
|
*/
|
|
|
|
enum ovs_vport_upcall_attr {
|
|
|
|
OVS_VPORT_UPCALL_ATTR_SUCCESS,
|
|
|
|
OVS_VPORT_UPCALL_ATTR_FAIL,
|
|
|
|
__OVS_VPORT_UPCALL_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VPORT_UPCALL_ATTR_MAX (__OVS_VPORT_UPCALL_ATTR_MAX - 1)
|
|
|
|
|
2015-01-15 03:53:59 +01:00
|
|
|
enum {
|
|
|
|
OVS_VXLAN_EXT_UNSPEC,
|
|
|
|
OVS_VXLAN_EXT_GBP, /* Flag or __u32 */
|
|
|
|
__OVS_VXLAN_EXT_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
|
|
|
|
|
|
|
|
|
2013-08-19 11:23:34 -07:00
|
|
|
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
OVS_TUNNEL_ATTR_UNSPEC,
|
|
|
|
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
|
2015-01-15 03:53:59 +01:00
|
|
|
OVS_TUNNEL_ATTR_EXTENSION,
|
2013-08-19 11:23:34 -07:00
|
|
|
__OVS_TUNNEL_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1)
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/* Flows. */
|
|
|
|
|
|
|
|
#define OVS_FLOW_FAMILY "ovs_flow"
|
|
|
|
#define OVS_FLOW_MCGROUP "ovs_flow"
|
|
|
|
#define OVS_FLOW_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_flow_cmd {
|
|
|
|
OVS_FLOW_CMD_UNSPEC,
|
|
|
|
OVS_FLOW_CMD_NEW,
|
|
|
|
OVS_FLOW_CMD_DEL,
|
|
|
|
OVS_FLOW_CMD_GET,
|
|
|
|
OVS_FLOW_CMD_SET
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_flow_stats {
|
|
|
|
__u64 n_packets; /* Number of matched packets. */
|
|
|
|
__u64 n_bytes; /* Number of matched bytes. */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ovs_key_attr {
|
|
|
|
OVS_KEY_ATTR_UNSPEC,
|
|
|
|
OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
|
|
|
|
OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
|
|
|
|
OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
|
|
|
|
OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
|
|
|
|
OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
|
|
|
|
OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
|
|
|
|
OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
|
|
|
|
OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
|
|
|
|
OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
|
|
|
|
OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
|
|
|
|
OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
|
|
|
|
OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
|
|
|
|
OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
|
|
|
|
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
|
|
|
|
OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
|
2013-06-17 17:50:18 -07:00
|
|
|
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
|
2013-08-22 12:30:48 -07:00
|
|
|
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
|
2013-10-23 01:44:59 -07:00
|
|
|
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
|
2014-09-15 19:37:25 -07:00
|
|
|
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
|
|
|
|
is not computed by the datapath. */
|
|
|
|
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
|
2014-10-06 05:05:13 -07:00
|
|
|
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
|
|
|
|
* The implementation may restrict
|
|
|
|
* the accepted length of the array. */
|
2015-10-06 11:00:00 -07:00
|
|
|
OVS_KEY_ATTR_CT_STATE, /* u32 bitmask of OVS_CS_F_* */
|
2015-08-26 11:31:48 -07:00
|
|
|
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
|
2015-08-26 11:31:49 -07:00
|
|
|
OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
|
2015-10-01 15:00:37 -07:00
|
|
|
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */
|
openvswitch: Add original direction conntrack tuple to sw_flow_key.
Add the fields of the conntrack original direction 5-tuple to struct
sw_flow_key. The new fields are initially marked as non-existent, and
are populated whenever a conntrack action is executed and either finds
or generates a conntrack entry. This means that these fields exist
for all packets that were not rejected by conntrack as untrackable.
The original tuple fields in the sw_flow_key are filled from the
original direction tuple of the conntrack entry relating to the
current packet, or from the original direction tuple of the master
conntrack entry, if the current conntrack entry has a master.
Generally, expected connections of connections having an assigned
helper (e.g., FTP), have a master conntrack entry.
The main purpose of the new conntrack original tuple fields is to
allow matching on them for policy decision purposes, with the premise
that the admissibility of tracked connections reply packets (as well
as original direction packets), and both direction packets of any
related connections may be based on ACL rules applying to the master
connection's original direction 5-tuple. This also makes it easier to
make policy decisions when the actual packet headers might have been
transformed by NAT, as the original direction 5-tuple represents the
packet headers before any such transformation.
When using the original direction 5-tuple the admissibility of return
and/or related packets need not be based on the mere existence of a
conntrack entry, allowing separation of admission policy from the
established conntrack state. While existence of a conntrack entry is
required for admission of the return or related packets, policy
changes can render connections that were initially admitted to be
rejected or dropped afterwards. If the admission of the return and
related packets was based on mere conntrack state (e.g., connection
being in an established state), a policy change that would make the
connection rejected or dropped would need to find and delete all
conntrack entries affected by such a change. When using the original
direction 5-tuple matching the affected conntrack entries can be
allowed to time out instead, as the established state of the
connection would not need to be the basis for packet admission any
more.
It should be noted that the directionality of related connections may
be the same or different than that of the master connection, and
neither the original direction 5-tuple nor the conntrack state bits
carry this information. If needed, the directionality of the master
connection can be stored in master's conntrack mark or labels, which
are automatically inherited by the expected related connections.
The fact that neither ARP nor ND packets are trackable by conntrack
allows mutual exclusion between ARP/ND and the new conntrack original
tuple fields. Hence, the IP addresses are overlaid in union with ARP
and ND fields. This allows the sw_flow_key to not grow much due to
this patch, but it also means that we must be careful to never use the
new key fields with ARP or ND packets. ARP is easy to distinguish and
keep mutually exclusive based on the ethernet type, but ND being an
ICMPv6 protocol requires a bit more attention.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-02-09 11:21:59 -08:00
|
|
|
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
|
|
|
|
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
|
2017-11-07 21:07:02 +08:00
|
|
|
OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */
|
2013-06-17 17:50:18 -07:00
|
|
|
|
2022-03-09 23:20:33 +01:00
|
|
|
/* User space decided to squat on types 29 and 30. They are defined
|
|
|
|
* below, but should not be sent to the kernel.
|
|
|
|
*
|
|
|
|
* WARNING: No new types should be added unless they are defined
|
|
|
|
* for both kernel and user space (no 'ifdef's). It's hard
|
|
|
|
* to keep compatibility otherwise.
|
|
|
|
*/
|
|
|
|
OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */
|
|
|
|
OVS_KEY_ATTR_ND_EXTENSIONS, /* IPv6 Neighbor Discovery extensions */
|
|
|
|
|
|
|
|
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info.
|
|
|
|
* For in-kernel use only.
|
|
|
|
*/
|
|
|
|
OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_KEY_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
|
|
|
|
|
2013-06-17 17:50:18 -07:00
|
|
|
enum ovs_tunnel_key_attr {
|
2017-06-08 10:37:45 +02:00
|
|
|
/* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */
|
2013-06-17 17:50:18 -07:00
|
|
|
OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
|
2014-10-03 15:35:30 -07:00
|
|
|
OVS_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */
|
2014-10-03 15:35:33 -07:00
|
|
|
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
|
2014-11-06 06:51:24 -08:00
|
|
|
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
|
2015-01-15 03:53:59 +01:00
|
|
|
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
|
2015-10-05 13:09:47 +02:00
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
|
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
|
2016-04-22 17:31:18 +02:00
|
|
|
OVS_TUNNEL_KEY_ATTR_PAD,
|
2018-01-25 13:20:11 -08:00
|
|
|
OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
|
2019-03-28 12:43:23 +08:00
|
|
|
OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. IPV4_INFO_BRIDGE mode.*/
|
2013-06-17 17:50:18 -07:00
|
|
|
__OVS_TUNNEL_KEY_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1)
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/**
|
|
|
|
* enum ovs_frag_type - IPv4 and IPv6 fragment type
|
|
|
|
* @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
|
|
|
|
* @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
|
|
|
|
* @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
|
|
|
|
*
|
|
|
|
* Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
|
|
|
|
* ovs_key_ipv6.
|
|
|
|
*/
|
|
|
|
enum ovs_frag_type {
|
|
|
|
OVS_FRAG_TYPE_NONE,
|
|
|
|
OVS_FRAG_TYPE_FIRST,
|
|
|
|
OVS_FRAG_TYPE_LATER,
|
|
|
|
__OVS_FRAG_TYPE_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
|
|
|
|
|
|
|
|
struct ovs_key_ethernet {
|
|
|
|
__u8 eth_src[ETH_ALEN];
|
|
|
|
__u8 eth_dst[ETH_ALEN];
|
|
|
|
};
|
|
|
|
|
2014-10-06 05:05:13 -07:00
|
|
|
struct ovs_key_mpls {
|
|
|
|
__be32 mpls_lse;
|
|
|
|
};
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
struct ovs_key_ipv4 {
|
|
|
|
__be32 ipv4_src;
|
|
|
|
__be32 ipv4_dst;
|
|
|
|
__u8 ipv4_proto;
|
|
|
|
__u8 ipv4_tos;
|
|
|
|
__u8 ipv4_ttl;
|
|
|
|
__u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_ipv6 {
|
|
|
|
__be32 ipv6_src[4];
|
|
|
|
__be32 ipv6_dst[4];
|
|
|
|
__be32 ipv6_label; /* 20-bits in least-significant bits. */
|
|
|
|
__u8 ipv6_proto;
|
|
|
|
__u8 ipv6_tclass;
|
|
|
|
__u8 ipv6_hlimit;
|
|
|
|
__u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
|
|
|
|
};
|
|
|
|
|
2022-02-23 16:54:09 -08:00
|
|
|
/* separate structure to support backward compatibility with older user space */
|
|
|
|
struct ovs_key_ipv6_exthdrs {
|
|
|
|
__u16 hdrs;
|
|
|
|
};
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
struct ovs_key_tcp {
|
|
|
|
__be16 tcp_src;
|
|
|
|
__be16 tcp_dst;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_udp {
|
|
|
|
__be16 udp_src;
|
|
|
|
__be16 udp_dst;
|
|
|
|
};
|
|
|
|
|
2013-08-22 12:30:48 -07:00
|
|
|
struct ovs_key_sctp {
|
|
|
|
__be16 sctp_src;
|
|
|
|
__be16 sctp_dst;
|
|
|
|
};
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
struct ovs_key_icmp {
|
|
|
|
__u8 icmp_type;
|
|
|
|
__u8 icmp_code;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_icmpv6 {
|
|
|
|
__u8 icmpv6_type;
|
|
|
|
__u8 icmpv6_code;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_arp {
|
|
|
|
__be32 arp_sip;
|
|
|
|
__be32 arp_tip;
|
|
|
|
__be16 arp_op;
|
|
|
|
__u8 arp_sha[ETH_ALEN];
|
|
|
|
__u8 arp_tha[ETH_ALEN];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_nd {
|
2014-09-30 10:52:32 -07:00
|
|
|
__be32 nd_target[4];
|
|
|
|
__u8 nd_sll[ETH_ALEN];
|
|
|
|
__u8 nd_tll[ETH_ALEN];
|
2013-03-29 14:46:52 +01:00
|
|
|
};
|
|
|
|
|
2017-02-09 11:21:55 -08:00
|
|
|
#define OVS_CT_LABELS_LEN_32 4
|
|
|
|
#define OVS_CT_LABELS_LEN (OVS_CT_LABELS_LEN_32 * sizeof(__u32))
|
2015-10-01 15:00:37 -07:00
|
|
|
struct ovs_key_ct_labels {
|
2017-02-09 11:21:55 -08:00
|
|
|
union {
|
|
|
|
__u8 ct_labels[OVS_CT_LABELS_LEN];
|
|
|
|
__u32 ct_labels_32[OVS_CT_LABELS_LEN_32];
|
|
|
|
};
|
2015-08-26 11:31:52 -07:00
|
|
|
};
|
|
|
|
|
2015-08-26 11:31:48 -07:00
|
|
|
/* OVS_KEY_ATTR_CT_STATE flags */
|
|
|
|
#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */
|
|
|
|
#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */
|
|
|
|
#define OVS_CS_F_RELATED 0x04 /* Related to an established
|
|
|
|
* connection. */
|
2015-10-06 11:00:00 -07:00
|
|
|
#define OVS_CS_F_REPLY_DIR 0x08 /* Flow is in the reply direction. */
|
|
|
|
#define OVS_CS_F_INVALID 0x10 /* Could not track connection. */
|
|
|
|
#define OVS_CS_F_TRACKED 0x20 /* Conntrack has occurred. */
|
2016-03-10 10:54:23 -08:00
|
|
|
#define OVS_CS_F_SRC_NAT 0x40 /* Packet's source address/port was
|
|
|
|
* mangled by NAT.
|
|
|
|
*/
|
|
|
|
#define OVS_CS_F_DST_NAT 0x80 /* Packet's destination address/port
|
|
|
|
* was mangled by NAT.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
|
2015-08-26 11:31:48 -07:00
|
|
|
|
openvswitch: Add original direction conntrack tuple to sw_flow_key.
Add the fields of the conntrack original direction 5-tuple to struct
sw_flow_key. The new fields are initially marked as non-existent, and
are populated whenever a conntrack action is executed and either finds
or generates a conntrack entry. This means that these fields exist
for all packets that were not rejected by conntrack as untrackable.
The original tuple fields in the sw_flow_key are filled from the
original direction tuple of the conntrack entry relating to the
current packet, or from the original direction tuple of the master
conntrack entry, if the current conntrack entry has a master.
Generally, expected connections of connections having an assigned
helper (e.g., FTP), have a master conntrack entry.
The main purpose of the new conntrack original tuple fields is to
allow matching on them for policy decision purposes, with the premise
that the admissibility of tracked connections reply packets (as well
as original direction packets), and both direction packets of any
related connections may be based on ACL rules applying to the master
connection's original direction 5-tuple. This also makes it easier to
make policy decisions when the actual packet headers might have been
transformed by NAT, as the original direction 5-tuple represents the
packet headers before any such transformation.
When using the original direction 5-tuple the admissibility of return
and/or related packets need not be based on the mere existence of a
conntrack entry, allowing separation of admission policy from the
established conntrack state. While existence of a conntrack entry is
required for admission of the return or related packets, policy
changes can render connections that were initially admitted to be
rejected or dropped afterwards. If the admission of the return and
related packets was based on mere conntrack state (e.g., connection
being in an established state), a policy change that would make the
connection rejected or dropped would need to find and delete all
conntrack entries affected by such a change. When using the original
direction 5-tuple matching the affected conntrack entries can be
allowed to time out instead, as the established state of the
connection would not need to be the basis for packet admission any
more.
It should be noted that the directionality of related connections may
be the same or different than that of the master connection, and
neither the original direction 5-tuple nor the conntrack state bits
carry this information. If needed, the directionality of the master
connection can be stored in master's conntrack mark or labels, which
are automatically inherited by the expected related connections.
The fact that neither ARP nor ND packets are trackable by conntrack
allows mutual exclusion between ARP/ND and the new conntrack original
tuple fields. Hence, the IP addresses are overlaid in union with ARP
and ND fields. This allows the sw_flow_key to not grow much due to
this patch, but it also means that we must be careful to never use the
new key fields with ARP or ND packets. ARP is easy to distinguish and
keep mutually exclusive based on the ethernet type, but ND being an
ICMPv6 protocol requires a bit more attention.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-02-09 11:21:59 -08:00
|
|
|
struct ovs_key_ct_tuple_ipv4 {
|
|
|
|
__be32 ipv4_src;
|
|
|
|
__be32 ipv4_dst;
|
|
|
|
__be16 src_port;
|
|
|
|
__be16 dst_port;
|
|
|
|
__u8 ipv4_proto;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ovs_key_ct_tuple_ipv6 {
|
|
|
|
__be32 ipv6_src[4];
|
|
|
|
__be32 ipv6_dst[4];
|
|
|
|
__be16 src_port;
|
|
|
|
__be16 dst_port;
|
|
|
|
__u8 ipv6_proto;
|
|
|
|
};
|
|
|
|
|
2017-11-07 21:07:02 +08:00
|
|
|
enum ovs_nsh_key_attr {
|
|
|
|
OVS_NSH_KEY_ATTR_UNSPEC,
|
|
|
|
OVS_NSH_KEY_ATTR_BASE, /* struct ovs_nsh_key_base. */
|
|
|
|
OVS_NSH_KEY_ATTR_MD1, /* struct ovs_nsh_key_md1. */
|
|
|
|
OVS_NSH_KEY_ATTR_MD2, /* variable-length octets for MD type 2. */
|
|
|
|
__OVS_NSH_KEY_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
struct ovs_nsh_key_base {
|
|
|
|
__u8 flags;
|
|
|
|
__u8 ttl;
|
|
|
|
__u8 mdtype;
|
|
|
|
__u8 np;
|
|
|
|
__be32 path_hdr;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define NSH_MD1_CONTEXT_SIZE 4
|
|
|
|
|
|
|
|
struct ovs_nsh_key_md1 {
|
|
|
|
__be32 context[NSH_MD1_CONTEXT_SIZE];
|
|
|
|
};
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/**
|
|
|
|
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
|
|
|
|
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
|
|
|
|
* key. Always present in notifications. Required for all requests (except
|
|
|
|
* dumps).
|
|
|
|
* @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
|
|
|
|
* the actions to take for packets that match the key. Always present in
|
|
|
|
* notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
|
2014-05-05 09:59:40 -07:00
|
|
|
* %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without
|
|
|
|
* %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions,
|
|
|
|
* an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given.
|
2013-03-29 14:46:52 +01:00
|
|
|
* @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
|
|
|
|
* flow. Present in notifications if the stats would be nonzero. Ignored in
|
|
|
|
* requests.
|
|
|
|
* @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
|
|
|
|
* TCP flags seen on packets in this flow. Only present in notifications for
|
|
|
|
* TCP flows, and only if it would be nonzero. Ignored in requests.
|
|
|
|
* @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
|
|
|
|
* the system monotonic clock, at which a packet was last processed for this
|
|
|
|
* flow. Only present in notifications if a packet has been processed for this
|
|
|
|
* flow. Ignored in requests.
|
|
|
|
* @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
|
|
|
|
* last-used time, accumulated TCP flags, and statistics for this flow.
|
|
|
|
* Otherwise ignored in requests. Never present in notifications.
|
2013-08-07 20:01:00 -07:00
|
|
|
* @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
|
|
|
|
* mask bits for wildcarded flow match. Mask bit value '1' specifies exact
|
|
|
|
* match with corresponding flow key bit, while mask bit value '0' specifies
|
|
|
|
* a wildcarded match. Omitting attribute is treated as wildcarding all
|
|
|
|
* corresponding fields. Optional for all requests. If not present,
|
|
|
|
* all flow key bits are exact match bits.
|
2015-01-21 16:42:52 -08:00
|
|
|
* @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
|
|
|
|
* identifier for the flow. Causes the flow to be indexed by this value rather
|
|
|
|
* than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
|
|
|
|
* requests. Present in notifications if the flow was created with this
|
|
|
|
* attribute.
|
|
|
|
* @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
|
|
|
|
* flags that provide alternative semantics for flow installation and
|
|
|
|
* retrieval. Optional for all requests.
|
2013-03-29 14:46:52 +01:00
|
|
|
*
|
|
|
|
* These attributes follow the &struct ovs_header within the Generic Netlink
|
|
|
|
* payload for %OVS_FLOW_* commands.
|
|
|
|
*/
|
|
|
|
enum ovs_flow_attr {
|
|
|
|
OVS_FLOW_ATTR_UNSPEC,
|
|
|
|
OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
|
|
|
|
OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
|
|
|
|
OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
|
|
|
|
OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
|
|
|
|
OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
|
|
|
|
OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
|
2013-08-07 20:01:00 -07:00
|
|
|
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
|
2014-11-06 07:03:05 -08:00
|
|
|
OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
|
|
|
|
* logging should be suppressed. */
|
2015-01-21 16:42:52 -08:00
|
|
|
OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */
|
|
|
|
OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
|
2016-04-25 10:25:17 +02:00
|
|
|
OVS_FLOW_ATTR_PAD,
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_FLOW_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
|
|
|
|
|
2015-01-21 16:42:52 -08:00
|
|
|
/**
|
|
|
|
* Omit attributes for notifications.
|
|
|
|
*
|
|
|
|
* If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath
|
|
|
|
* may omit the corresponding %OVS_FLOW_ATTR_* from the response.
|
|
|
|
*/
|
|
|
|
#define OVS_UFID_F_OMIT_KEY (1 << 0)
|
|
|
|
#define OVS_UFID_F_OMIT_MASK (1 << 1)
|
|
|
|
#define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/**
|
|
|
|
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
|
|
|
|
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
|
|
|
|
* @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
|
|
|
|
* %UINT32_MAX samples all packets and intermediate values sample intermediate
|
|
|
|
* fractions of packets.
|
|
|
|
* @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
|
|
|
|
* Actions are passed as nested attributes.
|
|
|
|
*
|
|
|
|
* Executes the specified actions with the given probability on a per-packet
|
|
|
|
* basis.
|
|
|
|
*/
|
|
|
|
enum ovs_sample_attr {
|
|
|
|
OVS_SAMPLE_ATTR_UNSPEC,
|
|
|
|
OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
|
|
|
|
OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
|
|
|
|
__OVS_SAMPLE_ATTR_MAX,
|
openvswitch: Optimize sample action for the clone use cases
With the introduction of open flow 'clone' action, the OVS user space
can now translate the 'clone' action into kernel datapath 'sample'
action, with 100% probability, to ensure that the clone semantics,
which is that the packet seen by the clone action is the same as the
packet seen by the action after clone, is faithfully carried out
in the datapath.
While the sample action in the datpath has the matching semantics,
its implementation is only optimized for its original use.
Specifically, there are two limitation: First, there is a 3 level of
nesting restriction, enforced at the flow downloading time. This
limit turns out to be too restrictive for the 'clone' use case.
Second, the implementation avoid recursive call only if the sample
action list has a single userspace action.
The main optimization implemented in this series removes the static
nesting limit check, instead, implement the run time recursion limit
check, and recursion avoidance similar to that of the 'recirc' action.
This optimization solve both #1 and #2 issues above.
One related optimization attempts to avoid copying flow key as
long as the actions enclosed does not change the flow key. The
detection is performed only once at the flow downloading time.
Another related optimization is to rewrite the action list
at flow downloading time in order to save the fast path from parsing
the sample action list in its original form repeatedly.
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-20 16:32:29 -07:00
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
OVS_SAMPLE_ATTR_ARG /* struct sample_arg */
|
|
|
|
#endif
|
2013-03-29 14:46:52 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
|
|
|
|
|
openvswitch: Optimize sample action for the clone use cases
With the introduction of open flow 'clone' action, the OVS user space
can now translate the 'clone' action into kernel datapath 'sample'
action, with 100% probability, to ensure that the clone semantics,
which is that the packet seen by the clone action is the same as the
packet seen by the action after clone, is faithfully carried out
in the datapath.
While the sample action in the datpath has the matching semantics,
its implementation is only optimized for its original use.
Specifically, there are two limitation: First, there is a 3 level of
nesting restriction, enforced at the flow downloading time. This
limit turns out to be too restrictive for the 'clone' use case.
Second, the implementation avoid recursive call only if the sample
action list has a single userspace action.
The main optimization implemented in this series removes the static
nesting limit check, instead, implement the run time recursion limit
check, and recursion avoidance similar to that of the 'recirc' action.
This optimization solve both #1 and #2 issues above.
One related optimization attempts to avoid copying flow key as
long as the actions enclosed does not change the flow key. The
detection is performed only once at the flow downloading time.
Another related optimization is to rewrite the action list
at flow downloading time in order to save the fast path from parsing
the sample action list in its original form repeatedly.
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-20 16:32:29 -07:00
|
|
|
#ifdef __KERNEL__
|
|
|
|
struct sample_arg {
|
|
|
|
bool exec; /* When true, actions in sample will not
|
|
|
|
* change flow keys. False otherwise.
|
|
|
|
*/
|
|
|
|
u32 probability; /* Same value as
|
|
|
|
* 'OVS_SAMPLE_ATTR_PROBABILITY'.
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/**
|
|
|
|
* enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
|
|
|
|
* @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
|
|
|
|
* message should be sent. Required.
|
|
|
|
* @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
|
|
|
|
* copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
|
2014-11-06 06:51:24 -08:00
|
|
|
* @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
|
|
|
|
* tunnel info.
|
2015-05-26 20:59:43 -07:00
|
|
|
* @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall.
|
2013-03-29 14:46:52 +01:00
|
|
|
*/
|
|
|
|
enum ovs_userspace_attr {
|
|
|
|
OVS_USERSPACE_ATTR_UNSPEC,
|
|
|
|
OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
|
|
|
|
OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
|
2014-11-06 06:51:24 -08:00
|
|
|
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port
|
|
|
|
* to get tunnel info. */
|
2015-05-26 20:59:43 -07:00
|
|
|
OVS_USERSPACE_ATTR_ACTIONS, /* Optional flag to get actions. */
|
2013-03-29 14:46:52 +01:00
|
|
|
__OVS_USERSPACE_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
|
|
|
|
|
2016-06-10 11:49:33 -07:00
|
|
|
struct ovs_action_trunc {
|
2016-08-22 20:33:19 +02:00
|
|
|
__u32 max_len; /* Max packet size in bytes. */
|
2016-06-10 11:49:33 -07:00
|
|
|
};
|
|
|
|
|
2014-10-06 05:05:13 -07:00
|
|
|
/**
|
|
|
|
* struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
|
|
|
|
* @mpls_lse: MPLS label stack entry to push.
|
|
|
|
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
|
|
|
|
*
|
|
|
|
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
|
|
|
|
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
|
|
|
|
*/
|
|
|
|
struct ovs_action_push_mpls {
|
|
|
|
__be32 mpls_lse;
|
|
|
|
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
|
|
|
|
};
|
|
|
|
|
2019-12-21 08:50:46 +05:30
|
|
|
/**
|
|
|
|
* struct ovs_action_add_mpls - %OVS_ACTION_ATTR_ADD_MPLS action
|
|
|
|
* argument.
|
|
|
|
* @mpls_lse: MPLS label stack entry to push.
|
|
|
|
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
|
|
|
|
* @tun_flags: MPLS tunnel attributes.
|
|
|
|
*
|
|
|
|
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
|
|
|
|
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
|
|
|
|
*/
|
|
|
|
struct ovs_action_add_mpls {
|
|
|
|
__be32 mpls_lse;
|
|
|
|
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
|
|
|
|
__u16 tun_flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_MPLS_L3_TUNNEL_FLAG_MASK (1 << 0) /* Flag to specify the place of
|
|
|
|
* insertion of MPLS header.
|
|
|
|
* When false, the MPLS header
|
|
|
|
* will be inserted at the start
|
|
|
|
* of the packet.
|
|
|
|
* When true, the MPLS header
|
|
|
|
* will be inserted at the start
|
|
|
|
* of the l3 header.
|
|
|
|
*/
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/**
|
|
|
|
* struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
|
|
|
|
* @vlan_tpid: Tag protocol identifier (TPID) to push.
|
|
|
|
* @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
|
|
|
|
* (but it will not be set in the 802.1Q header that is pushed).
|
|
|
|
*
|
2016-09-07 12:56:57 -04:00
|
|
|
* The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
|
|
|
|
* The only acceptable TPID values are those that the kernel module also parses
|
|
|
|
* as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
|
|
|
|
* by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
|
2013-03-29 14:46:52 +01:00
|
|
|
*/
|
|
|
|
struct ovs_action_push_vlan {
|
2016-09-07 12:56:57 -04:00
|
|
|
__be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */
|
2013-03-29 14:46:52 +01:00
|
|
|
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
|
|
|
|
};
|
|
|
|
|
2014-09-15 19:37:25 -07:00
|
|
|
/* Data path hash algorithm for computing Datapath hash.
|
|
|
|
*
|
|
|
|
* The algorithm type only specifies the fields in a flow
|
|
|
|
* will be used as part of the hash. Each datapath is free
|
|
|
|
* to use its own hash algorithm. The hash value will be
|
|
|
|
* opaque to the user space daemon.
|
|
|
|
*/
|
|
|
|
enum ovs_hash_alg {
|
|
|
|
OVS_HASH_ALG_L4,
|
net: openvswitch: add support for l4 symmetric hashing
Since its introduction, the ovs module execute_hash action allowed
hash algorithms other than the skb->l4_hash to be used. However,
additional hash algorithms were not implemented. This means flows
requiring different hash distributions weren't able to use the
kernel datapath.
Now, introduce support for symmetric hashing algorithm as an
alternative hash supported by the ovs module using the flow
dissector.
Output of flow using l4_sym hash:
recirc_id(0),in_port(3),eth(),eth_type(0x0800),
ipv4(dst=64.0.0.0/192.0.0.0,proto=6,frag=no), packets:30473425,
bytes:45902883702, used:0.000s, flags:SP.,
actions:hash(sym_l4(0)),recirc(0xd)
Some performance testing with no GRO/GSO, two veths, single flow:
hash(l4(0)): 4.35 GBits/s
hash(l4_sym(0)): 4.24 GBits/s
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-06-09 09:59:55 -04:00
|
|
|
OVS_HASH_ALG_SYM_L4,
|
2014-09-15 19:37:25 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
|
|
|
|
* @hash_alg: Algorithm used to compute hash prior to recirculation.
|
|
|
|
* @hash_basis: basis used for computing hash.
|
|
|
|
*/
|
|
|
|
struct ovs_action_hash {
|
2016-08-22 20:32:55 +02:00
|
|
|
__u32 hash_alg; /* One of ovs_hash_alg. */
|
|
|
|
__u32 hash_basis;
|
2014-09-15 19:37:25 -07:00
|
|
|
};
|
|
|
|
|
2015-08-26 11:31:48 -07:00
|
|
|
/**
|
|
|
|
* enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action.
|
2015-10-06 11:00:01 -07:00
|
|
|
* @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack
|
|
|
|
* table. This allows future packets for the same connection to be identified
|
2015-10-19 19:18:58 -07:00
|
|
|
* as 'established' or 'related'. The flow key for the current packet will
|
|
|
|
* retain the pre-commit connection state.
|
2015-08-26 11:31:48 -07:00
|
|
|
* @OVS_CT_ATTR_ZONE: u16 connection tracking zone.
|
2015-08-26 11:31:49 -07:00
|
|
|
* @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
|
|
|
|
* mask, the corresponding bit in the value is copied to the connection
|
|
|
|
* tracking mark field in the connection.
|
2015-12-14 14:29:58 +01:00
|
|
|
* @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
|
2015-08-26 11:31:52 -07:00
|
|
|
* mask. For each bit set in the mask, the corresponding bit in the value is
|
|
|
|
* copied to the connection tracking label field in the connection.
|
openvswitch: Allow attaching helpers to ct action
Add support for using conntrack helpers to assist protocol detection.
The new OVS_CT_ATTR_HELPER attribute of the CT action specifies a helper
to be used for this connection. If no helper is specified, then helpers
will be automatically applied as per the sysctl configuration of
net.netfilter.nf_conntrack_helper.
The helper may be specified as part of the conntrack action, eg:
ct(helper=ftp). Initial packets for related connections should be
committed to allow later packets for the flow to be considered
established.
Example ovs-ofctl flows allowing FTP connections from ports 1->2:
in_port=1,tcp,action=ct(helper=ftp,commit),2
in_port=2,tcp,ct_state=-trk,action=ct(recirc)
in_port=2,tcp,ct_state=+trk-new+est,action=1
in_port=2,tcp,ct_state=+trk+rel,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-26 11:31:53 -07:00
|
|
|
* @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
|
2016-03-10 10:54:23 -08:00
|
|
|
* @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address
|
|
|
|
* translation (NAT) on the packet.
|
2017-02-09 11:22:00 -08:00
|
|
|
* @OVS_CT_ATTR_FORCE_COMMIT: Like %OVS_CT_ATTR_COMMIT, but instead of doing
|
|
|
|
* nothing if the connection is already committed will check that the current
|
|
|
|
* packet is in conntrack entry's original direction. If directionality does
|
|
|
|
* not match, will delete the existing conntrack entry and commit a new one.
|
2017-04-21 16:48:06 -07:00
|
|
|
* @OVS_CT_ATTR_EVENTMASK: Mask of bits indicating which conntrack event types
|
|
|
|
* (enum ip_conntrack_events IPCT_*) should be reported. For any bit set to
|
|
|
|
* zero, the corresponding event type is not generated. Default behavior
|
|
|
|
* depends on system configuration, but typically all event types are
|
|
|
|
* generated, hence listening on NFNLGRP_CONNTRACK_UPDATE events may get a lot
|
|
|
|
* of events. Explicitly passing this attribute allows limiting the updates
|
|
|
|
* received to the events of interest. The bit 1 << IPCT_NEW, 1 <<
|
|
|
|
* IPCT_RELATED, and 1 << IPCT_DESTROY must be set to ones for those events to
|
|
|
|
* be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups,
|
|
|
|
* respectively. Remaining bits control the changes for which an event is
|
|
|
|
* delivered on the NFNLGRP_CONNTRACK_UPDATE group.
|
2019-03-26 11:31:14 -07:00
|
|
|
* @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout.
|
2015-08-26 11:31:48 -07:00
|
|
|
*/
|
|
|
|
enum ovs_ct_attr {
|
|
|
|
OVS_CT_ATTR_UNSPEC,
|
2015-10-06 11:00:01 -07:00
|
|
|
OVS_CT_ATTR_COMMIT, /* No argument, commits connection. */
|
2015-08-26 11:31:48 -07:00
|
|
|
OVS_CT_ATTR_ZONE, /* u16 zone id. */
|
2015-08-26 11:31:49 -07:00
|
|
|
OVS_CT_ATTR_MARK, /* mark to associate with this connection. */
|
2015-10-01 15:00:37 -07:00
|
|
|
OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */
|
openvswitch: Allow attaching helpers to ct action
Add support for using conntrack helpers to assist protocol detection.
The new OVS_CT_ATTR_HELPER attribute of the CT action specifies a helper
to be used for this connection. If no helper is specified, then helpers
will be automatically applied as per the sysctl configuration of
net.netfilter.nf_conntrack_helper.
The helper may be specified as part of the conntrack action, eg:
ct(helper=ftp). Initial packets for related connections should be
committed to allow later packets for the flow to be considered
established.
Example ovs-ofctl flows allowing FTP connections from ports 1->2:
in_port=1,tcp,action=ct(helper=ftp,commit),2
in_port=2,tcp,ct_state=-trk,action=ct(recirc)
in_port=2,tcp,ct_state=+trk-new+est,action=1
in_port=2,tcp,ct_state=+trk+rel,action=1
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-26 11:31:53 -07:00
|
|
|
OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of
|
|
|
|
related connections. */
|
2016-03-10 10:54:23 -08:00
|
|
|
OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */
|
2017-02-09 11:22:00 -08:00
|
|
|
OVS_CT_ATTR_FORCE_COMMIT, /* No argument */
|
2017-04-21 16:48:06 -07:00
|
|
|
OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */
|
2019-03-26 11:31:14 -07:00
|
|
|
OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for
|
|
|
|
* fine-grain timeout tuning. */
|
2015-08-26 11:31:48 -07:00
|
|
|
__OVS_CT_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1)
|
|
|
|
|
2016-03-10 10:54:23 -08:00
|
|
|
/**
|
|
|
|
* enum ovs_nat_attr - Attributes for %OVS_CT_ATTR_NAT.
|
|
|
|
*
|
|
|
|
* @OVS_NAT_ATTR_SRC: Flag for Source NAT (mangle source address/port).
|
|
|
|
* @OVS_NAT_ATTR_DST: Flag for Destination NAT (mangle destination
|
|
|
|
* address/port). Only one of (@OVS_NAT_ATTR_SRC, @OVS_NAT_ATTR_DST) may be
|
|
|
|
* specified. Effective only for packets for ct_state NEW connections.
|
|
|
|
* Packets of committed connections are mangled by the NAT action according to
|
|
|
|
* the committed NAT type regardless of the flags specified. As a corollary, a
|
|
|
|
* NAT action without a NAT type flag will only mangle packets of committed
|
|
|
|
* connections. The following NAT attributes only apply for NEW
|
|
|
|
* (non-committed) connections, and they may be included only when the CT
|
|
|
|
* action has the @OVS_CT_ATTR_COMMIT flag and either @OVS_NAT_ATTR_SRC or
|
|
|
|
* @OVS_NAT_ATTR_DST is also included.
|
|
|
|
* @OVS_NAT_ATTR_IP_MIN: struct in_addr or struct in6_addr
|
|
|
|
* @OVS_NAT_ATTR_IP_MAX: struct in_addr or struct in6_addr
|
|
|
|
* @OVS_NAT_ATTR_PROTO_MIN: u16 L4 protocol specific lower boundary (port)
|
|
|
|
* @OVS_NAT_ATTR_PROTO_MAX: u16 L4 protocol specific upper boundary (port)
|
|
|
|
* @OVS_NAT_ATTR_PERSISTENT: Flag for persistent IP mapping across reboots
|
|
|
|
* @OVS_NAT_ATTR_PROTO_HASH: Flag for pseudo random L4 port mapping (MD5)
|
|
|
|
* @OVS_NAT_ATTR_PROTO_RANDOM: Flag for fully randomized L4 port mapping
|
|
|
|
*/
|
|
|
|
enum ovs_nat_attr {
|
|
|
|
OVS_NAT_ATTR_UNSPEC,
|
|
|
|
OVS_NAT_ATTR_SRC,
|
|
|
|
OVS_NAT_ATTR_DST,
|
|
|
|
OVS_NAT_ATTR_IP_MIN,
|
|
|
|
OVS_NAT_ATTR_IP_MAX,
|
|
|
|
OVS_NAT_ATTR_PROTO_MIN,
|
|
|
|
OVS_NAT_ATTR_PROTO_MAX,
|
|
|
|
OVS_NAT_ATTR_PERSISTENT,
|
|
|
|
OVS_NAT_ATTR_PROTO_HASH,
|
|
|
|
OVS_NAT_ATTR_PROTO_RANDOM,
|
|
|
|
__OVS_NAT_ATTR_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
|
|
|
|
|
2016-11-10 16:28:23 +01:00
|
|
|
/*
|
|
|
|
* struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
|
|
|
|
* @addresses: Source and destination MAC addresses.
|
|
|
|
* @eth_type: Ethernet type
|
|
|
|
*/
|
|
|
|
struct ovs_action_push_eth {
|
|
|
|
struct ovs_key_ethernet addresses;
|
|
|
|
};
|
|
|
|
|
2019-03-26 06:13:46 +05:30
|
|
|
/*
|
|
|
|
* enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN.
|
|
|
|
*
|
|
|
|
* @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for.
|
|
|
|
* @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_*
|
|
|
|
* actions to apply if the packer length is greater than the specified
|
|
|
|
* length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN.
|
|
|
|
* @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_*
|
|
|
|
* actions to apply if the packer length is lesser or equal to the specified
|
|
|
|
* length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN.
|
|
|
|
*/
|
|
|
|
enum ovs_check_pkt_len_attr {
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_UNSPEC,
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_PKT_LEN,
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER,
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL,
|
|
|
|
__OVS_CHECK_PKT_LEN_ATTR_MAX,
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
struct check_pkt_len_arg {
|
|
|
|
u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */
|
|
|
|
bool exec_for_greater; /* When true, actions in IF_GREATER will
|
|
|
|
* not change flow keys. False otherwise.
|
|
|
|
*/
|
|
|
|
bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL
|
|
|
|
* will not change flow keys. False
|
|
|
|
* otherwise.
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
/**
|
|
|
|
* enum ovs_action_attr - Action types.
|
|
|
|
*
|
|
|
|
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
|
2016-06-10 11:49:33 -07:00
|
|
|
* @OVS_ACTION_ATTR_TRUNC: Output packet to port with truncated packet size.
|
2013-03-29 14:46:52 +01:00
|
|
|
* @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
|
|
|
|
* %OVS_USERSPACE_ATTR_* attributes.
|
|
|
|
* @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
|
|
|
|
* single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
|
|
|
|
* value.
|
2015-02-05 13:40:49 -08:00
|
|
|
* @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header. A
|
|
|
|
* nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
|
|
|
|
* and a mask. For every bit set in the mask, the corresponding bit value
|
|
|
|
* is copied from the value to the packet header field, rest of the bits are
|
|
|
|
* left unchanged. The non-masked value bits must be passed in as zeroes.
|
|
|
|
* Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
|
2016-09-07 12:56:57 -04:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
|
|
|
|
* onto the packet.
|
|
|
|
* @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
|
|
|
|
* from the packet.
|
2013-03-29 14:46:52 +01:00
|
|
|
* @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
|
|
|
|
* the nested %OVS_SAMPLE_ATTR_* attributes.
|
2014-10-06 05:05:13 -07:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
|
|
|
|
* top of the packets MPLS label stack. Set the ethertype of the
|
|
|
|
* encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
|
|
|
|
* indicate the new packet contents.
|
|
|
|
* @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the
|
|
|
|
* packet's MPLS label stack. Set the encapsulating frame's ethertype to
|
|
|
|
* indicate the new packet contents. This could potentially still be
|
|
|
|
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
|
|
|
|
* is no MPLS label stack, as determined by ethertype, no action is taken.
|
2015-08-26 11:31:48 -07:00
|
|
|
* @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
|
|
|
|
* entries in the flow key.
|
2016-11-10 16:28:23 +01:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the
|
|
|
|
* packet.
|
|
|
|
* @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
|
|
|
|
* packet.
|
2017-10-10 16:54:44 -04:00
|
|
|
* @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet.
|
2017-11-07 21:07:02 +08:00
|
|
|
* @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
|
|
|
|
* @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
|
2017-11-10 12:09:43 -08:00
|
|
|
* @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
|
|
|
|
* packet, or modify the packet (e.g., change the DSCP field).
|
2018-07-02 08:18:03 -07:00
|
|
|
* @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
|
|
|
|
* actions without affecting the original packet and key.
|
2019-03-26 06:13:46 +05:30
|
|
|
* @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set
|
|
|
|
* of actions if greater than the specified packet length, else execute
|
|
|
|
* another set of actions.
|
2019-12-21 08:50:46 +05:30
|
|
|
* @OVS_ACTION_ATTR_ADD_MPLS: Push a new MPLS label stack entry at the
|
|
|
|
* start of the packet or at the start of the l3 header depending on the value
|
|
|
|
* of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
|
|
|
|
* argument.
|
2023-08-11 16:12:50 +02:00
|
|
|
* @OVS_ACTION_ATTR_DROP: Explicit drop action.
|
2013-03-29 14:46:52 +01:00
|
|
|
*
|
|
|
|
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
|
|
|
|
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
|
|
|
|
* type may not be changed.
|
2015-02-05 13:40:49 -08:00
|
|
|
*
|
|
|
|
* @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
|
|
|
|
* from the @OVS_ACTION_ATTR_SET.
|
2013-03-29 14:46:52 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
enum ovs_action_attr {
|
|
|
|
OVS_ACTION_ATTR_UNSPEC,
|
|
|
|
OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
|
|
|
|
OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
|
|
|
|
OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
|
|
|
|
OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
|
|
|
|
OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
|
|
|
|
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
|
2014-09-15 19:37:25 -07:00
|
|
|
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
|
|
|
|
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
|
2014-10-06 05:05:13 -07:00
|
|
|
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
|
|
|
|
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
|
2015-02-05 13:40:49 -08:00
|
|
|
OVS_ACTION_ATTR_SET_MASKED, /* One nested OVS_KEY_ATTR_* including
|
|
|
|
* data immediately followed by a mask.
|
|
|
|
* The data must be zero for the unmasked
|
|
|
|
* bits. */
|
2015-10-06 10:59:56 -07:00
|
|
|
OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */
|
2016-06-10 11:49:33 -07:00
|
|
|
OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */
|
2016-11-10 16:28:23 +01:00
|
|
|
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
|
|
|
|
OVS_ACTION_ATTR_POP_ETH, /* No argument. */
|
2017-10-10 16:54:44 -04:00
|
|
|
OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */
|
2017-11-07 21:07:02 +08:00
|
|
|
OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */
|
|
|
|
OVS_ACTION_ATTR_POP_NSH, /* No argument. */
|
2017-11-10 12:09:43 -08:00
|
|
|
OVS_ACTION_ATTR_METER, /* u32 meter ID. */
|
2018-07-02 08:18:03 -07:00
|
|
|
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
|
2019-03-26 06:13:46 +05:30
|
|
|
OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
|
2019-12-21 08:50:46 +05:30
|
|
|
OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
|
openvswitch: add TTL decrement action
New action to decrement TTL instead of setting it to a fixed value.
This action will decrement the TTL and, in case of expired TTL, drop it
or execute an action passed via a nested attribute.
The default TTL expired action is to drop the packet.
Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
Tested with a corresponding change in the userspace:
# ovs-dpctl dump-flows
in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1
in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},2
in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
# ping -c1 192.168.0.2 -t 42
IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
# ping -c1 192.168.0.2 -t 120
IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
# ping -c1 192.168.0.2 -t 1
#
Co-developed-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-15 14:20:56 +01:00
|
|
|
OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
|
2023-08-11 16:12:50 +02:00
|
|
|
OVS_ACTION_ATTR_DROP, /* u32 error code. */
|
2015-02-05 13:40:49 -08:00
|
|
|
|
|
|
|
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
|
|
|
|
* from userspace. */
|
2014-10-06 05:05:13 -07:00
|
|
|
|
2015-02-05 13:40:49 -08:00
|
|
|
#ifdef __KERNEL__
|
|
|
|
OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
|
|
|
|
* set action converted from
|
|
|
|
* OVS_ACTION_ATTR_SET. */
|
|
|
|
#endif
|
2013-03-29 14:46:52 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
|
|
|
|
|
2017-11-10 12:09:40 -08:00
|
|
|
/* Meters. */
|
|
|
|
#define OVS_METER_FAMILY "ovs_meter"
|
|
|
|
#define OVS_METER_MCGROUP "ovs_meter"
|
|
|
|
#define OVS_METER_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_meter_cmd {
|
|
|
|
OVS_METER_CMD_UNSPEC,
|
|
|
|
OVS_METER_CMD_FEATURES, /* Get features supported by the datapath. */
|
|
|
|
OVS_METER_CMD_SET, /* Add or modify a meter. */
|
|
|
|
OVS_METER_CMD_DEL, /* Delete a meter. */
|
|
|
|
OVS_METER_CMD_GET /* Get meter stats. */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ovs_meter_attr {
|
|
|
|
OVS_METER_ATTR_UNSPEC,
|
|
|
|
OVS_METER_ATTR_ID, /* u32 meter ID within datapath. */
|
|
|
|
OVS_METER_ATTR_KBPS, /* No argument. If set, units in kilobits
|
|
|
|
* per second. Otherwise, units in
|
|
|
|
* packets per second.
|
|
|
|
*/
|
|
|
|
OVS_METER_ATTR_STATS, /* struct ovs_flow_stats for the meter. */
|
|
|
|
OVS_METER_ATTR_BANDS, /* Nested attributes for meter bands. */
|
|
|
|
OVS_METER_ATTR_USED, /* u64 msecs last used in monotonic time. */
|
|
|
|
OVS_METER_ATTR_CLEAR, /* Flag to clear stats, used. */
|
|
|
|
OVS_METER_ATTR_MAX_METERS, /* u32 number of meters supported. */
|
|
|
|
OVS_METER_ATTR_MAX_BANDS, /* u32 max number of bands per meter. */
|
|
|
|
OVS_METER_ATTR_PAD,
|
|
|
|
__OVS_METER_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_METER_ATTR_MAX (__OVS_METER_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
enum ovs_band_attr {
|
|
|
|
OVS_BAND_ATTR_UNSPEC,
|
|
|
|
OVS_BAND_ATTR_TYPE, /* u32 OVS_METER_BAND_TYPE_* constant. */
|
|
|
|
OVS_BAND_ATTR_RATE, /* u32 band rate in meter units (see above). */
|
|
|
|
OVS_BAND_ATTR_BURST, /* u32 burst size in meter units. */
|
|
|
|
OVS_BAND_ATTR_STATS, /* struct ovs_flow_stats for the band. */
|
|
|
|
__OVS_BAND_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_BAND_ATTR_MAX (__OVS_BAND_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
enum ovs_meter_band_type {
|
|
|
|
OVS_METER_BAND_TYPE_UNSPEC,
|
|
|
|
OVS_METER_BAND_TYPE_DROP, /* Drop exceeding packets. */
|
|
|
|
__OVS_METER_BAND_TYPE_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_METER_BAND_TYPE_MAX (__OVS_METER_BAND_TYPE_MAX - 1)
|
|
|
|
|
2018-05-24 17:56:42 -07:00
|
|
|
/* Conntrack limit */
|
|
|
|
#define OVS_CT_LIMIT_FAMILY "ovs_ct_limit"
|
|
|
|
#define OVS_CT_LIMIT_MCGROUP "ovs_ct_limit"
|
|
|
|
#define OVS_CT_LIMIT_VERSION 0x1
|
|
|
|
|
|
|
|
enum ovs_ct_limit_cmd {
|
|
|
|
OVS_CT_LIMIT_CMD_UNSPEC,
|
|
|
|
OVS_CT_LIMIT_CMD_SET, /* Add or modify ct limit. */
|
|
|
|
OVS_CT_LIMIT_CMD_DEL, /* Delete ct limit. */
|
|
|
|
OVS_CT_LIMIT_CMD_GET /* Get ct limit. */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ovs_ct_limit_attr {
|
|
|
|
OVS_CT_LIMIT_ATTR_UNSPEC,
|
|
|
|
OVS_CT_LIMIT_ATTR_ZONE_LIMIT, /* Nested struct ovs_zone_limit. */
|
|
|
|
__OVS_CT_LIMIT_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
|
|
|
|
|
|
|
|
#define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
|
|
|
|
|
|
|
|
struct ovs_zone_limit {
|
|
|
|
int zone_id;
|
|
|
|
__u32 limit;
|
|
|
|
__u32 count;
|
|
|
|
};
|
|
|
|
|
openvswitch: add TTL decrement action
New action to decrement TTL instead of setting it to a fixed value.
This action will decrement the TTL and, in case of expired TTL, drop it
or execute an action passed via a nested attribute.
The default TTL expired action is to drop the packet.
Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
Tested with a corresponding change in the userspace:
# ovs-dpctl dump-flows
in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1
in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},2
in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
# ping -c1 192.168.0.2 -t 42
IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
# ping -c1 192.168.0.2 -t 120
IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
# ping -c1 192.168.0.2 -t 1
#
Co-developed-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-15 14:20:56 +01:00
|
|
|
enum ovs_dec_ttl_attr {
|
|
|
|
OVS_DEC_TTL_ATTR_UNSPEC,
|
|
|
|
OVS_DEC_TTL_ATTR_ACTION, /* Nested struct nlattr */
|
|
|
|
__OVS_DEC_TTL_ATTR_MAX
|
|
|
|
};
|
|
|
|
|
2020-11-24 07:34:44 -05:00
|
|
|
#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
|
|
|
|
|
2013-03-29 14:46:52 +01:00
|
|
|
#endif /* _LINUX_OPENVSWITCH_H */
|