xfrm: add TX datapath support for IPsec packet offload mode

In IPsec packet mode, the device is going to encrypt and encapsulate
packets that are associated with offloaded policy. After successful
policy lookup to indicate if packets should be offloaded or not,
the stack forwards packets to the device to do the magic.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
This commit is contained in:
Leon Romanovsky 2022-12-02 20:41:30 +02:00 committed by Steffen Klassert
parent 919e43fad5
commit f8a70afafc
3 changed files with 141 additions and 5 deletions

View File

@ -132,6 +132,16 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN)
return skb;
/* The packet was sent to HW IPsec packet offload engine,
* but to wrong device. Drop the packet, so it won't skip
* XFRM stack.
*/
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) {
kfree_skb(skb);
dev_core_stats_tx_dropped_inc(dev);
return NULL;
}
/* This skb was already validated on the upper/virtual dev */
if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
return skb;
@ -398,8 +408,9 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
if (!x->type_offload || x->encap)
return false;
if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
(!xdst->child->xfrm)) {
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
!xdst->child->xfrm)) {
mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
if (skb->len <= mtu)
goto ok;

View File

@ -492,7 +492,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
struct xfrm_state *x = dst->xfrm;
struct net *net = xs_net(x);
if (err <= 0)
if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
goto resume;
do {
@ -717,6 +717,16 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
break;
}
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
if (!xfrm_dev_offload_ok(skb, x)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
kfree_skb(skb);
return -EHOSTUNREACH;
}
return xfrm_output_resume(sk, skb, 0);
}
secpath_reset(skb);
if (xfrm_dev_offload_ok(skb, x)) {

View File

@ -951,6 +951,49 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
x->props.family = tmpl->encap_family;
}
static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family,
struct xfrm_dev_offload *xdo)
{
unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
struct xfrm_state *x;
hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
#ifdef CONFIG_XFRM_OFFLOAD
if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
/* HW states are in the head of list, there is
* no need to iterate further.
*/
break;
/* Packet offload: both policy and SA should
* have same device.
*/
if (xdo->dev != x->xso.dev)
continue;
} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
/* Skip HW policy for SW lookups */
continue;
#endif
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family))
continue;
if ((mark & x->mark.m) != x->mark.v)
continue;
if (!xfrm_state_hold_rcu(x))
continue;
return x;
}
return NULL;
}
static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
@ -1092,6 +1135,23 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
/* HW states are in the head of list, there is
* no need to iterate further.
*/
break;
/* Packet offload: both policy and SA should
* have same device.
*/
if (pol->xdo.dev != x->xso.dev)
continue;
} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
/* Skip HW policy for SW lookups */
continue;
#endif
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
@ -1109,6 +1169,23 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
/* HW states are in the head of list, there is
* no need to iterate further.
*/
break;
/* Packet offload: both policy and SA should
* have same device.
*/
if (pol->xdo.dev != x->xso.dev)
continue;
} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
/* Skip HW policy for SW lookups */
continue;
#endif
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
@ -1126,8 +1203,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
x = best;
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
(x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
tmpl->id.proto, encap_family)) != NULL) {
(x0 = __xfrm_state_lookup_all(net, mark, daddr,
tmpl->id.spi, tmpl->id.proto,
encap_family,
&pol->xdo)) != NULL) {
to_put = x0;
error = -EEXIST;
goto out;
@ -1161,7 +1240,31 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
x = NULL;
goto out;
}
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
struct xfrm_dev_offload *xdo = &pol->xdo;
struct xfrm_dev_offload *xso = &x->xso;
xso->type = XFRM_DEV_OFFLOAD_PACKET;
xso->dir = xdo->dir;
xso->dev = xdo->dev;
xso->real_dev = xdo->real_dev;
netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
GFP_ATOMIC);
error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
if (error) {
xso->dir = 0;
netdev_put(xso->dev, &xso->dev_tracker);
xso->dev = NULL;
xso->real_dev = NULL;
xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
x->km.state = XFRM_STATE_DEAD;
to_put = x;
x = NULL;
goto out;
}
}
#endif
if (km_query(x, tmpl, pol) == 0) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
@ -1185,6 +1288,18 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
xfrm_hash_grow_check(net, x->bydst.next != NULL);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
} else {
#ifdef CONFIG_XFRM_OFFLOAD
struct xfrm_dev_offload *xso = &x->xso;
if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
xso->dir = 0;
netdev_put(xso->dev, &xso->dev_tracker);
xso->dev = NULL;
xso->real_dev = NULL;
xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
}
#endif
x->km.state = XFRM_STATE_DEAD;
to_put = x;
x = NULL;