xfrm: Cache used outbound xfrm states at the policy.

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we cache the used xfrm states at the policy for outbound
IPsec traffic.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
This commit is contained in:
Steffen Klassert 2024-10-23 12:53:43 +02:00
parent 1ddf9916ac
commit 0045e3d806
3 changed files with 71 additions and 0 deletions

View File

@ -184,6 +184,7 @@ struct xfrm_state {
};
struct hlist_node byspi;
struct hlist_node byseq;
struct hlist_node state_cache;
refcount_t refcnt;
spinlock_t lock;
@ -537,6 +538,7 @@ struct xfrm_policy_queue {
* @xp_net: network namespace the policy lives in
* @bydst: hlist node for SPD hash table or rbtree list
* @byidx: hlist node for index hash table
* @state_cache_list: hlist head for policy cached xfrm states
* @lock: serialize changes to policy structure members
* @refcnt: reference count, freed once it reaches 0
* @pos: kernel internal tie-breaker to determine age of policy
@ -567,6 +569,8 @@ struct xfrm_policy {
struct hlist_node bydst;
struct hlist_node byidx;
struct hlist_head state_cache_list;
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;

View File

@ -434,6 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@ -475,6 +476,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
struct net *net = xp_net(policy);
struct xfrm_state *x;
xfrm_dev_policy_delete(policy);
write_lock_bh(&policy->lock);
@ -490,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
/* XXX: Flush state cache */
spin_lock_bh(&net->xfrm.xfrm_state_lock);
hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
hlist_del_init_rcu(&x->state_cache);
}
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
xfrm_pol_put(policy);
}
@ -3275,6 +3286,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
dst_release(dst);
dst = dst_orig;
}
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&

View File

@ -665,6 +665,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
refcount_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->km.all);
INIT_HLIST_NODE(&x->state_cache);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@ -744,12 +745,15 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
if (!hlist_unhashed(&x->state_cache))
hlist_del_rcu(&x->state_cache);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@ -1222,6 +1226,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned int sequence;
struct km_event c;
unsigned int pcpu_id;
bool cached = false;
/* We need the cpu id just as a lookup key,
* we don't require it to be stable.
@ -1234,6 +1239,46 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
x->if_id == if_id &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, encap_family,
&best, &acquire_in_progress, &error);
}
if (best)
goto cached;
hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
x->if_id == if_id &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
&best, &acquire_in_progress, &error);
}
cached:
cached = true;
if (best)
goto found;
else if (error)
best = NULL;
else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
WARN_ON(1);
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@ -1383,6 +1428,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
XFRM_STATE_INSERT(bysrc, &x->bysrc,
net->xfrm.state_bysrc + h,
x->xso.type);
INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
@ -1431,6 +1477,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
} else {
*err = acquire_in_progress ? -EAGAIN : error;
}
if (x && x->km.state == XFRM_STATE_VALID && !cached &&
(!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (hlist_unhashed(&x->state_cache))
hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
}
rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);