mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-09 22:50:41 +00:00
netfilter: conntrack: remove timer from ecache extension
This brings the (per-conntrack) ecache extension back to 24 bytes in size (was 152 byte on x86_64 with lockdep on). When event delivery fails, re-delivery is attempted via work queue. Redelivery is attempted at least every 0.1 seconds, but can happen more frequently if userspace is not congested. The nf_ct_release_dying_list() function is removed. With this patch, ownership of the to-be-redelivered conntracks (on-dying-list-with-DYING-bit not yet set) is with the work queue, which will release the references once event is out. Joint work with Pablo Neira Ayuso. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
f6b50824f7
commit
9500507c61
@ -18,7 +18,6 @@ struct nf_conntrack_ecache {
|
|||||||
u16 ctmask; /* bitmask of ct events to be delivered */
|
u16 ctmask; /* bitmask of ct events to be delivered */
|
||||||
u16 expmask; /* bitmask of expect events to be delivered */
|
u16 expmask; /* bitmask of expect events to be delivered */
|
||||||
u32 portid; /* netlink portid of destroyer */
|
u32 portid; /* netlink portid of destroyer */
|
||||||
struct timer_list timeout;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct nf_conntrack_ecache *
|
static inline struct nf_conntrack_ecache *
|
||||||
@ -216,8 +215,23 @@ void nf_conntrack_ecache_pernet_fini(struct net *net);
|
|||||||
|
|
||||||
int nf_conntrack_ecache_init(void);
|
int nf_conntrack_ecache_init(void);
|
||||||
void nf_conntrack_ecache_fini(void);
|
void nf_conntrack_ecache_fini(void);
|
||||||
#else /* CONFIG_NF_CONNTRACK_EVENTS */
|
|
||||||
|
|
||||||
|
static inline void nf_conntrack_ecache_delayed_work(struct net *net)
|
||||||
|
{
|
||||||
|
if (!delayed_work_pending(&net->ct.ecache_dwork)) {
|
||||||
|
schedule_delayed_work(&net->ct.ecache_dwork, HZ);
|
||||||
|
net->ct.ecache_dwork_pending = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void nf_conntrack_ecache_work(struct net *net)
|
||||||
|
{
|
||||||
|
if (net->ct.ecache_dwork_pending) {
|
||||||
|
net->ct.ecache_dwork_pending = false;
|
||||||
|
mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else /* CONFIG_NF_CONNTRACK_EVENTS */
|
||||||
static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
|
static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
|
||||||
struct nf_conn *ct) {}
|
struct nf_conn *ct) {}
|
||||||
static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
|
static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
|
||||||
@ -255,6 +269,14 @@ static inline int nf_conntrack_ecache_init(void)
|
|||||||
static inline void nf_conntrack_ecache_fini(void)
|
static inline void nf_conntrack_ecache_fini(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void nf_conntrack_ecache_delayed_work(struct net *net)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void nf_conntrack_ecache_work(struct net *net)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
|
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
|
||||||
|
|
||||||
#endif /*_NF_CONNTRACK_ECACHE_H*/
|
#endif /*_NF_CONNTRACK_ECACHE_H*/
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/list_nulls.h>
|
#include <linux/list_nulls.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/workqueue.h>
|
||||||
#include <linux/netfilter/nf_conntrack_tcp.h>
|
#include <linux/netfilter/nf_conntrack_tcp.h>
|
||||||
#include <linux/seqlock.h>
|
#include <linux/seqlock.h>
|
||||||
|
|
||||||
@ -73,6 +74,10 @@ struct ct_pcpu {
|
|||||||
struct netns_ct {
|
struct netns_ct {
|
||||||
atomic_t count;
|
atomic_t count;
|
||||||
unsigned int expect_count;
|
unsigned int expect_count;
|
||||||
|
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||||
|
struct delayed_work ecache_dwork;
|
||||||
|
bool ecache_dwork_pending;
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_SYSCTL
|
#ifdef CONFIG_SYSCTL
|
||||||
struct ctl_table_header *sysctl_header;
|
struct ctl_table_header *sysctl_header;
|
||||||
struct ctl_table_header *acct_sysctl_header;
|
struct ctl_table_header *acct_sysctl_header;
|
||||||
@ -82,7 +87,6 @@ struct netns_ct {
|
|||||||
#endif
|
#endif
|
||||||
char *slabname;
|
char *slabname;
|
||||||
unsigned int sysctl_log_invalid; /* Log invalid packets */
|
unsigned int sysctl_log_invalid; /* Log invalid packets */
|
||||||
unsigned int sysctl_events_retry_timeout;
|
|
||||||
int sysctl_events;
|
int sysctl_events;
|
||||||
int sysctl_acct;
|
int sysctl_acct;
|
||||||
int sysctl_auto_assign_helper;
|
int sysctl_auto_assign_helper;
|
||||||
|
@ -352,40 +352,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
|
|||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void death_by_event(unsigned long ul_conntrack)
|
|
||||||
{
|
|
||||||
struct nf_conn *ct = (void *)ul_conntrack;
|
|
||||||
struct net *net = nf_ct_net(ct);
|
|
||||||
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
|
|
||||||
|
|
||||||
BUG_ON(ecache == NULL);
|
|
||||||
|
|
||||||
if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
|
|
||||||
/* bad luck, let's retry again */
|
|
||||||
ecache->timeout.expires = jiffies +
|
|
||||||
(prandom_u32() % net->ct.sysctl_events_retry_timeout);
|
|
||||||
add_timer(&ecache->timeout);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* we've got the event delivered, now it's dying */
|
|
||||||
set_bit(IPS_DYING_BIT, &ct->status);
|
|
||||||
nf_ct_put(ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void nf_ct_dying_timeout(struct nf_conn *ct)
|
|
||||||
{
|
|
||||||
struct net *net = nf_ct_net(ct);
|
|
||||||
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
|
|
||||||
|
|
||||||
BUG_ON(ecache == NULL);
|
|
||||||
|
|
||||||
/* set a new timer to retry event delivery */
|
|
||||||
setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
|
|
||||||
ecache->timeout.expires = jiffies +
|
|
||||||
(prandom_u32() % net->ct.sysctl_events_retry_timeout);
|
|
||||||
add_timer(&ecache->timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
|
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
|
||||||
{
|
{
|
||||||
struct nf_conn_tstamp *tstamp;
|
struct nf_conn_tstamp *tstamp;
|
||||||
@ -394,15 +360,20 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
|
|||||||
if (tstamp && tstamp->stop == 0)
|
if (tstamp && tstamp->stop == 0)
|
||||||
tstamp->stop = ktime_to_ns(ktime_get_real());
|
tstamp->stop = ktime_to_ns(ktime_get_real());
|
||||||
|
|
||||||
if (!nf_ct_is_dying(ct) &&
|
if (nf_ct_is_dying(ct))
|
||||||
unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
|
goto delete;
|
||||||
portid, report) < 0)) {
|
|
||||||
|
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
|
||||||
|
portid, report) < 0) {
|
||||||
/* destroy event was not delivered */
|
/* destroy event was not delivered */
|
||||||
nf_ct_delete_from_lists(ct);
|
nf_ct_delete_from_lists(ct);
|
||||||
nf_ct_dying_timeout(ct);
|
nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nf_conntrack_ecache_work(nf_ct_net(ct));
|
||||||
set_bit(IPS_DYING_BIT, &ct->status);
|
set_bit(IPS_DYING_BIT, &ct->status);
|
||||||
|
delete:
|
||||||
nf_ct_delete_from_lists(ct);
|
nf_ct_delete_from_lists(ct);
|
||||||
nf_ct_put(ct);
|
nf_ct_put(ct);
|
||||||
return true;
|
return true;
|
||||||
@ -1464,26 +1435,6 @@ void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
|
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
|
||||||
|
|
||||||
static void nf_ct_release_dying_list(struct net *net)
|
|
||||||
{
|
|
||||||
struct nf_conntrack_tuple_hash *h;
|
|
||||||
struct nf_conn *ct;
|
|
||||||
struct hlist_nulls_node *n;
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
|
||||||
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
|
||||||
|
|
||||||
spin_lock_bh(&pcpu->lock);
|
|
||||||
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
|
|
||||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
|
||||||
/* never fails to remove them, no listeners at this point */
|
|
||||||
nf_ct_kill(ct);
|
|
||||||
}
|
|
||||||
spin_unlock_bh(&pcpu->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int untrack_refs(void)
|
static int untrack_refs(void)
|
||||||
{
|
{
|
||||||
int cnt = 0, cpu;
|
int cnt = 0, cpu;
|
||||||
@ -1548,7 +1499,6 @@ i_see_dead_people:
|
|||||||
busy = 0;
|
busy = 0;
|
||||||
list_for_each_entry(net, net_exit_list, exit_list) {
|
list_for_each_entry(net, net_exit_list, exit_list) {
|
||||||
nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
|
nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
|
||||||
nf_ct_release_dying_list(net);
|
|
||||||
if (atomic_read(&net->ct.count) != 0)
|
if (atomic_read(&net->ct.count) != 0)
|
||||||
busy = 1;
|
busy = 1;
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,90 @@
|
|||||||
|
|
||||||
static DEFINE_MUTEX(nf_ct_ecache_mutex);
|
static DEFINE_MUTEX(nf_ct_ecache_mutex);
|
||||||
|
|
||||||
|
#define ECACHE_RETRY_WAIT (HZ/10)
|
||||||
|
|
||||||
|
enum retry_state {
|
||||||
|
STATE_CONGESTED,
|
||||||
|
STATE_RESTART,
|
||||||
|
STATE_DONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
|
||||||
|
{
|
||||||
|
struct nf_conn *refs[16];
|
||||||
|
struct nf_conntrack_tuple_hash *h;
|
||||||
|
struct hlist_nulls_node *n;
|
||||||
|
unsigned int evicted = 0;
|
||||||
|
enum retry_state ret = STATE_DONE;
|
||||||
|
|
||||||
|
spin_lock(&pcpu->lock);
|
||||||
|
|
||||||
|
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
|
||||||
|
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
|
||||||
|
if (nf_ct_is_dying(ct))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
|
||||||
|
ret = STATE_CONGESTED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we've got the event delivered, now it's dying */
|
||||||
|
set_bit(IPS_DYING_BIT, &ct->status);
|
||||||
|
refs[evicted] = ct;
|
||||||
|
|
||||||
|
if (++evicted >= ARRAY_SIZE(refs)) {
|
||||||
|
ret = STATE_RESTART;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&pcpu->lock);
|
||||||
|
|
||||||
|
/* can't _put while holding lock */
|
||||||
|
while (evicted)
|
||||||
|
nf_ct_put(refs[--evicted]);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ecache_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct netns_ct *ctnet =
|
||||||
|
container_of(work, struct netns_ct, ecache_dwork.work);
|
||||||
|
int cpu, delay = -1;
|
||||||
|
struct ct_pcpu *pcpu;
|
||||||
|
|
||||||
|
local_bh_disable();
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
enum retry_state ret;
|
||||||
|
|
||||||
|
pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
|
||||||
|
|
||||||
|
ret = ecache_work_evict_list(pcpu);
|
||||||
|
|
||||||
|
switch (ret) {
|
||||||
|
case STATE_CONGESTED:
|
||||||
|
delay = ECACHE_RETRY_WAIT;
|
||||||
|
goto out;
|
||||||
|
case STATE_RESTART:
|
||||||
|
delay = 0;
|
||||||
|
break;
|
||||||
|
case STATE_DONE:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
local_bh_enable();
|
||||||
|
|
||||||
|
ctnet->ecache_dwork_pending = delay > 0;
|
||||||
|
if (delay >= 0)
|
||||||
|
schedule_delayed_work(&ctnet->ecache_dwork, delay);
|
||||||
|
}
|
||||||
|
|
||||||
/* deliver cached events and clear cache entry - must be called with locally
|
/* deliver cached events and clear cache entry - must be called with locally
|
||||||
* disabled softirqs */
|
* disabled softirqs */
|
||||||
void nf_ct_deliver_cached_events(struct nf_conn *ct)
|
void nf_ct_deliver_cached_events(struct nf_conn *ct)
|
||||||
@ -157,7 +241,6 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
|
|||||||
|
|
||||||
#define NF_CT_EVENTS_DEFAULT 1
|
#define NF_CT_EVENTS_DEFAULT 1
|
||||||
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
|
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
|
||||||
static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
|
|
||||||
|
|
||||||
#ifdef CONFIG_SYSCTL
|
#ifdef CONFIG_SYSCTL
|
||||||
static struct ctl_table event_sysctl_table[] = {
|
static struct ctl_table event_sysctl_table[] = {
|
||||||
@ -168,13 +251,6 @@ static struct ctl_table event_sysctl_table[] = {
|
|||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec,
|
.proc_handler = proc_dointvec,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.procname = "nf_conntrack_events_retry_timeout",
|
|
||||||
.data = &init_net.ct.sysctl_events_retry_timeout,
|
|
||||||
.maxlen = sizeof(unsigned int),
|
|
||||||
.mode = 0644,
|
|
||||||
.proc_handler = proc_dointvec_jiffies,
|
|
||||||
},
|
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
#endif /* CONFIG_SYSCTL */
|
#endif /* CONFIG_SYSCTL */
|
||||||
@ -196,7 +272,6 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
table[0].data = &net->ct.sysctl_events;
|
table[0].data = &net->ct.sysctl_events;
|
||||||
table[1].data = &net->ct.sysctl_events_retry_timeout;
|
|
||||||
|
|
||||||
/* Don't export sysctls to unprivileged users */
|
/* Don't export sysctls to unprivileged users */
|
||||||
if (net->user_ns != &init_user_ns)
|
if (net->user_ns != &init_user_ns)
|
||||||
@ -238,12 +313,13 @@ static void nf_conntrack_event_fini_sysctl(struct net *net)
|
|||||||
int nf_conntrack_ecache_pernet_init(struct net *net)
|
int nf_conntrack_ecache_pernet_init(struct net *net)
|
||||||
{
|
{
|
||||||
net->ct.sysctl_events = nf_ct_events;
|
net->ct.sysctl_events = nf_ct_events;
|
||||||
net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
|
INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
|
||||||
return nf_conntrack_event_init_sysctl(net);
|
return nf_conntrack_event_init_sysctl(net);
|
||||||
}
|
}
|
||||||
|
|
||||||
void nf_conntrack_ecache_pernet_fini(struct net *net)
|
void nf_conntrack_ecache_pernet_fini(struct net *net)
|
||||||
{
|
{
|
||||||
|
cancel_delayed_work_sync(&net->ct.ecache_dwork);
|
||||||
nf_conntrack_event_fini_sysctl(net);
|
nf_conntrack_event_fini_sysctl(net);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user