linux-next/include/net/busy_poll.h
Peter Zijlstra 3708983452 sched, net: Fixup busy_loop_us_clock()
The only valid use of preempt_enable_no_resched() is if the very next
line is schedule() or if we know preemption cannot actually be enabled
by that statement due to known more preempt_count 'refs'.

This busy_poll stuff looks to be completely and utterly broken,
sched_clock() can return utter garbage with interrupts enabled (rare
but still) and it can drift unbounded between CPUs.

This means that if you get preempted/migrated and your new CPU is
years behind on the previous CPU we get to busy spin for a _very_ long
time.

There is a _REASON_ sched_clock() warns about preemptability -
papering over it with a preempt_disable()/preempt_enable_no_resched()
is just terminal brain damage on so many levels.

Replace sched_clock() usage with local_clock() which has a bounded
drift between CPUs (<2 jiffies).

There is a further problem with the entire busy wait poll thing in
that the spin time is additive to the syscall timeout, not inclusive.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: rui.zhang@intel.com
Cc: jacob.jun.pan@linux.intel.com
Cc: Mike Galbraith <bitbucket@online.de>
Cc: hpa@zytor.com
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: lenb@kernel.org
Cc: rjw@rjwysocki.net
Cc: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/20131119151338.GF3694@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-01-13 17:39:11 +01:00

170 lines
4.0 KiB
C

/*
* net busy poll support
* Copyright(c) 2013 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
* Author: Eliezer Tamir
*
* Contact Information:
* e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
*/
#ifndef _LINUX_NET_BUSY_POLL_H
#define _LINUX_NET_BUSY_POLL_H
#include <linux/netdevice.h>
#include <net/ip.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
struct napi_struct;
extern unsigned int sysctl_net_busy_read __read_mostly;
extern unsigned int sysctl_net_busy_poll __read_mostly;
/* return values from ndo_ll_poll */
#define LL_FLUSH_FAILED -1
#define LL_FLUSH_BUSY -2
static inline bool net_busy_loop_on(void)
{
return sysctl_net_busy_poll;
}
static inline u64 busy_loop_us_clock(void)
{
return local_clock() >> 10;
}
static inline unsigned long sk_busy_loop_end_time(struct sock *sk)
{
return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec);
}
/* in poll/select we use the global sysctl_net_ll_poll value */
static inline unsigned long busy_loop_end_time(void)
{
return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll);
}
static inline bool sk_can_busy_loop(struct sock *sk)
{
return sk->sk_ll_usec && sk->sk_napi_id &&
!need_resched() && !signal_pending(current);
}
static inline bool busy_loop_timeout(unsigned long end_time)
{
unsigned long now = busy_loop_us_clock();
return time_after(now, end_time);
}
/* when used in sock_poll() nonblock is known at compile time to be true
* so the loop and end_time will be optimized out
*/
static inline bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
const struct net_device_ops *ops;
struct napi_struct *napi;
int rc = false;
/*
* rcu read lock for napi hash
* bh so we don't race with net_rx_action
*/
rcu_read_lock_bh();
napi = napi_by_id(sk->sk_napi_id);
if (!napi)
goto out;
ops = napi->dev->netdev_ops;
if (!ops->ndo_busy_poll)
goto out;
do {
rc = ops->ndo_busy_poll(napi);
if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */
if (rc > 0)
/* local bh are disabled so it is ok to use _BH */
NET_ADD_STATS_BH(sock_net(sk),
LINUX_MIB_BUSYPOLLRXPACKETS, rc);
cpu_relax();
} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
!need_resched() && !busy_loop_timeout(end_time));
rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock_bh();
return rc;
}
/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
{
skb->napi_id = napi->napi_id;
}
/* used in the protocol hanlder to propagate the napi_id to the socket */
static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
{
sk->sk_napi_id = skb->napi_id;
}
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
return 0;
}
static inline unsigned long busy_loop_end_time(void)
{
return 0;
}
static inline bool sk_can_busy_loop(struct sock *sk)
{
return false;
}
static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
{
}
static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
{
}
static inline bool busy_loop_timeout(unsigned long end_time)
{
return true;
}
static inline bool sk_busy_loop(struct sock *sk, int nonblock)
{
return false;
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
#endif /* _LINUX_NET_BUSY_POLL_H */