mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 09:56:46 +00:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: (96 commits) sched: keep total / count stats in addition to the max for sched, futex: detach sched.h and futex.h sched: fix: don't take a mutex from interrupt context sched: print backtrace of running tasks too printk: use ktime_get() softlockup: fix signedness sched: latencytop support sched: fix goto retry in pick_next_task_rt() timers: don't #error on higher HZ values sched: monitor clock underflows in /proc/sched_debug sched: fix rq->clock warps on frequency changes sched: fix, always create kernel threads with normal priority debug: clean up kernel/profile.c sched: remove the !PREEMPT_BKL code sched: make PREEMPT_BKL the default debug: track and print last unloaded module in the oops trace debug: show being-loaded/being-unloaded indicator for modules sched: rt-watchdog: fix .rlim_max = RLIM_INFINITY sched: rt-group: reduce rescheduling hrtimer: unlock hrtimer_wakeup ...
This commit is contained in:
commit
0008bf5440
@ -9,8 +9,8 @@ The first thing resembling RCU was published in 1980, when Kung and Lehman
|
||||
[Kung80] recommended use of a garbage collector to defer destruction
|
||||
of nodes in a parallel binary search tree in order to simplify its
|
||||
implementation. This works well in environments that have garbage
|
||||
collectors, but current production garbage collectors incur significant
|
||||
read-side overhead.
|
||||
collectors, but most production garbage collectors incur significant
|
||||
overhead.
|
||||
|
||||
In 1982, Manber and Ladner [Manber82,Manber84] recommended deferring
|
||||
destruction until all threads running at that time have terminated, again
|
||||
@ -99,16 +99,25 @@ locking, reduces contention, reduces memory latency for readers, and
|
||||
parallelizes pipeline stalls and memory latency for writers. However,
|
||||
these techniques still impose significant read-side overhead in the
|
||||
form of memory barriers. Researchers at Sun worked along similar lines
|
||||
in the same timeframe [HerlihyLM02,HerlihyLMS03]. These techniques
|
||||
can be thought of as inside-out reference counts, where the count is
|
||||
represented by the number of hazard pointers referencing a given data
|
||||
structure (rather than the more conventional counter field within the
|
||||
data structure itself).
|
||||
in the same timeframe [HerlihyLM02]. These techniques can be thought
|
||||
of as inside-out reference counts, where the count is represented by the
|
||||
number of hazard pointers referencing a given data structure (rather than
|
||||
the more conventional counter field within the data structure itself).
|
||||
|
||||
By the same token, RCU can be thought of as a "bulk reference count",
|
||||
where some form of reference counter covers all reference by a given CPU
|
||||
or thread during a set timeframe. This timeframe is related to, but
|
||||
not necessarily exactly the same as, an RCU grace period. In classic
|
||||
RCU, the reference counter is the per-CPU bit in the "bitmask" field,
|
||||
and each such bit covers all references that might have been made by
|
||||
the corresponding CPU during the prior grace period. Of course, RCU
|
||||
can be thought of in other terms as well.
|
||||
|
||||
In 2003, the K42 group described how RCU could be used to create
|
||||
hot-pluggable implementations of operating-system functions. Later that
|
||||
year saw a paper describing an RCU implementation of System V IPC
|
||||
[Arcangeli03], and an introduction to RCU in Linux Journal [McKenney03a].
|
||||
hot-pluggable implementations of operating-system functions [Appavoo03a].
|
||||
Later that year saw a paper describing an RCU implementation of System
|
||||
V IPC [Arcangeli03], and an introduction to RCU in Linux Journal
|
||||
[McKenney03a].
|
||||
|
||||
2004 has seen a Linux-Journal article on use of RCU in dcache
|
||||
[McKenney04a], a performance comparison of locking to RCU on several
|
||||
@ -117,10 +126,19 @@ number of operating-system kernels [PaulEdwardMcKenneyPhD], a paper
|
||||
describing how to make RCU safe for soft-realtime applications [Sarma04c],
|
||||
and a paper describing SELinux performance with RCU [JamesMorris04b].
|
||||
|
||||
2005 has seen further adaptation of RCU to realtime use, permitting
|
||||
2005 brought further adaptation of RCU to realtime use, permitting
|
||||
preemption of RCU realtime critical sections [PaulMcKenney05a,
|
||||
PaulMcKenney05b].
|
||||
|
||||
2006 saw the first best-paper award for an RCU paper [ThomasEHart2006a],
|
||||
as well as further work on efficient implementations of preemptible
|
||||
RCU [PaulEMcKenney2006b], but priority-boosting of RCU read-side critical
|
||||
sections proved elusive. An RCU implementation permitting general
|
||||
blocking in read-side critical sections appeared [PaulEMcKenney2006c],
|
||||
Robert Olsson described an RCU-protected trie-hash combination
|
||||
[RobertOlsson2006a].
|
||||
|
||||
|
||||
Bibtex Entries
|
||||
|
||||
@article{Kung80
|
||||
@ -203,6 +221,41 @@ Bibtex Entries
|
||||
,Address="New Orleans, LA"
|
||||
}
|
||||
|
||||
@conference{Pu95a,
|
||||
Author = "Calton Pu and Tito Autrey and Andrew Black and Charles Consel and
|
||||
Crispin Cowan and Jon Inouye and Lakshmi Kethana and Jonathan Walpole and
|
||||
Ke Zhang",
|
||||
Title = "Optimistic Incremental Specialization: Streamlining a Commercial
|
||||
Operating System",
|
||||
Booktitle = "15\textsuperscript{th} ACM Symposium on
|
||||
Operating Systems Principles (SOSP'95)",
|
||||
address = "Copper Mountain, CO",
|
||||
month="December",
|
||||
year="1995",
|
||||
pages="314-321",
|
||||
annotation="
|
||||
Uses a replugger, but with a flag to signal when people are
|
||||
using the resource at hand. Only one reader at a time.
|
||||
"
|
||||
}
|
||||
|
||||
@conference{Cowan96a,
|
||||
Author = "Crispin Cowan and Tito Autrey and Charles Krasic and
|
||||
Calton Pu and Jonathan Walpole",
|
||||
Title = "Fast Concurrent Dynamic Linking for an Adaptive Operating System",
|
||||
Booktitle = "International Conference on Configurable Distributed Systems
|
||||
(ICCDS'96)",
|
||||
address = "Annapolis, MD",
|
||||
month="May",
|
||||
year="1996",
|
||||
pages="108",
|
||||
isbn="0-8186-7395-8",
|
||||
annotation="
|
||||
Uses a replugger, but with a counter to signal when people are
|
||||
using the resource at hand. Allows multiple readers.
|
||||
"
|
||||
}
|
||||
|
||||
@techreport{Slingwine95
|
||||
,author="John D. Slingwine and Paul E. McKenney"
|
||||
,title="Apparatus and Method for Achieving Reduced Overhead Mutual
|
||||
@ -312,6 +365,49 @@ Andrea Arcangeli and Andi Kleen and Orran Krieger and Rusty Russell"
|
||||
[Viewed June 23, 2004]"
|
||||
}
|
||||
|
||||
@conference{Michael02a
|
||||
,author="Maged M. Michael"
|
||||
,title="Safe Memory Reclamation for Dynamic Lock-Free Objects Using Atomic
|
||||
Reads and Writes"
|
||||
,Year="2002"
|
||||
,Month="August"
|
||||
,booktitle="{Proceedings of the 21\textsuperscript{st} Annual ACM
|
||||
Symposium on Principles of Distributed Computing}"
|
||||
,pages="21-30"
|
||||
,annotation="
|
||||
Each thread keeps an array of pointers to items that it is
|
||||
currently referencing. Sort of an inside-out garbage collection
|
||||
mechanism, but one that requires the accessing code to explicitly
|
||||
state its needs. Also requires read-side memory barriers on
|
||||
most architectures.
|
||||
"
|
||||
}
|
||||
|
||||
@conference{Michael02b
|
||||
,author="Maged M. Michael"
|
||||
,title="High Performance Dynamic Lock-Free Hash Tables and List-Based Sets"
|
||||
,Year="2002"
|
||||
,Month="August"
|
||||
,booktitle="{Proceedings of the 14\textsuperscript{th} Annual ACM
|
||||
Symposium on Parallel
|
||||
Algorithms and Architecture}"
|
||||
,pages="73-82"
|
||||
,annotation="
|
||||
Like the title says...
|
||||
"
|
||||
}
|
||||
|
||||
@InProceedings{HerlihyLM02
|
||||
,author={Maurice Herlihy and Victor Luchangco and Mark Moir}
|
||||
,title="The Repeat Offender Problem: A Mechanism for Supporting Dynamic-Sized,
|
||||
Lock-Free Data Structures"
|
||||
,booktitle={Proceedings of 16\textsuperscript{th} International
|
||||
Symposium on Distributed Computing}
|
||||
,year=2002
|
||||
,month="October"
|
||||
,pages="339-353"
|
||||
}
|
||||
|
||||
@article{Appavoo03a
|
||||
,author="J. Appavoo and K. Hui and C. A. N. Soules and R. W. Wisniewski and
|
||||
D. M. {Da Silva} and O. Krieger and M. A. Auslander and D. J. Edelsohn and
|
||||
@ -447,3 +543,95 @@ Oregon Health and Sciences University"
|
||||
Realtime turns into making RCU yet more realtime friendly.
|
||||
"
|
||||
}
|
||||
|
||||
@conference{ThomasEHart2006a
|
||||
,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown"
|
||||
,Title="Making Lockless Synchronization Fast: Performance Implications
|
||||
of Memory Reclamation"
|
||||
,Booktitle="20\textsuperscript{th} {IEEE} International Parallel and
|
||||
Distributed Processing Symposium"
|
||||
,month="April"
|
||||
,year="2006"
|
||||
,day="25-29"
|
||||
,address="Rhodes, Greece"
|
||||
,annotation="
|
||||
Compares QSBR (AKA "classic RCU"), HPBR, EBR, and lock-free
|
||||
reference counting.
|
||||
"
|
||||
}
|
||||
|
||||
@Conference{PaulEMcKenney2006b
|
||||
,Author="Paul E. McKenney and Dipankar Sarma and Ingo Molnar and
|
||||
Suparna Bhattacharya"
|
||||
,Title="Extending RCU for Realtime and Embedded Workloads"
|
||||
,Booktitle="{Ottawa Linux Symposium}"
|
||||
,Month="July"
|
||||
,Year="2006"
|
||||
,pages="v2 123-138"
|
||||
,note="Available:
|
||||
\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
|
||||
\url{http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf}
|
||||
[Viewed January 1, 2007]"
|
||||
,annotation="
|
||||
Described how to improve the -rt implementation of realtime RCU.
|
||||
"
|
||||
}
|
||||
|
||||
@unpublished{PaulEMcKenney2006c
|
||||
,Author="Paul E. McKenney"
|
||||
,Title="Sleepable {RCU}"
|
||||
,month="October"
|
||||
,day="9"
|
||||
,year="2006"
|
||||
,note="Available:
|
||||
\url{http://lwn.net/Articles/202847/}
|
||||
Revised:
|
||||
\url{http://www.rdrop.com/users/paulmck/RCU/srcu.2007.01.14a.pdf}
|
||||
[Viewed August 21, 2006]"
|
||||
,annotation="
|
||||
LWN article introducing SRCU.
|
||||
"
|
||||
}
|
||||
|
||||
@unpublished{RobertOlsson2006a
|
||||
,Author="Robert Olsson and Stefan Nilsson"
|
||||
,Title="{TRASH}: A dynamic {LC}-trie and hash data structure"
|
||||
,month="August"
|
||||
,day="18"
|
||||
,year="2006"
|
||||
,note="Available:
|
||||
\url{http://www.nada.kth.se/~snilsson/public/papers/trash/trash.pdf}
|
||||
[Viewed February 24, 2007]"
|
||||
,annotation="
|
||||
RCU-protected dynamic trie-hash combination.
|
||||
"
|
||||
}
|
||||
|
||||
@unpublished{ThomasEHart2007a
|
||||
,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown and Jonathan Walpole"
|
||||
,Title="Performance of memory reclamation for lockless synchronization"
|
||||
,journal="J. Parallel Distrib. Comput."
|
||||
,year="2007"
|
||||
,note="To appear in J. Parallel Distrib. Comput.
|
||||
\url{doi=10.1016/j.jpdc.2007.04.010}"
|
||||
,annotation={
|
||||
Compares QSBR (AKA "classic RCU"), HPBR, EBR, and lock-free
|
||||
reference counting. Journal version of ThomasEHart2006a.
|
||||
}
|
||||
}
|
||||
|
||||
@unpublished{PaulEMcKenney2007QRCUspin
|
||||
,Author="Paul E. McKenney"
|
||||
,Title="Using Promela and Spin to verify parallel algorithms"
|
||||
,month="August"
|
||||
,day="1"
|
||||
,year="2007"
|
||||
,note="Available:
|
||||
\url{http://lwn.net/Articles/243851/}
|
||||
[Viewed September 8, 2007]"
|
||||
,annotation="
|
||||
LWN article describing Promela and spin, and also using Oleg
|
||||
Nesterov's QRCU as an example (with Paul McKenney's fastpath).
|
||||
"
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,14 @@ o How can the updater tell when a grace period has completed
|
||||
executed in user mode, or executed in the idle loop, we can
|
||||
safely free up that item.
|
||||
|
||||
Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
|
||||
same effect, but require that the readers manipulate CPU-local
|
||||
counters. These counters allow limited types of blocking
|
||||
within RCU read-side critical sections. SRCU also uses
|
||||
CPU-local counters, and permits general blocking within
|
||||
RCU read-side critical sections. These two variants of
|
||||
RCU detect grace periods by sampling these counters.
|
||||
|
||||
o If I am running on a uniprocessor kernel, which can only do one
|
||||
thing at a time, why should I wait for a grace period?
|
||||
|
||||
@ -46,7 +54,10 @@ o How can I see where RCU is currently used in the Linux kernel?
|
||||
Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
|
||||
"rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh",
|
||||
"srcu_read_lock", "srcu_read_unlock", "synchronize_rcu",
|
||||
"synchronize_net", and "synchronize_srcu".
|
||||
"synchronize_net", "synchronize_srcu", and the other RCU
|
||||
primitives. Or grab one of the cscope databases from:
|
||||
|
||||
http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
|
||||
|
||||
o What guidelines should I follow when writing code that uses RCU?
|
||||
|
||||
@ -67,7 +78,11 @@ o I hear that RCU is patented? What is with that?
|
||||
|
||||
o I hear that RCU needs work in order to support realtime kernels?
|
||||
|
||||
Yes, work in progress.
|
||||
This work is largely completed. Realtime-friendly RCU can be
|
||||
enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter.
|
||||
However, work is in progress for enabling priority boosting of
|
||||
preempted RCU read-side critical sections.This is needed if you
|
||||
have CPU-bound realtime threads.
|
||||
|
||||
o Where can I find more information on RCU?
|
||||
|
||||
|
@ -46,12 +46,13 @@ stat_interval The number of seconds between output of torture
|
||||
|
||||
shuffle_interval
|
||||
The number of seconds to keep the test threads affinitied
|
||||
to a particular subset of the CPUs. Used in conjunction
|
||||
with test_no_idle_hz.
|
||||
to a particular subset of the CPUs, defaults to 5 seconds.
|
||||
Used in conjunction with test_no_idle_hz.
|
||||
|
||||
test_no_idle_hz Whether or not to test the ability of RCU to operate in
|
||||
a kernel that disables the scheduling-clock interrupt to
|
||||
idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
|
||||
Defaults to omitting this test.
|
||||
|
||||
torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API,
|
||||
"rcu_sync" for rcu_read_lock() with synchronous reclamation,
|
||||
@ -82,8 +83,6 @@ be evident. ;-)
|
||||
|
||||
The entries are as follows:
|
||||
|
||||
o "ggp": The number of counter flips (or batches) since boot.
|
||||
|
||||
o "rtc": The hexadecimal address of the structure currently visible
|
||||
to readers.
|
||||
|
||||
@ -117,8 +116,8 @@ o "Reader Pipe": Histogram of "ages" of structures seen by readers.
|
||||
o "Reader Batch": Another histogram of "ages" of structures seen
|
||||
by readers, but in terms of counter flips (or batches) rather
|
||||
than in terms of grace periods. The legal number of non-zero
|
||||
entries is again two. The reason for this separate view is
|
||||
that it is easier to get the third entry to show up in the
|
||||
entries is again two. The reason for this separate view is that
|
||||
it is sometimes easier to get the third entry to show up in the
|
||||
"Reader Batch" list than in the "Reader Pipe" list.
|
||||
|
||||
o "Free-Block Circulation": Shows the number of torture structures
|
||||
|
@ -109,12 +109,13 @@ Never use anything other than cpumask_t to represent bitmap of CPUs.
|
||||
for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
|
||||
|
||||
#include <linux/cpu.h>
|
||||
lock_cpu_hotplug() and unlock_cpu_hotplug():
|
||||
get_online_cpus() and put_online_cpus():
|
||||
|
||||
The above calls are used to inhibit cpu hotplug operations. While holding the
|
||||
cpucontrol mutex, cpu_online_map will not change. If you merely need to avoid
|
||||
cpus going away, you could also use preempt_disable() and preempt_enable()
|
||||
for those sections. Just remember the critical section cannot call any
|
||||
The above calls are used to inhibit cpu hotplug operations. While the
|
||||
cpu_hotplug.refcount is non zero, the cpu_online_map will not change.
|
||||
If you merely need to avoid cpus going away, you could also use
|
||||
preempt_disable() and preempt_enable() for those sections.
|
||||
Just remember the critical section cannot call any
|
||||
function that can sleep or schedule this process away. The preempt_disable()
|
||||
will work as long as stop_machine_run() is used to take a cpu down.
|
||||
|
||||
|
@ -79,17 +79,6 @@ static unsigned long dummy_gettimeoffset(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* An implementation of printk_clock() independent from
|
||||
* sched_clock(). This avoids non-bootable kernels when
|
||||
* printk_clock is enabled.
|
||||
*/
|
||||
unsigned long long printk_clock(void)
|
||||
{
|
||||
return (unsigned long long)(jiffies - INITIAL_JIFFIES) *
|
||||
(1000000000 / HZ);
|
||||
}
|
||||
|
||||
static unsigned long next_rtc_update;
|
||||
|
||||
/*
|
||||
|
@ -71,8 +71,6 @@ unsigned long __per_cpu_offset[NR_CPUS];
|
||||
EXPORT_SYMBOL(__per_cpu_offset);
|
||||
#endif
|
||||
|
||||
extern void ia64_setup_printk_clock(void);
|
||||
|
||||
DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
|
||||
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
|
||||
unsigned long ia64_cycles_per_usec;
|
||||
@ -507,8 +505,6 @@ setup_arch (char **cmdline_p)
|
||||
/* process SAL system table: */
|
||||
ia64_sal_init(__va(efi.sal_systab));
|
||||
|
||||
ia64_setup_printk_clock();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
cpu_physical_id(0) = hard_smp_processor_id();
|
||||
#endif
|
||||
|
@ -344,33 +344,6 @@ udelay (unsigned long usecs)
|
||||
}
|
||||
EXPORT_SYMBOL(udelay);
|
||||
|
||||
static unsigned long long ia64_itc_printk_clock(void)
|
||||
{
|
||||
if (ia64_get_kr(IA64_KR_PER_CPU_DATA))
|
||||
return sched_clock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long long ia64_default_printk_clock(void)
|
||||
{
|
||||
return (unsigned long long)(jiffies_64 - INITIAL_JIFFIES) *
|
||||
(1000000000/HZ);
|
||||
}
|
||||
|
||||
unsigned long long (*ia64_printk_clock)(void) = &ia64_default_printk_clock;
|
||||
|
||||
unsigned long long printk_clock(void)
|
||||
{
|
||||
return ia64_printk_clock();
|
||||
}
|
||||
|
||||
void __init
|
||||
ia64_setup_printk_clock(void)
|
||||
{
|
||||
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
|
||||
ia64_printk_clock = ia64_itc_printk_clock;
|
||||
}
|
||||
|
||||
/* IA64 doesn't cache the timezone */
|
||||
void update_vsyscall_tz(void)
|
||||
{
|
||||
|
@ -64,7 +64,6 @@ extern void sn_timer_init(void);
|
||||
extern unsigned long last_time_offset;
|
||||
extern void (*ia64_mark_idle) (int);
|
||||
extern void snidle(int);
|
||||
extern unsigned long long (*ia64_printk_clock)(void);
|
||||
|
||||
unsigned long sn_rtc_cycles_per_second;
|
||||
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
|
||||
@ -360,14 +359,6 @@ sn_scan_pcdp(void)
|
||||
|
||||
static unsigned long sn2_rtc_initial;
|
||||
|
||||
static unsigned long long ia64_sn2_printk_clock(void)
|
||||
{
|
||||
unsigned long rtc_now = rtc_time();
|
||||
|
||||
return (rtc_now - sn2_rtc_initial) *
|
||||
(1000000000 / sn_rtc_cycles_per_second);
|
||||
}
|
||||
|
||||
/**
|
||||
* sn_setup - SN platform setup routine
|
||||
* @cmdline_p: kernel command line
|
||||
@ -468,8 +459,6 @@ void __init sn_setup(char **cmdline_p)
|
||||
|
||||
platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
|
||||
|
||||
ia64_printk_clock = ia64_sn2_printk_clock;
|
||||
|
||||
printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
|
||||
|
||||
/*
|
||||
|
@ -58,13 +58,13 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
|
||||
if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
|
||||
return -EFAULT;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
p = find_process_by_pid(pid);
|
||||
if (!p) {
|
||||
read_unlock(&tasklist_lock);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
@ -106,7 +106,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
|
||||
|
||||
out_unlock:
|
||||
put_task_struct(p);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -125,7 +125,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
|
||||
if (len < real_len)
|
||||
return -EINVAL;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
retval = -ESRCH;
|
||||
@ -140,7 +140,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
|
||||
|
||||
out_unlock:
|
||||
read_unlock(&tasklist_lock);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
if (retval)
|
||||
return retval;
|
||||
if (copy_to_user(user_mask_ptr, &mask, real_len))
|
||||
|
@ -153,7 +153,7 @@ static int pseries_add_processor(struct device_node *np)
|
||||
for (i = 0; i < nthreads; i++)
|
||||
cpu_set(i, tmp);
|
||||
|
||||
lock_cpu_hotplug();
|
||||
cpu_maps_update_begin();
|
||||
|
||||
BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map));
|
||||
|
||||
@ -190,7 +190,7 @@ static int pseries_add_processor(struct device_node *np)
|
||||
}
|
||||
err = 0;
|
||||
out_unlock:
|
||||
unlock_cpu_hotplug();
|
||||
cpu_maps_update_done();
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -211,7 +211,7 @@ static void pseries_remove_processor(struct device_node *np)
|
||||
|
||||
nthreads = len / sizeof(u32);
|
||||
|
||||
lock_cpu_hotplug();
|
||||
cpu_maps_update_begin();
|
||||
for (i = 0; i < nthreads; i++) {
|
||||
for_each_present_cpu(cpu) {
|
||||
if (get_hard_smp_processor_id(cpu) != intserv[i])
|
||||
@ -225,7 +225,7 @@ static void pseries_remove_processor(struct device_node *np)
|
||||
printk(KERN_WARNING "Could not find cpu to remove "
|
||||
"with physical id 0x%x\n", intserv[i]);
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
|
||||
static int pseries_smp_notifier(struct notifier_block *nb,
|
||||
|
@ -382,7 +382,7 @@ static void do_event_scan_all_cpus(long delay)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
cpu = first_cpu(cpu_online_map);
|
||||
for (;;) {
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
@ -390,15 +390,15 @@ static void do_event_scan_all_cpus(long delay)
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
|
||||
/* Drop hotplug lock, and sleep for the specified delay */
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
msleep_interruptible(delay);
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
|
||||
cpu = next_cpu(cpu, cpu_online_map);
|
||||
if (cpu == NR_CPUS)
|
||||
break;
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
static int rtasd(void *unused)
|
||||
|
@ -349,7 +349,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
replace = -1;
|
||||
|
||||
/* No CPU hotplug when we change MTRR entries */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
/* Search for existing MTRR */
|
||||
mutex_lock(&mtrr_mutex);
|
||||
for (i = 0; i < num_var_ranges; ++i) {
|
||||
@ -405,7 +405,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
error = i;
|
||||
out:
|
||||
mutex_unlock(&mtrr_mutex);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -495,7 +495,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
|
||||
|
||||
max = num_var_ranges;
|
||||
/* No CPU hotplug when we change MTRR entries */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
mutex_lock(&mtrr_mutex);
|
||||
if (reg < 0) {
|
||||
/* Search for existing MTRR */
|
||||
@ -536,7 +536,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
|
||||
error = reg;
|
||||
out:
|
||||
mutex_unlock(&mtrr_mutex);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
return error;
|
||||
}
|
||||
/**
|
||||
|
@ -283,7 +283,7 @@ sysret_careful:
|
||||
sysret_signal:
|
||||
TRACE_IRQS_ON
|
||||
sti
|
||||
testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
|
||||
testl $_TIF_DO_NOTIFY_MASK,%edx
|
||||
jz 1f
|
||||
|
||||
/* Really a signal */
|
||||
@ -377,7 +377,7 @@ int_very_careful:
|
||||
jmp int_restore_rest
|
||||
|
||||
int_signal:
|
||||
testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
|
||||
testl $_TIF_DO_NOTIFY_MASK,%edx
|
||||
jz 1f
|
||||
movq %rsp,%rdi # &ptregs -> arg1
|
||||
xorl %esi,%esi # oldset -> arg2
|
||||
@ -603,7 +603,7 @@ retint_careful:
|
||||
jmp retint_check
|
||||
|
||||
retint_signal:
|
||||
testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
|
||||
testl $_TIF_DO_NOTIFY_MASK,%edx
|
||||
jz retint_swapgs
|
||||
TRACE_IRQS_ON
|
||||
sti
|
||||
|
@ -436,7 +436,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
mutex_lock(µcode_mutex);
|
||||
|
||||
user_buffer = (void __user *) buf;
|
||||
@ -447,7 +447,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
|
||||
ret = (ssize_t)len;
|
||||
|
||||
mutex_unlock(µcode_mutex);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -658,14 +658,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
|
||||
|
||||
old = current->cpus_allowed;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
|
||||
mutex_lock(µcode_mutex);
|
||||
if (uci->valid)
|
||||
err = cpu_request_microcode(cpu);
|
||||
mutex_unlock(µcode_mutex);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
set_cpus_allowed(current, old);
|
||||
}
|
||||
if (err)
|
||||
@ -817,9 +817,9 @@ static int __init microcode_init (void)
|
||||
return PTR_ERR(microcode_pdev);
|
||||
}
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
if (error) {
|
||||
microcode_dev_exit();
|
||||
platform_device_unregister(microcode_pdev);
|
||||
@ -839,9 +839,9 @@ static void __exit microcode_exit (void)
|
||||
|
||||
unregister_hotcpu_notifier(&mc_cpu_notifier);
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
|
||||
platform_device_unregister(microcode_pdev);
|
||||
}
|
||||
|
@ -658,6 +658,9 @@ void do_notify_resume(struct pt_regs *regs, void *_unused,
|
||||
/* deal with pending signal delivery */
|
||||
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
|
||||
do_signal(regs);
|
||||
|
||||
if (thread_info_flags & _TIF_HRTICK_RESCHED)
|
||||
hrtick_resched();
|
||||
|
||||
clear_thread_flag(TIF_IRET);
|
||||
}
|
||||
|
@ -480,6 +480,9 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
/* deal with pending signal delivery */
|
||||
if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
|
||||
do_signal(regs);
|
||||
|
||||
if (thread_info_flags & _TIF_HRTICK_RESCHED)
|
||||
hrtick_resched();
|
||||
}
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
|
@ -33,6 +33,19 @@ static void save_stack_address(void *data, unsigned long addr)
|
||||
trace->entries[trace->nr_entries++] = addr;
|
||||
}
|
||||
|
||||
static void save_stack_address_nosched(void *data, unsigned long addr)
|
||||
{
|
||||
struct stack_trace *trace = (struct stack_trace *)data;
|
||||
if (in_sched_functions(addr))
|
||||
return;
|
||||
if (trace->skip > 0) {
|
||||
trace->skip--;
|
||||
return;
|
||||
}
|
||||
if (trace->nr_entries < trace->max_entries)
|
||||
trace->entries[trace->nr_entries++] = addr;
|
||||
}
|
||||
|
||||
static const struct stacktrace_ops save_stack_ops = {
|
||||
.warning = save_stack_warning,
|
||||
.warning_symbol = save_stack_warning_symbol,
|
||||
@ -40,6 +53,13 @@ static const struct stacktrace_ops save_stack_ops = {
|
||||
.address = save_stack_address,
|
||||
};
|
||||
|
||||
static const struct stacktrace_ops save_stack_ops_nosched = {
|
||||
.warning = save_stack_warning,
|
||||
.warning_symbol = save_stack_warning_symbol,
|
||||
.stack = save_stack_stack,
|
||||
.address = save_stack_address_nosched,
|
||||
};
|
||||
|
||||
/*
|
||||
* Save stack-backtrace addresses into a stack_trace buffer.
|
||||
*/
|
||||
@ -50,3 +70,10 @@ void save_stack_trace(struct stack_trace *trace)
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
}
|
||||
EXPORT_SYMBOL(save_stack_trace);
|
||||
|
||||
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
|
||||
{
|
||||
dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
|
||||
if (trace->nr_entries < trace->max_entries)
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
}
|
||||
|
@ -459,7 +459,7 @@ void __init lguest_arch_host_init(void)
|
||||
|
||||
/* We don't need the complexity of CPUs coming and going while we're
|
||||
* doing this. */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
if (cpu_has_pge) { /* We have a broader idea of "global". */
|
||||
/* Remember that this was originally set (for cleanup). */
|
||||
cpu_had_pge = 1;
|
||||
@ -469,20 +469,20 @@ void __init lguest_arch_host_init(void)
|
||||
/* Turn off the feature in the global feature set. */
|
||||
clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
};
|
||||
/*:*/
|
||||
|
||||
void __exit lguest_arch_host_fini(void)
|
||||
{
|
||||
/* If we had PGE before we started, turn it back on now. */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
if (cpu_had_pge) {
|
||||
set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
|
||||
/* adjust_pge's argument "1" means set PGE. */
|
||||
on_each_cpu(adjust_pge, (void *)1, 0, 1);
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
|
||||
|
@ -29,12 +29,12 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
|
||||
struct sys_device *sysdev;
|
||||
|
||||
printk(KERN_WARNING TAG "cpu capability changed.\n");
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu) {
|
||||
sysdev = get_cpu_sysdev(cpu);
|
||||
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
static void sclp_conf_receiver_fn(struct evbuf_header *evbuf)
|
||||
|
@ -2130,4 +2130,3 @@ source "fs/nls/Kconfig"
|
||||
source "fs/dlm/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
||||
|
@ -310,6 +310,77 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
static int lstats_show_proc(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
struct task_struct *task = m->private;
|
||||
seq_puts(m, "Latency Top version : v0.1\n");
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (task->latency_record[i].backtrace[0]) {
|
||||
int q;
|
||||
seq_printf(m, "%i %li %li ",
|
||||
task->latency_record[i].count,
|
||||
task->latency_record[i].time,
|
||||
task->latency_record[i].max);
|
||||
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
|
||||
char sym[KSYM_NAME_LEN];
|
||||
char *c;
|
||||
if (!task->latency_record[i].backtrace[q])
|
||||
break;
|
||||
if (task->latency_record[i].backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
sprint_symbol(sym, task->latency_record[i].backtrace[q]);
|
||||
c = strchr(sym, '+');
|
||||
if (c)
|
||||
*c = 0;
|
||||
seq_printf(m, "%s ", sym);
|
||||
}
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lstats_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int ret;
|
||||
struct seq_file *m;
|
||||
struct task_struct *task = get_proc_task(inode);
|
||||
|
||||
ret = single_open(file, lstats_show_proc, NULL);
|
||||
if (!ret) {
|
||||
m = file->private_data;
|
||||
m->private = task;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t lstats_write(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *offs)
|
||||
{
|
||||
struct seq_file *m;
|
||||
struct task_struct *task;
|
||||
|
||||
m = file->private_data;
|
||||
task = m->private;
|
||||
clear_all_latency_tracing(task);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations proc_lstats_operations = {
|
||||
.open = lstats_open,
|
||||
.read = seq_read,
|
||||
.write = lstats_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* The badness from the OOM killer */
|
||||
unsigned long badness(struct task_struct *p, unsigned long uptime);
|
||||
static int proc_oom_score(struct task_struct *task, char *buffer)
|
||||
@ -1020,6 +1091,7 @@ static const struct file_operations proc_fault_inject_operations = {
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
/*
|
||||
* Print out various scheduling related per-task fields:
|
||||
@ -2230,6 +2302,9 @@ static const struct pid_entry tgid_base_stuff[] = {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
INF("schedstat", S_IRUGO, pid_schedstat),
|
||||
#endif
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
REG("latency", S_IRUGO, lstats),
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_PID_CPUSET
|
||||
REG("cpuset", S_IRUGO, cpuset),
|
||||
#endif
|
||||
@ -2555,6 +2630,9 @@ static const struct pid_entry tid_base_stuff[] = {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
INF("schedstat", S_IRUGO, pid_schedstat),
|
||||
#endif
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
REG("latency", S_IRUGO, lstats),
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_PID_CPUSET
|
||||
REG("cpuset", S_IRUGO, cpuset),
|
||||
#endif
|
||||
|
@ -44,8 +44,8 @@
|
||||
#define RLIMIT_NICE 13 /* max nice prio allowed to raise to
|
||||
0-39 for nice level 19 .. -20 */
|
||||
#define RLIMIT_RTPRIO 14 /* maximum realtime priority */
|
||||
|
||||
#define RLIM_NLIMITS 15
|
||||
#define RLIMIT_RTTIME 15 /* timeout for RT tasks in us */
|
||||
#define RLIM_NLIMITS 16
|
||||
|
||||
/*
|
||||
* SuS says limits have to be unsigned.
|
||||
@ -86,6 +86,7 @@
|
||||
[RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \
|
||||
[RLIMIT_NICE] = { 0, 0 }, \
|
||||
[RLIMIT_RTPRIO] = { 0, 0 }, \
|
||||
[RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
@ -132,6 +132,7 @@ static inline struct thread_info *current_thread_info(void)
|
||||
#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */
|
||||
#define TIF_SECCOMP 7 /* secure computing */
|
||||
#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */
|
||||
#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */
|
||||
#define TIF_MEMDIE 16
|
||||
#define TIF_DEBUG 17 /* uses debug registers */
|
||||
#define TIF_IO_BITMAP 18 /* uses I/O bitmap */
|
||||
@ -147,6 +148,7 @@ static inline struct thread_info *current_thread_info(void)
|
||||
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
|
||||
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
|
||||
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
||||
#define _TIF_HRTICK_RESCHED (1<<TIF_HRTICK_RESCHED)
|
||||
#define _TIF_DEBUG (1<<TIF_DEBUG)
|
||||
#define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP)
|
||||
#define _TIF_FREEZE (1<<TIF_FREEZE)
|
||||
|
@ -115,6 +115,7 @@ static inline struct thread_info *stack_thread_info(void)
|
||||
#define TIF_SECCOMP 8 /* secure computing */
|
||||
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
|
||||
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
|
||||
#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */
|
||||
/* 16 free */
|
||||
#define TIF_IA32 17 /* 32bit process */
|
||||
#define TIF_FORK 18 /* ret_from_fork */
|
||||
@ -133,6 +134,7 @@ static inline struct thread_info *stack_thread_info(void)
|
||||
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
|
||||
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
||||
#define _TIF_MCE_NOTIFY (1<<TIF_MCE_NOTIFY)
|
||||
#define _TIF_HRTICK_RESCHED (1<<TIF_HRTICK_RESCHED)
|
||||
#define _TIF_IA32 (1<<TIF_IA32)
|
||||
#define _TIF_FORK (1<<TIF_FORK)
|
||||
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
|
||||
@ -146,6 +148,9 @@ static inline struct thread_info *stack_thread_info(void)
|
||||
/* work to do on any return to user space */
|
||||
#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
|
||||
|
||||
#define _TIF_DO_NOTIFY_MASK \
|
||||
(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
|
||||
|
||||
/* flags to check in __switch_to() */
|
||||
#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
|
||||
|
||||
|
@ -71,18 +71,27 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
|
||||
|
||||
int cpu_up(unsigned int cpu);
|
||||
|
||||
extern void cpu_hotplug_init(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline int register_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void unregister_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void cpu_hotplug_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
extern struct sysdev_class cpu_sysdev_class;
|
||||
extern void cpu_maps_update_begin(void);
|
||||
extern void cpu_maps_update_done(void);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* Stop CPUs going up and down. */
|
||||
@ -97,8 +106,8 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex)
|
||||
mutex_unlock(cpu_hp_mutex);
|
||||
}
|
||||
|
||||
extern void lock_cpu_hotplug(void);
|
||||
extern void unlock_cpu_hotplug(void);
|
||||
extern void get_online_cpus(void);
|
||||
extern void put_online_cpus(void);
|
||||
#define hotcpu_notifier(fn, pri) { \
|
||||
static struct notifier_block fn##_nb = \
|
||||
{ .notifier_call = fn, .priority = pri }; \
|
||||
@ -115,8 +124,8 @@ static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex)
|
||||
static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex)
|
||||
{ }
|
||||
|
||||
#define lock_cpu_hotplug() do { } while (0)
|
||||
#define unlock_cpu_hotplug() do { } while (0)
|
||||
#define get_online_cpus() do { } while (0)
|
||||
#define put_online_cpus() do { } while (0)
|
||||
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
||||
/* These aren't inline functions due to a GCC bug. */
|
||||
#define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
|
||||
|
@ -47,6 +47,7 @@ struct task_struct;
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
extern void debug_show_all_locks(void);
|
||||
extern void __debug_show_held_locks(struct task_struct *task);
|
||||
extern void debug_show_held_locks(struct task_struct *task);
|
||||
extern void debug_check_no_locks_freed(const void *from, unsigned long len);
|
||||
extern void debug_check_no_locks_held(struct task_struct *task);
|
||||
@ -55,6 +56,10 @@ static inline void debug_show_all_locks(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void __debug_show_held_locks(struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void debug_show_held_locks(struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
@ -1,8 +1,12 @@
|
||||
#ifndef _LINUX_FUTEX_H
|
||||
#define _LINUX_FUTEX_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct inode;
|
||||
struct mm_struct;
|
||||
struct task_struct;
|
||||
union ktime;
|
||||
|
||||
/* Second argument to futex syscall */
|
||||
|
@ -72,11 +72,7 @@
|
||||
#define in_softirq() (softirq_count())
|
||||
#define in_interrupt() (irq_count())
|
||||
|
||||
#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
|
||||
# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
|
||||
#else
|
||||
# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
|
||||
#endif
|
||||
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
# define PREEMPT_CHECK_OFFSET 1
|
||||
|
@ -115,10 +115,8 @@ struct hrtimer {
|
||||
enum hrtimer_restart (*function)(struct hrtimer *);
|
||||
struct hrtimer_clock_base *base;
|
||||
unsigned long state;
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
enum hrtimer_cb_mode cb_mode;
|
||||
struct list_head cb_entry;
|
||||
#endif
|
||||
#ifdef CONFIG_TIMER_STATS
|
||||
void *start_site;
|
||||
char start_comm[16];
|
||||
@ -194,10 +192,10 @@ struct hrtimer_cpu_base {
|
||||
spinlock_t lock;
|
||||
struct lock_class_key lock_key;
|
||||
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
|
||||
struct list_head cb_pending;
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
ktime_t expires_next;
|
||||
int hres_active;
|
||||
struct list_head cb_pending;
|
||||
unsigned long nr_events;
|
||||
#endif
|
||||
};
|
||||
@ -217,6 +215,11 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
|
||||
return timer->base->get_time();
|
||||
}
|
||||
|
||||
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
|
||||
{
|
||||
return timer->base->cpu_base->hres_active;
|
||||
}
|
||||
|
||||
/*
|
||||
* The resolution of the clocks. The resolution value is returned in
|
||||
* the clock_getres() system call to give application programmers an
|
||||
@ -248,6 +251,10 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
|
||||
return timer->base->softirq_time;
|
||||
}
|
||||
|
||||
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern ktime_t ktime_get(void);
|
||||
@ -310,6 +317,7 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
|
||||
|
||||
/* Soft interrupt function to run the hrtimer queues: */
|
||||
extern void hrtimer_run_queues(void);
|
||||
extern void hrtimer_run_pending(void);
|
||||
|
||||
/* Bootup initialization: */
|
||||
extern void __init hrtimers_init(void);
|
||||
|
@ -132,9 +132,12 @@ extern struct group_info init_groups;
|
||||
.cpus_allowed = CPU_MASK_ALL, \
|
||||
.mm = NULL, \
|
||||
.active_mm = &init_mm, \
|
||||
.run_list = LIST_HEAD_INIT(tsk.run_list), \
|
||||
.rt = { \
|
||||
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
|
||||
.time_slice = HZ, \
|
||||
.nr_cpus_allowed = NR_CPUS, \
|
||||
}, \
|
||||
.ioprio = 0, \
|
||||
.time_slice = HZ, \
|
||||
.tasks = LIST_HEAD_INIT(tsk.tasks), \
|
||||
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
|
||||
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
|
||||
|
@ -256,6 +256,7 @@ enum
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
HRTIMER_SOFTIRQ,
|
||||
#endif
|
||||
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
|
||||
};
|
||||
|
||||
/* softirq mask and active fields moved to irq_cpustat_t in
|
||||
|
@ -29,6 +29,12 @@
|
||||
# define SHIFT_HZ 9
|
||||
#elif HZ >= 768 && HZ < 1536
|
||||
# define SHIFT_HZ 10
|
||||
#elif HZ >= 1536 && HZ < 3072
|
||||
# define SHIFT_HZ 11
|
||||
#elif HZ >= 3072 && HZ < 6144
|
||||
# define SHIFT_HZ 12
|
||||
#elif HZ >= 6144 && HZ < 12288
|
||||
# define SHIFT_HZ 13
|
||||
#else
|
||||
# error You lose.
|
||||
#endif
|
||||
|
@ -105,8 +105,8 @@ struct user;
|
||||
* supposed to.
|
||||
*/
|
||||
#ifdef CONFIG_PREEMPT_VOLUNTARY
|
||||
extern int cond_resched(void);
|
||||
# define might_resched() cond_resched()
|
||||
extern int _cond_resched(void);
|
||||
# define might_resched() _cond_resched()
|
||||
#else
|
||||
# define might_resched() do { } while (0)
|
||||
#endif
|
||||
|
44
include/linux/latencytop.h
Normal file
44
include/linux/latencytop.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* latencytop.h: Infrastructure for displaying latency
|
||||
*
|
||||
* (C) Copyright 2008 Intel Corporation
|
||||
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE_GUARD_LATENCYTOP_H_
|
||||
#define _INCLUDE_GUARD_LATENCYTOP_H_
|
||||
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
|
||||
#define LT_SAVECOUNT 32
|
||||
#define LT_BACKTRACEDEPTH 12
|
||||
|
||||
struct latency_record {
|
||||
unsigned long backtrace[LT_BACKTRACEDEPTH];
|
||||
unsigned int count;
|
||||
unsigned long time;
|
||||
unsigned long max;
|
||||
};
|
||||
|
||||
|
||||
struct task_struct;
|
||||
|
||||
void account_scheduler_latency(struct task_struct *task, int usecs, int inter);
|
||||
|
||||
void clear_all_latency_tracing(struct task_struct *p);
|
||||
|
||||
#else
|
||||
|
||||
static inline void
|
||||
account_scheduler_latency(struct task_struct *task, int usecs, int inter)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void clear_all_latency_tracing(struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -207,9 +207,7 @@ static inline int notifier_to_errno(int ret)
|
||||
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
|
||||
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
|
||||
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
|
||||
#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */
|
||||
#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */
|
||||
#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task,
|
||||
#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
|
||||
* not handling interrupts, soon dead */
|
||||
|
||||
/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
|
||||
|
164
include/linux/rcuclassic.h
Normal file
164
include/linux/rcuclassic.h
Normal file
@ -0,0 +1,164 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion (classic version)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2001
|
||||
*
|
||||
* Author: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_RCUCLASSIC_H
|
||||
#define __LINUX_RCUCLASSIC_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#include <linux/cache.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
|
||||
/* Global control variables for rcupdate callback mechanism. */
|
||||
struct rcu_ctrlblk {
|
||||
long cur; /* Current batch number. */
|
||||
long completed; /* Number of the last completed batch */
|
||||
int next_pending; /* Is the next batch already waiting? */
|
||||
|
||||
int signaled;
|
||||
|
||||
spinlock_t lock ____cacheline_internodealigned_in_smp;
|
||||
cpumask_t cpumask; /* CPUs that need to switch in order */
|
||||
/* for current batch to proceed. */
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
/* Is batch a before batch b ? */
|
||||
static inline int rcu_batch_before(long a, long b)
|
||||
{
|
||||
return (a - b) < 0;
|
||||
}
|
||||
|
||||
/* Is batch a after batch b ? */
|
||||
static inline int rcu_batch_after(long a, long b)
|
||||
{
|
||||
return (a - b) > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Per-CPU data for Read-Copy UPdate.
|
||||
* nxtlist - new callbacks are added here
|
||||
* curlist - current batch for which quiescent cycle started if any
|
||||
*/
|
||||
struct rcu_data {
|
||||
/* 1) quiescent state handling : */
|
||||
long quiescbatch; /* Batch # for grace period */
|
||||
int passed_quiesc; /* User-mode/idle loop etc. */
|
||||
int qs_pending; /* core waits for quiesc state */
|
||||
|
||||
/* 2) batch handling */
|
||||
long batch; /* Batch # for current RCU batch */
|
||||
struct rcu_head *nxtlist;
|
||||
struct rcu_head **nxttail;
|
||||
long qlen; /* # of queued callbacks */
|
||||
struct rcu_head *curlist;
|
||||
struct rcu_head **curtail;
|
||||
struct rcu_head *donelist;
|
||||
struct rcu_head **donetail;
|
||||
long blimit; /* Upper limit on a processed batch */
|
||||
int cpu;
|
||||
struct rcu_head barrier;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct rcu_data, rcu_data);
|
||||
DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
|
||||
|
||||
/*
|
||||
* Increment the quiescent state counter.
|
||||
* The counter is a bit degenerated: We do not need to know
|
||||
* how many quiescent states passed, just if there was at least
|
||||
* one since the start of the grace period. Thus just a flag.
|
||||
*/
|
||||
static inline void rcu_qsctr_inc(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
rdp->passed_quiesc = 1;
|
||||
}
|
||||
static inline void rcu_bh_qsctr_inc(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
rdp->passed_quiesc = 1;
|
||||
}
|
||||
|
||||
extern int rcu_pending(int cpu);
|
||||
extern int rcu_needs_cpu(int cpu);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
extern struct lockdep_map rcu_lock_map;
|
||||
# define rcu_read_acquire() \
|
||||
lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
|
||||
# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
|
||||
#else
|
||||
# define rcu_read_acquire() do { } while (0)
|
||||
# define rcu_read_release() do { } while (0)
|
||||
#endif
|
||||
|
||||
#define __rcu_read_lock() \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
__acquire(RCU); \
|
||||
rcu_read_acquire(); \
|
||||
} while (0)
|
||||
#define __rcu_read_unlock() \
|
||||
do { \
|
||||
rcu_read_release(); \
|
||||
__release(RCU); \
|
||||
preempt_enable(); \
|
||||
} while (0)
|
||||
#define __rcu_read_lock_bh() \
|
||||
do { \
|
||||
local_bh_disable(); \
|
||||
__acquire(RCU_BH); \
|
||||
rcu_read_acquire(); \
|
||||
} while (0)
|
||||
#define __rcu_read_unlock_bh() \
|
||||
do { \
|
||||
rcu_read_release(); \
|
||||
__release(RCU_BH); \
|
||||
local_bh_enable(); \
|
||||
} while (0)
|
||||
|
||||
#define __synchronize_sched() synchronize_rcu()
|
||||
|
||||
extern void __rcu_init(void);
|
||||
extern void rcu_check_callbacks(int cpu, int user);
|
||||
extern void rcu_restart_cpu(int cpu);
|
||||
|
||||
extern long rcu_batches_completed(void);
|
||||
extern long rcu_batches_completed_bh(void);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __LINUX_RCUCLASSIC_H */
|
@ -15,7 +15,7 @@
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2001
|
||||
* Copyright IBM Corporation, 2001
|
||||
*
|
||||
* Author: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
*
|
||||
@ -53,96 +53,18 @@ struct rcu_head {
|
||||
void (*func)(struct rcu_head *head);
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CLASSIC_RCU
|
||||
#include <linux/rcuclassic.h>
|
||||
#else /* #ifdef CONFIG_CLASSIC_RCU */
|
||||
#include <linux/rcupreempt.h>
|
||||
#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
|
||||
|
||||
#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
|
||||
#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
|
||||
#define INIT_RCU_HEAD(ptr) do { \
|
||||
(ptr)->next = NULL; (ptr)->func = NULL; \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
||||
/* Global control variables for rcupdate callback mechanism. */
|
||||
struct rcu_ctrlblk {
|
||||
long cur; /* Current batch number. */
|
||||
long completed; /* Number of the last completed batch */
|
||||
int next_pending; /* Is the next batch already waiting? */
|
||||
|
||||
int signaled;
|
||||
|
||||
spinlock_t lock ____cacheline_internodealigned_in_smp;
|
||||
cpumask_t cpumask; /* CPUs that need to switch in order */
|
||||
/* for current batch to proceed. */
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
/* Is batch a before batch b ? */
|
||||
static inline int rcu_batch_before(long a, long b)
|
||||
{
|
||||
return (a - b) < 0;
|
||||
}
|
||||
|
||||
/* Is batch a after batch b ? */
|
||||
static inline int rcu_batch_after(long a, long b)
|
||||
{
|
||||
return (a - b) > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Per-CPU data for Read-Copy UPdate.
|
||||
* nxtlist - new callbacks are added here
|
||||
* curlist - current batch for which quiescent cycle started if any
|
||||
*/
|
||||
struct rcu_data {
|
||||
/* 1) quiescent state handling : */
|
||||
long quiescbatch; /* Batch # for grace period */
|
||||
int passed_quiesc; /* User-mode/idle loop etc. */
|
||||
int qs_pending; /* core waits for quiesc state */
|
||||
|
||||
/* 2) batch handling */
|
||||
long batch; /* Batch # for current RCU batch */
|
||||
struct rcu_head *nxtlist;
|
||||
struct rcu_head **nxttail;
|
||||
long qlen; /* # of queued callbacks */
|
||||
struct rcu_head *curlist;
|
||||
struct rcu_head **curtail;
|
||||
struct rcu_head *donelist;
|
||||
struct rcu_head **donetail;
|
||||
long blimit; /* Upper limit on a processed batch */
|
||||
int cpu;
|
||||
struct rcu_head barrier;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct rcu_data, rcu_data);
|
||||
DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
|
||||
|
||||
/*
|
||||
* Increment the quiescent state counter.
|
||||
* The counter is a bit degenerated: We do not need to know
|
||||
* how many quiescent states passed, just if there was at least
|
||||
* one since the start of the grace period. Thus just a flag.
|
||||
*/
|
||||
static inline void rcu_qsctr_inc(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
rdp->passed_quiesc = 1;
|
||||
}
|
||||
static inline void rcu_bh_qsctr_inc(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
rdp->passed_quiesc = 1;
|
||||
}
|
||||
|
||||
extern int rcu_pending(int cpu);
|
||||
extern int rcu_needs_cpu(int cpu);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
extern struct lockdep_map rcu_lock_map;
|
||||
# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
|
||||
# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
|
||||
#else
|
||||
# define rcu_read_acquire() do { } while (0)
|
||||
# define rcu_read_release() do { } while (0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* rcu_read_lock - mark the beginning of an RCU read-side critical section.
|
||||
*
|
||||
@ -172,24 +94,13 @@ extern struct lockdep_map rcu_lock_map;
|
||||
*
|
||||
* It is illegal to block while in an RCU read-side critical section.
|
||||
*/
|
||||
#define rcu_read_lock() \
|
||||
do { \
|
||||
preempt_disable(); \
|
||||
__acquire(RCU); \
|
||||
rcu_read_acquire(); \
|
||||
} while(0)
|
||||
#define rcu_read_lock() __rcu_read_lock()
|
||||
|
||||
/**
|
||||
* rcu_read_unlock - marks the end of an RCU read-side critical section.
|
||||
*
|
||||
* See rcu_read_lock() for more information.
|
||||
*/
|
||||
#define rcu_read_unlock() \
|
||||
do { \
|
||||
rcu_read_release(); \
|
||||
__release(RCU); \
|
||||
preempt_enable(); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* So where is rcu_write_lock()? It does not exist, as there is no
|
||||
@ -200,6 +111,7 @@ extern struct lockdep_map rcu_lock_map;
|
||||
* used as well. RCU does not care how the writers keep out of each
|
||||
* others' way, as long as they do so.
|
||||
*/
|
||||
#define rcu_read_unlock() __rcu_read_unlock()
|
||||
|
||||
/**
|
||||
* rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section
|
||||
@ -212,24 +124,14 @@ extern struct lockdep_map rcu_lock_map;
|
||||
* can use just rcu_read_lock().
|
||||
*
|
||||
*/
|
||||
#define rcu_read_lock_bh() \
|
||||
do { \
|
||||
local_bh_disable(); \
|
||||
__acquire(RCU_BH); \
|
||||
rcu_read_acquire(); \
|
||||
} while(0)
|
||||
#define rcu_read_lock_bh() __rcu_read_lock_bh()
|
||||
|
||||
/*
|
||||
* rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
|
||||
*
|
||||
* See rcu_read_lock_bh() for more information.
|
||||
*/
|
||||
#define rcu_read_unlock_bh() \
|
||||
do { \
|
||||
rcu_read_release(); \
|
||||
__release(RCU_BH); \
|
||||
local_bh_enable(); \
|
||||
} while(0)
|
||||
#define rcu_read_unlock_bh() __rcu_read_unlock_bh()
|
||||
|
||||
/*
|
||||
* Prevent the compiler from merging or refetching accesses. The compiler
|
||||
@ -293,21 +195,52 @@ extern struct lockdep_map rcu_lock_map;
|
||||
* In "classic RCU", these two guarantees happen to be one and
|
||||
* the same, but can differ in realtime RCU implementations.
|
||||
*/
|
||||
#define synchronize_sched() synchronize_rcu()
|
||||
#define synchronize_sched() __synchronize_sched()
|
||||
|
||||
extern void rcu_init(void);
|
||||
extern void rcu_check_callbacks(int cpu, int user);
|
||||
extern void rcu_restart_cpu(int cpu);
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
extern void call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head));
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by :
|
||||
* - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
|
||||
* OR
|
||||
* - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
|
||||
* These may be nested.
|
||||
*/
|
||||
extern void call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head));
|
||||
|
||||
/* Exported common interfaces */
|
||||
extern void synchronize_rcu(void);
|
||||
extern void rcu_barrier(void);
|
||||
extern long rcu_batches_completed(void);
|
||||
extern long rcu_batches_completed_bh(void);
|
||||
|
||||
/* Exported interfaces */
|
||||
extern void FASTCALL(call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head)));
|
||||
extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head)));
|
||||
extern void synchronize_rcu(void);
|
||||
extern void rcu_barrier(void);
|
||||
/* Internal to kernel */
|
||||
extern void rcu_init(void);
|
||||
extern int rcu_needs_cpu(int cpu);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __LINUX_RCUPDATE_H */
|
||||
|
86
include/linux/rcupreempt.h
Normal file
86
include/linux/rcupreempt.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion (RT implementation)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2006
|
||||
*
|
||||
* Author: Paul McKenney <paulmck@us.ibm.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_RCUPREEMPT_H
|
||||
#define __LINUX_RCUPREEMPT_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#include <linux/cache.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
#define rcu_qsctr_inc(cpu)
|
||||
#define rcu_bh_qsctr_inc(cpu)
|
||||
#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
|
||||
|
||||
extern void __rcu_read_lock(void);
|
||||
extern void __rcu_read_unlock(void);
|
||||
extern int rcu_pending(int cpu);
|
||||
extern int rcu_needs_cpu(int cpu);
|
||||
|
||||
#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); }
|
||||
#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); }
|
||||
|
||||
extern void __synchronize_sched(void);
|
||||
|
||||
extern void __rcu_init(void);
|
||||
extern void rcu_check_callbacks(int cpu, int user);
|
||||
extern void rcu_restart_cpu(int cpu);
|
||||
extern long rcu_batches_completed(void);
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful for debug
|
||||
* and statistic. The _bh variant is identifcal to straight RCU
|
||||
*/
|
||||
static inline long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return rcu_batches_completed();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
struct rcupreempt_trace;
|
||||
extern long *rcupreempt_flipctr(int cpu);
|
||||
extern long rcupreempt_data_completed(void);
|
||||
extern int rcupreempt_flip_flag(int cpu);
|
||||
extern int rcupreempt_mb_flag(int cpu);
|
||||
extern char *rcupreempt_try_flip_state_name(void);
|
||||
extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
|
||||
#endif
|
||||
|
||||
struct softirq_action;
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __LINUX_RCUPREEMPT_H */
|
99
include/linux/rcupreempt_trace.h
Normal file
99
include/linux/rcupreempt_trace.h
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion (RT implementation)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2006
|
||||
*
|
||||
* Author: Paul McKenney <paulmck@us.ibm.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of the Preemptible Read-Copy Update mechanism see -
|
||||
* http://lwn.net/Articles/253651/
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_RCUPREEMPT_TRACE_H
|
||||
#define __LINUX_RCUPREEMPT_TRACE_H
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/atomic.h>
|
||||
|
||||
/*
|
||||
* PREEMPT_RCU data structures.
|
||||
*/
|
||||
|
||||
struct rcupreempt_trace {
|
||||
long next_length;
|
||||
long next_add;
|
||||
long wait_length;
|
||||
long wait_add;
|
||||
long done_length;
|
||||
long done_add;
|
||||
long done_remove;
|
||||
atomic_t done_invoked;
|
||||
long rcu_check_callbacks;
|
||||
atomic_t rcu_try_flip_1;
|
||||
atomic_t rcu_try_flip_e1;
|
||||
long rcu_try_flip_i1;
|
||||
long rcu_try_flip_ie1;
|
||||
long rcu_try_flip_g1;
|
||||
long rcu_try_flip_a1;
|
||||
long rcu_try_flip_ae1;
|
||||
long rcu_try_flip_a2;
|
||||
long rcu_try_flip_z1;
|
||||
long rcu_try_flip_ze1;
|
||||
long rcu_try_flip_z2;
|
||||
long rcu_try_flip_m1;
|
||||
long rcu_try_flip_me1;
|
||||
long rcu_try_flip_m2;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
#define RCU_TRACE(fn, arg) fn(arg);
|
||||
#else
|
||||
#define RCU_TRACE(fn, arg)
|
||||
#endif
|
||||
|
||||
extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
|
||||
extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
|
@ -78,7 +78,6 @@ struct sched_param {
|
||||
#include <linux/proportions.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/futex.h>
|
||||
#include <linux/rtmutex.h>
|
||||
|
||||
#include <linux/time.h>
|
||||
@ -88,11 +87,13 @@ struct sched_param {
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/task_io_accounting.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/latencytop.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
|
||||
struct exec_domain;
|
||||
struct futex_pi_state;
|
||||
struct robust_list_head;
|
||||
struct bio;
|
||||
|
||||
/*
|
||||
@ -230,6 +231,8 @@ static inline int select_nohz_load_balancer(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
extern unsigned long rt_needs_cpu(int cpu);
|
||||
|
||||
/*
|
||||
* Only dump TASK_* tasks. (0 for all tasks)
|
||||
*/
|
||||
@ -257,13 +260,19 @@ extern void trap_init(void);
|
||||
extern void account_process_tick(struct task_struct *task, int user);
|
||||
extern void update_process_times(int user);
|
||||
extern void scheduler_tick(void);
|
||||
extern void hrtick_resched(void);
|
||||
|
||||
extern void sched_show_task(struct task_struct *p);
|
||||
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
extern void softlockup_tick(void);
|
||||
extern void spawn_softlockup_task(void);
|
||||
extern void touch_softlockup_watchdog(void);
|
||||
extern void touch_all_softlockup_watchdogs(void);
|
||||
extern int softlockup_thresh;
|
||||
extern unsigned long softlockup_thresh;
|
||||
extern unsigned long sysctl_hung_task_check_count;
|
||||
extern unsigned long sysctl_hung_task_timeout_secs;
|
||||
extern unsigned long sysctl_hung_task_warnings;
|
||||
#else
|
||||
static inline void softlockup_tick(void)
|
||||
{
|
||||
@ -822,6 +831,7 @@ struct sched_class {
|
||||
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
|
||||
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
|
||||
void (*yield_task) (struct rq *rq);
|
||||
int (*select_task_rq)(struct task_struct *p, int sync);
|
||||
|
||||
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
|
||||
|
||||
@ -837,11 +847,25 @@ struct sched_class {
|
||||
int (*move_one_task) (struct rq *this_rq, int this_cpu,
|
||||
struct rq *busiest, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle);
|
||||
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
|
||||
void (*post_schedule) (struct rq *this_rq);
|
||||
void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
|
||||
#endif
|
||||
|
||||
void (*set_curr_task) (struct rq *rq);
|
||||
void (*task_tick) (struct rq *rq, struct task_struct *p);
|
||||
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
|
||||
void (*task_new) (struct rq *rq, struct task_struct *p);
|
||||
void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
|
||||
|
||||
void (*join_domain)(struct rq *rq);
|
||||
void (*leave_domain)(struct rq *rq);
|
||||
|
||||
void (*switched_from) (struct rq *this_rq, struct task_struct *task,
|
||||
int running);
|
||||
void (*switched_to) (struct rq *this_rq, struct task_struct *task,
|
||||
int running);
|
||||
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
|
||||
int oldprio, int running);
|
||||
};
|
||||
|
||||
struct load_weight {
|
||||
@ -871,6 +895,8 @@ struct sched_entity {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
u64 wait_start;
|
||||
u64 wait_max;
|
||||
u64 wait_count;
|
||||
u64 wait_sum;
|
||||
|
||||
u64 sleep_start;
|
||||
u64 sleep_max;
|
||||
@ -909,6 +935,21 @@ struct sched_entity {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct sched_rt_entity {
|
||||
struct list_head run_list;
|
||||
unsigned int time_slice;
|
||||
unsigned long timeout;
|
||||
int nr_cpus_allowed;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
struct sched_rt_entity *parent;
|
||||
/* rq on which this entity is (to be) queued: */
|
||||
struct rt_rq *rt_rq;
|
||||
/* rq "owned" by this entity/group: */
|
||||
struct rt_rq *my_q;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct task_struct {
|
||||
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
|
||||
void *stack;
|
||||
@ -925,9 +966,9 @@ struct task_struct {
|
||||
#endif
|
||||
|
||||
int prio, static_prio, normal_prio;
|
||||
struct list_head run_list;
|
||||
const struct sched_class *sched_class;
|
||||
struct sched_entity se;
|
||||
struct sched_rt_entity rt;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
/* list of struct preempt_notifier: */
|
||||
@ -951,7 +992,11 @@ struct task_struct {
|
||||
|
||||
unsigned int policy;
|
||||
cpumask_t cpus_allowed;
|
||||
unsigned int time_slice;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
int rcu_read_lock_nesting;
|
||||
int rcu_flipctr_idx;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
|
||||
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
||||
struct sched_info sched_info;
|
||||
@ -1041,6 +1086,11 @@ struct task_struct {
|
||||
/* ipc stuff */
|
||||
struct sysv_sem sysvsem;
|
||||
#endif
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
/* hung task detection */
|
||||
unsigned long last_switch_timestamp;
|
||||
unsigned long last_switch_count;
|
||||
#endif
|
||||
/* CPU-specific state of this task */
|
||||
struct thread_struct thread;
|
||||
/* filesystem information */
|
||||
@ -1173,6 +1223,10 @@ struct task_struct {
|
||||
int make_it_fail;
|
||||
#endif
|
||||
struct prop_local_single dirties;
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
int latency_record_count;
|
||||
struct latency_record latency_record[LT_SAVECOUNT];
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1453,6 +1507,12 @@ extern unsigned int sysctl_sched_child_runs_first;
|
||||
extern unsigned int sysctl_sched_features;
|
||||
extern unsigned int sysctl_sched_migration_cost;
|
||||
extern unsigned int sysctl_sched_nr_migrate;
|
||||
extern unsigned int sysctl_sched_rt_period;
|
||||
extern unsigned int sysctl_sched_rt_ratio;
|
||||
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
|
||||
extern unsigned int sysctl_sched_min_bal_int_shares;
|
||||
extern unsigned int sysctl_sched_max_bal_int_shares;
|
||||
#endif
|
||||
|
||||
int sched_nr_latency_handler(struct ctl_table *table, int write,
|
||||
struct file *file, void __user *buffer, size_t *length,
|
||||
@ -1845,7 +1905,18 @@ static inline int need_resched(void)
|
||||
* cond_resched_lock() will drop the spinlock before scheduling,
|
||||
* cond_resched_softirq() will enable bhs before scheduling.
|
||||
*/
|
||||
extern int cond_resched(void);
|
||||
#ifdef CONFIG_PREEMPT
|
||||
static inline int cond_resched(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
extern int _cond_resched(void);
|
||||
static inline int cond_resched(void)
|
||||
{
|
||||
return _cond_resched();
|
||||
}
|
||||
#endif
|
||||
extern int cond_resched_lock(spinlock_t * lock);
|
||||
extern int cond_resched_softirq(void);
|
||||
|
||||
|
@ -17,22 +17,10 @@ extern void __lockfunc __release_kernel_lock(void);
|
||||
__release_kernel_lock(); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Non-SMP kernels will never block on the kernel lock,
|
||||
* so we are better off returning a constant zero from
|
||||
* reacquire_kernel_lock() so that the compiler can see
|
||||
* it at compile-time.
|
||||
*/
|
||||
#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_BKL)
|
||||
# define return_value_on_smp return
|
||||
#else
|
||||
# define return_value_on_smp
|
||||
#endif
|
||||
|
||||
static inline int reacquire_kernel_lock(struct task_struct *task)
|
||||
{
|
||||
if (unlikely(task->lock_depth >= 0))
|
||||
return_value_on_smp __reacquire_kernel_lock();
|
||||
return __reacquire_kernel_lock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -9,10 +9,13 @@ struct stack_trace {
|
||||
};
|
||||
|
||||
extern void save_stack_trace(struct stack_trace *trace);
|
||||
extern void save_stack_trace_tsk(struct task_struct *tsk,
|
||||
struct stack_trace *trace);
|
||||
|
||||
extern void print_stack_trace(struct stack_trace *trace, int spaces);
|
||||
#else
|
||||
# define save_stack_trace(trace) do { } while (0)
|
||||
# define save_stack_trace_tsk(tsk, trace) do { } while (0)
|
||||
# define print_stack_trace(trace, spaces) do { } while (0)
|
||||
#endif
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* Copyright (C) 2002, IBM Corp.
|
||||
*
|
||||
* All rights reserved.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@ -103,6 +103,7 @@
|
||||
.forkexec_idx = 0, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_NEWIDLE \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_WAKE_IDLE \
|
||||
@ -134,6 +135,7 @@
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_NEWIDLE \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_WAKE_AFFINE \
|
||||
| SD_WAKE_IDLE \
|
||||
@ -165,6 +167,7 @@
|
||||
.forkexec_idx = 1, \
|
||||
.flags = SD_LOAD_BALANCE \
|
||||
| SD_BALANCE_NEWIDLE \
|
||||
| SD_BALANCE_FORK \
|
||||
| SD_BALANCE_EXEC \
|
||||
| SD_WAKE_AFFINE \
|
||||
| BALANCE_FOR_PKG_POWER,\
|
||||
|
28
init/Kconfig
28
init/Kconfig
@ -763,3 +763,31 @@ source "block/Kconfig"
|
||||
|
||||
config PREEMPT_NOTIFIERS
|
||||
bool
|
||||
|
||||
choice
|
||||
prompt "RCU implementation type:"
|
||||
default CLASSIC_RCU
|
||||
|
||||
config CLASSIC_RCU
|
||||
bool "Classic RCU"
|
||||
help
|
||||
This option selects the classic RCU implementation that is
|
||||
designed for best read-side performance on non-realtime
|
||||
systems.
|
||||
|
||||
Say Y if you are unsure.
|
||||
|
||||
config PREEMPT_RCU
|
||||
bool "Preemptible RCU"
|
||||
depends on PREEMPT
|
||||
help
|
||||
This option reduces the latency of the kernel by making certain
|
||||
RCU sections preemptible. Normally RCU code is non-preemptible, if
|
||||
this option is selected then read-only RCU sections become
|
||||
preemptible. This helps latency, but may expose bugs due to
|
||||
now-naive assumptions about each RCU read-side critical section
|
||||
remaining on a given CPU through its execution.
|
||||
|
||||
Say N if you are unsure.
|
||||
|
||||
endchoice
|
||||
|
@ -607,6 +607,7 @@ asmlinkage void __init start_kernel(void)
|
||||
vfs_caches_init_early();
|
||||
cpuset_init_early();
|
||||
mem_init();
|
||||
cpu_hotplug_init();
|
||||
kmem_cache_init();
|
||||
setup_per_cpu_pageset();
|
||||
numa_policy_init();
|
||||
|
@ -54,3 +54,5 @@ config HZ
|
||||
default 300 if HZ_300
|
||||
default 1000 if HZ_1000
|
||||
|
||||
config SCHED_HRTICK
|
||||
def_bool HIGH_RES_TIMERS && X86
|
||||
|
@ -52,14 +52,13 @@ config PREEMPT
|
||||
|
||||
endchoice
|
||||
|
||||
config PREEMPT_BKL
|
||||
bool "Preempt The Big Kernel Lock"
|
||||
depends on SMP || PREEMPT
|
||||
config RCU_TRACE
|
||||
bool "Enable tracing for RCU - currently stats in debugfs"
|
||||
select DEBUG_FS
|
||||
default y
|
||||
help
|
||||
This option reduces the latency of the kernel by making the
|
||||
big kernel lock preemptible.
|
||||
This option provides tracing in RCU which presents stats
|
||||
in debugfs for debugging RCU implementation.
|
||||
|
||||
Say Y here if you are building a kernel for a desktop system.
|
||||
Say Y here if you want to enable RCU tracing
|
||||
Say N if you are unsure.
|
||||
|
||||
|
@ -52,11 +52,17 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
|
||||
obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
|
||||
obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
|
||||
ifeq ($(CONFIG_PREEMPT_RCU),y)
|
||||
obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
|
||||
endif
|
||||
obj-$(CONFIG_RELAY) += relay.o
|
||||
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
|
||||
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
|
||||
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
|
||||
obj-$(CONFIG_MARKERS) += marker.o
|
||||
obj-$(CONFIG_LATENCYTOP) += latencytop.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
|
166
kernel/cpu.c
166
kernel/cpu.c
@ -15,9 +15,8 @@
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/* This protects CPUs going up and down... */
|
||||
/* Serializes the updates to cpu_online_map, cpu_present_map */
|
||||
static DEFINE_MUTEX(cpu_add_remove_lock);
|
||||
static DEFINE_MUTEX(cpu_bitmask_lock);
|
||||
|
||||
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
|
||||
|
||||
@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
|
||||
*/
|
||||
static int cpu_hotplug_disabled;
|
||||
|
||||
static struct {
|
||||
struct task_struct *active_writer;
|
||||
struct mutex lock; /* Synchronizes accesses to refcount, */
|
||||
/*
|
||||
* Also blocks the new readers during
|
||||
* an ongoing cpu hotplug operation.
|
||||
*/
|
||||
int refcount;
|
||||
wait_queue_head_t writer_queue;
|
||||
} cpu_hotplug;
|
||||
|
||||
#define writer_exists() (cpu_hotplug.active_writer != NULL)
|
||||
|
||||
void __init cpu_hotplug_init(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
mutex_init(&cpu_hotplug.lock);
|
||||
cpu_hotplug.refcount = 0;
|
||||
init_waitqueue_head(&cpu_hotplug.writer_queue);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
|
||||
static struct task_struct *recursive;
|
||||
static int recursive_depth;
|
||||
|
||||
void lock_cpu_hotplug(void)
|
||||
void get_online_cpus(void)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
if (tsk == recursive) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
|
||||
WARN_ON(1);
|
||||
warnings--;
|
||||
}
|
||||
recursive_depth++;
|
||||
might_sleep();
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
}
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
recursive = tsk;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
cpu_hotplug.refcount++;
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
|
||||
void unlock_cpu_hotplug(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_online_cpus);
|
||||
|
||||
void put_online_cpus(void)
|
||||
{
|
||||
WARN_ON(recursive != current);
|
||||
if (recursive_depth) {
|
||||
recursive_depth--;
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
}
|
||||
recursive = NULL;
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
cpu_hotplug.refcount--;
|
||||
|
||||
if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
|
||||
wake_up(&cpu_hotplug.writer_queue);
|
||||
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
|
||||
EXPORT_SYMBOL_GPL(put_online_cpus);
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/*
|
||||
* The following two API's must be used when attempting
|
||||
* to serialize the updates to cpu_online_map, cpu_present_map.
|
||||
*/
|
||||
void cpu_maps_update_begin(void)
|
||||
{
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
}
|
||||
|
||||
void cpu_maps_update_done(void)
|
||||
{
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* This ensures that the hotplug operation can begin only when the
|
||||
* refcount goes to zero.
|
||||
*
|
||||
* Note that during a cpu-hotplug operation, the new readers, if any,
|
||||
* will be blocked by the cpu_hotplug.lock
|
||||
*
|
||||
* Since cpu_maps_update_begin is always called after invoking
|
||||
* cpu_maps_update_begin, we can be sure that only one writer is active.
|
||||
*
|
||||
* Note that theoretically, there is a possibility of a livelock:
|
||||
* - Refcount goes to zero, last reader wakes up the sleeping
|
||||
* writer.
|
||||
* - Last reader unlocks the cpu_hotplug.lock.
|
||||
* - A new reader arrives at this moment, bumps up the refcount.
|
||||
* - The writer acquires the cpu_hotplug.lock finds the refcount
|
||||
* non zero and goes to sleep again.
|
||||
*
|
||||
* However, this is very difficult to achieve in practice since
|
||||
* get_online_cpus() not an api which is called all that often.
|
||||
*
|
||||
*/
|
||||
static void cpu_hotplug_begin(void)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
|
||||
cpu_hotplug.active_writer = current;
|
||||
add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
|
||||
while (cpu_hotplug.refcount) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
schedule();
|
||||
mutex_lock(&cpu_hotplug.lock);
|
||||
}
|
||||
remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
|
||||
}
|
||||
|
||||
static void cpu_hotplug_done(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
mutex_unlock(&cpu_hotplug.lock);
|
||||
}
|
||||
/* Need to know about CPUs going up/down? */
|
||||
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
int ret;
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
ret = raw_notifier_chain_register(&cpu_chain, nb);
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
|
||||
|
||||
void unregister_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
raw_notifier_chain_unregister(&cpu_chain, nb);
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_cpu_notifier);
|
||||
|
||||
@ -147,7 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
if (!cpu_online(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
|
||||
cpu_hotplug_begin();
|
||||
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
|
||||
hcpu, -1, &nr_calls);
|
||||
if (err == NOTIFY_BAD) {
|
||||
@ -166,9 +236,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
cpu_clear(cpu, tmp);
|
||||
set_cpus_allowed(current, tmp);
|
||||
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
|
||||
if (IS_ERR(p) || cpu_online(cpu)) {
|
||||
/* CPU didn't die: tell everyone. Can't complain. */
|
||||
@ -202,7 +270,7 @@ out_thread:
|
||||
out_allowed:
|
||||
set_cpus_allowed(current, old_allowed);
|
||||
out_release:
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
|
||||
cpu_hotplug_done();
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -210,13 +278,13 @@ int cpu_down(unsigned int cpu)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
if (cpu_hotplug_disabled)
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = _cpu_down(cpu, 0);
|
||||
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return err;
|
||||
}
|
||||
#endif /*CONFIG_HOTPLUG_CPU*/
|
||||
@ -231,7 +299,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
|
||||
if (cpu_online(cpu) || !cpu_present(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
|
||||
cpu_hotplug_begin();
|
||||
ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
|
||||
-1, &nr_calls);
|
||||
if (ret == NOTIFY_BAD) {
|
||||
@ -243,9 +311,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
|
||||
}
|
||||
|
||||
/* Arch-specific enabling code. */
|
||||
mutex_lock(&cpu_bitmask_lock);
|
||||
ret = __cpu_up(cpu);
|
||||
mutex_unlock(&cpu_bitmask_lock);
|
||||
if (ret != 0)
|
||||
goto out_notify;
|
||||
BUG_ON(!cpu_online(cpu));
|
||||
@ -257,7 +323,7 @@ out_notify:
|
||||
if (ret != 0)
|
||||
__raw_notifier_call_chain(&cpu_chain,
|
||||
CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
|
||||
cpu_hotplug_done();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -275,13 +341,13 @@ int __cpuinit cpu_up(unsigned int cpu)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
if (cpu_hotplug_disabled)
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = _cpu_up(cpu, 0);
|
||||
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -292,7 +358,7 @@ int disable_nonboot_cpus(void)
|
||||
{
|
||||
int cpu, first_cpu, error = 0;
|
||||
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
first_cpu = first_cpu(cpu_online_map);
|
||||
/* We take down all of the non-boot CPUs in one shot to avoid races
|
||||
* with the userspace trying to use the CPU hotplug at the same time
|
||||
@ -319,7 +385,7 @@ int disable_nonboot_cpus(void)
|
||||
} else {
|
||||
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
|
||||
}
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -328,7 +394,7 @@ void enable_nonboot_cpus(void)
|
||||
int cpu, error;
|
||||
|
||||
/* Allow everyone to use the CPU hotplug again */
|
||||
mutex_lock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_begin();
|
||||
cpu_hotplug_disabled = 0;
|
||||
if (cpus_empty(frozen_cpus))
|
||||
goto out;
|
||||
@ -344,6 +410,6 @@ void enable_nonboot_cpus(void)
|
||||
}
|
||||
cpus_clear(frozen_cpus);
|
||||
out:
|
||||
mutex_unlock(&cpu_add_remove_lock);
|
||||
cpu_maps_update_done();
|
||||
}
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
|
@ -537,10 +537,10 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
|
||||
*
|
||||
* Call with cgroup_mutex held. May take callback_mutex during
|
||||
* call due to the kfifo_alloc() and kmalloc() calls. May nest
|
||||
* a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
|
||||
* a call to the get_online_cpus()/put_online_cpus() pair.
|
||||
* Must not be called holding callback_mutex, because we must not
|
||||
* call lock_cpu_hotplug() while holding callback_mutex. Elsewhere
|
||||
* the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
|
||||
* call get_online_cpus() while holding callback_mutex. Elsewhere
|
||||
* the kernel nests callback_mutex inside get_online_cpus() calls.
|
||||
* So the reverse nesting would risk an ABBA deadlock.
|
||||
*
|
||||
* The three key local variables below are:
|
||||
@ -691,9 +691,9 @@ restart:
|
||||
|
||||
rebuild:
|
||||
/* Have scheduler rebuild sched domains */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
partition_sched_domains(ndoms, doms);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
|
||||
done:
|
||||
if (q && !IS_ERR(q))
|
||||
@ -1617,10 +1617,10 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
*
|
||||
* If the cpuset being removed has its flag 'sched_load_balance'
|
||||
* enabled, then simulate turning sched_load_balance off, which
|
||||
* will call rebuild_sched_domains(). The lock_cpu_hotplug()
|
||||
* will call rebuild_sched_domains(). The get_online_cpus()
|
||||
* call in rebuild_sched_domains() must not be made while holding
|
||||
* callback_mutex. Elsewhere the kernel nests callback_mutex inside
|
||||
* lock_cpu_hotplug() calls. So the reverse nesting would risk an
|
||||
* get_online_cpus() calls. So the reverse nesting would risk an
|
||||
* ABBA deadlock.
|
||||
*/
|
||||
|
||||
|
@ -1045,6 +1045,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
copy_flags(clone_flags, p);
|
||||
INIT_LIST_HEAD(&p->children);
|
||||
INIT_LIST_HEAD(&p->sibling);
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
p->rcu_read_lock_nesting = 0;
|
||||
p->rcu_flipctr_idx = 0;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
p->vfork_done = NULL;
|
||||
spin_lock_init(&p->alloc_lock);
|
||||
|
||||
@ -1059,6 +1063,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
p->prev_utime = cputime_zero;
|
||||
p->prev_stime = cputime_zero;
|
||||
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
p->last_switch_count = 0;
|
||||
p->last_switch_timestamp = 0;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TASK_XACCT
|
||||
p->rchar = 0; /* I/O counter: bytes read */
|
||||
p->wchar = 0; /* I/O counter: bytes written */
|
||||
@ -1196,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
#ifdef TIF_SYSCALL_EMU
|
||||
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
|
||||
#endif
|
||||
clear_all_latency_tracing(p);
|
||||
|
||||
/* Our parent execution domain becomes current domain
|
||||
These must match for thread signalling to apply */
|
||||
@ -1237,6 +1247,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
* parent's CPU). This avoids alot of nasty races.
|
||||
*/
|
||||
p->cpus_allowed = current->cpus_allowed;
|
||||
p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
|
||||
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
|
||||
!cpu_online(task_cpu(p))))
|
||||
set_task_cpu(p, smp_processor_id());
|
||||
|
284
kernel/hrtimer.c
284
kernel/hrtimer.c
@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t kt, s64 div)
|
||||
}
|
||||
#endif /* BITS_PER_LONG >= 64 */
|
||||
|
||||
/*
|
||||
* Check, whether the timer is on the callback pending list
|
||||
*/
|
||||
static inline int hrtimer_cb_pending(const struct hrtimer *timer)
|
||||
{
|
||||
return timer->state & HRTIMER_STATE_PENDING;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a timer from the callback pending list
|
||||
*/
|
||||
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
|
||||
{
|
||||
list_del_init(&timer->cb_entry);
|
||||
}
|
||||
|
||||
/* High resolution timer related functions */
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
|
||||
@ -493,22 +509,6 @@ void hres_timers_resume(void)
|
||||
retrigger_next_event(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check, whether the timer is on the callback pending list
|
||||
*/
|
||||
static inline int hrtimer_cb_pending(const struct hrtimer *timer)
|
||||
{
|
||||
return timer->state & HRTIMER_STATE_PENDING;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a timer from the callback pending list
|
||||
*/
|
||||
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
|
||||
{
|
||||
list_del_init(&timer->cb_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the high resolution related parts of cpu_base
|
||||
*/
|
||||
@ -516,7 +516,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
|
||||
{
|
||||
base->expires_next.tv64 = KTIME_MAX;
|
||||
base->hres_active = 0;
|
||||
INIT_LIST_HEAD(&base->cb_pending);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
|
||||
*/
|
||||
static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
|
||||
{
|
||||
INIT_LIST_HEAD(&timer->cb_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -618,10 +616,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
|
||||
static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
|
||||
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
|
||||
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
|
||||
static inline int hrtimer_reprogram(struct hrtimer *timer,
|
||||
struct hrtimer_clock_base *base)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
@ -1001,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
|
||||
clock_id = CLOCK_MONOTONIC;
|
||||
|
||||
timer->base = &cpu_base->clock_base[clock_id];
|
||||
INIT_LIST_HEAD(&timer->cb_entry);
|
||||
hrtimer_init_timer_hres(timer);
|
||||
|
||||
#ifdef CONFIG_TIMER_STATS
|
||||
@ -1030,6 +1032,85 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_get_res);
|
||||
|
||||
static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
|
||||
{
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
while (!list_empty(&cpu_base->cb_pending)) {
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
struct hrtimer *timer;
|
||||
int restart;
|
||||
|
||||
timer = list_entry(cpu_base->cb_pending.next,
|
||||
struct hrtimer, cb_entry);
|
||||
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
|
||||
restart = fn(timer);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
if (restart == HRTIMER_RESTART) {
|
||||
BUG_ON(hrtimer_active(timer));
|
||||
/*
|
||||
* Enqueue the timer, allow reprogramming of the event
|
||||
* device
|
||||
*/
|
||||
enqueue_hrtimer(timer, timer->base, 1);
|
||||
} else if (hrtimer_active(timer)) {
|
||||
/*
|
||||
* If the timer was rearmed on another CPU, reprogram
|
||||
* the event device.
|
||||
*/
|
||||
if (timer->base->first == &timer->node)
|
||||
hrtimer_reprogram(timer, timer->base);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
}
|
||||
|
||||
static void __run_hrtimer(struct hrtimer *timer)
|
||||
{
|
||||
struct hrtimer_clock_base *base = timer->base;
|
||||
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
int restart;
|
||||
|
||||
__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
|
||||
/*
|
||||
* Used for scheduler timers, avoid lock inversion with
|
||||
* rq->lock and tasklist_lock.
|
||||
*
|
||||
* These timers are required to deal with enqueue expiry
|
||||
* themselves and are not allowed to migrate.
|
||||
*/
|
||||
spin_unlock(&cpu_base->lock);
|
||||
restart = fn(timer);
|
||||
spin_lock(&cpu_base->lock);
|
||||
} else
|
||||
restart = fn(timer);
|
||||
|
||||
/*
|
||||
* Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
|
||||
* reprogramming of the event hardware. This happens at the end of this
|
||||
* function anyway.
|
||||
*/
|
||||
if (restart != HRTIMER_NORESTART) {
|
||||
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
|
||||
enqueue_hrtimer(timer, base, 0);
|
||||
}
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
|
||||
/*
|
||||
@ -1087,21 +1168,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
continue;
|
||||
}
|
||||
|
||||
__remove_hrtimer(timer, base,
|
||||
HRTIMER_STATE_CALLBACK, 0);
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
/*
|
||||
* Note: We clear the CALLBACK bit after
|
||||
* enqueue_hrtimer to avoid reprogramming of
|
||||
* the event hardware. This happens at the end
|
||||
* of this function anyway.
|
||||
*/
|
||||
if (timer->function(timer) != HRTIMER_NORESTART) {
|
||||
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
|
||||
enqueue_hrtimer(timer, base, 0);
|
||||
}
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
__run_hrtimer(timer);
|
||||
}
|
||||
spin_unlock(&cpu_base->lock);
|
||||
base++;
|
||||
@ -1122,98 +1189,11 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
|
||||
static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
while (!list_empty(&cpu_base->cb_pending)) {
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
struct hrtimer *timer;
|
||||
int restart;
|
||||
|
||||
timer = list_entry(cpu_base->cb_pending.next,
|
||||
struct hrtimer, cb_entry);
|
||||
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
|
||||
restart = fn(timer);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
if (restart == HRTIMER_RESTART) {
|
||||
BUG_ON(hrtimer_active(timer));
|
||||
/*
|
||||
* Enqueue the timer, allow reprogramming of the event
|
||||
* device
|
||||
*/
|
||||
enqueue_hrtimer(timer, timer->base, 1);
|
||||
} else if (hrtimer_active(timer)) {
|
||||
/*
|
||||
* If the timer was rearmed on another CPU, reprogram
|
||||
* the event device.
|
||||
*/
|
||||
if (timer->base->first == &timer->node)
|
||||
hrtimer_reprogram(timer, timer->base);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
/*
|
||||
* Expire the per base hrtimer-queue:
|
||||
*/
|
||||
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
|
||||
int index)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
|
||||
|
||||
if (!base->first)
|
||||
return;
|
||||
|
||||
if (base->get_softirq_time)
|
||||
base->softirq_time = base->get_softirq_time();
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
while ((node = base->first)) {
|
||||
struct hrtimer *timer;
|
||||
enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
int restart;
|
||||
|
||||
timer = rb_entry(node, struct hrtimer, node);
|
||||
if (base->softirq_time.tv64 <= timer->expires.tv64)
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
|
||||
#endif
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
|
||||
restart = fn(timer);
|
||||
|
||||
spin_lock_irq(&cpu_base->lock);
|
||||
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
if (restart != HRTIMER_NORESTART) {
|
||||
BUG_ON(hrtimer_active(timer));
|
||||
enqueue_hrtimer(timer, base, 0);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&cpu_base->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from timer softirq every jiffy, expire hrtimers:
|
||||
*
|
||||
@ -1221,10 +1201,9 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
|
||||
* softirq context in case the hrtimer initialization failed or has
|
||||
* not been done yet.
|
||||
*/
|
||||
void hrtimer_run_queues(void)
|
||||
void hrtimer_run_pending(void)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
int i;
|
||||
|
||||
if (hrtimer_hres_active())
|
||||
return;
|
||||
@ -1238,8 +1217,54 @@ void hrtimer_run_queues(void)
|
||||
* deadlock vs. xtime_lock.
|
||||
*/
|
||||
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
|
||||
if (hrtimer_switch_to_hres())
|
||||
return;
|
||||
hrtimer_switch_to_hres();
|
||||
|
||||
run_hrtimer_pending(cpu_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from hardirq context every jiffy
|
||||
*/
|
||||
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
|
||||
int index)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct hrtimer_clock_base *base = &cpu_base->clock_base[index];
|
||||
|
||||
if (!base->first)
|
||||
return;
|
||||
|
||||
if (base->get_softirq_time)
|
||||
base->softirq_time = base->get_softirq_time();
|
||||
|
||||
spin_lock(&cpu_base->lock);
|
||||
|
||||
while ((node = base->first)) {
|
||||
struct hrtimer *timer;
|
||||
|
||||
timer = rb_entry(node, struct hrtimer, node);
|
||||
if (base->softirq_time.tv64 <= timer->expires.tv64)
|
||||
break;
|
||||
|
||||
if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
|
||||
__remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
|
||||
list_add_tail(&timer->cb_entry,
|
||||
&base->cpu_base->cb_pending);
|
||||
continue;
|
||||
}
|
||||
|
||||
__run_hrtimer(timer);
|
||||
}
|
||||
spin_unlock(&cpu_base->lock);
|
||||
}
|
||||
|
||||
void hrtimer_run_queues(void)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
int i;
|
||||
|
||||
if (hrtimer_hres_active())
|
||||
return;
|
||||
|
||||
hrtimer_get_softirq_time(cpu_base);
|
||||
|
||||
@ -1268,7 +1293,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
|
||||
sl->timer.function = hrtimer_wakeup;
|
||||
sl->task = task;
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART;
|
||||
sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1279,6 +1304,8 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
|
||||
do {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
hrtimer_start(&t->timer, t->timer.expires, mode);
|
||||
if (!hrtimer_active(&t->timer))
|
||||
t->task = NULL;
|
||||
|
||||
if (likely(t->task))
|
||||
schedule();
|
||||
@ -1389,6 +1416,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
|
||||
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
|
||||
cpu_base->clock_base[i].cpu_base = cpu_base;
|
||||
|
||||
INIT_LIST_HEAD(&cpu_base->cb_pending);
|
||||
hrtimer_init_hres(cpu_base);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
#define KTHREAD_NICE_LEVEL (-5)
|
||||
|
||||
static DEFINE_SPINLOCK(kthread_create_lock);
|
||||
static LIST_HEAD(kthread_create_list);
|
||||
struct task_struct *kthreadd_task;
|
||||
@ -94,10 +96,18 @@ static void create_kthread(struct kthread_create_info *create)
|
||||
if (pid < 0) {
|
||||
create->result = ERR_PTR(pid);
|
||||
} else {
|
||||
struct sched_param param = { .sched_priority = 0 };
|
||||
wait_for_completion(&create->started);
|
||||
read_lock(&tasklist_lock);
|
||||
create->result = find_task_by_pid(pid);
|
||||
read_unlock(&tasklist_lock);
|
||||
/*
|
||||
* root may have changed our (kthreadd's) priority or CPU mask.
|
||||
* The kernel thread should not inherit these properties.
|
||||
*/
|
||||
sched_setscheduler(create->result, SCHED_NORMAL, ¶m);
|
||||
set_user_nice(create->result, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed(create->result, CPU_MASK_ALL);
|
||||
}
|
||||
complete(&create->done);
|
||||
}
|
||||
@ -221,7 +231,7 @@ int kthreadd(void *unused)
|
||||
/* Setup a clean context for our children to inherit. */
|
||||
set_task_comm(tsk, "kthreadd");
|
||||
ignore_signals(tsk);
|
||||
set_user_nice(tsk, -5);
|
||||
set_user_nice(tsk, KTHREAD_NICE_LEVEL);
|
||||
set_cpus_allowed(tsk, CPU_MASK_ALL);
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
239
kernel/latencytop.c
Normal file
239
kernel/latencytop.c
Normal file
@ -0,0 +1,239 @@
|
||||
/*
|
||||
* latencytop.c: Latency display infrastructure
|
||||
*
|
||||
* (C) Copyright 2008 Intel Corporation
|
||||
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
static DEFINE_SPINLOCK(latency_lock);
|
||||
|
||||
#define MAXLR 128
|
||||
static struct latency_record latency_record[MAXLR];
|
||||
|
||||
int latencytop_enabled;
|
||||
|
||||
void clear_all_latency_tracing(struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
memset(&p->latency_record, 0, sizeof(p->latency_record));
|
||||
p->latency_record_count = 0;
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
|
||||
static void clear_global_latency_tracing(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
memset(&latency_record, 0, sizeof(latency_record));
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
|
||||
static void __sched
|
||||
account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
|
||||
{
|
||||
int firstnonnull = MAXLR + 1;
|
||||
int i;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
/* skip kernel threads for now */
|
||||
if (!tsk->mm)
|
||||
return;
|
||||
|
||||
for (i = 0; i < MAXLR; i++) {
|
||||
int q;
|
||||
int same = 1;
|
||||
/* Nothing stored: */
|
||||
if (!latency_record[i].backtrace[0]) {
|
||||
if (firstnonnull > i)
|
||||
firstnonnull = i;
|
||||
continue;
|
||||
}
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
if (latency_record[i].backtrace[q] !=
|
||||
lat->backtrace[q])
|
||||
same = 0;
|
||||
if (same && lat->backtrace[q] == 0)
|
||||
break;
|
||||
if (same && lat->backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
}
|
||||
if (same) {
|
||||
latency_record[i].count++;
|
||||
latency_record[i].time += lat->time;
|
||||
if (lat->time > latency_record[i].max)
|
||||
latency_record[i].max = lat->time;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
i = firstnonnull;
|
||||
if (i >= MAXLR - 1)
|
||||
return;
|
||||
|
||||
/* Allocted a new one: */
|
||||
memcpy(&latency_record[i], lat, sizeof(struct latency_record));
|
||||
}
|
||||
|
||||
static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
|
||||
{
|
||||
struct stack_trace trace;
|
||||
|
||||
memset(&trace, 0, sizeof(trace));
|
||||
trace.max_entries = LT_BACKTRACEDEPTH;
|
||||
trace.entries = &lat->backtrace[0];
|
||||
trace.skip = 0;
|
||||
save_stack_trace_tsk(tsk, &trace);
|
||||
}
|
||||
|
||||
void __sched
|
||||
account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i, q;
|
||||
struct latency_record lat;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
/* Long interruptible waits are generally user requested... */
|
||||
if (inter && usecs > 5000)
|
||||
return;
|
||||
|
||||
memset(&lat, 0, sizeof(lat));
|
||||
lat.count = 1;
|
||||
lat.time = usecs;
|
||||
lat.max = usecs;
|
||||
store_stacktrace(tsk, &lat);
|
||||
|
||||
spin_lock_irqsave(&latency_lock, flags);
|
||||
|
||||
account_global_scheduler_latency(tsk, &lat);
|
||||
|
||||
/*
|
||||
* short term hack; if we're > 32 we stop; future we recycle:
|
||||
*/
|
||||
tsk->latency_record_count++;
|
||||
if (tsk->latency_record_count >= LT_SAVECOUNT)
|
||||
goto out_unlock;
|
||||
|
||||
for (i = 0; i < LT_SAVECOUNT ; i++) {
|
||||
struct latency_record *mylat;
|
||||
int same = 1;
|
||||
mylat = &tsk->latency_record[i];
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
if (mylat->backtrace[q] !=
|
||||
lat.backtrace[q])
|
||||
same = 0;
|
||||
if (same && lat.backtrace[q] == 0)
|
||||
break;
|
||||
if (same && lat.backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
}
|
||||
if (same) {
|
||||
mylat->count++;
|
||||
mylat->time += lat.time;
|
||||
if (lat.time > mylat->max)
|
||||
mylat->max = lat.time;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocated a new one: */
|
||||
i = tsk->latency_record_count;
|
||||
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&latency_lock, flags);
|
||||
}
|
||||
|
||||
static int lstats_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
seq_puts(m, "Latency Top version : v0.1\n");
|
||||
|
||||
for (i = 0; i < MAXLR; i++) {
|
||||
if (latency_record[i].backtrace[0]) {
|
||||
int q;
|
||||
seq_printf(m, "%i %li %li ",
|
||||
latency_record[i].count,
|
||||
latency_record[i].time,
|
||||
latency_record[i].max);
|
||||
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
|
||||
char sym[KSYM_NAME_LEN];
|
||||
char *c;
|
||||
if (!latency_record[i].backtrace[q])
|
||||
break;
|
||||
if (latency_record[i].backtrace[q] == ULONG_MAX)
|
||||
break;
|
||||
sprint_symbol(sym, latency_record[i].backtrace[q]);
|
||||
c = strchr(sym, '+');
|
||||
if (c)
|
||||
*c = 0;
|
||||
seq_printf(m, "%s ", sym);
|
||||
}
|
||||
seq_printf(m, "\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
lstats_write(struct file *file, const char __user *buf, size_t count,
|
||||
loff_t *offs)
|
||||
{
|
||||
clear_global_latency_tracing();
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int lstats_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, lstats_show, NULL);
|
||||
}
|
||||
|
||||
static struct file_operations lstats_fops = {
|
||||
.open = lstats_open,
|
||||
.read = seq_read,
|
||||
.write = lstats_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init init_lstats_procfs(void)
|
||||
{
|
||||
struct proc_dir_entry *pe;
|
||||
|
||||
pe = create_proc_entry("latency_stats", 0644, NULL);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
|
||||
pe->proc_fops = &lstats_fops;
|
||||
|
||||
return 0;
|
||||
}
|
||||
__initcall(init_lstats_procfs);
|
@ -3206,7 +3206,11 @@ retry:
|
||||
|
||||
EXPORT_SYMBOL_GPL(debug_show_all_locks);
|
||||
|
||||
void debug_show_held_locks(struct task_struct *task)
|
||||
/*
|
||||
* Careful: only use this function if you are sure that
|
||||
* the task cannot run in parallel!
|
||||
*/
|
||||
void __debug_show_held_locks(struct task_struct *task)
|
||||
{
|
||||
if (unlikely(!debug_locks)) {
|
||||
printk("INFO: lockdep is turned off.\n");
|
||||
@ -3214,6 +3218,12 @@ void debug_show_held_locks(struct task_struct *task)
|
||||
}
|
||||
lockdep_print_held_locks(task);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__debug_show_held_locks);
|
||||
|
||||
void debug_show_held_locks(struct task_struct *task)
|
||||
{
|
||||
__debug_show_held_locks(task);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(debug_show_held_locks);
|
||||
|
||||
|
@ -496,6 +496,8 @@ static struct module_attribute modinfo_##field = { \
|
||||
MODINFO_ATTR(version);
|
||||
MODINFO_ATTR(srcversion);
|
||||
|
||||
static char last_unloaded_module[MODULE_NAME_LEN+1];
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
/* Init the unload section of the module. */
|
||||
static void module_unload_init(struct module *mod)
|
||||
@ -719,6 +721,8 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
|
||||
mod->exit();
|
||||
mutex_lock(&module_mutex);
|
||||
}
|
||||
/* Store the name of the last unloaded module for diagnostic purposes */
|
||||
sprintf(last_unloaded_module, mod->name);
|
||||
free_module(mod);
|
||||
|
||||
out:
|
||||
@ -2357,21 +2361,30 @@ static void m_stop(struct seq_file *m, void *p)
|
||||
mutex_unlock(&module_mutex);
|
||||
}
|
||||
|
||||
static char *taint_flags(unsigned int taints, char *buf)
|
||||
static char *module_flags(struct module *mod, char *buf)
|
||||
{
|
||||
int bx = 0;
|
||||
|
||||
if (taints) {
|
||||
if (mod->taints ||
|
||||
mod->state == MODULE_STATE_GOING ||
|
||||
mod->state == MODULE_STATE_COMING) {
|
||||
buf[bx++] = '(';
|
||||
if (taints & TAINT_PROPRIETARY_MODULE)
|
||||
if (mod->taints & TAINT_PROPRIETARY_MODULE)
|
||||
buf[bx++] = 'P';
|
||||
if (taints & TAINT_FORCED_MODULE)
|
||||
if (mod->taints & TAINT_FORCED_MODULE)
|
||||
buf[bx++] = 'F';
|
||||
/*
|
||||
* TAINT_FORCED_RMMOD: could be added.
|
||||
* TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
|
||||
* apply to modules.
|
||||
*/
|
||||
|
||||
/* Show a - for module-is-being-unloaded */
|
||||
if (mod->state == MODULE_STATE_GOING)
|
||||
buf[bx++] = '-';
|
||||
/* Show a + for module-is-being-loaded */
|
||||
if (mod->state == MODULE_STATE_COMING)
|
||||
buf[bx++] = '+';
|
||||
buf[bx++] = ')';
|
||||
}
|
||||
buf[bx] = '\0';
|
||||
@ -2398,7 +2411,7 @@ static int m_show(struct seq_file *m, void *p)
|
||||
|
||||
/* Taints info */
|
||||
if (mod->taints)
|
||||
seq_printf(m, " %s", taint_flags(mod->taints, buf));
|
||||
seq_printf(m, " %s", module_flags(mod, buf));
|
||||
|
||||
seq_printf(m, "\n");
|
||||
return 0;
|
||||
@ -2493,7 +2506,9 @@ void print_modules(void)
|
||||
|
||||
printk("Modules linked in:");
|
||||
list_for_each_entry(mod, &modules, list)
|
||||
printk(" %s%s", mod->name, taint_flags(mod->taints, buf));
|
||||
printk(" %s%s", mod->name, module_flags(mod, buf));
|
||||
if (last_unloaded_module[0])
|
||||
printk(" [last unloaded: %s]", last_unloaded_module);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
|
@ -967,6 +967,7 @@ static void check_thread_timers(struct task_struct *tsk,
|
||||
{
|
||||
int maxfire;
|
||||
struct list_head *timers = tsk->cpu_timers;
|
||||
struct signal_struct *const sig = tsk->signal;
|
||||
|
||||
maxfire = 20;
|
||||
tsk->it_prof_expires = cputime_zero;
|
||||
@ -1011,6 +1012,35 @@ static void check_thread_timers(struct task_struct *tsk,
|
||||
t->firing = 1;
|
||||
list_move_tail(&t->entry, firing);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for the special case thread timers.
|
||||
*/
|
||||
if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
|
||||
unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
|
||||
unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
|
||||
|
||||
if (hard != RLIM_INFINITY &&
|
||||
tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
|
||||
/*
|
||||
* At the hard limit, we just die.
|
||||
* No need to calculate anything else now.
|
||||
*/
|
||||
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
|
||||
return;
|
||||
}
|
||||
if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
|
||||
/*
|
||||
* At the soft limit, send a SIGXCPU every second.
|
||||
*/
|
||||
if (sig->rlim[RLIMIT_RTTIME].rlim_cur
|
||||
< sig->rlim[RLIMIT_RTTIME].rlim_max) {
|
||||
sig->rlim[RLIMIT_RTTIME].rlim_cur +=
|
||||
USEC_PER_SEC;
|
||||
}
|
||||
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -573,11 +573,6 @@ static int __init printk_time_setup(char *str)
|
||||
|
||||
__setup("time", printk_time_setup);
|
||||
|
||||
__attribute__((weak)) unsigned long long printk_clock(void)
|
||||
{
|
||||
return sched_clock();
|
||||
}
|
||||
|
||||
/* Check if we have any console registered that can be called early in boot. */
|
||||
static int have_callable_console(void)
|
||||
{
|
||||
@ -628,30 +623,57 @@ asmlinkage int printk(const char *fmt, ...)
|
||||
/* cpu currently holding logbuf_lock */
|
||||
static volatile unsigned int printk_cpu = UINT_MAX;
|
||||
|
||||
const char printk_recursion_bug_msg [] =
|
||||
KERN_CRIT "BUG: recent printk recursion!\n";
|
||||
static int printk_recursion_bug;
|
||||
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
{
|
||||
unsigned long flags;
|
||||
int printed_len;
|
||||
char *p;
|
||||
static char printk_buf[1024];
|
||||
static int log_level_unknown = 1;
|
||||
static char printk_buf[1024];
|
||||
|
||||
unsigned long flags;
|
||||
int printed_len = 0;
|
||||
int this_cpu;
|
||||
char *p;
|
||||
|
||||
boot_delay_msec();
|
||||
|
||||
preempt_disable();
|
||||
if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
|
||||
/* If a crash is occurring during printk() on this CPU,
|
||||
* make sure we can't deadlock */
|
||||
zap_locks();
|
||||
|
||||
/* This stops the holder of console_sem just where we want him */
|
||||
raw_local_irq_save(flags);
|
||||
this_cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* Ouch, printk recursed into itself!
|
||||
*/
|
||||
if (unlikely(printk_cpu == this_cpu)) {
|
||||
/*
|
||||
* If a crash is occurring during printk() on this CPU,
|
||||
* then try to get the crash message out but make sure
|
||||
* we can't deadlock. Otherwise just return to avoid the
|
||||
* recursion and return - but flag the recursion so that
|
||||
* it can be printed at the next appropriate moment:
|
||||
*/
|
||||
if (!oops_in_progress) {
|
||||
printk_recursion_bug = 1;
|
||||
goto out_restore_irqs;
|
||||
}
|
||||
zap_locks();
|
||||
}
|
||||
|
||||
lockdep_off();
|
||||
spin_lock(&logbuf_lock);
|
||||
printk_cpu = smp_processor_id();
|
||||
printk_cpu = this_cpu;
|
||||
|
||||
if (printk_recursion_bug) {
|
||||
printk_recursion_bug = 0;
|
||||
strcpy(printk_buf, printk_recursion_bug_msg);
|
||||
printed_len = sizeof(printk_recursion_bug_msg);
|
||||
}
|
||||
/* Emit the output into the temporary buffer */
|
||||
printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
|
||||
printed_len += vscnprintf(printk_buf + printed_len,
|
||||
sizeof(printk_buf), fmt, args);
|
||||
|
||||
/*
|
||||
* Copy the output into log_buf. If the caller didn't provide
|
||||
@ -680,7 +702,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
loglev_char = default_message_loglevel
|
||||
+ '0';
|
||||
}
|
||||
t = printk_clock();
|
||||
t = 0;
|
||||
if (system_state != SYSTEM_BOOTING)
|
||||
t = ktime_to_ns(ktime_get());
|
||||
nanosec_rem = do_div(t, 1000000000);
|
||||
tlen = sprintf(tbuf,
|
||||
"<%c>[%5lu.%06lu] ",
|
||||
@ -744,6 +768,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
printk_cpu = UINT_MAX;
|
||||
spin_unlock(&logbuf_lock);
|
||||
lockdep_on();
|
||||
out_restore_irqs:
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
111
kernel/profile.c
111
kernel/profile.c
@ -52,7 +52,7 @@ static DEFINE_PER_CPU(int, cpu_profile_flip);
|
||||
static DEFINE_MUTEX(profile_flip_mutex);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static int __init profile_setup(char * str)
|
||||
static int __init profile_setup(char *str)
|
||||
{
|
||||
static char __initdata schedstr[] = "schedule";
|
||||
static char __initdata sleepstr[] = "sleep";
|
||||
@ -104,28 +104,28 @@ __setup("profile=", profile_setup);
|
||||
|
||||
void __init profile_init(void)
|
||||
{
|
||||
if (!prof_on)
|
||||
if (!prof_on)
|
||||
return;
|
||||
|
||||
|
||||
/* only text is profiled */
|
||||
prof_len = (_etext - _stext) >> prof_shift;
|
||||
prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
|
||||
}
|
||||
|
||||
/* Profile event notifications */
|
||||
|
||||
|
||||
#ifdef CONFIG_PROFILING
|
||||
|
||||
|
||||
static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
|
||||
static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
|
||||
static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
|
||||
|
||||
void profile_task_exit(struct task_struct * task)
|
||||
|
||||
void profile_task_exit(struct task_struct *task)
|
||||
{
|
||||
blocking_notifier_call_chain(&task_exit_notifier, 0, task);
|
||||
}
|
||||
|
||||
int profile_handoff_task(struct task_struct * task)
|
||||
|
||||
int profile_handoff_task(struct task_struct *task)
|
||||
{
|
||||
int ret;
|
||||
ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
|
||||
@ -137,52 +137,55 @@ void profile_munmap(unsigned long addr)
|
||||
blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
|
||||
}
|
||||
|
||||
int task_handoff_register(struct notifier_block * n)
|
||||
int task_handoff_register(struct notifier_block *n)
|
||||
{
|
||||
return atomic_notifier_chain_register(&task_free_notifier, n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_handoff_register);
|
||||
|
||||
int task_handoff_unregister(struct notifier_block * n)
|
||||
int task_handoff_unregister(struct notifier_block *n)
|
||||
{
|
||||
return atomic_notifier_chain_unregister(&task_free_notifier, n);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_handoff_unregister);
|
||||
|
||||
int profile_event_register(enum profile_type type, struct notifier_block * n)
|
||||
int profile_event_register(enum profile_type type, struct notifier_block *n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_register(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_register(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int profile_event_unregister(enum profile_type type, struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_register(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_register(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(profile_event_register);
|
||||
|
||||
int profile_event_unregister(enum profile_type type, struct notifier_block *n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = blocking_notifier_chain_unregister(
|
||||
&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(profile_event_unregister);
|
||||
|
||||
int register_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
@ -191,6 +194,7 @@ int register_timer_hook(int (*hook)(struct pt_regs *))
|
||||
timer_hook = hook;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_timer_hook);
|
||||
|
||||
void unregister_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
@ -199,13 +203,7 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
|
||||
/* make sure all CPUs see the NULL hook */
|
||||
synchronize_sched(); /* Allow ongoing interrupts to complete. */
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(unregister_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_register);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_unregister);
|
||||
EXPORT_SYMBOL_GPL(profile_event_register);
|
||||
EXPORT_SYMBOL_GPL(profile_event_unregister);
|
||||
|
||||
#endif /* CONFIG_PROFILING */
|
||||
|
||||
@ -366,7 +364,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
|
||||
}
|
||||
break;
|
||||
out_free:
|
||||
out_free:
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
@ -409,7 +407,6 @@ void profile_hits(int type, void *__pc, unsigned int nr_hits)
|
||||
atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
|
||||
}
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
EXPORT_SYMBOL_GPL(profile_hits);
|
||||
|
||||
void profile_tick(int type)
|
||||
@ -427,7 +424,7 @@ void profile_tick(int type)
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
|
||||
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
|
||||
@ -437,8 +434,8 @@ static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
|
||||
return len;
|
||||
}
|
||||
|
||||
static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
|
||||
unsigned long count, void *data)
|
||||
static int prof_cpu_mask_write_proc(struct file *file,
|
||||
const char __user *buffer, unsigned long count, void *data)
|
||||
{
|
||||
cpumask_t *mask = (cpumask_t *)data;
|
||||
unsigned long full_count = count, err;
|
||||
@ -457,7 +454,8 @@ void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/prof_cpu_mask */
|
||||
if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
|
||||
entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
|
||||
if (!entry)
|
||||
return;
|
||||
entry->data = (void *)&prof_cpu_mask;
|
||||
entry->read_proc = prof_cpu_mask_read_proc;
|
||||
@ -475,7 +473,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
unsigned long p = *ppos;
|
||||
ssize_t read;
|
||||
char * pnt;
|
||||
char *pnt;
|
||||
unsigned int sample_step = 1 << prof_shift;
|
||||
|
||||
profile_flip_buffers();
|
||||
@ -486,12 +484,12 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
read = 0;
|
||||
|
||||
while (p < sizeof(unsigned int) && count > 0) {
|
||||
if (put_user(*((char *)(&sample_step)+p),buf))
|
||||
if (put_user(*((char *)(&sample_step)+p), buf))
|
||||
return -EFAULT;
|
||||
buf++; p++; count--; read++;
|
||||
}
|
||||
pnt = (char *)prof_buffer + p - sizeof(atomic_t);
|
||||
if (copy_to_user(buf,(void *)pnt,count))
|
||||
if (copy_to_user(buf, (void *)pnt, count))
|
||||
return -EFAULT;
|
||||
read += count;
|
||||
*ppos += read;
|
||||
@ -508,7 +506,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
extern int setup_profiling_timer (unsigned int multiplier);
|
||||
extern int setup_profiling_timer(unsigned int multiplier);
|
||||
|
||||
if (count == sizeof(int)) {
|
||||
unsigned int multiplier;
|
||||
@ -591,7 +589,8 @@ static int __init create_proc_profile(void)
|
||||
return 0;
|
||||
if (create_hash_tables())
|
||||
return -1;
|
||||
if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
|
||||
entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
|
||||
if (!entry)
|
||||
return 0;
|
||||
entry->proc_fops = &proc_profile_operations;
|
||||
entry->size = (1+prof_len) * sizeof(atomic_t);
|
||||
|
575
kernel/rcuclassic.c
Normal file
575
kernel/rcuclassic.c
Normal file
@ -0,0 +1,575 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2001
|
||||
*
|
||||
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
* Manfred Spraul <manfred@colorfullife.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
static struct lock_class_key rcu_lock_key;
|
||||
struct lockdep_map rcu_lock_map =
|
||||
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
|
||||
EXPORT_SYMBOL_GPL(rcu_lock_map);
|
||||
#endif
|
||||
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
|
||||
|
||||
static int blimit = 10;
|
||||
static int qhimark = 10000;
|
||||
static int qlowmark = 100;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
int cpu;
|
||||
cpumask_t cpumask;
|
||||
set_need_resched();
|
||||
if (unlikely(!rcp->signaled)) {
|
||||
rcp->signaled = 1;
|
||||
/*
|
||||
* Don't send IPI to itself. With irqs disabled,
|
||||
* rdp->cpu is the current cpu.
|
||||
*/
|
||||
cpumask = rcp->cpumask;
|
||||
cpu_clear(rdp->cpu, cpumask);
|
||||
for_each_cpu_mask(cpu, cpumask)
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
set_need_resched();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock() and
|
||||
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
|
||||
* and rcu_read_unlock_bh(), if in process context. These may be nested.
|
||||
*/
|
||||
void call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return rcu_bh_ctrlblk.completed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
|
||||
/* Raises the softirq for processing rcu_callbacks. */
|
||||
static inline void raise_rcu_softirq(void)
|
||||
{
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
/*
|
||||
* The smp_mb() here is required to ensure that this cpu's
|
||||
* __rcu_process_callbacks() reads the most recently updated
|
||||
* value of rcu->cur.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke the completed RCU callbacks. They are expected to be in
|
||||
* a per-cpu list.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
list = rdp->donelist;
|
||||
while (list) {
|
||||
next = list->next;
|
||||
prefetch(next);
|
||||
list->func(list);
|
||||
list = next;
|
||||
if (++count >= rdp->blimit)
|
||||
break;
|
||||
}
|
||||
rdp->donelist = list;
|
||||
|
||||
local_irq_disable();
|
||||
rdp->qlen -= count;
|
||||
local_irq_enable();
|
||||
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
if (!rdp->donelist)
|
||||
rdp->donetail = &rdp->donelist;
|
||||
else
|
||||
raise_rcu_softirq();
|
||||
}
|
||||
|
||||
/*
|
||||
* Grace period handling:
|
||||
* The grace period handling consists out of two steps:
|
||||
* - A new grace period is started.
|
||||
* This is done by rcu_start_batch. The start is not broadcasted to
|
||||
* all cpus, they must pick this up by comparing rcp->cur with
|
||||
* rdp->quiescbatch. All cpus are recorded in the
|
||||
* rcu_ctrlblk.cpumask bitmap.
|
||||
* - All cpus must go through a quiescent state.
|
||||
* Since the start of the grace period is not broadcasted, at least two
|
||||
* calls to rcu_check_quiescent_state are required:
|
||||
* The first call just notices that a new grace period is running. The
|
||||
* following calls check if there was a quiescent state since the beginning
|
||||
* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
|
||||
* the bitmap is empty, then the grace period is completed.
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
* Caller must hold rcu_ctrlblk.lock.
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
if (rcp->next_pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
rcp->next_pending = 0;
|
||||
/*
|
||||
* next_pending == 0 must be visible in
|
||||
* __rcu_process_callbacks() before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
|
||||
/*
|
||||
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
|
||||
* Barrier Otherwise it can cause tickless idle CPUs to be
|
||||
* included in rcp->cpumask, which will extend graceperiods
|
||||
* unnecessarily.
|
||||
*/
|
||||
smp_mb();
|
||||
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
|
||||
|
||||
rcp->signaled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu went through a quiescent state since the beginning of the grace period.
|
||||
* Clear it from the cpu mask and complete the grace period if it was the last
|
||||
* cpu. Start another grace period if someone has further entries pending
|
||||
*/
|
||||
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
cpu_clear(cpu, rcp->cpumask);
|
||||
if (cpus_empty(rcp->cpumask)) {
|
||||
/* batch completed ! */
|
||||
rcp->completed = rcp->cur;
|
||||
rcu_start_batch(rcp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the cpu has gone through a quiescent state (say context
|
||||
* switch). If so and if it already hasn't done so in this RCU
|
||||
* quiescent cycle, then indicate that it has done so.
|
||||
*/
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesc = 0;
|
||||
rdp->quiescbatch = rcp->cur;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grace period already completed for this cpu?
|
||||
* qs_pending is checked instead of the actual bitmap to avoid
|
||||
* cacheline trashing.
|
||||
*/
|
||||
if (!rdp->qs_pending)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (!rdp->passed_quiesc)
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rcp->lock);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
*/
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
|
||||
* locking requirements, the list it's pulling from has to belong to a cpu
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rcp->lock);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
spin_unlock_bh(&rcp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
}
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
|
||||
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
|
||||
|
||||
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
|
||||
&per_cpu(rcu_data, cpu));
|
||||
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
|
||||
&per_cpu(rcu_bh_data, cpu));
|
||||
put_cpu_var(rcu_data);
|
||||
put_cpu_var(rcu_bh_data);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This does the RCU processing work from softirq context.
|
||||
*/
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
local_irq_disable();
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
*/
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rcp->lock);
|
||||
rcp->next_pending = 1;
|
||||
rcu_start_batch(rcp);
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_check_quiescent_state(rcp, rdp);
|
||||
if (rdp->donelist)
|
||||
rcu_do_batch(rdp);
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
}
|
||||
|
||||
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
|
||||
return 1;
|
||||
|
||||
/* This cpu has no pending entries, but there are new entries */
|
||||
if (!rdp->curlist && rdp->nxtlist)
|
||||
return 1;
|
||||
|
||||
/* This cpu has finished callbacks to invoke */
|
||||
if (rdp->donelist)
|
||||
return 1;
|
||||
|
||||
/* The rcu core waits for a quiescent state from the cpu */
|
||||
if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
|
||||
return 1;
|
||||
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate RCU-related work to be done
|
||||
* by the current CPU, returning 1 if so. This function is part of the
|
||||
* RCU implementation; it is -not- an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
|
||||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
rcu_qsctr_inc(cpu);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
} else if (!in_softirq())
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
raise_rcu_softirq();
|
||||
}
|
||||
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
rdp->blimit = blimit;
|
||||
}
|
||||
|
||||
static void __cpuinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes rcu mechanism. Assumed to be called early.
|
||||
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
|
||||
* Note that rcu_qsctr and friends are implicitly
|
||||
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
|
||||
*/
|
||||
void __init __rcu_init(void)
|
||||
{
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
}
|
||||
|
||||
module_param(blimit, int, 0);
|
||||
module_param(qhimark, int, 0);
|
||||
module_param(qlowmark, int, 0);
|
@ -15,7 +15,7 @@
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2001
|
||||
* Copyright IBM Corporation, 2001
|
||||
*
|
||||
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
* Manfred Spraul <manfred@colorfullife.com>
|
||||
@ -35,572 +35,27 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
static struct lock_class_key rcu_lock_key;
|
||||
struct lockdep_map rcu_lock_map =
|
||||
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
|
||||
|
||||
EXPORT_SYMBOL_GPL(rcu_lock_map);
|
||||
#endif
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
|
||||
|
||||
/* Fake initialization required by compiler */
|
||||
static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
|
||||
static int blimit = 10;
|
||||
static int qhimark = 10000;
|
||||
static int qlowmark = 100;
|
||||
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
int cpu;
|
||||
cpumask_t cpumask;
|
||||
set_need_resched();
|
||||
if (unlikely(!rcp->signaled)) {
|
||||
rcp->signaled = 1;
|
||||
/*
|
||||
* Don't send IPI to itself. With irqs disabled,
|
||||
* rdp->cpu is the current cpu.
|
||||
*/
|
||||
cpumask = rcp->cpumask;
|
||||
cpu_clear(rdp->cpu, cpumask);
|
||||
for_each_cpu_mask(cpu, cpumask)
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
set_need_resched();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void fastcall call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock() and
|
||||
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
|
||||
* and rcu_read_unlock_bh(), if in process context. These may be nested.
|
||||
*/
|
||||
void fastcall call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return rcu_bh_ctrlblk.completed;
|
||||
}
|
||||
|
||||
static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with preemption disabled, and from cross-cpu IRQ context.
|
||||
*/
|
||||
static void rcu_barrier_func(void *notused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_head *head;
|
||||
|
||||
head = &rdp->barrier;
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
call_rcu(head, rcu_barrier_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all the in-flight RCUs are complete.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
BUG_ON(in_interrupt());
|
||||
/* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
mutex_lock(&rcu_barrier_mutex);
|
||||
init_completion(&rcu_barrier_completion);
|
||||
atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
wait_for_completion(&rcu_barrier_completion);
|
||||
mutex_unlock(&rcu_barrier_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Invoke the completed RCU callbacks. They are expected to be in
|
||||
* a per-cpu list.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
list = rdp->donelist;
|
||||
while (list) {
|
||||
next = list->next;
|
||||
prefetch(next);
|
||||
list->func(list);
|
||||
list = next;
|
||||
if (++count >= rdp->blimit)
|
||||
break;
|
||||
}
|
||||
rdp->donelist = list;
|
||||
|
||||
local_irq_disable();
|
||||
rdp->qlen -= count;
|
||||
local_irq_enable();
|
||||
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
if (!rdp->donelist)
|
||||
rdp->donetail = &rdp->donelist;
|
||||
else
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Grace period handling:
|
||||
* The grace period handling consists out of two steps:
|
||||
* - A new grace period is started.
|
||||
* This is done by rcu_start_batch. The start is not broadcasted to
|
||||
* all cpus, they must pick this up by comparing rcp->cur with
|
||||
* rdp->quiescbatch. All cpus are recorded in the
|
||||
* rcu_ctrlblk.cpumask bitmap.
|
||||
* - All cpus must go through a quiescent state.
|
||||
* Since the start of the grace period is not broadcasted, at least two
|
||||
* calls to rcu_check_quiescent_state are required:
|
||||
* The first call just notices that a new grace period is running. The
|
||||
* following calls check if there was a quiescent state since the beginning
|
||||
* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
|
||||
* the bitmap is empty, then the grace period is completed.
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
* Caller must hold rcu_ctrlblk.lock.
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
if (rcp->next_pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
rcp->next_pending = 0;
|
||||
/*
|
||||
* next_pending == 0 must be visible in
|
||||
* __rcu_process_callbacks() before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
|
||||
/*
|
||||
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
|
||||
* Barrier Otherwise it can cause tickless idle CPUs to be
|
||||
* included in rcp->cpumask, which will extend graceperiods
|
||||
* unnecessarily.
|
||||
*/
|
||||
smp_mb();
|
||||
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
|
||||
|
||||
rcp->signaled = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu went through a quiescent state since the beginning of the grace period.
|
||||
* Clear it from the cpu mask and complete the grace period if it was the last
|
||||
* cpu. Start another grace period if someone has further entries pending
|
||||
*/
|
||||
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
cpu_clear(cpu, rcp->cpumask);
|
||||
if (cpus_empty(rcp->cpumask)) {
|
||||
/* batch completed ! */
|
||||
rcp->completed = rcp->cur;
|
||||
rcu_start_batch(rcp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the cpu has gone through a quiescent state (say context
|
||||
* switch). If so and if it already hasn't done so in this RCU
|
||||
* quiescent cycle, then indicate that it has done so.
|
||||
*/
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesc = 0;
|
||||
rdp->quiescbatch = rcp->cur;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grace period already completed for this cpu?
|
||||
* qs_pending is checked instead of the actual bitmap to avoid
|
||||
* cacheline trashing.
|
||||
*/
|
||||
if (!rdp->qs_pending)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (!rdp->passed_quiesc)
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rcp->lock);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
*/
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
|
||||
* locking requirements, the list it's pulling from has to belong to a cpu
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rcp->lock);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
spin_unlock_bh(&rcp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
|
||||
}
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
|
||||
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
|
||||
|
||||
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
|
||||
&per_cpu(rcu_data, cpu));
|
||||
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
|
||||
&per_cpu(rcu_bh_data, cpu));
|
||||
put_cpu_var(rcu_data);
|
||||
put_cpu_var(rcu_bh_data);
|
||||
tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This does the RCU processing work from tasklet context.
|
||||
*/
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
local_irq_disable();
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
*/
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rcp->lock);
|
||||
rcp->next_pending = 1;
|
||||
rcu_start_batch(rcp);
|
||||
spin_unlock(&rcp->lock);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_check_quiescent_state(rcp, rdp);
|
||||
if (rdp->donelist)
|
||||
rcu_do_batch(rdp);
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(unsigned long unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
}
|
||||
|
||||
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
|
||||
return 1;
|
||||
|
||||
/* This cpu has no pending entries, but there are new entries */
|
||||
if (!rdp->curlist && rdp->nxtlist)
|
||||
return 1;
|
||||
|
||||
/* This cpu has finished callbacks to invoke */
|
||||
if (rdp->donelist)
|
||||
return 1;
|
||||
|
||||
/* The rcu core waits for a quiescent state from the cpu */
|
||||
if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
|
||||
return 1;
|
||||
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate RCU-related work to be done
|
||||
* by the current CPU, returning 1 if so. This function is part of the
|
||||
* RCU implementation; it is -not- an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
|
||||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
rcu_qsctr_inc(cpu);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
} else if (!in_softirq())
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
|
||||
}
|
||||
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
rdp->blimit = blimit;
|
||||
}
|
||||
|
||||
static void __cpuinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
|
||||
tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes rcu mechanism. Assumed to be called early.
|
||||
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
|
||||
* Note that rcu_qsctr and friends are implicitly
|
||||
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
|
||||
*/
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
}
|
||||
#include <linux/module.h>
|
||||
|
||||
struct rcu_synchronize {
|
||||
struct rcu_head head;
|
||||
struct completion completion;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
|
||||
/* Because of FASTCALL declaration of complete, we use this wrapper */
|
||||
static void wakeme_after_rcu(struct rcu_head *head)
|
||||
{
|
||||
@ -618,9 +73,6 @@ static void wakeme_after_rcu(struct rcu_head *head)
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*
|
||||
* If your read-side code is not protected by rcu_read_lock(), do -not-
|
||||
* use synchronize_rcu().
|
||||
*/
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
@ -633,12 +85,54 @@ void synchronize_rcu(void)
|
||||
/* Wait for it */
|
||||
wait_for_completion(&rcu.completion);
|
||||
}
|
||||
|
||||
module_param(blimit, int, 0);
|
||||
module_param(qhimark, int, 0);
|
||||
module_param(qlowmark, int, 0);
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with preemption disabled, and from cross-cpu IRQ context.
|
||||
*/
|
||||
static void rcu_barrier_func(void *notused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
|
||||
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
call_rcu(head, rcu_barrier_callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all the in-flight RCUs are complete.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
BUG_ON(in_interrupt());
|
||||
/* Take cpucontrol mutex to protect against CPU hotplug */
|
||||
mutex_lock(&rcu_barrier_mutex);
|
||||
init_completion(&rcu_barrier_completion);
|
||||
atomic_set(&rcu_barrier_cpu_count, 0);
|
||||
/*
|
||||
* The queueing of callbacks in all CPUs must be atomic with
|
||||
* respect to RCU, otherwise one CPU may queue a callback,
|
||||
* wait for a grace period, decrement barrier count and call
|
||||
* complete(), while other CPUs have not yet queued anything.
|
||||
* So, we need to make sure that grace periods cannot complete
|
||||
* until all the callbacks are queued.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
on_each_cpu(rcu_barrier_func, NULL, 0, 1);
|
||||
rcu_read_unlock();
|
||||
wait_for_completion(&rcu_barrier_completion);
|
||||
mutex_unlock(&rcu_barrier_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
__rcu_init();
|
||||
}
|
||||
|
||||
|
953
kernel/rcupreempt.c
Normal file
953
kernel/rcupreempt.c
Normal file
@ -0,0 +1,953 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion, realtime implementation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2006
|
||||
*
|
||||
* Authors: Paul E. McKenney <paulmck@us.ibm.com>
|
||||
* With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
|
||||
* for pushing me away from locks and towards counters, and
|
||||
* to Suparna Bhattacharya for pushing me completely away
|
||||
* from atomic instructions on the read side.
|
||||
*
|
||||
* Papers: http://www.rdrop.com/users/paulmck/RCU
|
||||
*
|
||||
* Design Document: http://lwn.net/Articles/253651/
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/byteorder/swabb.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/rcupreempt_trace.h>
|
||||
|
||||
/*
|
||||
* Macro that prevents the compiler from reordering accesses, but does
|
||||
* absolutely -nothing- to prevent CPUs from reordering. This is used
|
||||
* only to mediate communication between mainline code and hardware
|
||||
* interrupt and NMI handlers.
|
||||
*/
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
|
||||
/*
|
||||
* PREEMPT_RCU data structures.
|
||||
*/
|
||||
|
||||
/*
|
||||
* GP_STAGES specifies the number of times the state machine has
|
||||
* to go through the all the rcu_try_flip_states (see below)
|
||||
* in a single Grace Period.
|
||||
*
|
||||
* GP in GP_STAGES stands for Grace Period ;)
|
||||
*/
|
||||
#define GP_STAGES 2
|
||||
struct rcu_data {
|
||||
spinlock_t lock; /* Protect rcu_data fields. */
|
||||
long completed; /* Number of last completed batch. */
|
||||
int waitlistcount;
|
||||
struct tasklet_struct rcu_tasklet;
|
||||
struct rcu_head *nextlist;
|
||||
struct rcu_head **nexttail;
|
||||
struct rcu_head *waitlist[GP_STAGES];
|
||||
struct rcu_head **waittail[GP_STAGES];
|
||||
struct rcu_head *donelist;
|
||||
struct rcu_head **donetail;
|
||||
long rcu_flipctr[2];
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
struct rcupreempt_trace trace;
|
||||
#endif /* #ifdef CONFIG_RCU_TRACE */
|
||||
};
|
||||
|
||||
/*
|
||||
* States for rcu_try_flip() and friends.
|
||||
*/
|
||||
|
||||
enum rcu_try_flip_states {
|
||||
|
||||
/*
|
||||
* Stay here if nothing is happening. Flip the counter if somthing
|
||||
* starts happening. Denoted by "I"
|
||||
*/
|
||||
rcu_try_flip_idle_state,
|
||||
|
||||
/*
|
||||
* Wait here for all CPUs to notice that the counter has flipped. This
|
||||
* prevents the old set of counters from ever being incremented once
|
||||
* we leave this state, which in turn is necessary because we cannot
|
||||
* test any individual counter for zero -- we can only check the sum.
|
||||
* Denoted by "A".
|
||||
*/
|
||||
rcu_try_flip_waitack_state,
|
||||
|
||||
/*
|
||||
* Wait here for the sum of the old per-CPU counters to reach zero.
|
||||
* Denoted by "Z".
|
||||
*/
|
||||
rcu_try_flip_waitzero_state,
|
||||
|
||||
/*
|
||||
* Wait here for each of the other CPUs to execute a memory barrier.
|
||||
* This is necessary to ensure that these other CPUs really have
|
||||
* completed executing their RCU read-side critical sections, despite
|
||||
* their CPUs wildly reordering memory. Denoted by "M".
|
||||
*/
|
||||
rcu_try_flip_waitmb_state,
|
||||
};
|
||||
|
||||
struct rcu_ctrlblk {
|
||||
spinlock_t fliplock; /* Protect state-machine transitions. */
|
||||
long completed; /* Number of last completed batch. */
|
||||
enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
|
||||
the rcu state machine */
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_data, rcu_data);
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
|
||||
.completed = 0,
|
||||
.rcu_try_flip_state = rcu_try_flip_idle_state,
|
||||
};
|
||||
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
static char *rcu_try_flip_state_names[] =
|
||||
{ "idle", "waitack", "waitzero", "waitmb" };
|
||||
#endif /* #ifdef CONFIG_RCU_TRACE */
|
||||
|
||||
static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
|
||||
|
||||
/*
|
||||
* Enum and per-CPU flag to determine when each CPU has seen
|
||||
* the most recent counter flip.
|
||||
*/
|
||||
|
||||
enum rcu_flip_flag_values {
|
||||
rcu_flip_seen, /* Steady/initial state, last flip seen. */
|
||||
/* Only GP detector can update. */
|
||||
rcu_flipped /* Flip just completed, need confirmation. */
|
||||
/* Only corresponding CPU can update. */
|
||||
};
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
|
||||
= rcu_flip_seen;
|
||||
|
||||
/*
|
||||
* Enum and per-CPU flag to determine when each CPU has executed the
|
||||
* needed memory barrier to fence in memory references from its last RCU
|
||||
* read-side critical section in the just-completed grace period.
|
||||
*/
|
||||
|
||||
enum rcu_mb_flag_values {
|
||||
rcu_mb_done, /* Steady/initial state, no mb()s required. */
|
||||
/* Only GP detector can update. */
|
||||
rcu_mb_needed /* Flip just completed, need an mb(). */
|
||||
/* Only corresponding CPU can update. */
|
||||
};
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
|
||||
= rcu_mb_done;
|
||||
|
||||
/*
|
||||
* RCU_DATA_ME: find the current CPU's rcu_data structure.
|
||||
* RCU_DATA_CPU: find the specified CPU's rcu_data structure.
|
||||
*/
|
||||
#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
|
||||
#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
|
||||
|
||||
/*
|
||||
* Helper macro for tracing when the appropriate rcu_data is not
|
||||
* cached in a local variable, but where the CPU number is so cached.
|
||||
*/
|
||||
#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
|
||||
|
||||
/*
|
||||
* Helper macro for tracing when the appropriate rcu_data is not
|
||||
* cached in a local variable.
|
||||
*/
|
||||
#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
|
||||
|
||||
/*
|
||||
* Helper macro for tracing when the appropriate rcu_data is pointed
|
||||
* to by a local variable.
|
||||
*/
|
||||
#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
|
||||
|
||||
/*
|
||||
* Return the number of RCU batches processed thus far. Useful
|
||||
* for debug and statistics.
|
||||
*/
|
||||
long rcu_batches_completed(void)
|
||||
{
|
||||
return rcu_ctrlblk.completed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed);
|
||||
|
||||
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
|
||||
void __rcu_read_lock(void)
|
||||
{
|
||||
int idx;
|
||||
struct task_struct *t = current;
|
||||
int nesting;
|
||||
|
||||
nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
if (nesting != 0) {
|
||||
|
||||
/* An earlier rcu_read_lock() covers us, just count it. */
|
||||
|
||||
t->rcu_read_lock_nesting = nesting + 1;
|
||||
|
||||
} else {
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* We disable interrupts for the following reasons:
|
||||
* - If we get scheduling clock interrupt here, and we
|
||||
* end up acking the counter flip, it's like a promise
|
||||
* that we will never increment the old counter again.
|
||||
* Thus we will break that promise if that
|
||||
* scheduling clock interrupt happens between the time
|
||||
* we pick the .completed field and the time that we
|
||||
* increment our counter.
|
||||
*
|
||||
* - We don't want to be preempted out here.
|
||||
*
|
||||
* NMIs can still occur, of course, and might themselves
|
||||
* contain rcu_read_lock().
|
||||
*/
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* Outermost nesting of rcu_read_lock(), so increment
|
||||
* the current counter for the current CPU. Use volatile
|
||||
* casts to prevent the compiler from reordering.
|
||||
*/
|
||||
|
||||
idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
|
||||
ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
|
||||
|
||||
/*
|
||||
* Now that the per-CPU counter has been incremented, we
|
||||
* are protected from races with rcu_read_lock() invoked
|
||||
* from NMI handlers on this CPU. We can therefore safely
|
||||
* increment the nesting counter, relieving further NMIs
|
||||
* of the need to increment the per-CPU counter.
|
||||
*/
|
||||
|
||||
ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
|
||||
|
||||
/*
|
||||
* Now that we have preventing any NMIs from storing
|
||||
* to the ->rcu_flipctr_idx, we can safely use it to
|
||||
* remember which counter to decrement in the matching
|
||||
* rcu_read_unlock().
|
||||
*/
|
||||
|
||||
ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_lock);
|
||||
|
||||
void __rcu_read_unlock(void)
|
||||
{
|
||||
int idx;
|
||||
struct task_struct *t = current;
|
||||
int nesting;
|
||||
|
||||
nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
if (nesting > 1) {
|
||||
|
||||
/*
|
||||
* We are still protected by the enclosing rcu_read_lock(),
|
||||
* so simply decrement the counter.
|
||||
*/
|
||||
|
||||
t->rcu_read_lock_nesting = nesting - 1;
|
||||
|
||||
} else {
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Disable local interrupts to prevent the grace-period
|
||||
* detection state machine from seeing us half-done.
|
||||
* NMIs can still occur, of course, and might themselves
|
||||
* contain rcu_read_lock() and rcu_read_unlock().
|
||||
*/
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* Outermost nesting of rcu_read_unlock(), so we must
|
||||
* decrement the current counter for the current CPU.
|
||||
* This must be done carefully, because NMIs can
|
||||
* occur at any point in this code, and any rcu_read_lock()
|
||||
* and rcu_read_unlock() pairs in the NMI handlers
|
||||
* must interact non-destructively with this code.
|
||||
* Lots of volatile casts, and -very- careful ordering.
|
||||
*
|
||||
* Changes to this code, including this one, must be
|
||||
* inspected, validated, and tested extremely carefully!!!
|
||||
*/
|
||||
|
||||
/*
|
||||
* First, pick up the index.
|
||||
*/
|
||||
|
||||
idx = ACCESS_ONCE(t->rcu_flipctr_idx);
|
||||
|
||||
/*
|
||||
* Now that we have fetched the counter index, it is
|
||||
* safe to decrement the per-task RCU nesting counter.
|
||||
* After this, any interrupts or NMIs will increment and
|
||||
* decrement the per-CPU counters.
|
||||
*/
|
||||
ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
|
||||
|
||||
/*
|
||||
* It is now safe to decrement this task's nesting count.
|
||||
* NMIs that occur after this statement will route their
|
||||
* rcu_read_lock() calls through this "else" clause, and
|
||||
* will thus start incrementing the per-CPU counter on
|
||||
* their own. They will also clobber ->rcu_flipctr_idx,
|
||||
* but that is OK, since we have already fetched it.
|
||||
*/
|
||||
|
||||
ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
|
||||
|
||||
/*
|
||||
* If a global counter flip has occurred since the last time that we
|
||||
* advanced callbacks, advance them. Hardware interrupts must be
|
||||
* disabled when calling this function.
|
||||
*/
|
||||
static void __rcu_advance_callbacks(struct rcu_data *rdp)
|
||||
{
|
||||
int cpu;
|
||||
int i;
|
||||
int wlc = 0;
|
||||
|
||||
if (rdp->completed != rcu_ctrlblk.completed) {
|
||||
if (rdp->waitlist[GP_STAGES - 1] != NULL) {
|
||||
*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
|
||||
rdp->donetail = rdp->waittail[GP_STAGES - 1];
|
||||
RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
|
||||
}
|
||||
for (i = GP_STAGES - 2; i >= 0; i--) {
|
||||
if (rdp->waitlist[i] != NULL) {
|
||||
rdp->waitlist[i + 1] = rdp->waitlist[i];
|
||||
rdp->waittail[i + 1] = rdp->waittail[i];
|
||||
wlc++;
|
||||
} else {
|
||||
rdp->waitlist[i + 1] = NULL;
|
||||
rdp->waittail[i + 1] =
|
||||
&rdp->waitlist[i + 1];
|
||||
}
|
||||
}
|
||||
if (rdp->nextlist != NULL) {
|
||||
rdp->waitlist[0] = rdp->nextlist;
|
||||
rdp->waittail[0] = rdp->nexttail;
|
||||
wlc++;
|
||||
rdp->nextlist = NULL;
|
||||
rdp->nexttail = &rdp->nextlist;
|
||||
RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
|
||||
} else {
|
||||
rdp->waitlist[0] = NULL;
|
||||
rdp->waittail[0] = &rdp->waitlist[0];
|
||||
}
|
||||
rdp->waitlistcount = wlc;
|
||||
rdp->completed = rcu_ctrlblk.completed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if this CPU needs to report that it has seen
|
||||
* the most recent counter flip, thereby declaring that all
|
||||
* subsequent rcu_read_lock() invocations will respect this flip.
|
||||
*/
|
||||
|
||||
cpu = raw_smp_processor_id();
|
||||
if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
|
||||
smp_mb(); /* Subsequent counter accesses must see new value */
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
|
||||
smp_mb(); /* Subsequent RCU read-side critical sections */
|
||||
/* seen -after- acknowledgement. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get here when RCU is idle. Decide whether we need to
|
||||
* move out of idle state, and return non-zero if so.
|
||||
* "Straightforward" approach for the moment, might later
|
||||
* use callback-list lengths, grace-period duration, or
|
||||
* some such to determine when to exit idle state.
|
||||
* Might also need a pre-idle test that does not acquire
|
||||
* the lock, but let's get the simple case working first...
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_idle(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
|
||||
if (!rcu_pending(smp_processor_id())) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the flip.
|
||||
*/
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
|
||||
rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
|
||||
|
||||
/*
|
||||
* Need a memory barrier so that other CPUs see the new
|
||||
* counter value before they see the subsequent change of all
|
||||
* the rcu_flip_flag instances to rcu_flipped.
|
||||
*/
|
||||
|
||||
smp_mb(); /* see above block comment. */
|
||||
|
||||
/* Now ask each CPU for acknowledgement of the flip. */
|
||||
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for CPUs to acknowledge the flip.
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_waitack(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure our checks above don't bleed into subsequent
|
||||
* waiting for the sum of the counters to reach zero.
|
||||
*/
|
||||
|
||||
smp_mb(); /* see above block comment. */
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for collective ``last'' counter to reach zero,
|
||||
* then tell all CPUs to do an end-of-grace-period memory barrier.
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_waitzero(void)
|
||||
{
|
||||
int cpu;
|
||||
int lastidx = !(rcu_ctrlblk.completed & 0x1);
|
||||
int sum = 0;
|
||||
|
||||
/* Check to see if the sum of the "last" counters is zero. */
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
|
||||
if (sum != 0) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This ensures that the other CPUs see the call for
|
||||
* memory barriers -after- the sum to zero has been
|
||||
* detected here
|
||||
*/
|
||||
smp_mb(); /* ^^^^^^^^^^^^ */
|
||||
|
||||
/* Call for a memory barrier from each CPU. */
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for all CPUs to do their end-of-grace-period memory barrier.
|
||||
* Return 0 once all CPUs have done so.
|
||||
*/
|
||||
|
||||
static int
|
||||
rcu_try_flip_waitmb(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
|
||||
for_each_cpu_mask(cpu, rcu_cpu_online_map)
|
||||
if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
smp_mb(); /* Ensure that the above checks precede any following flip. */
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt a single flip of the counters. Remember, a single flip does
|
||||
* -not- constitute a grace period. Instead, the interval between
|
||||
* at least GP_STAGES consecutive flips is a grace period.
|
||||
*
|
||||
* If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
|
||||
* on a large SMP, they might want to use a hierarchical organization of
|
||||
* the per-CPU-counter pairs.
|
||||
*/
|
||||
static void rcu_try_flip(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
|
||||
if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
|
||||
RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the next transition(s) through the RCU grace-period
|
||||
* flip-counter state machine.
|
||||
*/
|
||||
|
||||
switch (rcu_ctrlblk.rcu_try_flip_state) {
|
||||
case rcu_try_flip_idle_state:
|
||||
if (rcu_try_flip_idle())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_waitack_state;
|
||||
break;
|
||||
case rcu_try_flip_waitack_state:
|
||||
if (rcu_try_flip_waitack())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_waitzero_state;
|
||||
break;
|
||||
case rcu_try_flip_waitzero_state:
|
||||
if (rcu_try_flip_waitzero())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_waitmb_state;
|
||||
break;
|
||||
case rcu_try_flip_waitmb_state:
|
||||
if (rcu_try_flip_waitmb())
|
||||
rcu_ctrlblk.rcu_try_flip_state =
|
||||
rcu_try_flip_idle_state;
|
||||
}
|
||||
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if this CPU needs to do a memory barrier in order to
|
||||
* ensure that any prior RCU read-side critical sections have committed
|
||||
* their counter manipulations and critical-section memory references
|
||||
* before declaring the grace period to be completed.
|
||||
*/
|
||||
static void rcu_check_mb(int cpu)
|
||||
{
|
||||
if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
|
||||
smp_mb(); /* Ensure RCU read-side accesses are visible. */
|
||||
per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
|
||||
}
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
rcu_check_mb(cpu);
|
||||
if (rcu_ctrlblk.completed == rdp->completed)
|
||||
rcu_try_flip();
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
|
||||
__rcu_advance_callbacks(rdp);
|
||||
if (rdp->donelist == NULL) {
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Needed by dynticks, to make sure all RCU processing has finished
|
||||
* when we go idle:
|
||||
*/
|
||||
void rcu_advance_callbacks(int cpu, int user)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
if (rcu_ctrlblk.completed == rdp->completed) {
|
||||
rcu_try_flip();
|
||||
if (rcu_ctrlblk.completed == rdp->completed)
|
||||
return;
|
||||
}
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
|
||||
__rcu_advance_callbacks(rdp);
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
|
||||
*dsttail = srclist; \
|
||||
if (srclist != NULL) { \
|
||||
dsttail = srctail; \
|
||||
srclist = NULL; \
|
||||
srctail = &srclist;\
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
int i;
|
||||
struct rcu_head *list = NULL;
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
struct rcu_head **tail = &list;
|
||||
|
||||
/*
|
||||
* Remove all callbacks from the newly dead CPU, retaining order.
|
||||
* Otherwise rcu_barrier() will fail
|
||||
*/
|
||||
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
|
||||
for (i = GP_STAGES - 1; i >= 0; i--)
|
||||
rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
|
||||
list, tail);
|
||||
rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
rdp->waitlistcount = 0;
|
||||
|
||||
/* Disengage the newly dead CPU from the grace-period computation. */
|
||||
|
||||
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
|
||||
rcu_check_mb(cpu);
|
||||
if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
|
||||
smp_mb(); /* Subsequent counter accesses must see new value */
|
||||
per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
|
||||
smp_mb(); /* Subsequent RCU read-side critical sections */
|
||||
/* seen -after- acknowledgement. */
|
||||
}
|
||||
|
||||
RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
|
||||
RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
|
||||
|
||||
RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
|
||||
RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
|
||||
|
||||
cpu_clear(cpu, rcu_cpu_online_map);
|
||||
|
||||
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
|
||||
|
||||
/*
|
||||
* Place the removed callbacks on the current CPU's queue.
|
||||
* Make them all start a new grace period: simple approach,
|
||||
* in theory could starve a given set of callbacks, but
|
||||
* you would need to be doing some serious CPU hotplugging
|
||||
* to make this happen. If this becomes a problem, adding
|
||||
* a synchronize_rcu() to the hotplug path would be a simple
|
||||
* fix.
|
||||
*/
|
||||
|
||||
rdp = RCU_DATA_ME();
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
*rdp->nexttail = list;
|
||||
if (list)
|
||||
rdp->nexttail = tail;
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
}
|
||||
|
||||
void __devinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
|
||||
cpu_set(cpu, rcu_cpu_online_map);
|
||||
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
void __devinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_head *next, *list;
|
||||
struct rcu_data *rdp = RCU_DATA_ME();
|
||||
|
||||
spin_lock_irqsave(&rdp->lock, flags);
|
||||
list = rdp->donelist;
|
||||
if (list == NULL) {
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
return;
|
||||
}
|
||||
rdp->donelist = NULL;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
|
||||
spin_unlock_irqrestore(&rdp->lock, flags);
|
||||
while (list) {
|
||||
next = list->next;
|
||||
list->func(list);
|
||||
list = next;
|
||||
RCU_TRACE_ME(rcupreempt_trace_invoke);
|
||||
}
|
||||
}
|
||||
|
||||
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = RCU_DATA_ME();
|
||||
spin_lock(&rdp->lock);
|
||||
__rcu_advance_callbacks(rdp);
|
||||
*rdp->nexttail = head;
|
||||
rdp->nexttail = &head->next;
|
||||
RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
|
||||
spin_unlock(&rdp->lock);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/*
|
||||
* Wait until all currently running preempt_disable() code segments
|
||||
* (including hardware-irq-disable segments) complete. Note that
|
||||
* in -rt this does -not- necessarily result in all currently executing
|
||||
* interrupt -handlers- having completed.
|
||||
*/
|
||||
void __synchronize_sched(void)
|
||||
{
|
||||
cpumask_t oldmask;
|
||||
int cpu;
|
||||
|
||||
if (sched_getaffinity(0, &oldmask) < 0)
|
||||
oldmask = cpu_possible_map;
|
||||
for_each_online_cpu(cpu) {
|
||||
sched_setaffinity(0, cpumask_of_cpu(cpu));
|
||||
schedule();
|
||||
}
|
||||
sched_setaffinity(0, oldmask);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__synchronize_sched);
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so. Assumes that notifiers would take care of handling any
|
||||
* outstanding requests from the RCU core.
|
||||
*
|
||||
* This function is part of the RCU implementation; it is -not-
|
||||
* an exported member of the RCU API.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
return (rdp->donelist != NULL ||
|
||||
!!rdp->waitlistcount ||
|
||||
rdp->nextlist != NULL);
|
||||
}
|
||||
|
||||
int rcu_pending(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
/* The CPU has at least one callback queued somewhere. */
|
||||
|
||||
if (rdp->donelist != NULL ||
|
||||
!!rdp->waitlistcount ||
|
||||
rdp->nextlist != NULL)
|
||||
return 1;
|
||||
|
||||
/* The RCU core needs an acknowledgement from this CPU. */
|
||||
|
||||
if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
|
||||
(per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
|
||||
return 1;
|
||||
|
||||
/* This CPU has fallen behind the global grace-period number. */
|
||||
|
||||
if (rdp->completed != rcu_ctrlblk.completed)
|
||||
return 1;
|
||||
|
||||
/* Nothing needed from this CPU. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
void __init __rcu_init(void)
|
||||
{
|
||||
int cpu;
|
||||
int i;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
printk(KERN_NOTICE "Preemptible RCU implementation.\n");
|
||||
for_each_possible_cpu(cpu) {
|
||||
rdp = RCU_DATA_CPU(cpu);
|
||||
spin_lock_init(&rdp->lock);
|
||||
rdp->completed = 0;
|
||||
rdp->waitlistcount = 0;
|
||||
rdp->nextlist = NULL;
|
||||
rdp->nexttail = &rdp->nextlist;
|
||||
for (i = 0; i < GP_STAGES; i++) {
|
||||
rdp->waitlist[i] = NULL;
|
||||
rdp->waittail[i] = &rdp->waitlist[i];
|
||||
}
|
||||
rdp->donelist = NULL;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->rcu_flipctr[0] = 0;
|
||||
rdp->rcu_flipctr[1] = 0;
|
||||
}
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
|
||||
/*
|
||||
* We don't need protection against CPU-Hotplug here
|
||||
* since
|
||||
* a) If a CPU comes online while we are iterating over the
|
||||
* cpu_online_map below, we would only end up making a
|
||||
* duplicate call to rcu_online_cpu() which sets the corresponding
|
||||
* CPU's mask in the rcu_cpu_online_map.
|
||||
*
|
||||
* b) A CPU cannot go offline at this point in time since the user
|
||||
* does not have access to the sysfs interface, nor do we
|
||||
* suspend the system.
|
||||
*/
|
||||
for_each_online_cpu(cpu)
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
|
||||
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deprecated, use synchronize_rcu() or synchronize_sched() instead.
|
||||
*/
|
||||
void synchronize_kernel(void)
|
||||
{
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
long *rcupreempt_flipctr(int cpu)
|
||||
{
|
||||
return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
|
||||
|
||||
int rcupreempt_flip_flag(int cpu)
|
||||
{
|
||||
return per_cpu(rcu_flip_flag, cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
|
||||
|
||||
int rcupreempt_mb_flag(int cpu)
|
||||
{
|
||||
return per_cpu(rcu_mb_flag, cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
|
||||
|
||||
char *rcupreempt_try_flip_state_name(void)
|
||||
{
|
||||
return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
|
||||
|
||||
struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = RCU_DATA_CPU(cpu);
|
||||
|
||||
return &rdp->trace;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
|
||||
|
||||
#endif /* #ifdef RCU_TRACE */
|
330
kernel/rcupreempt_trace.c
Normal file
330
kernel/rcupreempt_trace.c
Normal file
@ -0,0 +1,330 @@
|
||||
/*
|
||||
* Read-Copy Update tracing for realtime implementation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2006
|
||||
*
|
||||
* Papers: http://www.rdrop.com/users/paulmck/RCU
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* Documentation/RCU/ *.txt
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rcupreempt_trace.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
static struct mutex rcupreempt_trace_mutex;
|
||||
static char *rcupreempt_trace_buf;
|
||||
#define RCUPREEMPT_TRACE_BUF_SIZE 4096
|
||||
|
||||
void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->done_length += trace->wait_length;
|
||||
trace->done_add += trace->wait_length;
|
||||
trace->wait_length = 0;
|
||||
}
|
||||
void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->wait_length += trace->next_length;
|
||||
trace->wait_add += trace->next_length;
|
||||
trace->next_length = 0;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
atomic_inc(&trace->rcu_try_flip_1);
|
||||
}
|
||||
void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
atomic_inc(&trace->rcu_try_flip_e1);
|
||||
}
|
||||
void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_i1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_ie1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_g1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_a1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_ae1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_a2++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_z1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_ze1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_z2++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_m1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_me1++;
|
||||
}
|
||||
void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_try_flip_m2++;
|
||||
}
|
||||
void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->rcu_check_callbacks++;
|
||||
}
|
||||
void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->done_remove += trace->done_length;
|
||||
trace->done_length = 0;
|
||||
}
|
||||
void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
|
||||
{
|
||||
atomic_inc(&trace->done_invoked);
|
||||
}
|
||||
void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
|
||||
{
|
||||
trace->next_add++;
|
||||
trace->next_length++;
|
||||
}
|
||||
|
||||
static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
|
||||
{
|
||||
struct rcupreempt_trace *cp;
|
||||
int cpu;
|
||||
|
||||
memset(sp, 0, sizeof(*sp));
|
||||
for_each_possible_cpu(cpu) {
|
||||
cp = rcupreempt_trace_cpu(cpu);
|
||||
sp->next_length += cp->next_length;
|
||||
sp->next_add += cp->next_add;
|
||||
sp->wait_length += cp->wait_length;
|
||||
sp->wait_add += cp->wait_add;
|
||||
sp->done_length += cp->done_length;
|
||||
sp->done_add += cp->done_add;
|
||||
sp->done_remove += cp->done_remove;
|
||||
atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
|
||||
sp->rcu_check_callbacks += cp->rcu_check_callbacks;
|
||||
atomic_set(&sp->rcu_try_flip_1,
|
||||
atomic_read(&cp->rcu_try_flip_1));
|
||||
atomic_set(&sp->rcu_try_flip_e1,
|
||||
atomic_read(&cp->rcu_try_flip_e1));
|
||||
sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
|
||||
sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
|
||||
sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
|
||||
sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
|
||||
sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
|
||||
sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
|
||||
sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
|
||||
sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
|
||||
sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
|
||||
sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
|
||||
sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
|
||||
sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t rcustats_read(struct file *filp, char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct rcupreempt_trace trace;
|
||||
ssize_t bcount;
|
||||
int cnt = 0;
|
||||
|
||||
rcupreempt_trace_sum(&trace);
|
||||
mutex_lock(&rcupreempt_trace_mutex);
|
||||
snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"ggp=%ld rcc=%ld\n",
|
||||
rcu_batches_completed(),
|
||||
trace.rcu_check_callbacks);
|
||||
snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
|
||||
"1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
|
||||
"z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
|
||||
|
||||
trace.next_add, trace.next_length,
|
||||
trace.wait_add, trace.wait_length,
|
||||
trace.done_add, trace.done_length,
|
||||
trace.done_remove, atomic_read(&trace.done_invoked),
|
||||
atomic_read(&trace.rcu_try_flip_1),
|
||||
atomic_read(&trace.rcu_try_flip_e1),
|
||||
trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
|
||||
trace.rcu_try_flip_g1,
|
||||
trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
|
||||
trace.rcu_try_flip_a2,
|
||||
trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
|
||||
trace.rcu_try_flip_z2,
|
||||
trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
|
||||
trace.rcu_try_flip_m2);
|
||||
bcount = simple_read_from_buffer(buffer, count, ppos,
|
||||
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
|
||||
mutex_unlock(&rcupreempt_trace_mutex);
|
||||
return bcount;
|
||||
}
|
||||
|
||||
static ssize_t rcugp_read(struct file *filp, char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
long oldgp = rcu_batches_completed();
|
||||
ssize_t bcount;
|
||||
|
||||
mutex_lock(&rcupreempt_trace_mutex);
|
||||
synchronize_rcu();
|
||||
snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
|
||||
"oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed());
|
||||
bcount = simple_read_from_buffer(buffer, count, ppos,
|
||||
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
|
||||
mutex_unlock(&rcupreempt_trace_mutex);
|
||||
return bcount;
|
||||
}
|
||||
|
||||
static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int cnt = 0;
|
||||
int cpu;
|
||||
int f = rcu_batches_completed() & 0x1;
|
||||
ssize_t bcount;
|
||||
|
||||
mutex_lock(&rcupreempt_trace_mutex);
|
||||
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
|
||||
"CPU last cur F M\n");
|
||||
for_each_online_cpu(cpu) {
|
||||
long *flipctr = rcupreempt_flipctr(cpu);
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt],
|
||||
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"%3d %4ld %3ld %d %d\n",
|
||||
cpu,
|
||||
flipctr[!f],
|
||||
flipctr[f],
|
||||
rcupreempt_flip_flag(cpu),
|
||||
rcupreempt_mb_flag(cpu));
|
||||
}
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt],
|
||||
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"ggp = %ld, state = %s\n",
|
||||
rcu_batches_completed(),
|
||||
rcupreempt_try_flip_state_name());
|
||||
cnt += snprintf(&rcupreempt_trace_buf[cnt],
|
||||
RCUPREEMPT_TRACE_BUF_SIZE - cnt,
|
||||
"\n");
|
||||
bcount = simple_read_from_buffer(buffer, count, ppos,
|
||||
rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
|
||||
mutex_unlock(&rcupreempt_trace_mutex);
|
||||
return bcount;
|
||||
}
|
||||
|
||||
static struct file_operations rcustats_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = rcustats_read,
|
||||
};
|
||||
|
||||
static struct file_operations rcugp_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = rcugp_read,
|
||||
};
|
||||
|
||||
static struct file_operations rcuctrs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = rcuctrs_read,
|
||||
};
|
||||
|
||||
static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
|
||||
static int rcupreempt_debugfs_init(void)
|
||||
{
|
||||
rcudir = debugfs_create_dir("rcu", NULL);
|
||||
if (!rcudir)
|
||||
goto out;
|
||||
statdir = debugfs_create_file("rcustats", 0444, rcudir,
|
||||
NULL, &rcustats_fops);
|
||||
if (!statdir)
|
||||
goto free_out;
|
||||
|
||||
gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
|
||||
if (!gpdir)
|
||||
goto free_out;
|
||||
|
||||
ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
|
||||
NULL, &rcuctrs_fops);
|
||||
if (!ctrsdir)
|
||||
goto free_out;
|
||||
return 0;
|
||||
free_out:
|
||||
if (statdir)
|
||||
debugfs_remove(statdir);
|
||||
if (gpdir)
|
||||
debugfs_remove(gpdir);
|
||||
debugfs_remove(rcudir);
|
||||
out:
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init rcupreempt_trace_init(void)
|
||||
{
|
||||
mutex_init(&rcupreempt_trace_mutex);
|
||||
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
|
||||
if (!rcupreempt_trace_buf)
|
||||
return 1;
|
||||
return rcupreempt_debugfs_init();
|
||||
}
|
||||
|
||||
static void __exit rcupreempt_trace_cleanup(void)
|
||||
{
|
||||
debugfs_remove(statdir);
|
||||
debugfs_remove(gpdir);
|
||||
debugfs_remove(ctrsdir);
|
||||
debugfs_remove(rcudir);
|
||||
kfree(rcupreempt_trace_buf);
|
||||
}
|
||||
|
||||
|
||||
module_init(rcupreempt_trace_init);
|
||||
module_exit(rcupreempt_trace_cleanup);
|
@ -726,11 +726,11 @@ static void rcu_torture_shuffle_tasks(void)
|
||||
cpumask_t tmp_mask = CPU_MASK_ALL;
|
||||
int i;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
|
||||
/* No point in shuffling if there is only one online CPU (ex: UP) */
|
||||
if (num_online_cpus() == 1) {
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -762,7 +762,7 @@ static void rcu_torture_shuffle_tasks(void)
|
||||
else
|
||||
rcu_idle_cpu--;
|
||||
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
|
||||
|
1390
kernel/sched.c
1390
kernel/sched.c
File diff suppressed because it is too large
Load Diff
@ -179,6 +179,7 @@ static void print_cpu(struct seq_file *m, int cpu)
|
||||
PN(prev_clock_raw);
|
||||
P(clock_warps);
|
||||
P(clock_overflows);
|
||||
P(clock_underflows);
|
||||
P(clock_deep_idle_events);
|
||||
PN(clock_max_delta);
|
||||
P(cpu_load[0]);
|
||||
@ -299,6 +300,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
PN(se.exec_max);
|
||||
PN(se.slice_max);
|
||||
PN(se.wait_max);
|
||||
PN(se.wait_sum);
|
||||
P(se.wait_count);
|
||||
P(sched_info.bkl_count);
|
||||
P(se.nr_migrations);
|
||||
P(se.nr_migrations_cold);
|
||||
@ -366,6 +369,8 @@ void proc_sched_set_task(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
p->se.wait_max = 0;
|
||||
p->se.wait_sum = 0;
|
||||
p->se.wait_count = 0;
|
||||
p->se.sleep_max = 0;
|
||||
p->se.sum_sleep_runtime = 0;
|
||||
p->se.block_max = 0;
|
||||
|
@ -20,6 +20,8 @@
|
||||
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
|
||||
/*
|
||||
* Targeted preemption latency for CPU-bound tasks:
|
||||
* (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
@ -248,8 +250,8 @@ static u64 __sched_period(unsigned long nr_running)
|
||||
unsigned long nr_latency = sched_nr_latency;
|
||||
|
||||
if (unlikely(nr_running > nr_latency)) {
|
||||
period = sysctl_sched_min_granularity;
|
||||
period *= nr_running;
|
||||
do_div(period, nr_latency);
|
||||
}
|
||||
|
||||
return period;
|
||||
@ -383,6 +385,9 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
schedstat_set(se->wait_max, max(se->wait_max,
|
||||
rq_of(cfs_rq)->clock - se->wait_start));
|
||||
schedstat_set(se->wait_count, se->wait_count + 1);
|
||||
schedstat_set(se->wait_sum, se->wait_sum +
|
||||
rq_of(cfs_rq)->clock - se->wait_start);
|
||||
schedstat_set(se->wait_start, 0);
|
||||
}
|
||||
|
||||
@ -434,6 +439,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (se->sleep_start) {
|
||||
u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if ((s64)delta < 0)
|
||||
delta = 0;
|
||||
@ -443,9 +449,12 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
|
||||
se->sleep_start = 0;
|
||||
se->sum_sleep_runtime += delta;
|
||||
|
||||
account_scheduler_latency(tsk, delta >> 10, 1);
|
||||
}
|
||||
if (se->block_start) {
|
||||
u64 delta = rq_of(cfs_rq)->clock - se->block_start;
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if ((s64)delta < 0)
|
||||
delta = 0;
|
||||
@ -462,11 +471,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
* time that the task spent sleeping:
|
||||
*/
|
||||
if (unlikely(prof_on == SLEEP_PROFILING)) {
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
|
||||
delta >> 20);
|
||||
}
|
||||
account_scheduler_latency(tsk, delta >> 10, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -642,13 +651,29 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
cfs_rq->curr = NULL;
|
||||
}
|
||||
|
||||
static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
static void
|
||||
entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
||||
{
|
||||
/*
|
||||
* Update run-time statistics of the 'current'.
|
||||
*/
|
||||
update_curr(cfs_rq);
|
||||
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
/*
|
||||
* queued ticks are scheduled to match the slice, so don't bother
|
||||
* validating it and just reschedule.
|
||||
*/
|
||||
if (queued)
|
||||
return resched_task(rq_of(cfs_rq)->curr);
|
||||
/*
|
||||
* don't let the period tick interfere with the hrtick preemption
|
||||
*/
|
||||
if (!sched_feat(DOUBLE_TICK) &&
|
||||
hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
|
||||
check_preempt_tick(cfs_rq, curr);
|
||||
}
|
||||
@ -690,7 +715,7 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
|
||||
|
||||
/* Iterate thr' all leaf cfs_rq's on a runqueue */
|
||||
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
|
||||
list_for_each_entry(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
|
||||
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
|
||||
|
||||
/* Do the two (enqueued) entities belong to the same group ? */
|
||||
static inline int
|
||||
@ -707,6 +732,8 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
return se->parent;
|
||||
}
|
||||
|
||||
#define GROUP_IMBALANCE_PCT 20
|
||||
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#define for_each_sched_entity(se) \
|
||||
@ -752,6 +779,43 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
int requeue = rq->curr == p;
|
||||
struct sched_entity *se = &p->se;
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
WARN_ON(task_rq(p) != rq);
|
||||
|
||||
if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {
|
||||
u64 slice = sched_slice(cfs_rq, se);
|
||||
u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
|
||||
s64 delta = slice - ran;
|
||||
|
||||
if (delta < 0) {
|
||||
if (rq->curr == p)
|
||||
resched_task(p);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't schedule slices shorter than 10000ns, that just
|
||||
* doesn't make sense. Rely on vruntime for fairness.
|
||||
*/
|
||||
if (!requeue)
|
||||
delta = max(10000LL, delta);
|
||||
|
||||
hrtick_start(rq, delta, requeue);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The enqueue_task method is called before nr_running is
|
||||
* increased. Here we update the fair scheduling stats and
|
||||
@ -760,15 +824,28 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
|
||||
static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
struct sched_entity *se = &p->se,
|
||||
*topse = NULL; /* Highest schedulable entity */
|
||||
int incload = 1;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
if (se->on_rq)
|
||||
topse = se;
|
||||
if (se->on_rq) {
|
||||
incload = 0;
|
||||
break;
|
||||
}
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
enqueue_entity(cfs_rq, se, wakeup);
|
||||
wakeup = 1;
|
||||
}
|
||||
/* Increment cpu load if we just enqueued the first task of a group on
|
||||
* 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
|
||||
* at the highest grouping level.
|
||||
*/
|
||||
if (incload)
|
||||
inc_cpu_load(rq, topse->load.weight);
|
||||
|
||||
hrtick_start_fair(rq, rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -779,16 +856,30 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se;
|
||||
struct sched_entity *se = &p->se,
|
||||
*topse = NULL; /* Highest schedulable entity */
|
||||
int decload = 1;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
topse = se;
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
dequeue_entity(cfs_rq, se, sleep);
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight)
|
||||
if (cfs_rq->load.weight) {
|
||||
if (parent_entity(se))
|
||||
decload = 0;
|
||||
break;
|
||||
}
|
||||
sleep = 1;
|
||||
}
|
||||
/* Decrement cpu load if we just dequeued the last task of a group on
|
||||
* 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
|
||||
* at the highest grouping level.
|
||||
*/
|
||||
if (decload)
|
||||
dec_cpu_load(rq, topse->load.weight);
|
||||
|
||||
hrtick_start_fair(rq, rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -835,6 +926,154 @@ static void yield_task_fair(struct rq *rq)
|
||||
se->vruntime = rightmost->vruntime + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* wake_idle() will wake a task on an idle cpu if task->cpu is
|
||||
* not idle and an idle cpu is available. The span of cpus to
|
||||
* search starts with cpus closest then further out as needed,
|
||||
* so we always favor a closer, idle cpu.
|
||||
*
|
||||
* Returns the CPU we should wake onto.
|
||||
*/
|
||||
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
|
||||
static int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
cpumask_t tmp;
|
||||
struct sched_domain *sd;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If it is idle, then it is the best cpu to run this task.
|
||||
*
|
||||
* This cpu is also the best, if it has more than one task already.
|
||||
* Siblings must be also busy(in most cases) as they didn't already
|
||||
* pickup the extra load from this cpu and hence we need not check
|
||||
* sibling runqueue info. This will avoid the checks and cache miss
|
||||
* penalities associated with that.
|
||||
*/
|
||||
if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
|
||||
return cpu;
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_IDLE) {
|
||||
cpus_and(tmp, sd->span, p->cpus_allowed);
|
||||
for_each_cpu_mask(i, tmp) {
|
||||
if (idle_cpu(i)) {
|
||||
if (i != task_cpu(p)) {
|
||||
schedstat_inc(p,
|
||||
se.nr_wakeups_idle);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cpu;
|
||||
}
|
||||
#else
|
||||
static inline int wake_idle(int cpu, struct task_struct *p)
|
||||
{
|
||||
return cpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
{
|
||||
int cpu, this_cpu;
|
||||
struct rq *rq;
|
||||
struct sched_domain *sd, *this_sd = NULL;
|
||||
int new_cpu;
|
||||
|
||||
cpu = task_cpu(p);
|
||||
rq = task_rq(p);
|
||||
this_cpu = smp_processor_id();
|
||||
new_cpu = cpu;
|
||||
|
||||
if (cpu == this_cpu)
|
||||
goto out_set_cpu;
|
||||
|
||||
for_each_domain(this_cpu, sd) {
|
||||
if (cpu_isset(cpu, sd->span)) {
|
||||
this_sd = sd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
|
||||
goto out_set_cpu;
|
||||
|
||||
/*
|
||||
* Check for affine wakeup and passive balancing possibilities.
|
||||
*/
|
||||
if (this_sd) {
|
||||
int idx = this_sd->wake_idx;
|
||||
unsigned int imbalance;
|
||||
unsigned long load, this_load;
|
||||
|
||||
imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
|
||||
|
||||
load = source_load(cpu, idx);
|
||||
this_load = target_load(this_cpu, idx);
|
||||
|
||||
new_cpu = this_cpu; /* Wake to this CPU if we can */
|
||||
|
||||
if (this_sd->flags & SD_WAKE_AFFINE) {
|
||||
unsigned long tl = this_load;
|
||||
unsigned long tl_per_task;
|
||||
|
||||
/*
|
||||
* Attract cache-cold tasks on sync wakeups:
|
||||
*/
|
||||
if (sync && !task_hot(p, rq->clock, this_sd))
|
||||
goto out_set_cpu;
|
||||
|
||||
schedstat_inc(p, se.nr_wakeups_affine_attempts);
|
||||
tl_per_task = cpu_avg_load_per_task(this_cpu);
|
||||
|
||||
/*
|
||||
* If sync wakeup then subtract the (maximum possible)
|
||||
* effect of the currently running task from the load
|
||||
* of the current CPU:
|
||||
*/
|
||||
if (sync)
|
||||
tl -= current->se.load.weight;
|
||||
|
||||
if ((tl <= load &&
|
||||
tl + target_load(cpu, idx) <= tl_per_task) ||
|
||||
100*(tl + p->se.load.weight) <= imbalance*load) {
|
||||
/*
|
||||
* This domain has SD_WAKE_AFFINE and
|
||||
* p is cache cold in this domain, and
|
||||
* there is no bad imbalance.
|
||||
*/
|
||||
schedstat_inc(this_sd, ttwu_move_affine);
|
||||
schedstat_inc(p, se.nr_wakeups_affine);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start passive balancing when half the imbalance_pct
|
||||
* limit is reached.
|
||||
*/
|
||||
if (this_sd->flags & SD_WAKE_BALANCE) {
|
||||
if (imbalance*this_load <= 100*load) {
|
||||
schedstat_inc(this_sd, ttwu_move_balance);
|
||||
schedstat_inc(p, se.nr_wakeups_passive);
|
||||
goto out_set_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
|
||||
out_set_cpu:
|
||||
return wake_idle(new_cpu, p);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
@ -876,6 +1115,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
|
||||
static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
struct cfs_rq *cfs_rq = &rq->cfs;
|
||||
struct sched_entity *se;
|
||||
|
||||
@ -887,7 +1127,10 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
cfs_rq = group_cfs_rq(se);
|
||||
} while (cfs_rq);
|
||||
|
||||
return task_of(se);
|
||||
p = task_of(se);
|
||||
hrtick_start_fair(rq, p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -944,25 +1187,6 @@ static struct task_struct *load_balance_next_fair(void *arg)
|
||||
return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *curr;
|
||||
struct task_struct *p;
|
||||
|
||||
if (!cfs_rq->nr_running)
|
||||
return MAX_PRIO;
|
||||
|
||||
curr = cfs_rq->curr;
|
||||
if (!curr)
|
||||
curr = __pick_next_entity(cfs_rq);
|
||||
|
||||
p = task_of(curr);
|
||||
|
||||
return p->prio;
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long
|
||||
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move,
|
||||
@ -972,28 +1196,45 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct cfs_rq *busy_cfs_rq;
|
||||
long rem_load_move = max_load_move;
|
||||
struct rq_iterator cfs_rq_iterator;
|
||||
unsigned long load_moved;
|
||||
|
||||
cfs_rq_iterator.start = load_balance_start_fair;
|
||||
cfs_rq_iterator.next = load_balance_next_fair;
|
||||
|
||||
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
struct cfs_rq *this_cfs_rq;
|
||||
long imbalance;
|
||||
unsigned long maxload;
|
||||
struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
|
||||
unsigned long maxload, task_load, group_weight;
|
||||
unsigned long thisload, per_task_load;
|
||||
struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
|
||||
|
||||
this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
|
||||
task_load = busy_cfs_rq->load.weight;
|
||||
group_weight = se->load.weight;
|
||||
|
||||
imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
|
||||
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
|
||||
if (imbalance <= 0)
|
||||
/*
|
||||
* 'group_weight' is contributed by tasks of total weight
|
||||
* 'task_load'. To move 'rem_load_move' worth of weight only,
|
||||
* we need to move a maximum task load of:
|
||||
*
|
||||
* maxload = (remload / group_weight) * task_load;
|
||||
*/
|
||||
maxload = (rem_load_move * task_load) / group_weight;
|
||||
|
||||
if (!maxload || !task_load)
|
||||
continue;
|
||||
|
||||
/* Don't pull more than imbalance/2 */
|
||||
imbalance /= 2;
|
||||
maxload = min(rem_load_move, imbalance);
|
||||
per_task_load = task_load / busy_cfs_rq->nr_running;
|
||||
/*
|
||||
* balance_tasks will try to forcibly move atleast one task if
|
||||
* possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
|
||||
* maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
|
||||
*/
|
||||
if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
|
||||
continue;
|
||||
|
||||
*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
|
||||
/* Disable priority-based load balance */
|
||||
*this_best_prio = 0;
|
||||
thisload = this_cfs_rq->load.weight;
|
||||
#else
|
||||
# define maxload rem_load_move
|
||||
#endif
|
||||
@ -1002,11 +1243,33 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
* load_balance_[start|next]_fair iterators
|
||||
*/
|
||||
cfs_rq_iterator.arg = busy_cfs_rq;
|
||||
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
|
||||
load_moved = balance_tasks(this_rq, this_cpu, busiest,
|
||||
maxload, sd, idle, all_pinned,
|
||||
this_best_prio,
|
||||
&cfs_rq_iterator);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
/*
|
||||
* load_moved holds the task load that was moved. The
|
||||
* effective (group) weight moved would be:
|
||||
* load_moved_eff = load_moved/task_load * group_weight;
|
||||
*/
|
||||
load_moved = (group_weight * load_moved) / task_load;
|
||||
|
||||
/* Adjust shares on both cpus to reflect load_moved */
|
||||
group_weight -= load_moved;
|
||||
set_se_shares(se, group_weight);
|
||||
|
||||
se = busy_cfs_rq->tg->se[this_cpu];
|
||||
if (!thisload)
|
||||
group_weight = load_moved;
|
||||
else
|
||||
group_weight = se->load.weight + load_moved;
|
||||
set_se_shares(se, group_weight);
|
||||
#endif
|
||||
|
||||
rem_load_move -= load_moved;
|
||||
|
||||
if (rem_load_move <= 0)
|
||||
break;
|
||||
}
|
||||
@ -1042,14 +1305,14 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class:
|
||||
*/
|
||||
static void task_tick_fair(struct rq *rq, struct task_struct *curr)
|
||||
static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &curr->se;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
entity_tick(cfs_rq, se);
|
||||
entity_tick(cfs_rq, se, queued);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1087,6 +1350,42 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Priority of the task has changed. Check to see if we preempt
|
||||
* the current task.
|
||||
*/
|
||||
static void prio_changed_fair(struct rq *rq, struct task_struct *p,
|
||||
int oldprio, int running)
|
||||
{
|
||||
/*
|
||||
* Reschedule if we are currently running on this runqueue and
|
||||
* our priority decreased, or if we are not currently running on
|
||||
* this runqueue and our priority is higher than the current's
|
||||
*/
|
||||
if (running) {
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* We switched to the sched_fair class.
|
||||
*/
|
||||
static void switched_to_fair(struct rq *rq, struct task_struct *p,
|
||||
int running)
|
||||
{
|
||||
/*
|
||||
* We were most likely switched from sched_rt, so
|
||||
* kick off the schedule if running, otherwise just see
|
||||
* if we can still preempt the current task.
|
||||
*/
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
/* Account for a task changing its policy or group.
|
||||
*
|
||||
* This routine is mostly called to set cfs_rq->curr field when a task
|
||||
@ -1108,6 +1407,9 @@ static const struct sched_class fair_sched_class = {
|
||||
.enqueue_task = enqueue_task_fair,
|
||||
.dequeue_task = dequeue_task_fair,
|
||||
.yield_task = yield_task_fair,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_fair,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_wakeup,
|
||||
|
||||
@ -1122,6 +1424,9 @@ static const struct sched_class fair_sched_class = {
|
||||
.set_curr_task = set_curr_task_fair,
|
||||
.task_tick = task_tick_fair,
|
||||
.task_new = task_new_fair,
|
||||
|
||||
.prio_changed = prio_changed_fair,
|
||||
.switched_to = switched_to_fair,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
@ -1132,7 +1437,9 @@ static void print_cfs_stats(struct seq_file *m, int cpu)
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
print_cfs_rq(m, cpu, &cpu_rq(cpu)->cfs);
|
||||
#endif
|
||||
rcu_read_lock();
|
||||
for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
|
||||
print_cfs_rq(m, cpu, cfs_rq);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
#endif
|
||||
|
@ -5,6 +5,12 @@
|
||||
* handled in sched_fair.c)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int select_task_rq_idle(struct task_struct *p, int sync)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
@ -55,7 +61,7 @@ move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr)
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
||||
@ -63,6 +69,33 @@ static void set_curr_task_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void switched_to_idle(struct rq *rq, struct task_struct *p,
|
||||
int running)
|
||||
{
|
||||
/* Can this actually happen?? */
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
||||
int oldprio, int running)
|
||||
{
|
||||
/* This can happen for hot plug CPUS */
|
||||
|
||||
/*
|
||||
* Reschedule if we are currently running on this runqueue and
|
||||
* our priority decreased, or if we are not currently running on
|
||||
* this runqueue and our priority is higher than the current's
|
||||
*/
|
||||
if (running) {
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
@ -72,6 +105,9 @@ const struct sched_class idle_sched_class = {
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
@ -85,5 +121,9 @@ const struct sched_class idle_sched_class = {
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
.task_tick = task_tick_idle,
|
||||
|
||||
.prio_changed = prio_changed_idle,
|
||||
.switched_to = switched_to_idle,
|
||||
|
||||
/* no .task_new for idle tasks */
|
||||
};
|
||||
|
1142
kernel/sched_rt.c
1142
kernel/sched_rt.c
File diff suppressed because it is too large
Load Diff
@ -8,6 +8,7 @@
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/freezer.h>
|
||||
@ -23,8 +24,8 @@ static DEFINE_PER_CPU(unsigned long, touch_timestamp);
|
||||
static DEFINE_PER_CPU(unsigned long, print_timestamp);
|
||||
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
|
||||
|
||||
static int did_panic;
|
||||
int softlockup_thresh = 10;
|
||||
static int __read_mostly did_panic;
|
||||
unsigned long __read_mostly softlockup_thresh = 60;
|
||||
|
||||
static int
|
||||
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
|
||||
@ -45,7 +46,7 @@ static struct notifier_block panic_block = {
|
||||
*/
|
||||
static unsigned long get_timestamp(int this_cpu)
|
||||
{
|
||||
return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */
|
||||
return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
|
||||
}
|
||||
|
||||
void touch_softlockup_watchdog(void)
|
||||
@ -100,11 +101,7 @@ void softlockup_tick(void)
|
||||
|
||||
now = get_timestamp(this_cpu);
|
||||
|
||||
/* Wake up the high-prio watchdog task every second: */
|
||||
if (now > (touch_timestamp + 1))
|
||||
wake_up_process(per_cpu(watchdog_task, this_cpu));
|
||||
|
||||
/* Warn about unreasonable 10+ seconds delays: */
|
||||
/* Warn about unreasonable delays: */
|
||||
if (now <= (touch_timestamp + softlockup_thresh))
|
||||
return;
|
||||
|
||||
@ -121,12 +118,94 @@ void softlockup_tick(void)
|
||||
spin_unlock(&print_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Have a reasonable limit on the number of tasks checked:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
|
||||
|
||||
/*
|
||||
* Zero means infinite timeout - no checking done:
|
||||
*/
|
||||
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
|
||||
|
||||
unsigned long __read_mostly sysctl_hung_task_warnings = 10;
|
||||
|
||||
/*
|
||||
* Only do the hung-tasks check on one CPU:
|
||||
*/
|
||||
static int check_cpu __read_mostly = -1;
|
||||
|
||||
static void check_hung_task(struct task_struct *t, unsigned long now)
|
||||
{
|
||||
unsigned long switch_count = t->nvcsw + t->nivcsw;
|
||||
|
||||
if (t->flags & PF_FROZEN)
|
||||
return;
|
||||
|
||||
if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
|
||||
t->last_switch_count = switch_count;
|
||||
t->last_switch_timestamp = now;
|
||||
return;
|
||||
}
|
||||
if ((long)(now - t->last_switch_timestamp) <
|
||||
sysctl_hung_task_timeout_secs)
|
||||
return;
|
||||
if (sysctl_hung_task_warnings < 0)
|
||||
return;
|
||||
sysctl_hung_task_warnings--;
|
||||
|
||||
/*
|
||||
* Ok, the task did not get scheduled for more than 2 minutes,
|
||||
* complain:
|
||||
*/
|
||||
printk(KERN_ERR "INFO: task %s:%d blocked for more than "
|
||||
"%ld seconds.\n", t->comm, t->pid,
|
||||
sysctl_hung_task_timeout_secs);
|
||||
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
|
||||
" disables this message.\n");
|
||||
sched_show_task(t);
|
||||
__debug_show_held_locks(t);
|
||||
|
||||
t->last_switch_timestamp = now;
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
|
||||
* a really long time (120 seconds). If that happens, print out
|
||||
* a warning.
|
||||
*/
|
||||
static void check_hung_uninterruptible_tasks(int this_cpu)
|
||||
{
|
||||
int max_count = sysctl_hung_task_check_count;
|
||||
unsigned long now = get_timestamp(this_cpu);
|
||||
struct task_struct *g, *t;
|
||||
|
||||
/*
|
||||
* If the system crashed already then all bets are off,
|
||||
* do not report extra hung tasks:
|
||||
*/
|
||||
if ((tainted & TAINT_DIE) || did_panic)
|
||||
return;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, t) {
|
||||
if (!--max_count)
|
||||
break;
|
||||
if (t->state & TASK_UNINTERRUPTIBLE)
|
||||
check_hung_task(t, now);
|
||||
} while_each_thread(g, t);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The watchdog thread - runs every second and touches the timestamp.
|
||||
*/
|
||||
static int watchdog(void *__bind_cpu)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
int this_cpu = (long)__bind_cpu;
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
|
||||
@ -135,13 +214,18 @@ static int watchdog(void *__bind_cpu)
|
||||
|
||||
/*
|
||||
* Run briefly once per second to reset the softlockup timestamp.
|
||||
* If this gets delayed for more than 10 seconds then the
|
||||
* If this gets delayed for more than 60 seconds then the
|
||||
* debug-printout triggers in softlockup_tick().
|
||||
*/
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
touch_softlockup_watchdog();
|
||||
schedule();
|
||||
msleep_interruptible(10000);
|
||||
|
||||
if (this_cpu != check_cpu)
|
||||
continue;
|
||||
|
||||
if (sysctl_hung_task_timeout_secs)
|
||||
check_hung_uninterruptible_tasks(this_cpu);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -171,6 +255,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
check_cpu = any_online_cpu(cpu_online_map);
|
||||
wake_up_process(per_cpu(watchdog_task, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
@ -181,6 +266,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
/* Unbind so it can run. Fall thru. */
|
||||
kthread_bind(per_cpu(watchdog_task, hotcpu),
|
||||
any_online_cpu(cpu_online_map));
|
||||
case CPU_DOWN_PREPARE:
|
||||
case CPU_DOWN_PREPARE_FROZEN:
|
||||
if (hotcpu == check_cpu) {
|
||||
cpumask_t temp_cpu_online_map = cpu_online_map;
|
||||
|
||||
cpu_clear(hotcpu, temp_cpu_online_map);
|
||||
check_cpu = any_online_cpu(temp_cpu_online_map);
|
||||
}
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
p = per_cpu(watchdog_task, hotcpu);
|
||||
|
@ -203,13 +203,13 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
|
||||
int ret;
|
||||
|
||||
/* No CPUs can come up or down during this. */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
p = __stop_machine_run(fn, data, cpu);
|
||||
if (!IS_ERR(p))
|
||||
ret = kthread_stop(p);
|
||||
else
|
||||
ret = PTR_ERR(p);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -81,6 +81,7 @@ extern int compat_log;
|
||||
extern int maps_protect;
|
||||
extern int sysctl_stat_interval;
|
||||
extern int audit_argv_kb;
|
||||
extern int latencytop_enabled;
|
||||
|
||||
/* Constants used for minimum and maximum */
|
||||
#ifdef CONFIG_DETECT_SOFTLOCKUP
|
||||
@ -306,9 +307,43 @@ static struct ctl_table kern_table[] = {
|
||||
.procname = "sched_nr_migrate",
|
||||
.data = &sysctl_sched_nr_migrate,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 644,
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_rt_period_ms",
|
||||
.data = &sysctl_sched_rt_period,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_rt_ratio",
|
||||
.data = &sysctl_sched_rt_ratio,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_min_bal_int_shares",
|
||||
.data = &sysctl_sched_min_bal_int_shares,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "sched_max_bal_int_shares",
|
||||
.data = &sysctl_sched_max_bal_int_shares,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
@ -382,6 +417,15 @@ static struct ctl_table kern_table[] = {
|
||||
.proc_handler = &proc_dointvec_taint,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
{
|
||||
.procname = "latencytop",
|
||||
.data = &latencytop_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_SECURITY_CAPABILITIES
|
||||
{
|
||||
.procname = "cap-bound",
|
||||
@ -728,13 +772,40 @@ static struct ctl_table kern_table[] = {
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "softlockup_thresh",
|
||||
.data = &softlockup_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_minmax,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
.extra1 = &one,
|
||||
.extra2 = &sixty,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_check_count",
|
||||
.data = &sysctl_hung_task_check_count,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_timeout_secs",
|
||||
.data = &sysctl_hung_task_timeout_secs,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "hung_task_warnings",
|
||||
.data = &sysctl_hung_task_warnings,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
.strategy = &sysctl_intvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_COMPAT
|
||||
{
|
||||
|
@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void)
|
||||
void tick_nohz_stop_sched_tick(void)
|
||||
{
|
||||
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
|
||||
unsigned long rt_jiffies;
|
||||
struct tick_sched *ts;
|
||||
ktime_t last_update, expires, now, delta;
|
||||
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
|
||||
@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void)
|
||||
next_jiffies = get_next_timer_interrupt(last_jiffies);
|
||||
delta_jiffies = next_jiffies - last_jiffies;
|
||||
|
||||
rt_jiffies = rt_needs_cpu(cpu);
|
||||
if (rt_jiffies && rt_jiffies < delta_jiffies)
|
||||
delta_jiffies = rt_jiffies;
|
||||
|
||||
if (rcu_needs_cpu(cpu))
|
||||
delta_jiffies = 1;
|
||||
/*
|
||||
@ -509,7 +514,6 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
||||
{
|
||||
struct tick_sched *ts =
|
||||
container_of(timer, struct tick_sched, sched_timer);
|
||||
struct hrtimer_cpu_base *base = timer->base->cpu_base;
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
ktime_t now = ktime_get();
|
||||
int cpu = smp_processor_id();
|
||||
@ -547,15 +551,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
||||
touch_softlockup_watchdog();
|
||||
ts->idle_jiffies++;
|
||||
}
|
||||
/*
|
||||
* update_process_times() might take tasklist_lock, hence
|
||||
* drop the base lock. sched-tick hrtimers are per-CPU and
|
||||
* never accessible by userspace APIs, so this is safe to do.
|
||||
*/
|
||||
spin_unlock(&base->lock);
|
||||
update_process_times(user_mode(regs));
|
||||
profile_tick(CPU_PROFILING);
|
||||
spin_lock(&base->lock);
|
||||
}
|
||||
|
||||
/* Do not restart, when we are in the idle loop */
|
||||
|
@ -896,7 +896,7 @@ static void run_timer_softirq(struct softirq_action *h)
|
||||
{
|
||||
tvec_base_t *base = __get_cpu_var(tvec_bases);
|
||||
|
||||
hrtimer_run_queues();
|
||||
hrtimer_run_pending();
|
||||
|
||||
if (time_after_eq(jiffies, base->timer_jiffies))
|
||||
__run_timers(base);
|
||||
@ -907,6 +907,7 @@ static void run_timer_softirq(struct softirq_action *h)
|
||||
*/
|
||||
void run_local_timers(void)
|
||||
{
|
||||
hrtimer_run_queues();
|
||||
raise_softirq(TIMER_SOFTIRQ);
|
||||
softlockup_tick();
|
||||
}
|
||||
|
@ -319,7 +319,7 @@ void free_uid(struct user_struct *up)
|
||||
struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
{
|
||||
struct hlist_head *hashent = uidhashentry(ns, uid);
|
||||
struct user_struct *up;
|
||||
struct user_struct *up, *new;
|
||||
|
||||
/* Make uid_hash_find() + uids_user_create() + uid_hash_insert()
|
||||
* atomic.
|
||||
@ -331,13 +331,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
spin_unlock_irq(&uidhash_lock);
|
||||
|
||||
if (!up) {
|
||||
struct user_struct *new;
|
||||
|
||||
new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
|
||||
if (!new) {
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (!new)
|
||||
goto out_unlock;
|
||||
|
||||
new->uid = uid;
|
||||
atomic_set(&new->__count, 1);
|
||||
@ -353,28 +349,14 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
#endif
|
||||
new->locked_shm = 0;
|
||||
|
||||
if (alloc_uid_keyring(new, current) < 0) {
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (alloc_uid_keyring(new, current) < 0)
|
||||
goto out_free_user;
|
||||
|
||||
if (sched_create_user(new) < 0) {
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (sched_create_user(new) < 0)
|
||||
goto out_put_keys;
|
||||
|
||||
if (uids_user_create(new)) {
|
||||
sched_destroy_user(new);
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
if (uids_user_create(new))
|
||||
goto out_destoy_sched;
|
||||
|
||||
/*
|
||||
* Before adding this, check whether we raced
|
||||
@ -402,6 +384,17 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
|
||||
uids_mutex_unlock();
|
||||
|
||||
return up;
|
||||
|
||||
out_destoy_sched:
|
||||
sched_destroy_user(new);
|
||||
out_put_keys:
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
out_free_user:
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
out_unlock:
|
||||
uids_mutex_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void switch_uid(struct user_struct *new_user)
|
||||
|
@ -67,9 +67,8 @@ struct workqueue_struct {
|
||||
#endif
|
||||
};
|
||||
|
||||
/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
|
||||
threads to each one as cpus come/go. */
|
||||
static DEFINE_MUTEX(workqueue_mutex);
|
||||
/* Serializes the accesses to the list of workqueues. */
|
||||
static DEFINE_SPINLOCK(workqueue_lock);
|
||||
static LIST_HEAD(workqueues);
|
||||
|
||||
static int singlethread_cpu __read_mostly;
|
||||
@ -592,8 +591,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
|
||||
* Returns zero on success.
|
||||
* Returns -ve errno on failure.
|
||||
*
|
||||
* Appears to be racy against CPU hotplug.
|
||||
*
|
||||
* schedule_on_each_cpu() is very slow.
|
||||
*/
|
||||
int schedule_on_each_cpu(work_func_t func)
|
||||
@ -605,7 +602,7 @@ int schedule_on_each_cpu(work_func_t func)
|
||||
if (!works)
|
||||
return -ENOMEM;
|
||||
|
||||
preempt_disable(); /* CPU hotplug */
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct work_struct *work = per_cpu_ptr(works, cpu);
|
||||
|
||||
@ -613,8 +610,8 @@ int schedule_on_each_cpu(work_func_t func)
|
||||
set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
|
||||
__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
|
||||
}
|
||||
preempt_enable();
|
||||
flush_workqueue(keventd_wq);
|
||||
put_online_cpus();
|
||||
free_percpu(works);
|
||||
return 0;
|
||||
}
|
||||
@ -750,8 +747,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
|
||||
err = create_workqueue_thread(cwq, singlethread_cpu);
|
||||
start_workqueue_thread(cwq, -1);
|
||||
} else {
|
||||
mutex_lock(&workqueue_mutex);
|
||||
get_online_cpus();
|
||||
spin_lock(&workqueue_lock);
|
||||
list_add(&wq->list, &workqueues);
|
||||
spin_unlock(&workqueue_lock);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cwq = init_cpu_workqueue(wq, cpu);
|
||||
@ -760,7 +759,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
|
||||
err = create_workqueue_thread(cwq, cpu);
|
||||
start_workqueue_thread(cwq, cpu);
|
||||
}
|
||||
mutex_unlock(&workqueue_mutex);
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
if (err) {
|
||||
@ -775,7 +774,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
|
||||
{
|
||||
/*
|
||||
* Our caller is either destroy_workqueue() or CPU_DEAD,
|
||||
* workqueue_mutex protects cwq->thread
|
||||
* get_online_cpus() protects cwq->thread.
|
||||
*/
|
||||
if (cwq->thread == NULL)
|
||||
return;
|
||||
@ -810,9 +809,11 @@ void destroy_workqueue(struct workqueue_struct *wq)
|
||||
struct cpu_workqueue_struct *cwq;
|
||||
int cpu;
|
||||
|
||||
mutex_lock(&workqueue_mutex);
|
||||
get_online_cpus();
|
||||
spin_lock(&workqueue_lock);
|
||||
list_del(&wq->list);
|
||||
mutex_unlock(&workqueue_mutex);
|
||||
spin_unlock(&workqueue_lock);
|
||||
put_online_cpus();
|
||||
|
||||
for_each_cpu_mask(cpu, *cpu_map) {
|
||||
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
|
||||
@ -835,13 +836,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
||||
action &= ~CPU_TASKS_FROZEN;
|
||||
|
||||
switch (action) {
|
||||
case CPU_LOCK_ACQUIRE:
|
||||
mutex_lock(&workqueue_mutex);
|
||||
return NOTIFY_OK;
|
||||
|
||||
case CPU_LOCK_RELEASE:
|
||||
mutex_unlock(&workqueue_mutex);
|
||||
return NOTIFY_OK;
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
cpu_set(cpu, cpu_populated_map);
|
||||
@ -854,7 +848,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
||||
case CPU_UP_PREPARE:
|
||||
if (!create_workqueue_thread(cwq, cpu))
|
||||
break;
|
||||
printk(KERN_ERR "workqueue for %i failed\n", cpu);
|
||||
printk(KERN_ERR "workqueue [%s] for %i failed\n",
|
||||
wq->name, cpu);
|
||||
return NOTIFY_BAD;
|
||||
|
||||
case CPU_ONLINE:
|
||||
|
@ -517,4 +517,18 @@ config FAULT_INJECTION_STACKTRACE_FILTER
|
||||
help
|
||||
Provide stacktrace filter for fault-injection capabilities
|
||||
|
||||
config LATENCYTOP
|
||||
bool "Latency measuring infrastructure"
|
||||
select FRAME_POINTER if !MIPS
|
||||
select KALLSYMS
|
||||
select KALLSYMS_ALL
|
||||
select STACKTRACE
|
||||
select SCHEDSTATS
|
||||
select SCHED_DEBUG
|
||||
depends on X86 || X86_64
|
||||
help
|
||||
Enable this option if you want to use the LatencyTOP tool
|
||||
to find out which userspace is blocking on what kernel operations.
|
||||
|
||||
|
||||
source "samples/Kconfig"
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
#ifdef CONFIG_PREEMPT_BKL
|
||||
/*
|
||||
* The 'big kernel semaphore'
|
||||
*
|
||||
@ -86,128 +85,6 @@ void __lockfunc unlock_kernel(void)
|
||||
up(&kernel_sem);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* The 'big kernel lock'
|
||||
*
|
||||
* This spinlock is taken and released recursively by lock_kernel()
|
||||
* and unlock_kernel(). It is transparently dropped and reacquired
|
||||
* over schedule(). It is used to protect legacy code that hasn't
|
||||
* been migrated to a proper locking design yet.
|
||||
*
|
||||
* Don't use in new code.
|
||||
*/
|
||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
|
||||
|
||||
|
||||
/*
|
||||
* Acquire/release the underlying lock from the scheduler.
|
||||
*
|
||||
* This is called with preemption disabled, and should
|
||||
* return an error value if it cannot get the lock and
|
||||
* TIF_NEED_RESCHED gets set.
|
||||
*
|
||||
* If it successfully gets the lock, it should increment
|
||||
* the preemption count like any spinlock does.
|
||||
*
|
||||
* (This works on UP too - _raw_spin_trylock will never
|
||||
* return false in that case)
|
||||
*/
|
||||
int __lockfunc __reacquire_kernel_lock(void)
|
||||
{
|
||||
while (!_raw_spin_trylock(&kernel_flag)) {
|
||||
if (test_thread_flag(TIF_NEED_RESCHED))
|
||||
return -EAGAIN;
|
||||
cpu_relax();
|
||||
}
|
||||
preempt_disable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __lockfunc __release_kernel_lock(void)
|
||||
{
|
||||
_raw_spin_unlock(&kernel_flag);
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* These are the BKL spinlocks - we try to be polite about preemption.
|
||||
* If SMP is not on (ie UP preemption), this all goes away because the
|
||||
* _raw_spin_trylock() will always succeed.
|
||||
*/
|
||||
#ifdef CONFIG_PREEMPT
|
||||
static inline void __lock_kernel(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
|
||||
/*
|
||||
* If preemption was disabled even before this
|
||||
* was called, there's nothing we can be polite
|
||||
* about - just spin.
|
||||
*/
|
||||
if (preempt_count() > 1) {
|
||||
_raw_spin_lock(&kernel_flag);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, let's wait for the kernel lock
|
||||
* with preemption enabled..
|
||||
*/
|
||||
do {
|
||||
preempt_enable();
|
||||
while (spin_is_locked(&kernel_flag))
|
||||
cpu_relax();
|
||||
preempt_disable();
|
||||
} while (!_raw_spin_trylock(&kernel_flag));
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* Non-preemption case - just get the spinlock
|
||||
*/
|
||||
static inline void __lock_kernel(void)
|
||||
{
|
||||
_raw_spin_lock(&kernel_flag);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void __unlock_kernel(void)
|
||||
{
|
||||
/*
|
||||
* the BKL is not covered by lockdep, so we open-code the
|
||||
* unlocking sequence (and thus avoid the dep-chain ops):
|
||||
*/
|
||||
_raw_spin_unlock(&kernel_flag);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Getting the big kernel lock.
|
||||
*
|
||||
* This cannot happen asynchronously, so we only need to
|
||||
* worry about other CPU's.
|
||||
*/
|
||||
void __lockfunc lock_kernel(void)
|
||||
{
|
||||
int depth = current->lock_depth+1;
|
||||
if (likely(!depth))
|
||||
__lock_kernel();
|
||||
current->lock_depth = depth;
|
||||
}
|
||||
|
||||
void __lockfunc unlock_kernel(void)
|
||||
{
|
||||
BUG_ON(current->lock_depth < 0);
|
||||
if (likely(--current->lock_depth < 0))
|
||||
__unlock_kernel();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(lock_kernel);
|
||||
EXPORT_SYMBOL(unlock_kernel);
|
||||
|
||||
|
@ -286,7 +286,7 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
|
||||
* all the memory it needs. That way it should be able to
|
||||
* exit() and clear out its resources quickly...
|
||||
*/
|
||||
p->time_slice = HZ;
|
||||
p->rt.time_slice = HZ;
|
||||
set_tsk_thread_flag(p, TIF_MEMDIE);
|
||||
|
||||
force_sig(SIGKILL, p);
|
||||
|
18
mm/slab.c
18
mm/slab.c
@ -730,8 +730,7 @@ static inline void init_lock_keys(void)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 1. Guard access to the cache-chain.
|
||||
* 2. Protect sanity of cpu_online_map against cpu hotplug events
|
||||
* Guard access to the cache-chain.
|
||||
*/
|
||||
static DEFINE_MUTEX(cache_chain_mutex);
|
||||
static struct list_head cache_chain;
|
||||
@ -1331,12 +1330,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
|
||||
int err = 0;
|
||||
|
||||
switch (action) {
|
||||
case CPU_LOCK_ACQUIRE:
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
break;
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
err = cpuup_prepare(cpu);
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
@ -1373,9 +1371,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
|
||||
#endif
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
cpuup_canceled(cpu);
|
||||
break;
|
||||
case CPU_LOCK_RELEASE:
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
break;
|
||||
}
|
||||
@ -2170,6 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
|
||||
* We use cache_chain_mutex to ensure a consistent view of
|
||||
* cpu_online_map as well. Please see cpuup_callback
|
||||
*/
|
||||
get_online_cpus();
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
|
||||
list_for_each_entry(pc, &cache_chain, next) {
|
||||
@ -2396,6 +2394,7 @@ oops:
|
||||
panic("kmem_cache_create(): failed to create slab `%s'\n",
|
||||
name);
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
put_online_cpus();
|
||||
return cachep;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_create);
|
||||
@ -2547,9 +2546,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
|
||||
int ret;
|
||||
BUG_ON(!cachep || in_interrupt());
|
||||
|
||||
get_online_cpus();
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
ret = __cache_shrink(cachep);
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
put_online_cpus();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_shrink);
|
||||
@ -2575,6 +2576,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
|
||||
BUG_ON(!cachep || in_interrupt());
|
||||
|
||||
/* Find the cache in the chain of caches. */
|
||||
get_online_cpus();
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
/*
|
||||
* the chain is never empty, cache_cache is never destroyed
|
||||
@ -2584,6 +2586,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
|
||||
slab_error(cachep, "Can't free all objects");
|
||||
list_add(&cachep->next, &cache_chain);
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
put_online_cpus();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2592,6 +2595,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
|
||||
|
||||
__kmem_cache_destroy(cachep);
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
put_online_cpus();
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_cache_destroy);
|
||||
|
||||
|
@ -293,7 +293,7 @@ void flow_cache_flush(void)
|
||||
static DEFINE_MUTEX(flow_flush_sem);
|
||||
|
||||
/* Don't want cpus going down or up during this. */
|
||||
lock_cpu_hotplug();
|
||||
get_online_cpus();
|
||||
mutex_lock(&flow_flush_sem);
|
||||
atomic_set(&info.cpuleft, num_online_cpus());
|
||||
init_completion(&info.completion);
|
||||
@ -305,7 +305,7 @@ void flow_cache_flush(void)
|
||||
|
||||
wait_for_completion(&info.completion);
|
||||
mutex_unlock(&flow_flush_sem);
|
||||
unlock_cpu_hotplug();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
static void __devinit flow_cache_cpu_prepare(int cpu)
|
||||
|
Loading…
x
Reference in New Issue
Block a user