mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
kthread: Default affine kthread to its preferred NUMA node
Kthreads attached to a preferred NUMA node for their task structure allocation can also be assumed to run preferrably within that same node. A more precise affinity is usually notified by calling kthread_create_on_cpu() or kthread_bind[_mask]() before the first wakeup. For the others, a default affinity to the node is desired and sometimes implemented with more or less success when it comes to deal with hotplug events and nohz_full / CPU Isolation interactions: - kcompactd is affine to its node and handles hotplug but not CPU Isolation - kswapd is affine to its node and ignores hotplug and CPU Isolation - A bunch of drivers create their kthreads on a specific node and don't take care about affining further. Handle that default node affinity preference at the generic level instead, provided a kthread is created on an actual node and doesn't apply any specific affinity such as a given CPU or a custom cpumask to bind to before its first wake-up. This generic handling is aware of CPU hotplug events and CPU isolation such that: * When a housekeeping CPU goes up that is part of the node of a given kthread, the related task is re-affined to that own node if it was previously running on the default last resort online housekeeping set from other nodes. * When a housekeeping CPU goes down while it was part of the node of a kthread, the running task is migrated (or the sleeping task is woken up) automatically by the scheduler to other housekeepers within the same node or, as a last resort, to all housekeepers from other nodes. Acked-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
This commit is contained in:
parent
73382d8781
commit
0fea1a6b7a
@ -240,6 +240,7 @@ enum cpuhp_state {
|
||||
CPUHP_AP_WORKQUEUE_ONLINE,
|
||||
CPUHP_AP_RANDOM_ONLINE,
|
||||
CPUHP_AP_RCUTREE_ONLINE,
|
||||
CPUHP_AP_KTHREADS_ONLINE,
|
||||
CPUHP_AP_BASE_CACHEINFO_ONLINE,
|
||||
CPUHP_AP_ONLINE_DYN,
|
||||
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40,
|
||||
|
106
kernel/kthread.c
106
kernel/kthread.c
@ -35,6 +35,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
|
||||
static LIST_HEAD(kthread_create_list);
|
||||
struct task_struct *kthreadd_task;
|
||||
|
||||
static LIST_HEAD(kthreads_hotplug);
|
||||
static DEFINE_MUTEX(kthreads_hotplug_lock);
|
||||
|
||||
struct kthread_create_info
|
||||
{
|
||||
/* Information passed to kthread() from kthreadd. */
|
||||
@ -53,6 +56,7 @@ struct kthread_create_info
|
||||
struct kthread {
|
||||
unsigned long flags;
|
||||
unsigned int cpu;
|
||||
unsigned int node;
|
||||
int started;
|
||||
int result;
|
||||
int (*threadfn)(void *);
|
||||
@ -64,6 +68,8 @@ struct kthread {
|
||||
#endif
|
||||
/* To store the full name if task comm is truncated. */
|
||||
char *full_name;
|
||||
struct task_struct *task;
|
||||
struct list_head hotplug_node;
|
||||
};
|
||||
|
||||
enum KTHREAD_BITS {
|
||||
@ -122,8 +128,11 @@ bool set_kthread_struct(struct task_struct *p)
|
||||
|
||||
init_completion(&kthread->exited);
|
||||
init_completion(&kthread->parked);
|
||||
INIT_LIST_HEAD(&kthread->hotplug_node);
|
||||
p->vfork_done = &kthread->exited;
|
||||
|
||||
kthread->task = p;
|
||||
kthread->node = tsk_fork_get_node(current);
|
||||
p->worker_private = kthread;
|
||||
return true;
|
||||
}
|
||||
@ -314,6 +323,11 @@ void __noreturn kthread_exit(long result)
|
||||
{
|
||||
struct kthread *kthread = to_kthread(current);
|
||||
kthread->result = result;
|
||||
if (!list_empty(&kthread->hotplug_node)) {
|
||||
mutex_lock(&kthreads_hotplug_lock);
|
||||
list_del(&kthread->hotplug_node);
|
||||
mutex_unlock(&kthreads_hotplug_lock);
|
||||
}
|
||||
do_exit(0);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_exit);
|
||||
@ -339,6 +353,48 @@ void __noreturn kthread_complete_and_exit(struct completion *comp, long code)
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_complete_and_exit);
|
||||
|
||||
static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask)
|
||||
{
|
||||
cpumask_and(cpumask, cpumask_of_node(kthread->node),
|
||||
housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
|
||||
if (cpumask_empty(cpumask))
|
||||
cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
}
|
||||
|
||||
static void kthread_affine_node(void)
|
||||
{
|
||||
struct kthread *kthread = to_kthread(current);
|
||||
cpumask_var_t affinity;
|
||||
|
||||
WARN_ON_ONCE(kthread_is_per_cpu(current));
|
||||
|
||||
if (kthread->node == NUMA_NO_NODE) {
|
||||
housekeeping_affine(current, HK_TYPE_KTHREAD);
|
||||
} else {
|
||||
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&kthreads_hotplug_lock);
|
||||
WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
|
||||
list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
|
||||
/*
|
||||
* The node cpumask is racy when read from kthread() but:
|
||||
* - a racing CPU going down will either fail on the subsequent
|
||||
* call to set_cpus_allowed_ptr() or be migrated to housekeepers
|
||||
* afterwards by the scheduler.
|
||||
* - a racing CPU going up will be handled by kthreads_online_cpu()
|
||||
*/
|
||||
kthread_fetch_affinity(kthread, affinity);
|
||||
set_cpus_allowed_ptr(current, affinity);
|
||||
mutex_unlock(&kthreads_hotplug_lock);
|
||||
|
||||
free_cpumask_var(affinity);
|
||||
}
|
||||
}
|
||||
|
||||
static int kthread(void *_create)
|
||||
{
|
||||
static const struct sched_param param = { .sched_priority = 0 };
|
||||
@ -369,7 +425,6 @@ static int kthread(void *_create)
|
||||
* back to default in case they have been changed.
|
||||
*/
|
||||
sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
||||
set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
|
||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
@ -385,6 +440,9 @@ static int kthread(void *_create)
|
||||
|
||||
self->started = 1;
|
||||
|
||||
if (!(current->flags & PF_NO_SETAFFINITY))
|
||||
kthread_affine_node();
|
||||
|
||||
ret = -EINTR;
|
||||
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
|
||||
cgroup_kthread_ready();
|
||||
@ -781,6 +839,52 @@ int kthreadd(void *unused)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-affine kthreads according to their preferences
|
||||
* and the newly online CPU. The CPU down part is handled
|
||||
* by select_fallback_rq() which default re-affines to
|
||||
* housekeepers in case the preferred affinity doesn't
|
||||
* apply anymore.
|
||||
*/
|
||||
static int kthreads_online_cpu(unsigned int cpu)
|
||||
{
|
||||
cpumask_var_t affinity;
|
||||
struct kthread *k;
|
||||
int ret;
|
||||
|
||||
guard(mutex)(&kthreads_hotplug_lock);
|
||||
|
||||
if (list_empty(&kthreads_hotplug))
|
||||
return 0;
|
||||
|
||||
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = 0;
|
||||
|
||||
list_for_each_entry(k, &kthreads_hotplug, hotplug_node) {
|
||||
if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
|
||||
kthread_is_per_cpu(k->task) ||
|
||||
k->node == NUMA_NO_NODE)) {
|
||||
ret = -EINVAL;
|
||||
continue;
|
||||
}
|
||||
kthread_fetch_affinity(k, affinity);
|
||||
set_cpus_allowed_ptr(k->task, affinity);
|
||||
}
|
||||
|
||||
free_cpumask_var(affinity);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kthreads_init(void)
|
||||
{
|
||||
return cpuhp_setup_state(CPUHP_AP_KTHREADS_ONLINE, "kthreads:online",
|
||||
kthreads_online_cpu, NULL);
|
||||
}
|
||||
early_initcall(kthreads_init);
|
||||
|
||||
void __kthread_init_worker(struct kthread_worker *worker,
|
||||
const char *name,
|
||||
struct lock_class_key *key)
|
||||
|
Loading…
Reference in New Issue
Block a user