diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 205dc9edcaa9..528285a91499 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3692,15 +3692,38 @@ void __init cpuset_init_smp(void) * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of cpu_online_mask, even if this means going outside the - * tasks cpuset. + * tasks cpuset, except when the task is in the top cpuset. **/ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { unsigned long flags; + struct cpuset *cs; spin_lock_irqsave(&callback_lock, flags); - guarantee_online_cpus(tsk, pmask); + rcu_read_lock(); + + cs = task_cs(tsk); + if (cs != &top_cpuset) + guarantee_online_cpus(tsk, pmask); + /* + * Tasks in the top cpuset won't get update to their cpumasks + * when a hotplug online/offline event happens. So we include all + * offline cpus in the allowed cpu list. + */ + if ((cs == &top_cpuset) || cpumask_empty(pmask)) { + const struct cpumask *possible_mask = task_cpu_possible_mask(tsk); + + /* + * We first exclude cpus allocated to partitions. If there is no + * allowable online cpu left, we fall back to all possible cpus. + */ + cpumask_andnot(pmask, possible_mask, top_cpuset.subparts_cpus); + if (!cpumask_intersects(pmask, cpu_online_mask)) + cpumask_copy(pmask, possible_mask); + } + + rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); }