cgroup: Fixes for v6.13-rc6

All are cpuset changes:
 
 - Fix isolated CPUs leaking into sched domains.
 
 - Remove now unnecessary kernfs active break which can trigger a warning.
 
 - Comment updates.
 -----BEGIN PGP SIGNATURE-----
 
 iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZ4Gkug4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGXRGAQCf9aL+UWZZiVqcvRjBt8z3gxW9HQOCXYXNGlLF
 EKFFuAD+KLox+flPLbgNv9IwZnswv9+SdOTCE1TlT0GQFBPZcQU=
 =suPy
 -----END PGP SIGNATURE-----

Merge tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "Cpuset fixes:

   - Fix isolated CPUs leaking into sched domains

   - Remove now unnecessary kernfs active break which can trigger a
     warning

   - Comment updates"

* tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup/cpuset: remove kernfs active break
  cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains
  cgroup/cpuset: Remove stale text
This commit is contained in:
Linus Torvalds 2025-01-10 15:03:02 -08:00
commit 58624e4bc8
2 changed files with 30 additions and 47 deletions

View File

@ -197,10 +197,8 @@ static struct cpuset top_cpuset = {
/* /*
* There are two global locks guarding cpuset structures - cpuset_mutex and * There are two global locks guarding cpuset structures - cpuset_mutex and
* callback_lock. We also require taking task_lock() when dereferencing a * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel
* task's cpuset pointer. See "The task_lock() exception", at the end of this * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
* comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems
* can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
* structures. Note that cpuset_mutex needs to be a mutex as it is used in * structures. Note that cpuset_mutex needs to be a mutex as it is used in
* paths that rely on priority inheritance (e.g. scheduler - on RT) for * paths that rely on priority inheritance (e.g. scheduler - on RT) for
* correctness. * correctness.
@ -229,9 +227,6 @@ static struct cpuset top_cpuset = {
* The cpuset_common_seq_show() handlers only hold callback_lock across * The cpuset_common_seq_show() handlers only hold callback_lock across
* small pieces of code, such as when reading out possibly multi-word * small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks. * cpumasks and nodemasks.
*
* Accessing a task's cpuset should be done in accordance with the
* guidelines for accessing subsystem state in kernel/cgroup.c
*/ */
static DEFINE_MUTEX(cpuset_mutex); static DEFINE_MUTEX(cpuset_mutex);
@ -890,7 +885,15 @@ v2:
*/ */
if (cgrpv2) { if (cgrpv2) {
for (i = 0; i < ndoms; i++) { for (i = 0; i < ndoms; i++) {
cpumask_copy(doms[i], csa[i]->effective_cpus); /*
* The top cpuset may contain some boot time isolated
* CPUs that need to be excluded from the sched domain.
*/
if (csa[i] == &top_cpuset)
cpumask_and(doms[i], csa[i]->effective_cpus,
housekeeping_cpumask(HK_TYPE_DOMAIN));
else
cpumask_copy(doms[i], csa[i]->effective_cpus);
if (dattr) if (dattr)
dattr[i] = SD_ATTR_INIT; dattr[i] = SD_ATTR_INIT;
} }
@ -3121,29 +3124,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
int retval = -ENODEV; int retval = -ENODEV;
buf = strstrip(buf); buf = strstrip(buf);
/*
* CPU or memory hotunplug may leave @cs w/o any execution
* resources, in which case the hotplug code asynchronously updates
* configuration and transfers all tasks to the nearest ancestor
* which can execute.
*
* As writes to "cpus" or "mems" may restore @cs's execution
* resources, wait for the previously scheduled operations before
* proceeding, so that we don't end up keep removing tasks added
* after execution capability is restored.
*
* cpuset_handle_hotplug may call back into cgroup core asynchronously
* via cgroup_transfer_tasks() and waiting for it from a cgroupfs
* operation like this one can lead to a deadlock through kernfs
* active_ref protection. Let's break the protection. Losing the
* protection is okay as we check whether @cs is online after
* grabbing cpuset_mutex anyway. This only happens on the legacy
* hierarchies.
*/
css_get(&cs->css);
kernfs_break_active_protection(of->kn);
cpus_read_lock(); cpus_read_lock();
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
@ -3176,8 +3156,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
out_unlock: out_unlock:
mutex_unlock(&cpuset_mutex); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
kernfs_unbreak_active_protection(of->kn);
css_put(&cs->css);
flush_workqueue(cpuset_migrate_mm_wq); flush_workqueue(cpuset_migrate_mm_wq);
return retval ?: nbytes; return retval ?: nbytes;
} }

View File

@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus
# #
# If isolated CPUs have been reserved at boot time (as shown in # If isolated CPUs have been reserved at boot time (as shown in
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7 # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of # that will be used by this script for testing purpose. If not, some of
# the tests may fail incorrectly. These isolated CPUs will also be removed # the tests may fail incorrectly. These pre-isolated CPUs should stay in
# before being compared with the expected results. # an isolated state throughout the testing process for now.
# #
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
if [[ -n "$BOOT_ISOLCPUS" ]] if [[ -n "$BOOT_ISOLCPUS" ]]
then then
[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] && [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi fi
@ -683,15 +683,19 @@ check_isolcpus()
EXPECT_VAL2=$EXPECT_VAL EXPECT_VAL2=$EXPECT_VAL
fi fi
#
# Appending pre-isolated CPUs
# Even though CPU #8 isn't used for testing, it can't be pre-isolated
# to make appending those CPUs easier.
#
[[ -n "$BOOT_ISOLCPUS" ]] && {
EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
}
# #
# Check cpuset.cpus.isolated cpumask # Check cpuset.cpus.isolated cpumask
# #
if [[ -z "$BOOT_ISOLCPUS" ]]
then
ISOLCPUS=$(cat $ISCPUS)
else
ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
fi
[[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
# Take a 50ms pause and try again # Take a 50ms pause and try again
pause 0.05 pause 0.05
@ -731,8 +735,6 @@ check_isolcpus()
fi fi
done done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
[[ -n "BOOT_ISOLCPUS" ]] &&
ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
[[ "$EXPECT_VAL" = "$ISOLCPUS" ]] [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
} }
@ -836,8 +838,11 @@ run_state_test()
# if available # if available
[[ -n "$ICPUS" ]] && { [[ -n "$ICPUS" ]] && {
check_isolcpus $ICPUS check_isolcpus $ICPUS
[[ $? -ne 0 ]] && test_fail $I "isolated CPU" \ [[ $? -ne 0 ]] && {
"Expect $ICPUS, get $ISOLCPUS instead" [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
test_fail $I "isolated CPU" \
"Expect $ICPUS, get $ISOLCPUS instead"
}
} }
reset_cgroup_states reset_cgroup_states
# #