Merge branch 'for-6.10' into test-merge-for-6.10

This commit is contained in:
Tejun Heo 2024-05-15 11:40:33 -10:00
commit a2a58909cf
4 changed files with 334 additions and 200 deletions

View File

@ -51,20 +51,23 @@ enum work_bits {
* data contains off-queue information when !WORK_STRUCT_PWQ. * data contains off-queue information when !WORK_STRUCT_PWQ.
* *
* MSB * MSB
* [ pool ID ] [ OFFQ flags ] [ STRUCT flags ] * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ]
* 1 bit 4 or 5 bits * 16 bits 1 bit 4 or 5 bits
*/ */
WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS,
WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT,
WORK_OFFQ_FLAG_END, WORK_OFFQ_FLAG_END,
WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT,
WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS,
WORK_OFFQ_DISABLE_BITS = 16,
/* /*
* When a work item is off queue, the high bits encode off-queue flags * When a work item is off queue, the high bits encode off-queue flags
* and the last pool it was on. Cap pool ID to 31 bits and use the * and the last pool it was on. Cap pool ID to 31 bits and use the
* highest number to indicate that no pool is associated. * highest number to indicate that no pool is associated.
*/ */
WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS,
WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
}; };
@ -96,7 +99,9 @@ enum wq_misc_consts {
}; };
/* Convenience constants - of type 'unsigned long', not 'enum'! */ /* Convenience constants - of type 'unsigned long', not 'enum'! */
#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) #define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT)
#define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT)
#define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT)
#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1)
#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1))
@ -180,6 +185,9 @@ struct workqueue_attrs {
* Below fields aren't properties of a worker_pool. They only modify how * Below fields aren't properties of a worker_pool. They only modify how
* :c:func:`apply_workqueue_attrs` select pools and thus don't * :c:func:`apply_workqueue_attrs` select pools and thus don't
* participate in pool hash calculations or equality comparisons. * participate in pool hash calculations or equality comparisons.
*
* If @affn_strict is set, @cpumask isn't a property of a worker_pool
* either.
*/ */
/** /**
@ -465,7 +473,7 @@ void workqueue_softirq_dead(unsigned int cpu);
* @fmt: printf format for the name of the workqueue * @fmt: printf format for the name of the workqueue
* @flags: WQ_* flags * @flags: WQ_* flags
* @max_active: max in-flight work items, 0 for default * @max_active: max in-flight work items, 0 for default
* remaining args: args for @fmt * @...: args for @fmt
* *
* For a per-cpu workqueue, @max_active limits the number of in-flight work * For a per-cpu workqueue, @max_active limits the number of in-flight work
* items for each CPU. e.g. @max_active of 1 indicates that each CPU can be * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be
@ -559,6 +567,14 @@ extern bool flush_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork);
extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
extern bool disable_work(struct work_struct *work);
extern bool disable_work_sync(struct work_struct *work);
extern bool enable_work(struct work_struct *work);
extern bool disable_delayed_work(struct delayed_work *dwork);
extern bool disable_delayed_work_sync(struct delayed_work *dwork);
extern bool enable_delayed_work(struct delayed_work *dwork);
extern bool flush_rcu_work(struct rcu_work *rwork); extern bool flush_rcu_work(struct rcu_work *rwork);
extern void workqueue_set_max_active(struct workqueue_struct *wq, extern void workqueue_set_max_active(struct workqueue_struct *wq,
@ -666,6 +682,32 @@ static inline bool schedule_work(struct work_struct *work)
return queue_work(system_wq, work); return queue_work(system_wq, work);
} }
/**
* enable_and_queue_work - Enable and queue a work item on a specific workqueue
* @wq: The target workqueue
* @work: The work item to be enabled and queued
*
* This function combines the operations of enable_work() and queue_work(),
* providing a convenient way to enable and queue a work item in a single call.
* It invokes enable_work() on @work and then queues it if the disable depth
* reached 0. Returns %true if the disable depth reached 0 and @work is queued,
* and %false otherwise.
*
* Note that @work is always queued when disable depth reaches zero. If the
* desired behavior is queueing only if certain events took place while @work is
* disabled, the user should implement the necessary state tracking and perform
* explicit conditional queueing after enable_work().
*/
static inline bool enable_and_queue_work(struct workqueue_struct *wq,
struct work_struct *work)
{
if (enable_work(work)) {
queue_work(wq, work);
return true;
}
return false;
}
/* /*
* Detect attempt to flush system-wide workqueues at compile time when possible. * Detect attempt to flush system-wide workqueues at compile time when possible.
* Warn attempt to flush system-wide workqueues at runtime. * Warn attempt to flush system-wide workqueues at runtime.

View File

@ -64,13 +64,15 @@ TRACE_EVENT(workqueue_activate_work,
TP_STRUCT__entry( TP_STRUCT__entry(
__field( void *, work ) __field( void *, work )
__field( void *, function)
), ),
TP_fast_assign( TP_fast_assign(
__entry->work = work; __entry->work = work;
__entry->function = work->func;
), ),
TP_printk("work struct %p", __entry->work) TP_printk("work struct %p function=%ps ", __entry->work, __entry->function)
); );
/** /**

View File

@ -99,6 +99,7 @@ enum worker_flags {
enum work_cancel_flags { enum work_cancel_flags {
WORK_CANCEL_DELAYED = 1 << 0, /* canceling a delayed_work */ WORK_CANCEL_DELAYED = 1 << 0, /* canceling a delayed_work */
WORK_CANCEL_DISABLE = 1 << 1, /* canceling to disable */
}; };
enum wq_internal_consts { enum wq_internal_consts {
@ -392,6 +393,12 @@ struct wq_pod_type {
int *cpu_pod; /* cpu -> pod */ int *cpu_pod; /* cpu -> pod */
}; };
struct work_offq_data {
u32 pool_id;
u32 disable;
u32 flags;
};
static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = { static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = {
[WQ_AFFN_DFL] = "default", [WQ_AFFN_DFL] = "default",
[WQ_AFFN_CPU] = "cpu", [WQ_AFFN_CPU] = "cpu",
@ -489,12 +496,6 @@ static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
/* I: attributes used when instantiating ordered pools on demand */ /* I: attributes used when instantiating ordered pools on demand */
static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
/*
* Used to synchronize multiple cancel_sync attempts on the same work item. See
* work_grab_pending() and __cancel_work_sync().
*/
static DECLARE_WAIT_QUEUE_HEAD(wq_cancel_waitq);
/* /*
* I: kthread_worker to release pwq's. pwq release needs to be bounced to a * I: kthread_worker to release pwq's. pwq release needs to be bounced to a
* process context while holding a pool lock. Bounce to a dedicated kthread * process context while holding a pool lock. Bounce to a dedicated kthread
@ -763,6 +764,11 @@ static int work_next_color(int color)
return (color + 1) % WORK_NR_COLORS; return (color + 1) % WORK_NR_COLORS;
} }
static unsigned long pool_offq_flags(struct worker_pool *pool)
{
return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : 0;
}
/* /*
* While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
* contain the pointer to the queued pwq. Once execution starts, the flag * contain the pointer to the queued pwq. Once execution starts, the flag
@ -776,11 +782,6 @@ static int work_next_color(int color)
* corresponding to a work. Pool is available once the work has been * corresponding to a work. Pool is available once the work has been
* queued anywhere after initialization until it is sync canceled. pwq is * queued anywhere after initialization until it is sync canceled. pwq is
* available only while the work item is queued. * available only while the work item is queued.
*
* %WORK_OFFQ_CANCELING is used to mark a work item which is being
* canceled. While being canceled, a work item may have its PENDING set
* but stay off timer and worklist for arbitrarily long and nobody should
* try to steal the PENDING bit.
*/ */
static inline void set_work_data(struct work_struct *work, unsigned long data) static inline void set_work_data(struct work_struct *work, unsigned long data)
{ {
@ -892,36 +893,26 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
return idr_find(&worker_pool_idr, pool_id); return idr_find(&worker_pool_idr, pool_id);
} }
/** static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits)
* get_work_pool_id - return the worker pool ID a given work is associated with
* @work: the work item of interest
*
* Return: The worker_pool ID @work was last associated with.
* %WORK_OFFQ_POOL_NONE if none.
*/
static int get_work_pool_id(struct work_struct *work)
{ {
unsigned long data = atomic_long_read(&work->data); return (v >> shift) & ((1 << bits) - 1);
if (data & WORK_STRUCT_PWQ)
return work_struct_pwq(data)->pool->id;
return data >> WORK_OFFQ_POOL_SHIFT;
} }
static void mark_work_canceling(struct work_struct *work) static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data)
{ {
unsigned long pool_id = get_work_pool_id(work); WARN_ON_ONCE(data & WORK_STRUCT_PWQ);
pool_id <<= WORK_OFFQ_POOL_SHIFT; offqd->pool_id = shift_and_mask(data, WORK_OFFQ_POOL_SHIFT,
set_work_data(work, pool_id | WORK_STRUCT_PENDING | WORK_OFFQ_CANCELING); WORK_OFFQ_POOL_BITS);
offqd->disable = shift_and_mask(data, WORK_OFFQ_DISABLE_SHIFT,
WORK_OFFQ_DISABLE_BITS);
offqd->flags = data & WORK_OFFQ_FLAG_MASK;
} }
static bool work_is_canceling(struct work_struct *work) static unsigned long work_offqd_pack_flags(struct work_offq_data *offqd)
{ {
unsigned long data = atomic_long_read(&work->data); return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) |
((unsigned long)offqd->flags);
return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
} }
/* /*
@ -2067,8 +2058,6 @@ out_put:
* 1 if @work was pending and we successfully stole PENDING * 1 if @work was pending and we successfully stole PENDING
* 0 if @work was idle and we claimed PENDING * 0 if @work was idle and we claimed PENDING
* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
* -ENOENT if someone else is canceling @work, this state may persist
* for arbitrarily long
* ======== ================================================================ * ======== ================================================================
* *
* Note: * Note:
@ -2151,7 +2140,8 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags,
* this destroys work->data needed by the next step, stash it. * this destroys work->data needed by the next step, stash it.
*/ */
work_data = *work_data_bits(work); work_data = *work_data_bits(work);
set_work_pool_and_keep_pending(work, pool->id, 0); set_work_pool_and_keep_pending(work, pool->id,
pool_offq_flags(pool));
/* must be the last step, see the function comment */ /* must be the last step, see the function comment */
pwq_dec_nr_in_flight(pwq, work_data); pwq_dec_nr_in_flight(pwq, work_data);
@ -2164,26 +2154,9 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags,
fail: fail:
rcu_read_unlock(); rcu_read_unlock();
local_irq_restore(*irq_flags); local_irq_restore(*irq_flags);
if (work_is_canceling(work))
return -ENOENT;
cpu_relax();
return -EAGAIN; return -EAGAIN;
} }
struct cwt_wait {
wait_queue_entry_t wait;
struct work_struct *work;
};
static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
if (cwait->work != key)
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
/** /**
* work_grab_pending - steal work item from worklist and disable irq * work_grab_pending - steal work item from worklist and disable irq
* @work: work item to steal * @work: work item to steal
@ -2193,7 +2166,7 @@ static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *k
* Grab PENDING bit of @work. @work can be in any stable state - idle, on timer * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
* or on worklist. * or on worklist.
* *
* Must be called in process context. IRQ is disabled on return with IRQ state * Can be called from any context. IRQ is disabled on return with IRQ state
* stored in *@irq_flags. The caller is responsible for re-enabling it using * stored in *@irq_flags. The caller is responsible for re-enabling it using
* local_irq_restore(). * local_irq_restore().
* *
@ -2202,41 +2175,14 @@ static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *k
static bool work_grab_pending(struct work_struct *work, u32 cflags, static bool work_grab_pending(struct work_struct *work, u32 cflags,
unsigned long *irq_flags) unsigned long *irq_flags)
{ {
struct cwt_wait cwait;
int ret; int ret;
might_sleep(); while (true) {
repeat: ret = try_to_grab_pending(work, cflags, irq_flags);
ret = try_to_grab_pending(work, cflags, irq_flags); if (ret >= 0)
if (likely(ret >= 0)) return ret;
return ret; cpu_relax();
if (ret != -ENOENT) }
goto repeat;
/*
* Someone is already canceling. Wait for it to finish. flush_work()
* doesn't work for PREEMPT_NONE because we may get woken up between
* @work's completion and the other canceling task resuming and clearing
* CANCELING - flush_work() will return false immediately as @work is no
* longer busy, try_to_grab_pending() will return -ENOENT as @work is
* still being canceled and the other canceling task won't be able to
* clear CANCELING as we're hogging the CPU.
*
* Let's wait for completion using a waitqueue. As this may lead to the
* thundering herd problem, use a custom wake function which matches
* @work along with exclusive wait and wakeup.
*/
init_wait(&cwait.wait);
cwait.wait.func = cwt_wakefn;
cwait.work = work;
prepare_to_wait_exclusive(&wq_cancel_waitq, &cwait.wait,
TASK_UNINTERRUPTIBLE);
if (work_is_canceling(work))
schedule();
finish_wait(&wq_cancel_waitq, &cwait.wait);
goto repeat;
} }
/** /**
@ -2422,6 +2368,21 @@ out:
rcu_read_unlock(); rcu_read_unlock();
} }
static bool clear_pending_if_disabled(struct work_struct *work)
{
unsigned long data = *work_data_bits(work);
struct work_offq_data offqd;
if (likely((data & WORK_STRUCT_PWQ) ||
!(data & WORK_OFFQ_DISABLE_MASK)))
return false;
work_offqd_unpack(&offqd, data);
set_work_pool_and_clear_pending(work, offqd.pool_id,
work_offqd_pack_flags(&offqd));
return true;
}
/** /**
* queue_work_on - queue work on specific cpu * queue_work_on - queue work on specific cpu
* @cpu: CPU number to execute work on * @cpu: CPU number to execute work on
@ -2444,7 +2405,8 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
local_irq_save(irq_flags); local_irq_save(irq_flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
!clear_pending_if_disabled(work)) {
__queue_work(cpu, wq, work); __queue_work(cpu, wq, work);
ret = true; ret = true;
} }
@ -2522,7 +2484,8 @@ bool queue_work_node(int node, struct workqueue_struct *wq,
local_irq_save(irq_flags); local_irq_save(irq_flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
!clear_pending_if_disabled(work)) {
int cpu = select_numa_node_cpu(node); int cpu = select_numa_node_cpu(node);
__queue_work(cpu, wq, work); __queue_work(cpu, wq, work);
@ -2604,7 +2567,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
/* read the comment in __queue_work() */ /* read the comment in __queue_work() */
local_irq_save(irq_flags); local_irq_save(irq_flags);
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
!clear_pending_if_disabled(work)) {
__queue_delayed_work(cpu, wq, dwork, delay); __queue_delayed_work(cpu, wq, dwork, delay);
ret = true; ret = true;
} }
@ -2636,19 +2600,14 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay) struct delayed_work *dwork, unsigned long delay)
{ {
unsigned long irq_flags; unsigned long irq_flags;
int ret; bool ret;
do { ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags);
ret = try_to_grab_pending(&dwork->work, WORK_CANCEL_DELAYED,
&irq_flags);
} while (unlikely(ret == -EAGAIN));
if (likely(ret >= 0)) { if (!clear_pending_if_disabled(&dwork->work))
__queue_delayed_work(cpu, wq, dwork, delay); __queue_delayed_work(cpu, wq, dwork, delay);
local_irq_restore(irq_flags);
}
/* -ENOENT from try_to_grab_pending() becomes %true */ local_irq_restore(irq_flags);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(mod_delayed_work_on); EXPORT_SYMBOL_GPL(mod_delayed_work_on);
@ -2677,7 +2636,12 @@ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
{ {
struct work_struct *work = &rwork->work; struct work_struct *work = &rwork->work;
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { /*
* rcu_work can't be canceled or disabled. Warn if the user reached
* inside @rwork and disabled the inner work.
*/
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) &&
!WARN_ON_ONCE(clear_pending_if_disabled(work))) {
rwork->wq = wq; rwork->wq = wq;
call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); call_rcu_hurry(&rwork->rcu, rcu_work_rcufn);
return true; return true;
@ -2953,7 +2917,7 @@ static void idle_worker_timeout(struct timer_list *t)
unsigned long expires; unsigned long expires;
/* idle_list is kept in LIFO order, check the last one */ /* idle_list is kept in LIFO order, check the last one */
worker = list_entry(pool->idle_list.prev, struct worker, entry); worker = list_last_entry(&pool->idle_list, struct worker, entry);
expires = worker->last_active + IDLE_WORKER_TIMEOUT; expires = worker->last_active + IDLE_WORKER_TIMEOUT;
do_cull = !time_before(jiffies, expires); do_cull = !time_before(jiffies, expires);
@ -2995,7 +2959,7 @@ static void idle_cull_fn(struct work_struct *work)
struct worker *worker; struct worker *worker;
unsigned long expires; unsigned long expires;
worker = list_entry(pool->idle_list.prev, struct worker, entry); worker = list_last_entry(&pool->idle_list, struct worker, entry);
expires = worker->last_active + IDLE_WORKER_TIMEOUT; expires = worker->last_active + IDLE_WORKER_TIMEOUT;
if (time_before(jiffies, expires)) { if (time_before(jiffies, expires)) {
@ -3230,7 +3194,7 @@ __acquires(&pool->lock)
* PENDING and queued state changes happen together while IRQ is * PENDING and queued state changes happen together while IRQ is
* disabled. * disabled.
*/ */
set_work_pool_and_clear_pending(work, pool->id, 0); set_work_pool_and_clear_pending(work, pool->id, pool_offq_flags(pool));
pwq->stats[PWQ_STAT_STARTED]++; pwq->stats[PWQ_STAT_STARTED]++;
raw_spin_unlock_irq(&pool->lock); raw_spin_unlock_irq(&pool->lock);
@ -3700,7 +3664,7 @@ void workqueue_softirq_dead(unsigned int cpu)
if (!need_more_worker(pool)) if (!need_more_worker(pool))
continue; continue;
INIT_WORK(&dead_work.work, drain_dead_softirq_workfn); INIT_WORK_ONSTACK(&dead_work.work, drain_dead_softirq_workfn);
dead_work.pool = pool; dead_work.pool = pool;
init_completion(&dead_work.done); init_completion(&dead_work.done);
@ -3710,6 +3674,7 @@ void workqueue_softirq_dead(unsigned int cpu)
queue_work(system_bh_wq, &dead_work.work); queue_work(system_bh_wq, &dead_work.work);
wait_for_completion(&dead_work.done); wait_for_completion(&dead_work.done);
destroy_work_on_stack(&dead_work.work);
} }
} }
@ -4154,8 +4119,6 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
struct pool_workqueue *pwq; struct pool_workqueue *pwq;
struct workqueue_struct *wq; struct workqueue_struct *wq;
might_sleep();
rcu_read_lock(); rcu_read_lock();
pool = get_work_pool(work); pool = get_work_pool(work);
if (!pool) { if (!pool) {
@ -4207,6 +4170,7 @@ already_gone:
static bool __flush_work(struct work_struct *work, bool from_cancel) static bool __flush_work(struct work_struct *work, bool from_cancel)
{ {
struct wq_barrier barr; struct wq_barrier barr;
unsigned long data;
if (WARN_ON(!wq_online)) if (WARN_ON(!wq_online))
return false; return false;
@ -4214,13 +4178,41 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
if (WARN_ON(!work->func)) if (WARN_ON(!work->func))
return false; return false;
if (start_flush_work(work, &barr, from_cancel)) { if (!start_flush_work(work, &barr, from_cancel))
wait_for_completion(&barr.done);
destroy_work_on_stack(&barr.work);
return true;
} else {
return false; return false;
/*
* start_flush_work() returned %true. If @from_cancel is set, we know
* that @work must have been executing during start_flush_work() and
* can't currently be queued. Its data must contain OFFQ bits. If @work
* was queued on a BH workqueue, we also know that it was running in the
* BH context and thus can be busy-waited.
*/
data = *work_data_bits(work);
if (from_cancel &&
!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {
/*
* On RT, prevent a live lock when %current preempted soft
* interrupt processing or prevents ksoftirqd from running by
* keeping flipping BH. If the BH work item runs on a different
* CPU then this has no effect other than doing the BH
* disable/enable dance for nothing. This is copied from
* kernel/softirq.c::tasklet_unlock_spin_wait().
*/
while (!try_wait_for_completion(&barr.done)) {
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
local_bh_disable();
local_bh_enable();
} else {
cpu_relax();
}
}
} else {
wait_for_completion(&barr.done);
} }
destroy_work_on_stack(&barr.work);
return true;
} }
/** /**
@ -4236,6 +4228,7 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
*/ */
bool flush_work(struct work_struct *work) bool flush_work(struct work_struct *work)
{ {
might_sleep();
return __flush_work(work, false); return __flush_work(work, false);
} }
EXPORT_SYMBOL_GPL(flush_work); EXPORT_SYMBOL_GPL(flush_work);
@ -4282,32 +4275,53 @@ bool flush_rcu_work(struct rcu_work *rwork)
} }
EXPORT_SYMBOL(flush_rcu_work); EXPORT_SYMBOL(flush_rcu_work);
static void work_offqd_disable(struct work_offq_data *offqd)
{
const unsigned long max = (1lu << WORK_OFFQ_DISABLE_BITS) - 1;
if (likely(offqd->disable < max))
offqd->disable++;
else
WARN_ONCE(true, "workqueue: work disable count overflowed\n");
}
static void work_offqd_enable(struct work_offq_data *offqd)
{
if (likely(offqd->disable > 0))
offqd->disable--;
else
WARN_ONCE(true, "workqueue: work disable count underflowed\n");
}
static bool __cancel_work(struct work_struct *work, u32 cflags) static bool __cancel_work(struct work_struct *work, u32 cflags)
{ {
struct work_offq_data offqd;
unsigned long irq_flags; unsigned long irq_flags;
int ret; int ret;
do { ret = work_grab_pending(work, cflags, &irq_flags);
ret = try_to_grab_pending(work, cflags, &irq_flags);
} while (unlikely(ret == -EAGAIN));
if (unlikely(ret < 0)) work_offqd_unpack(&offqd, *work_data_bits(work));
return false;
set_work_pool_and_clear_pending(work, get_work_pool_id(work), 0); if (cflags & WORK_CANCEL_DISABLE)
work_offqd_disable(&offqd);
set_work_pool_and_clear_pending(work, offqd.pool_id,
work_offqd_pack_flags(&offqd));
local_irq_restore(irq_flags); local_irq_restore(irq_flags);
return ret; return ret;
} }
static bool __cancel_work_sync(struct work_struct *work, u32 cflags) static bool __cancel_work_sync(struct work_struct *work, u32 cflags)
{ {
unsigned long irq_flags;
bool ret; bool ret;
/* claim @work and tell other tasks trying to grab @work to back off */ ret = __cancel_work(work, cflags | WORK_CANCEL_DISABLE);
ret = work_grab_pending(work, cflags, &irq_flags);
mark_work_canceling(work); if (*work_data_bits(work) & WORK_OFFQ_BH)
local_irq_restore(irq_flags); WARN_ON_ONCE(in_hardirq());
else
might_sleep();
/* /*
* Skip __flush_work() during early boot when we know that @work isn't * Skip __flush_work() during early boot when we know that @work isn't
@ -4316,15 +4330,8 @@ static bool __cancel_work_sync(struct work_struct *work, u32 cflags)
if (wq_online) if (wq_online)
__flush_work(work, true); __flush_work(work, true);
/* if (!(cflags & WORK_CANCEL_DISABLE))
* smp_mb() at the end of set_work_pool_and_clear_pending() is paired enable_work(work);
* with prepare_to_wait() above so that either waitqueue_active() is
* visible here or !work_is_canceling() is visible there.
*/
set_work_pool_and_clear_pending(work, WORK_OFFQ_POOL_NONE, 0);
if (waitqueue_active(&wq_cancel_waitq))
__wake_up(&wq_cancel_waitq, TASK_NORMAL, 1, work);
return ret; return ret;
} }
@ -4342,19 +4349,19 @@ EXPORT_SYMBOL(cancel_work);
* cancel_work_sync - cancel a work and wait for it to finish * cancel_work_sync - cancel a work and wait for it to finish
* @work: the work to cancel * @work: the work to cancel
* *
* Cancel @work and wait for its execution to finish. This function * Cancel @work and wait for its execution to finish. This function can be used
* can be used even if the work re-queues itself or migrates to * even if the work re-queues itself or migrates to another workqueue. On return
* another workqueue. On return from this function, @work is * from this function, @work is guaranteed to be not pending or executing on any
* guaranteed to be not pending or executing on any CPU. * CPU as long as there aren't racing enqueues.
* *
* cancel_work_sync(&delayed_work->work) must not be used for * cancel_work_sync(&delayed_work->work) must not be used for delayed_work's.
* delayed_work's. Use cancel_delayed_work_sync() instead. * Use cancel_delayed_work_sync() instead.
* *
* The caller must ensure that the workqueue on which @work was last * Must be called from a sleepable context if @work was last queued on a non-BH
* queued can't be destroyed before this function returns. * workqueue. Can also be called from non-hardirq atomic contexts including BH
* if @work was last queued on a BH workqueue.
* *
* Return: * Returns %true if @work was pending, %false otherwise.
* %true if @work was pending, %false otherwise.
*/ */
bool cancel_work_sync(struct work_struct *work) bool cancel_work_sync(struct work_struct *work)
{ {
@ -4399,6 +4406,108 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
} }
EXPORT_SYMBOL(cancel_delayed_work_sync); EXPORT_SYMBOL(cancel_delayed_work_sync);
/**
* disable_work - Disable and cancel a work item
* @work: work item to disable
*
* Disable @work by incrementing its disable count and cancel it if currently
* pending. As long as the disable count is non-zero, any attempt to queue @work
* will fail and return %false. The maximum supported disable depth is 2 to the
* power of %WORK_OFFQ_DISABLE_BITS, currently 65536.
*
* Can be called from any context. Returns %true if @work was pending, %false
* otherwise.
*/
bool disable_work(struct work_struct *work)
{
return __cancel_work(work, WORK_CANCEL_DISABLE);
}
EXPORT_SYMBOL_GPL(disable_work);
/**
* disable_work_sync - Disable, cancel and drain a work item
* @work: work item to disable
*
* Similar to disable_work() but also wait for @work to finish if currently
* executing.
*
* Must be called from a sleepable context if @work was last queued on a non-BH
* workqueue. Can also be called from non-hardirq atomic contexts including BH
* if @work was last queued on a BH workqueue.
*
* Returns %true if @work was pending, %false otherwise.
*/
bool disable_work_sync(struct work_struct *work)
{
return __cancel_work_sync(work, WORK_CANCEL_DISABLE);
}
EXPORT_SYMBOL_GPL(disable_work_sync);
/**
* enable_work - Enable a work item
* @work: work item to enable
*
* Undo disable_work[_sync]() by decrementing @work's disable count. @work can
* only be queued if its disable count is 0.
*
* Can be called from any context. Returns %true if the disable count reached 0.
* Otherwise, %false.
*/
bool enable_work(struct work_struct *work)
{
struct work_offq_data offqd;
unsigned long irq_flags;
work_grab_pending(work, 0, &irq_flags);
work_offqd_unpack(&offqd, *work_data_bits(work));
work_offqd_enable(&offqd);
set_work_pool_and_clear_pending(work, offqd.pool_id,
work_offqd_pack_flags(&offqd));
local_irq_restore(irq_flags);
return !offqd.disable;
}
EXPORT_SYMBOL_GPL(enable_work);
/**
* disable_delayed_work - Disable and cancel a delayed work item
* @dwork: delayed work item to disable
*
* disable_work() for delayed work items.
*/
bool disable_delayed_work(struct delayed_work *dwork)
{
return __cancel_work(&dwork->work,
WORK_CANCEL_DELAYED | WORK_CANCEL_DISABLE);
}
EXPORT_SYMBOL_GPL(disable_delayed_work);
/**
* disable_delayed_work_sync - Disable, cancel and drain a delayed work item
* @dwork: delayed work item to disable
*
* disable_work_sync() for delayed work items.
*/
bool disable_delayed_work_sync(struct delayed_work *dwork)
{
return __cancel_work_sync(&dwork->work,
WORK_CANCEL_DELAYED | WORK_CANCEL_DISABLE);
}
EXPORT_SYMBOL_GPL(disable_delayed_work_sync);
/**
* enable_delayed_work - Enable a delayed work item
* @dwork: delayed work item to enable
*
* enable_work() for delayed work items.
*/
bool enable_delayed_work(struct delayed_work *dwork)
{
return enable_work(&dwork->work);
}
EXPORT_SYMBOL_GPL(enable_delayed_work);
/** /**
* schedule_on_each_cpu - execute a function synchronously on each online CPU * schedule_on_each_cpu - execute a function synchronously on each online CPU
* @func: the function to call * @func: the function to call
@ -4530,6 +4639,8 @@ static void wqattrs_clear_for_pool(struct workqueue_attrs *attrs)
{ {
attrs->affn_scope = WQ_AFFN_NR_TYPES; attrs->affn_scope = WQ_AFFN_NR_TYPES;
attrs->ordered = false; attrs->ordered = false;
if (attrs->affn_strict)
cpumask_copy(attrs->cpumask, cpu_possible_mask);
} }
/* hash value of the content of @attr */ /* hash value of the content of @attr */
@ -4538,11 +4649,12 @@ static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
u32 hash = 0; u32 hash = 0;
hash = jhash_1word(attrs->nice, hash); hash = jhash_1word(attrs->nice, hash);
hash = jhash(cpumask_bits(attrs->cpumask), hash = jhash_1word(attrs->affn_strict, hash);
BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
hash = jhash(cpumask_bits(attrs->__pod_cpumask), hash = jhash(cpumask_bits(attrs->__pod_cpumask),
BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
hash = jhash_1word(attrs->affn_strict, hash); if (!attrs->affn_strict)
hash = jhash(cpumask_bits(attrs->cpumask),
BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
return hash; return hash;
} }
@ -4552,11 +4664,11 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
{ {
if (a->nice != b->nice) if (a->nice != b->nice)
return false; return false;
if (!cpumask_equal(a->cpumask, b->cpumask)) if (a->affn_strict != b->affn_strict)
return false; return false;
if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask))
return false; return false;
if (a->affn_strict != b->affn_strict) if (!a->affn_strict && !cpumask_equal(a->cpumask, b->cpumask))
return false; return false;
return true; return true;
} }
@ -7148,25 +7260,27 @@ static ssize_t __wq_cpumask_show(struct device *dev,
return written; return written;
} }
static ssize_t wq_unbound_cpumask_show(struct device *dev, static ssize_t cpumask_requested_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return __wq_cpumask_show(dev, attr, buf, wq_requested_unbound_cpumask);
}
static DEVICE_ATTR_RO(cpumask_requested);
static ssize_t cpumask_isolated_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return __wq_cpumask_show(dev, attr, buf, wq_isolated_cpumask);
}
static DEVICE_ATTR_RO(cpumask_isolated);
static ssize_t cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
return __wq_cpumask_show(dev, attr, buf, wq_unbound_cpumask); return __wq_cpumask_show(dev, attr, buf, wq_unbound_cpumask);
} }
static ssize_t wq_requested_cpumask_show(struct device *dev, static ssize_t cpumask_store(struct device *dev,
struct device_attribute *attr, char *buf)
{
return __wq_cpumask_show(dev, attr, buf, wq_requested_unbound_cpumask);
}
static ssize_t wq_isolated_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return __wq_cpumask_show(dev, attr, buf, wq_isolated_cpumask);
}
static ssize_t wq_unbound_cpumask_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count) struct device_attribute *attr, const char *buf, size_t count)
{ {
cpumask_var_t cpumask; cpumask_var_t cpumask;
@ -7182,36 +7296,19 @@ static ssize_t wq_unbound_cpumask_store(struct device *dev,
free_cpumask_var(cpumask); free_cpumask_var(cpumask);
return ret ? ret : count; return ret ? ret : count;
} }
static DEVICE_ATTR_RW(cpumask);
static struct device_attribute wq_sysfs_cpumask_attrs[] = { static struct attribute *wq_sysfs_cpumask_attrs[] = {
__ATTR(cpumask, 0644, wq_unbound_cpumask_show, &dev_attr_cpumask.attr,
wq_unbound_cpumask_store), &dev_attr_cpumask_requested.attr,
__ATTR(cpumask_requested, 0444, wq_requested_cpumask_show, NULL), &dev_attr_cpumask_isolated.attr,
__ATTR(cpumask_isolated, 0444, wq_isolated_cpumask_show, NULL), NULL,
__ATTR_NULL,
}; };
ATTRIBUTE_GROUPS(wq_sysfs_cpumask);
static int __init wq_sysfs_init(void) static int __init wq_sysfs_init(void)
{ {
struct device *dev_root; return subsys_virtual_register(&wq_subsys, wq_sysfs_cpumask_groups);
int err;
err = subsys_virtual_register(&wq_subsys, NULL);
if (err)
return err;
dev_root = bus_get_dev_root(&wq_subsys);
if (dev_root) {
struct device_attribute *attr;
for (attr = wq_sysfs_cpumask_attrs; attr->attr.name; attr++) {
err = device_create_file(dev_root, attr);
if (err)
break;
}
put_device(dev_root);
}
return err;
} }
core_initcall(wq_sysfs_init); core_initcall(wq_sysfs_init);

View File

@ -32,16 +32,13 @@ https://github.com/osandov/drgn.
rescued The number of work items executed by the rescuer. rescued The number of work items executed by the rescuer.
""" """
import sys
import signal import signal
import os
import re import re
import time import time
import json import json
import drgn import drgn
from drgn.helpers.linux.list import list_for_each_entry,list_empty from drgn.helpers.linux.list import list_for_each_entry
from drgn.helpers.linux.cpumask import for_each_possible_cpu
import argparse import argparse
parser = argparse.ArgumentParser(description=desc, parser = argparse.ArgumentParser(description=desc,
@ -54,10 +51,6 @@ parser.add_argument('-j', '--json', action='store_true',
help='Output in json') help='Output in json')
args = parser.parse_args() args = parser.parse_args()
def err(s):
print(s, file=sys.stderr, flush=True)
sys.exit(1)
workqueues = prog['workqueues'] workqueues = prog['workqueues']
WQ_UNBOUND = prog['WQ_UNBOUND'] WQ_UNBOUND = prog['WQ_UNBOUND']