linux-next/kernel/bpf/mmap_unlock_work.h
Song Liu 7c7e3d31e7 bpf: Introduce helper bpf_find_vma
In some profiler use cases, it is necessary to map an address to the
backing file, e.g., a shared library. bpf_find_vma helper provides a
flexible way to achieve this. bpf_find_vma maps an address of a task to
the vma (vm_area_struct) for this address, and feed the vma to an callback
BPF function. The callback function is necessary here, as we need to
ensure mmap_sem is unlocked.

It is necessary to lock mmap_sem for find_vma. To lock and unlock mmap_sem
safely when irqs are disable, we use the same mechanism as stackmap with
build_id. Specifically, when irqs are disabled, the unlocked is postponed
in an irq_work. Refactor stackmap.c so that the irq_work is shared among
bpf_find_vma and stackmap helpers.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Hengqi Chen <hengqi.chen@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20211105232330.1936330-2-songliubraving@fb.com
2021-11-07 11:54:51 -08:00

66 lines
1.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2021 Facebook
*/
#ifndef __MMAP_UNLOCK_WORK_H__
#define __MMAP_UNLOCK_WORK_H__
#include <linux/irq_work.h>
/* irq_work to run mmap_read_unlock() in irq_work */
struct mmap_unlock_irq_work {
struct irq_work irq_work;
struct mm_struct *mm;
};
DECLARE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
/*
* We cannot do mmap_read_unlock() when the irq is disabled, because of
* risk to deadlock with rq_lock. To look up vma when the irqs are
* disabled, we need to run mmap_read_unlock() in irq_work. We use a
* percpu variable to do the irq_work. If the irq_work is already used
* by another lookup, we fall over.
*/
static inline bool bpf_mmap_unlock_get_irq_work(struct mmap_unlock_irq_work **work_ptr)
{
struct mmap_unlock_irq_work *work = NULL;
bool irq_work_busy = false;
if (irqs_disabled()) {
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
work = this_cpu_ptr(&mmap_unlock_work);
if (irq_work_is_busy(&work->irq_work)) {
/* cannot queue more up_read, fallback */
irq_work_busy = true;
}
} else {
/*
* PREEMPT_RT does not allow to trylock mmap sem in
* interrupt disabled context. Force the fallback code.
*/
irq_work_busy = true;
}
}
*work_ptr = work;
return irq_work_busy;
}
static inline void bpf_mmap_unlock_mm(struct mmap_unlock_irq_work *work, struct mm_struct *mm)
{
if (!work) {
mmap_read_unlock(mm);
} else {
work->mm = mm;
/* The lock will be released once we're out of interrupt
* context. Tell lockdep that we've released it now so
* it doesn't complain that we forgot to release it.
*/
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
irq_work_queue(&work->irq_work);
}
}
#endif /* __MMAP_UNLOCK_WORK_H__ */