Merge branch 'fixes' into next

Merge our fixes branch to bring in some changes that are prerequisites
for work in next.
This commit is contained in:
Michael Ellerman 2022-11-30 21:46:06 +11:00
commit 611c020239
21 changed files with 264 additions and 84 deletions

View File

@ -147,6 +147,7 @@ config PPC
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_SPLIT_ARG64 if PPC32
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
@ -285,7 +286,7 @@ config PPC
#
config PPC_LONG_DOUBLE_128
depends on PPC64
depends on PPC64 && ALTIVEC
def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
config PPC_BARRIER_NOSPEC

View File

@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
if (radix_enabled())
return;
/*
* apply_to_page_range can call us this preempt enabled when
* operating on kernel page tables.
*/
preempt_disable();
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
if (batch->index)
__flush_tlb_pending(batch);
batch->active = 0;
preempt_enable();
}
#define arch_flush_lazy_mmu_mode() do {} while (0)

View File

@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs)
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
#ifdef CONFIG_PPC_BOOK3S_64
DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
#endif
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);

View File

@ -104,6 +104,13 @@ long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
unsigned long len1, unsigned long len2);
long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
size_t len, int advice);
long sys_ppc_sync_file_range2(int fd, unsigned int flags,
unsigned int offset1,
unsigned int offset2,
unsigned int nbytes1,
unsigned int nbytes2);
long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2,
u32 len1, u32 len2);
#endif
#ifdef CONFIG_COMPAT
long compat_sys_mmap2(unsigned long addr, size_t len,

View File

@ -813,6 +813,13 @@ kernel_dbg_exc:
EXCEPTION_COMMON(0x260)
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
/*
* XXX: Returning from performance_monitor_exception taken as a
* soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
* and could cause bugs in return or elsewhere. That case should just
* restore registers and return. There is a workaround for one known
* problem in interrupt_exit_kernel_prepare().
*/
bl performance_monitor_exception
b interrupt_return

View File

@ -2357,9 +2357,21 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
lbz r4,PACAIRQSOFTMASK(r13)
cmpdi r4,IRQS_ENABLED
bne 1f
bl performance_monitor_exception_async
b interrupt_return_srr
1:
bl performance_monitor_exception_nmi
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
mtmsrd r9,1
kuap_kernel_restore r9, r10
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.

View File

@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
/*
* CT_WARN_ON comes here via program_check_exception,
* so avoid recursion.
* CT_WARN_ON comes here via program_check_exception, so avoid
* recursion.
*
* Skip the assertion on PMIs on 64e to work around a problem caused
* by NMI PMIs incorrectly taking this interrupt return path, it's
* possible for this to hit after interrupt exit to user switches
* context to user. See also the comment in the performance monitor
* handler in exceptions-64e.S
*/
if (TRAP(regs) != INTERRUPT_PROGRAM)
if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
TRAP(regs) != INTERRUPT_PROGRAM &&
TRAP(regs) != INTERRUPT_PERFMON)
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap = kuap_get_and_assert_locked();

View File

@ -532,15 +532,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
* Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context.
*
* The MSR[EE] check catches among other things the short incoherency
* in hard_irq_disable() between clearing MSR[EE] and setting
* PACA_IRQ_HARD_DIS.
*/
ld r12,_MSR(r1)
andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending
bne 1f // HARD_MASK is pending
// No HARD_MASK pending, clear possible HARD_DIS set by interrupt
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
stb r11,PACAIRQHAPPENED(r13)
b .Lfast_kernel_interrupt_return_\srr\()
/* Must clear MSR_EE from _MSR */
1: /* Must clear MSR_EE from _MSR */
#ifdef CONFIG_PPC_BOOK3S
li r10,0
/* Clear valid before changing _MSR */

View File

@ -112,7 +112,7 @@ PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
advice);
}
COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
int, fd, unsigned int, flags,
unsigned int, offset1, unsigned int, offset2,
unsigned int, nbytes1, unsigned int, nbytes2)
@ -122,3 +122,14 @@ COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
#ifdef CONFIG_PPC32
SYSCALL_DEFINE6(ppc_fallocate,
int, fd, int, mode,
u32, offset1, u32, offset2, u32, len1, u32, len2)
{
return ksys_fallocate(fd, mode,
merge_64(offset1, offset2),
merge_64(len1, len2));
}
#endif

View File

@ -394,8 +394,11 @@
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
308 common sync_file_range2 sys_sync_file_range2 compat_sys_ppc_sync_file_range2
309 nospu fallocate sys_fallocate compat_sys_fallocate
308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2
308 64 sync_file_range2 sys_sync_file_range2
308 spu sync_file_range2 sys_sync_file_range2
309 32 fallocate sys_ppc_fallocate compat_sys_fallocate
309 64 fallocate sys_fallocate
310 nospu subpage_prot sys_subpage_prot
311 32 timerfd_settime sys_timerfd_settime32
311 64 timerfd_settime sys_timerfd_settime

View File

@ -142,7 +142,7 @@ SECTIONS
#endif
.data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
*(.data.rel.ro*)
*(.data.rel.ro .data.rel.ro.*)
}
.branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {

View File

@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV
config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE
help
Support running guest kernels in virtual machines on processors
@ -190,6 +192,7 @@ config KVM_EXIT_TIMING
config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors"
depends on PPC_E500 && !PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select MMU_NOTIFIER
@ -205,6 +208,7 @@ config KVM_E500V2
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select KVM_BOOKE_HV

View File

@ -36,7 +36,17 @@ int exit_vmx_usercopy(void)
{
disable_kernel_altivec();
pagefault_enable();
preempt_enable();
preempt_enable_no_resched();
/*
* Must never explicitly call schedule (including preempt_enable())
* while in a kuap-unlocked user copy, because the AMR register will
* not be saved and restored across context switch. However preempt
* kernels need to be preempted as soon as possible if need_resched is
* set and we are preemptible. The hack here is to schedule a
* decrementer to fire here and reschedule for us if necessary.
*/
if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
set_dec(1);
return 0;
}

View File

@ -43,6 +43,29 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
#ifdef CONFIG_LOCKDEP
static struct lockdep_map hpte_lock_map =
STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
static void acquire_hpte_lock(void)
{
lock_map_acquire(&hpte_lock_map);
}
static void release_hpte_lock(void)
{
lock_map_release(&hpte_lock_map);
}
#else
static void acquire_hpte_lock(void)
{
}
static void release_hpte_lock(void)
{
}
#endif
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
unsigned long flags;
int i;
local_irq_save(flags);
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
if (i == HPTES_PER_GROUP)
if (i == HPTES_PER_GROUP) {
local_irq_restore(flags);
return -1;
}
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
local_irq_restore(flags);
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
return i;
@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0, local = 0;
unsigned long irqflags;
local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local);
local_irq_restore(irqflags);
return ret;
}
@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
}
/*
@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid;
long slot;
struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
return 0;
}
@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
else
} else
native_unlock_hpte(hptep);
}
/*
@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/*
* Invalidate the hpte. NOTE: this also unlocks it
*/
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
} else
native_unlock_hpte(hptep);
@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
else
else {
release_hpte_lock();
hptep->v = 0;
}
} pte_iterate_hashed_end();
}

View File

@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms {
unsigned long start, end, newpp;
unsigned int step, nr_cpus, master_cpu;
unsigned int step, nr_cpus;
atomic_t master_cpu;
atomic_t cpu_counter;
};
@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{
struct change_memory_parms *parms = data;
if (parms->master_cpu != smp_processor_id())
// First CPU goes through, all others wait.
if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in
@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end;
chmem_parms.step = step;
chmem_parms.newpp = newpp;
chmem_parms.master_cpu = smp_processor_id();
atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock();

View File

@ -1981,7 +1981,7 @@ repeat:
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static DEFINE_SPINLOCK(linear_map_hash_lock);
static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
@ -2005,10 +2005,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
spin_lock(&linear_map_hash_lock);
raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
@ -2018,14 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
spin_lock(&linear_map_hash_lock);
raw_spin_lock(&linear_map_hash_lock);
if (!(linear_map_hash_slots[lmi] & 0x80)) {
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
return;
}
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;

View File

@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
/* Initialize tail_call_cnt, to be skipped if we do tail calls. */
EMIT(PPC_RAW_LI(_R4, 0));
#define BPF_TAILCALL_PROLOGUE_SIZE 4
EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_TAILCALL)
EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/* First arg comes in as a 32 bits pointer. */
EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
/*
* Initialize tail_call_cnt in stack frame if we do tail calls.
* Otherwise, put in NOPs so that it can be skipped when we are
* invoked through a tail call.
*/
if (ctx->seen & SEEN_TAILCALL)
EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1,
bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
else
EMIT(PPC_RAW_NOP());
#define BPF_TAILCALL_PROLOGUE_SIZE 16
/*
* We need a stack frame, but we don't necessarily need to
@ -170,6 +166,16 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
/* Tear down our stack frame */
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
}
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
@ -178,16 +184,6 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
bpf_jit_emit_common_epilogue(image, ctx);
/* Tear down our stack frame */
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
EMIT(PPC_RAW_BLR());
}
@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/*
* if (prog == NULL)
@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
/* goto *(prog->bpf_func + prologue_size); */
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
EMIT(PPC_RAW_MTCTR(_R3));
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
/* Put tail_call_cnt in r4 */
EMIT(PPC_RAW_MR(_R4, _R0));
/* tear restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);

View File

@ -35,6 +35,7 @@
#include <asm/drmem.h>
#include "pseries.h"
#include "vas.h" /* pseries_vas_dlpar_cpu() */
/*
* This isn't a module but we expose that to userspace
@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
/*
* The hypervisor assigns VAS resources based
* on entitled capacity for shared mode.
* Reconfig VAS windows based on DLPAR CPU events.
*/
if (pseries_vas_dlpar_cpu() != 0)
retval = H_HARDWARE;
}
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);

View File

@ -200,16 +200,41 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref;
int rc;
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
if (!rc) {
tsk_ref = &txwin->vas_win.task_ref;
vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
while (atomic_read(&txwin->pending_faults)) {
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
if (!rc) {
tsk_ref = &txwin->vas_win.task_ref;
vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
}
atomic_dec(&txwin->pending_faults);
}
return IRQ_HANDLED;
}
/*
* irq_default_primary_handler() can be used only with IRQF_ONESHOT
* which disables IRQ before executing the thread handler and enables
* it after. But this disabling interrupt sets the VAS IRQ OFF
* state in the hypervisor. If the NX generates fault interrupt
* during this window, the hypervisor will not deliver this
* interrupt to the LPAR. So use VAS specific IRQ handler instead
* of calling the default primary handler.
*/
static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
{
struct pseries_vas_window *txwin = data;
/*
* The thread hanlder will process this interrupt if it is
* already running.
*/
atomic_inc(&txwin->pending_faults);
return IRQ_WAKE_THREAD;
}
/*
* Allocate window and setup IRQ mapping.
*/
@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq;
}
rc = request_threaded_irq(txwin->fault_virq, NULL,
pseries_vas_fault_thread_fn, IRQF_ONESHOT,
rc = request_threaded_irq(txwin->fault_virq,
pseries_vas_irq_handler,
pseries_vas_fault_thread_fn, 0,
txwin->name, txwin);
if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
mutex_unlock(&vas_pseries_mutex);
return rc;
}
int pseries_vas_dlpar_cpu(void)
{
int new_nr_creds, rc;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
}
/*
* Total number of default credits available (target_credits)
* in LPAR depends on number of cores configured. It varies based on
@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn;
const __be32 *intserv = NULL;
int new_nr_creds, len, rc = 0;
int len;
/*
* For shared CPU partition, the hypervisor assigns total credits
* based on entitled core capacity. So updating VAS windows will
* be called from lparcfg_write().
*/
if (is_shared_processor())
return NOTIFY_OK;
if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE))
@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv)
return NOTIFY_OK;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
return pseries_vas_dlpar_cpu();
}
static struct notifier_block pseries_vas_nb = {

View File

@ -132,6 +132,7 @@ struct pseries_vas_window {
u64 flags;
char *name;
int fault_virq;
atomic_t pending_faults; /* Number of pending faults */
};
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS
int vas_migration_handler(int action);
int pseries_vas_dlpar_cpu(void);
#else
static inline int vas_migration_handler(int action)
{
return 0;
}
static inline int pseries_vas_dlpar_cpu(void)
{
return 0;
}
#endif
#endif /* _VAS_H */

View File

@ -15,7 +15,7 @@
#endif
#ifndef compat_arg_u64
#ifdef CONFIG_CPU_BIG_ENDIAN
#ifndef CONFIG_CPU_BIG_ENDIAN
#define compat_arg_u64(name) u32 name##_lo, u32 name##_hi
#define compat_arg_u64_dual(name) u32, name##_lo, u32, name##_hi
#else