mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-12 08:48:48 +00:00
0cef77c779
When a single-threaded process has a non-local mm_cpumask, try to use that point to flush the TLBs out of other CPUs in the cpumask. An IPI is used for clearing remote CPUs for a few reasons: - An IPI can end lazy TLB use of the mm, which is required to prevent TLB entries being created on the remote CPU. The alternative is to drop lazy TLB switching completely, which costs 7.5% in a context switch ping-pong test betwee a process and kernel idle thread. - An IPI can have remote CPUs flush the entire PID, but the local CPU can flush a specific VA. tlbie would require over-flushing of the local CPU (where the process is running). - A single threaded process that is migrated to a different CPU is likely to have a relatively small mm_cpumask, so IPI is reasonable. No other thread can concurrently switch to this mm, because it must have been given a reference to mm_users by the current thread before it can use_mm. mm_users can be asynchronously incremented (by mm_activate or mmget_not_zero), but those users must use remote mm access and can't use_mm or access user address space. Existing code makes the this assumption already, for example sparc64 has reset mm_cpumask using this condition since the start of history, see arch/sparc/kernel/smp_64.c. This reduces tlbies for a kernel compile workload from 0.90M to 0.12M, tlbiels are increased significantly due to the PID flushing for the cleaning up remote CPUs, and increased local flushes (PID flushes take 128 tlbiels vs 1 tlbie). Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
114 lines
2.9 KiB
C
114 lines
2.9 KiB
C
/*
|
|
* TLB shootdown specifics for powerpc
|
|
*
|
|
* Copyright (C) 2002 Anton Blanchard, IBM Corp.
|
|
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#ifndef _ASM_POWERPC_TLB_H
|
|
#define _ASM_POWERPC_TLB_H
|
|
#ifdef __KERNEL__
|
|
|
|
#ifndef __powerpc64__
|
|
#include <asm/pgtable.h>
|
|
#endif
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/tlbflush.h>
|
|
#ifndef __powerpc64__
|
|
#include <asm/page.h>
|
|
#include <asm/mmu.h>
|
|
#endif
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#define tlb_start_vma(tlb, vma) do { } while (0)
|
|
#define tlb_end_vma(tlb, vma) do { } while (0)
|
|
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
|
|
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
|
|
|
extern void tlb_flush(struct mmu_gather *tlb);
|
|
|
|
/* Get the generic bits... */
|
|
#include <asm-generic/tlb.h>
|
|
|
|
extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
|
|
unsigned long address);
|
|
|
|
static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
|
|
unsigned long address)
|
|
{
|
|
#ifdef CONFIG_PPC_STD_MMU_32
|
|
if (pte_val(*ptep) & _PAGE_HASHPTE)
|
|
flush_hash_entry(tlb->mm, ptep, address);
|
|
#endif
|
|
}
|
|
|
|
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
|
unsigned int page_size)
|
|
{
|
|
if (!tlb->page_size)
|
|
tlb->page_size = page_size;
|
|
else if (tlb->page_size != page_size) {
|
|
tlb_flush_mmu(tlb);
|
|
/*
|
|
* update the page size after flush for the new
|
|
* mmu_gather.
|
|
*/
|
|
tlb->page_size = page_size;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static inline int mm_is_core_local(struct mm_struct *mm)
|
|
{
|
|
return cpumask_subset(mm_cpumask(mm),
|
|
topology_sibling_cpumask(smp_processor_id()));
|
|
}
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
static inline int mm_is_thread_local(struct mm_struct *mm)
|
|
{
|
|
if (atomic_read(&mm->context.active_cpus) > 1)
|
|
return false;
|
|
return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
|
|
}
|
|
static inline void mm_reset_thread_local(struct mm_struct *mm)
|
|
{
|
|
WARN_ON(atomic_read(&mm->context.copros) > 0);
|
|
/*
|
|
* It's possible for mm_access to take a reference on mm_users to
|
|
* access the remote mm from another thread, but it's not allowed
|
|
* to set mm_cpumask, so mm_users may be > 1 here.
|
|
*/
|
|
WARN_ON(current->mm != mm);
|
|
atomic_set(&mm->context.active_cpus, 1);
|
|
cpumask_clear(mm_cpumask(mm));
|
|
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
|
|
}
|
|
#else /* CONFIG_PPC_BOOK3S_64 */
|
|
static inline int mm_is_thread_local(struct mm_struct *mm)
|
|
{
|
|
return cpumask_equal(mm_cpumask(mm),
|
|
cpumask_of(smp_processor_id()));
|
|
}
|
|
#endif /* !CONFIG_PPC_BOOK3S_64 */
|
|
|
|
#else /* CONFIG_SMP */
|
|
static inline int mm_is_core_local(struct mm_struct *mm)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int mm_is_thread_local(struct mm_struct *mm)
|
|
{
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* __ASM_POWERPC_TLB_H */
|