more s390 updates for 6.13 merge window

- Add swap entry for hugetlbfs support
 
 - Add PTE_MARKER support for hugetlbs mappings; this fixes a regression
   (possible page fault loop) which was introduced when support for
   UFFDIO_POISON for hugetlbfs was added
 
 - Add ARCH_HAS_PREEMPT_LAZY and PREEMPT_DYNAMIC support
 
 - Mark IRQ entries in entry code, so that stack tracers can filter out the
   non-IRQ parts of stack traces. This fixes stack depot capacity limit
   warnings, since without filtering the number of unique stack traces is
   huge
 
 - In PCI code fix leak of struct zpci_dev object, and fix potential double
   remove of hotplug slot
 
 - Fix pagefault_disable() / pagefault_enable() unbalance in
   arch_stack_user_walk_common()
 
 - A couple of inline assembly optimizations, more cmpxchg() to
   try_cmpxchg() conversions, and removal of usages of xchg() and cmpxchg()
   on one and two byte memory areas
 
 - Various other small improvements and cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEECMNfWEw3SLnmiLkZIg7DeRspbsIFAmdJ3WoACgkQIg7DeRsp
 bsJt1RAAtlkbeN4+eVeYM4vBwHvgfAY/5Ii2wdHO2qwPHqBVkRtsqrmyewE/tVCF
 PZsYBXDDrzyAtLMqjlNGDQ1QexNLn4BELgSIysr45mxwMq1W33BiXvb8I5uK/V/7
 /TcW2s1daJKKrbk+HBA8ZTwna5SeUSoZuh9y/n9SKVC4rRkWdeL7G1RRNQtafDlg
 aELCo17iHDZNoHeoRStOimZqVBwko6IQqQH4DCx2S4+J6nKQBGRyzGWIkLRoUxr6
 MgNLrxekWjkoqAnXM0Ztb7LYg6AS/iOuGbqg/xLi1VJSWNCIf9zLpDs++SdFHoTU
 n4Cj07IHR4OLQ1YB+EX2uPY7rJw0tPt0g/dgmYYi3uP88hJ7VYFOtfJx/UGlid2q
 3l7wXNwtg+CJtw0Ey+21cMdmnOffxH9c3nBPahe7zK5k1GKjXDOfWEcmucG0zW5K
 qYI5m7vAZAX4ve1362DOgJei/1uxGuMQQZsobHpwfhcGXzLZ2AZY45Ls86nQzHua
 KpupybWQe70hQYk9hUw+M/ShChuH8dhnPjx51T0r/0E0BdU6Q20xLPLWx/2jRzUb
 FlFg7WtVw2y45eQCFPbtVsoVzDCpfpfgTw5rrDsjFf/twS0E3ubmTC1rLr4YB+5m
 5cjPys/SYpQWUi3wQFTQ6dL3w0+vWXlQmTi5ChcxTZF2ytwP+yg=
 =cfmM
 -----END PGP SIGNATURE-----

Merge tag 's390-6.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull more s390 updates from Heiko Carstens:

 - Add swap entry for hugetlbfs support

 - Add PTE_MARKER support for hugetlbs mappings; this fixes a regression
   (possible page fault loop) which was introduced when support for
   UFFDIO_POISON for hugetlbfs was added

 - Add ARCH_HAS_PREEMPT_LAZY and PREEMPT_DYNAMIC support

 - Mark IRQ entries in entry code, so that stack tracers can filter out
   the non-IRQ parts of stack traces. This fixes stack depot capacity
   limit warnings, since without filtering the number of unique stack
   traces is huge

 - In PCI code fix leak of struct zpci_dev object, and fix potential
   double remove of hotplug slot

 - Fix pagefault_disable() / pagefault_enable() unbalance in
   arch_stack_user_walk_common()

 - A couple of inline assembly optimizations, more cmpxchg() to
   try_cmpxchg() conversions, and removal of usages of xchg() and
   cmpxchg() on one and two byte memory areas

 - Various other small improvements and cleanups

* tag 's390-6.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (27 commits)
  Revert "s390/mm: Allow large pages for KASAN shadow mapping"
  s390/spinlock: Use flag output constraint for arch_cmpxchg_niai8()
  s390/spinlock: Use R constraint for arch_load_niai4()
  s390/spinlock: Generate shorter code for arch_spin_unlock()
  s390/spinlock: Remove condition code clobber from arch_spin_unlock()
  s390/spinlock: Use symbolic names in inline assemblies
  s390: Support PREEMPT_DYNAMIC
  s390/pci: Fix potential double remove of hotplug slot
  s390/pci: Fix leak of struct zpci_dev when zpci_add_device() fails
  s390/mm/hugetlbfs: Add missing includes
  s390/mm: Add PTE_MARKER support for hugetlbfs mappings
  s390/mm: Introduce region-third and segment table swap entries
  s390/mm: Introduce region-third and segment table entry present bits
  s390/mm: Rearrange region-third and segment table entry SW bits
  KVM: s390: Increase size of union sca_utility to four bytes
  KVM: s390: Remove one byte cmpxchg() usage
  KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
  s390/ap: Replace xchg() with WRITE_ONCE()
  s390/mm: Allow large pages for KASAN shadow mapping
  s390: Add ARCH_HAS_PREEMPT_LAZY support
  ...
This commit is contained in:
Linus Torvalds 2024-11-29 10:40:52 -08:00
commit 509f806f7f
27 changed files with 359 additions and 208 deletions

View File

@ -87,6 +87,7 @@ config S390
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PREEMPT_LAZY
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME
select ARCH_HAS_SET_DIRECT_MAP
@ -218,6 +219,7 @@ config S390
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RETHOOK

View File

@ -66,14 +66,15 @@ typedef int (debug_header_proc_t) (debug_info_t *id,
struct debug_view *view,
int area,
debug_entry_t *entry,
char *out_buf);
char *out_buf, size_t out_buf_size);
typedef int (debug_format_proc_t) (debug_info_t *id,
struct debug_view *view, char *out_buf,
size_t out_buf_size,
const char *in_buf);
typedef int (debug_prolog_proc_t) (debug_info_t *id,
struct debug_view *view,
char *out_buf);
char *out_buf, size_t out_buf_size);
typedef int (debug_input_proc_t) (debug_info_t *id,
struct debug_view *view,
struct file *file,
@ -81,7 +82,8 @@ typedef int (debug_input_proc_t) (debug_info_t *id,
size_t in_buf_size, loff_t *offset);
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry, char *out_buf);
int area, debug_entry_t *entry,
char *out_buf, size_t out_buf_size);
struct debug_view {
char name[DEBUG_MAX_NAME_LEN];

View File

@ -17,8 +17,8 @@
#define GMAP_NOTIFY_MPROT 0x1
/* Status bits only for huge segment entries */
#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */
#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */
/**
* struct gmap_struct - guest address space

View File

@ -10,6 +10,8 @@
#define _ASM_S390_HUGETLB_H
#include <linux/pgtable.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <asm/page.h>
#define hugepages_supported() (MACHINE_HAS_EDAT1)
@ -78,7 +80,7 @@ static inline int huge_pte_none(pte_t pte)
#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
static inline int huge_pte_none_mostly(pte_t pte)
{
return huge_pte_none(pte);
return huge_pte_none(pte) || is_pte_marker(pte);
}
#define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP

View File

@ -94,11 +94,16 @@ union ipte_control {
};
};
/*
* Utility is defined as two bytes but having it four bytes wide
* generates more efficient code. Since the following bytes are
* reserved this makes no functional difference.
*/
union sca_utility {
__u16 val;
__u32 val;
struct {
__u16 mtcr : 1;
__u16 reserved : 15;
__u32 mtcr : 1;
__u32 : 31;
};
};
@ -107,7 +112,7 @@ struct bsca_block {
__u64 reserved[5];
__u64 mcn;
union sca_utility utility;
__u8 reserved2[6];
__u8 reserved2[4];
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
};
@ -115,7 +120,7 @@ struct esca_block {
union ipte_control ipte_control;
__u64 reserved1[6];
union sca_utility utility;
__u8 reserved2[6];
__u8 reserved2[4];
__u64 mcn[4];
__u64 reserved3[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];

View File

@ -277,7 +277,8 @@ static inline int is_module_addr(void *addr)
#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \
_REGION3_ENTRY_PRESENT)
#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_HARDWARE_BITS 0xfffffffffffff6ffUL
@ -285,18 +286,27 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */
#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */
#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */
#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */
#define _REGION3_ENTRY_WRITE 0x0002 /* SW region write bit */
#define _REGION3_ENTRY_READ 0x0001 /* SW region read bit */
#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */
#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */
#else
#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
#endif
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
/*
* SW region present bit. For non-leaf region-third-table entries, bits 62-63
* indicate the TABLE LENGTH and both must be set to 1. But such entries
* would always be considered as present, so it is safe to use bit 63 as
* PRESENT bit for PUD.
*/
#define _REGION3_ENTRY_PRESENT 0x0001
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe3fUL
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe3cUL
@ -308,21 +318,29 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */
#define _SEGMENT_ENTRY (0)
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */
#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */
#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */
#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */
#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */
#else
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
#endif
#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */
/* Common bits in region and segment table entries, for swap entries */
#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */
#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */
#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
#define _PAGE_ENTRIES 256 /* number of page table entries */
@ -454,17 +472,22 @@ static inline int is_module_addr(void *addr)
/*
* Segment entry (large page) protection definitions.
*/
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_NOEXEC)
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ)
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_NOEXEC)
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE)
#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
@ -491,6 +514,7 @@ static inline int is_module_addr(void *addr)
*/
#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
_REGION3_ENTRY_LARGE | \
_REGION3_ENTRY_READ | \
_REGION3_ENTRY_WRITE | \
@ -498,12 +522,14 @@ static inline int is_module_addr(void *addr)
_REGION3_ENTRY_DIRTY | \
_REGION_ENTRY_NOEXEC)
#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
_REGION3_ENTRY_LARGE | \
_REGION3_ENTRY_READ | \
_REGION3_ENTRY_YOUNG | \
_REGION_ENTRY_PROTECT | \
_REGION_ENTRY_NOEXEC)
#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
_REGION3_ENTRY_LARGE | \
_REGION3_ENTRY_READ | \
_REGION3_ENTRY_WRITE | \
@ -746,7 +772,7 @@ static inline int pud_present(pud_t pud)
{
if (pud_folded(pud))
return 1;
return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0;
}
static inline int pud_none(pud_t pud)
@ -761,13 +787,18 @@ static inline bool pud_leaf(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
return 0;
return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0);
}
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0;
}
#define pmd_leaf pmd_leaf
static inline bool pmd_leaf(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0);
}
static inline int pmd_bad(pmd_t pmd)
@ -799,11 +830,6 @@ static inline int p4d_bad(p4d_t p4d)
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
}
static inline int pmd_present(pmd_t pmd)
{
return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
}
static inline int pmd_none(pmd_t pmd)
{
return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
@ -1851,7 +1877,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
static inline int pmd_trans_huge(pmd_t pmd)
{
return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
return pmd_leaf(pmd);
}
#define has_transparent_hugepage has_transparent_hugepage
@ -1911,6 +1937,53 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
/*
* 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE)
* Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste
* as invalid.
* A swap entry is indicated by bit pattern (rste & 0x011) == 0x010
* | offset |Xtype |11TT|S0|
* |0000000000111111111122222222223333333333444444444455|555555|5566|66|
* |0123456789012345678901234567890123456789012345678901|234567|8901|23|
*
* Bits 0-51 store the offset.
* Bits 53-57 store the type.
* Bit 62 (S) is used for softdirty tracking.
* Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT.
* Bit 52 (X) is unused.
*/
#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1)
#define __SWP_OFFSET_SHIFT_RSTE 12
#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1)
#define __SWP_TYPE_SHIFT_RSTE 6
/*
* TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3
* bits 0x01. See also __set_huge_pte_at().
*/
static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset)
{
unsigned long rste;
rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM;
rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE;
rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE;
return rste;
}
static inline unsigned long __swp_type_rste(swp_entry_t entry)
{
return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE;
}
static inline unsigned long __swp_offset_rste(swp_entry_t entry)
{
return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE;
}
#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste })
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);

View File

@ -130,10 +130,24 @@ static __always_inline bool should_resched(int preempt_offset)
#define init_idle_preempt_count(p, cpu) do { } while (0)
#ifdef CONFIG_PREEMPTION
extern void preempt_schedule(void);
#define __preempt_schedule() preempt_schedule()
extern void preempt_schedule_notrace(void);
#define __preempt_schedule_notrace() preempt_schedule_notrace()
void preempt_schedule(void);
void preempt_schedule_notrace(void);
#ifdef CONFIG_PREEMPT_DYNAMIC
void dynamic_preempt_schedule(void);
void dynamic_preempt_schedule_notrace(void);
#define __preempt_schedule() dynamic_preempt_schedule()
#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
#else /* CONFIG_PREEMPT_DYNAMIC */
#define __preempt_schedule() preempt_schedule()
#define __preempt_schedule_notrace() preempt_schedule_notrace()
#endif /* CONFIG_PREEMPT_DYNAMIC */
#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */

View File

@ -82,9 +82,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
kcsan_release();
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
" sth %1,%0\n"
: "=R" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");
" mvhhi %[lock],0\n"
: [lock] "=Q" (((unsigned short *)&lp->lock)[1])
:
: "memory");
}
/*

View File

@ -61,44 +61,45 @@ void arch_setup_new_exec(void);
/*
* thread information flags bit numbers
*/
/* _TIF_WORK bits */
#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_UPROBE 3 /* breakpointed or single-stepping */
#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
#define TIF_UPROBE 4 /* breakpointed or single-stepping */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
#define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
#define TIF_SINGLE_STEP 19 /* This task is single stepped */
#define TIF_BLOCK_STEP 20 /* This task is block stepped */
#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
/* _TIF_TRACE bits */
#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
#define TIF_SECCOMP 26 /* secure computing */
#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME)
#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_SIGPENDING BIT(TIF_SIGPENDING)
#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED)
#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE BIT(TIF_UPROBE)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
#define _TIF_PGSTE BIT(TIF_PGSTE)
#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
#define _TIF_PER_TRAP BIT(TIF_PER_TRAP)
#define _TIF_31BIT BIT(TIF_31BIT)
#define _TIF_MEMDIE BIT(TIF_MEMDIE)
#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP)
#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP)
#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP)
#define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP BIT(TIF_SECCOMP)

View File

@ -46,11 +46,6 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
{
unsigned long gmap_asce;
/*
* If the machine has IDTE we prefer to do a per mm flush
* on all cpus instead of doing a local flush if the mm
* only ran on the local cpu.
*/
preempt_disable();
atomic_inc(&mm->context.flush_count);
/* Reset TLB flush mask */

View File

@ -77,12 +77,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area,
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
static int debug_prolog_level_fn(debug_info_t *id,
struct debug_view *view, char *out_buf);
struct debug_view *view, char *out_buf,
size_t out_buf_size);
static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_prolog_pages_fn(debug_info_t *id,
struct debug_view *view, char *out_buf);
struct debug_view *view, char *out_buf,
size_t out_buf_size);
static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
@ -90,9 +92,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *in_buf);
char *out_buf, size_t out_buf_size,
const char *in_buf);
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *inbuf);
char *out_buf, size_t out_buf_size,
const char *inbuf);
static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
static void debug_events_append(debug_info_t *dest, debug_info_t *src);
@ -391,8 +395,10 @@ static int debug_format_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
/* print prolog */
if (view->prolog_proc)
len += view->prolog_proc(id_snap, view, p_info->temp_buf);
if (view->prolog_proc) {
len += view->prolog_proc(id_snap, view, p_info->temp_buf,
sizeof(p_info->temp_buf));
}
goto out;
}
if (!id_snap->areas) /* this is true, if we have a prolog only view */
@ -402,12 +408,16 @@ static int debug_format_entry(file_private_info_t *p_info)
if (act_entry->clock == 0LL)
goto out; /* empty entry */
if (view->header_proc)
if (view->header_proc) {
len += view->header_proc(id_snap, view, p_info->act_area,
act_entry, p_info->temp_buf + len);
if (view->format_proc)
act_entry, p_info->temp_buf + len,
sizeof(p_info->temp_buf) - len);
}
if (view->format_proc) {
len += view->format_proc(id_snap, view, p_info->temp_buf + len,
sizeof(p_info->temp_buf) - len,
DEBUG_DATA(act_entry));
}
out:
return len;
}
@ -1292,9 +1302,9 @@ static inline int debug_get_uint(char *buf)
*/
static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
char *out_buf)
char *out_buf, size_t out_buf_size)
{
return sprintf(out_buf, "%i\n", id->pages_per_area);
return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area);
}
/*
@ -1341,14 +1351,14 @@ static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
* prints out actual debug level
*/
static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
char *out_buf)
char *out_buf, size_t out_buf_size)
{
int rc = 0;
if (id->level == DEBUG_OFF_LEVEL)
rc = sprintf(out_buf, "-\n");
rc = scnprintf(out_buf, out_buf_size, "-\n");
else
rc = sprintf(out_buf, "%i\n", id->level);
rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level);
return rc;
}
@ -1465,22 +1475,24 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
* prints debug data in hex/ascii format
*/
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *in_buf)
char *out_buf, size_t out_buf_size, const char *in_buf)
{
int i, rc = 0;
for (i = 0; i < id->buf_size; i++)
rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
rc += sprintf(out_buf + rc, "| ");
for (i = 0; i < id->buf_size; i++) {
rc += scnprintf(out_buf + rc, out_buf_size - rc,
"%02x ", ((unsigned char *)in_buf)[i]);
}
rc += scnprintf(out_buf + rc, out_buf_size - rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
if (isascii(c) && isprint(c))
rc += sprintf(out_buf + rc, "%c", c);
rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c);
else
rc += sprintf(out_buf + rc, ".");
rc += scnprintf(out_buf + rc, out_buf_size - rc, ".");
}
rc += sprintf(out_buf + rc, "\n");
rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n");
return rc;
}
@ -1488,7 +1500,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
* prints header for debug entry
*/
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry, char *out_buf)
int area, debug_entry_t *entry, char *out_buf,
size_t out_buf_size)
{
unsigned long sec, usec;
unsigned long caller;
@ -1505,9 +1518,9 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
else
except_str = "-";
caller = (unsigned long) entry->caller;
rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ",
area, sec, usec, level, except_str,
entry->cpu, (void *)caller);
rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ",
area, sec, usec, level, except_str,
entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
@ -1520,7 +1533,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn);
#define DEBUG_SPRINTF_MAX_ARGS 10
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *inbuf)
char *out_buf, size_t out_buf_size, const char *inbuf)
{
debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
int num_longs, num_used_args = 0, i, rc = 0;
@ -1533,8 +1546,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
goto out; /* bufsize of entry too small */
if (num_longs == 1) {
/* no args, we use only the string */
strcpy(out_buf, curr_event->string);
rc = strlen(curr_event->string);
rc = strscpy(out_buf, curr_event->string, out_buf_size);
if (rc == -E2BIG)
rc = out_buf_size;
goto out;
}
@ -1546,12 +1560,13 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
for (i = 0; i < num_used_args; i++)
index[i] = i;
rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
curr_event->args[index[1]], curr_event->args[index[2]],
curr_event->args[index[3]], curr_event->args[index[4]],
curr_event->args[index[5]], curr_event->args[index[6]],
curr_event->args[index[7]], curr_event->args[index[8]],
curr_event->args[index[9]]);
rc = scnprintf(out_buf, out_buf_size,
curr_event->string, curr_event->args[index[0]],
curr_event->args[index[1]], curr_event->args[index[2]],
curr_event->args[index[3]], curr_event->args[index[4]],
curr_event->args[index[5]], curr_event->args[index[6]],
curr_event->args[index[7]], curr_event->args[index[8]],
curr_event->args[index[9]]);
out:
return rc;
}

View File

@ -430,9 +430,13 @@ SYM_CODE_START(\name)
SYM_CODE_END(\name)
.endm
.section .irqentry.text, "ax"
INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
.section .kprobes.text, "ax"
/*
* Machine check handler routines
*/

View File

@ -489,6 +489,12 @@ int __init arch_init_kprobes(void)
return 0;
}
int __init arch_populate_kprobe_blacklist(void)
{
return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
(unsigned long)__irqentry_text_end);
}
int arch_trampoline_kprobe(struct kprobe *p)
{
return 0;

View File

@ -180,39 +180,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
*/
static void free_sampling_buffer(struct sf_buffer *sfb)
{
unsigned long *sdbt, *curr;
if (!sfb->sdbt)
return;
unsigned long *sdbt, *curr, *head;
sdbt = sfb->sdbt;
curr = sdbt;
if (!sdbt)
return;
sfb->sdbt = NULL;
/* Free the SDBT after all SDBs are processed... */
while (1) {
if (!*curr || !sdbt)
break;
/* Process table-link entries */
head = sdbt;
curr = sdbt;
do {
if (is_link_entry(curr)) {
/* Process table-link entries */
curr = get_next_sdbt(curr);
if (sdbt)
free_page((unsigned long)sdbt);
/* If the origin is reached, sampling buffer is freed */
if (curr == sfb->sdbt)
break;
else
sdbt = curr;
free_page((unsigned long)sdbt);
sdbt = curr;
} else {
/* Process SDB pointer */
if (*curr) {
free_page((unsigned long)phys_to_virt(*curr));
curr++;
}
free_page((unsigned long)phys_to_virt(*curr));
curr++;
}
}
} while (curr != head);
memset(sfb, 0, sizeof(*sfb));
}

View File

@ -151,7 +151,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo
break;
}
if (!store_ip(consume_entry, cookie, entry, perf, ip))
return;
break;
first = false;
}
pagefault_enable();

View File

@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm)
retry:
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
old = READ_ONCE(*ic);
do {
old = READ_ONCE(*ic);
if (old.k) {
read_unlock(&kvm->arch.sca_lock);
cond_resched();
@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm)
}
new = old;
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
out:
mutex_unlock(&kvm->arch.ipte_mutex);
@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm)
goto out;
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
old = READ_ONCE(*ic);
do {
old = READ_ONCE(*ic);
new = old;
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
wake_up(&kvm->arch.ipte_wq);
out:
@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm)
retry:
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
old = READ_ONCE(*ic);
do {
old = READ_ONCE(*ic);
if (old.kg) {
read_unlock(&kvm->arch.sca_lock);
cond_resched();
@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm)
new = old;
new.k = 1;
new.kh++;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm)
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
old = READ_ONCE(*ic);
do {
old = READ_ONCE(*ic);
new = old;
new.kh--;
if (!new.kh)
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
wake_up(&kvm->arch.ipte_wq);

View File

@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
int rc, expect;
if (!kvm_s390_use_sca_entries())
return;
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union esca_sigp_ctrl old;
old = READ_ONCE(*sigp_ctrl);
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
WRITE_ONCE(sigp_ctrl->value, 0);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union bsca_sigp_ctrl old;
old = READ_ONCE(*sigp_ctrl);
expect = old.value;
rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
WRITE_ONCE(sigp_ctrl->value, 0);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
WARN_ON(rc != expect); /* cannot clear? */
}
int psw_extint_disabled(struct kvm_vcpu *vcpu)
@ -247,12 +238,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
{
u64 word, _word;
word = READ_ONCE(gisa->u64.word[0]);
do {
word = READ_ONCE(gisa->u64.word[0]);
if ((u64)gisa != word >> 32)
return -EBUSY;
_word = (word & ~0xffUL) | iam;
} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
return 0;
}
@ -270,10 +261,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
{
u64 word, _word;
word = READ_ONCE(gisa->u64.word[0]);
do {
word = READ_ONCE(gisa->u64.word[0]);
_word = word & ~(0xffUL << 24);
} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
}
/**
@ -291,14 +282,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
u8 pending_mask, alert_mask;
u64 word, _word;
word = READ_ONCE(gi->origin->u64.word[0]);
do {
word = READ_ONCE(gi->origin->u64.word[0]);
alert_mask = READ_ONCE(gi->alert.mask);
pending_mask = (u8)(word >> 24) & alert_mask;
if (pending_mask)
return pending_mask;
_word = (word & ~0xffUL) | alert_mask;
} while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
return 0;
}

View File

@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
read_lock(&kvm->arch.sca_lock);
sca = kvm->arch.sca;
old = READ_ONCE(sca->utility);
do {
old = READ_ONCE(sca->utility);
new = old;
new.mtcr = val;
} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}

View File

@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
cur_pages = atomic_long_read(&user->locked_vm);
do {
cur_pages = atomic_long_read(&user->locked_vm);
new_pages = cur_pages + nr_pages;
if (new_pages > page_limit)
return -ENOMEM;
} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
new_pages) != cur_pages);
} while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
atomic64_add(nr_pages, &current->mm->pinned_vm);

View File

@ -15,6 +15,7 @@
#include <linux/percpu.h>
#include <linux/io.h>
#include <asm/alternative.h>
#include <asm/asm.h>
int spin_retry = -1;
@ -76,24 +77,43 @@ static inline int arch_load_niai4(int *lock)
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
" l %[owner],%[lock]\n"
: [owner] "=d" (owner) : [lock] "R" (*lock) : "memory");
return owner;
}
static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
#ifdef __HAVE_ASM_FLAG_OUTPUTS__
static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
{
int cc;
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
" cs %[old],%[new],%[lock]\n"
: [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc)
: [new] "d" (new)
: "memory");
return cc == 0;
}
#else /* __HAVE_ASM_FLAG_OUTPUTS__ */
static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
{
int expected = old;
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
" cs %[old],%[new],%[lock]\n"
: [old] "+d" (old), [lock] "+Q" (*lock)
: [new] "d" (new)
: "cc", "memory");
return expected == old;
}
#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
static inline struct spin_wait *arch_spin_decode_tail(int lock)
{
int ix, cpu;
@ -226,7 +246,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
/* Try to get the lock if it is free. */
if (!owner) {
new = (old & _Q_TAIL_MASK) | lockval;
if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) {
/* Got the lock */
return;
}

View File

@ -338,7 +338,8 @@ static void do_exception(struct pt_regs *regs, int access)
handle_fault_error_nolock(regs, 0);
else
do_sigsegv(regs, SEGV_MAPERR);
} else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) {
} else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON |
VM_FAULT_HWPOISON_LARGE)) {
if (!user_mode(regs))
handle_fault_error_nolock(regs, 0);
else

View File

@ -587,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
if (pmd_leaf(*pmd)) {
*table = (pmd_val(*pmd) &
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
| _SEGMENT_ENTRY_GMAP_UC;
| _SEGMENT_ENTRY_GMAP_UC
| _SEGMENT_ENTRY;
} else
*table = pmd_val(*pmd) &
_SEGMENT_ENTRY_HARDWARE_BITS;
@ -2396,7 +2397,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
_SEGMENT_ENTRY_GMAP_UC));
_SEGMENT_ENTRY_GMAP_UC |
_SEGMENT_ENTRY));
if (purge)
__pmdp_csp(pmdp);
set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
@ -2450,7 +2452,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
gaddr = __gmap_segment_gaddr(entry);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
_SEGMENT_ENTRY_GMAP_UC));
_SEGMENT_ENTRY_GMAP_UC |
_SEGMENT_ENTRY));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_LOCAL);
@ -2485,7 +2488,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
gaddr = __gmap_segment_gaddr(entry);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
_SEGMENT_ENTRY_GMAP_UC));
_SEGMENT_ENTRY_GMAP_UC |
_SEGMENT_ENTRY));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_GLOBAL);

View File

@ -24,6 +24,7 @@
static inline unsigned long __pte_to_rste(pte_t pte)
{
swp_entry_t arch_entry;
unsigned long rste;
/*
@ -48,6 +49,7 @@ static inline unsigned long __pte_to_rste(pte_t pte)
*/
if (pte_present(pte)) {
rste = pte_val(pte) & PAGE_MASK;
rste |= _SEGMENT_ENTRY_PRESENT;
rste |= move_set_bit(pte_val(pte), _PAGE_READ,
_SEGMENT_ENTRY_READ);
rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
@ -66,6 +68,10 @@ static inline unsigned long __pte_to_rste(pte_t pte)
#endif
rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
_SEGMENT_ENTRY_NOEXEC);
} else if (!pte_none(pte)) {
/* swap pte */
arch_entry = __pte_to_swp_entry(pte);
rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry));
} else
rste = _SEGMENT_ENTRY_EMPTY;
return rste;
@ -73,13 +79,18 @@ static inline unsigned long __pte_to_rste(pte_t pte)
static inline pte_t __rste_to_pte(unsigned long rste)
{
swp_entry_t arch_entry;
unsigned long pteval;
int present;
int present, none;
pte_t pte;
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
present = pud_present(__pud(rste));
else
none = pud_none(__pud(rste));
} else {
present = pmd_present(__pmd(rste));
none = pmd_none(__pmd(rste));
}
/*
* Convert encoding pmd / pud bits pte bits
@ -114,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste)
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
#endif
pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
} else if (!none) {
/* swap rste */
arch_entry = __rste_to_swp_entry(rste);
pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
pteval = pte_val(pte);
} else
pteval = _PAGE_INVALID;
return __pte(pteval);
@ -148,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
unsigned long rste;
rste = __pte_to_rste(pte);
if (!MACHINE_HAS_NX)
rste &= ~_SEGMENT_ENTRY_NOEXEC;
/* Set correct table type for 2G hugepages */
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
@ -223,11 +237,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
p4dp = p4d_offset(pgdp, addr);
if (p4d_present(*p4dp)) {
pudp = pud_offset(p4dp, addr);
if (pud_present(*pudp)) {
if (pud_leaf(*pudp))
return (pte_t *) pudp;
if (sz == PUD_SIZE)
return (pte_t *)pudp;
if (pud_present(*pudp))
pmdp = pmd_offset(pudp, addr);
}
}
}
return (pte_t *) pmdp;

View File

@ -779,8 +779,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
* @fh: Current Function Handle of the device to be created
* @state: Initial state after creation either Standby or Configured
*
* Creates a new zpci device and adds it to its, possibly newly created, zbus
* as well as zpci_list.
* Allocates a new struct zpci_dev and queries the platform for its details.
* If successful the device can subsequently be added to the zPCI subsystem
* using zpci_add_device().
*
* Returns: the zdev on success or an error pointer otherwise
*/
@ -803,7 +804,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
goto error;
zdev->state = state;
kref_init(&zdev->kref);
mutex_init(&zdev->state_lock);
mutex_init(&zdev->fmb_lock);
mutex_init(&zdev->kzdev_lock);
@ -816,6 +816,17 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
return ERR_PTR(rc);
}
/**
* zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem
* @zdev: The zPCI device to be added
*
* A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating
* a new one as necessary. A hotplug slot is created and events start to be handled.
* If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used.
* If adding the struct zpci_dev fails the device was not added and should be freed.
*
* Return: 0 on success, or an error code otherwise
*/
int zpci_add_device(struct zpci_dev *zdev)
{
int rc;
@ -829,6 +840,7 @@ int zpci_add_device(struct zpci_dev *zdev)
if (rc)
goto error_destroy_iommu;
kref_init(&zdev->kref);
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
@ -928,10 +940,8 @@ void zpci_device_reserved(struct zpci_dev *zdev)
void zpci_release_device(struct kref *kref)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
int ret;
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
@ -939,28 +949,14 @@ void zpci_release_device(struct kref *kref)
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
switch (zdev->state) {
case ZPCI_FN_STATE_CONFIGURED:
ret = sclp_pci_deconfigure(zdev->fid);
zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
fallthrough;
case ZPCI_FN_STATE_STANDBY:
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
spin_lock(&zpci_list_lock);
list_del(&zdev->entry);
spin_unlock(&zpci_list_lock);
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
fallthrough;
case ZPCI_FN_STATE_RESERVED:
if (zdev->has_resources)
zpci_cleanup_bus_resources(zdev);
zpci_bus_device_unregister(zdev);
zpci_destroy_iommu(zdev);
fallthrough;
default:
break;
}
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
if (zdev->has_resources)
zpci_cleanup_bus_resources(zdev);
zpci_bus_device_unregister(zdev);
zpci_destroy_iommu(zdev);
zpci_dbg(3, "rem fid:%x\n", zdev->fid);
kfree_rcu(zdev, rcu);
}
@ -1121,7 +1117,8 @@ static void zpci_add_devices(struct list_head *scan_list)
list_sort(NULL, scan_list, &zpci_cmp_rid);
list_for_each_entry_safe(zdev, tmp, scan_list, entry) {
list_del_init(&zdev->entry);
zpci_add_device(zdev);
if (zpci_add_device(zdev))
kfree(zdev);
}
}

View File

@ -340,7 +340,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
if (IS_ERR(zdev))
break;
zpci_add_device(zdev);
if (zpci_add_device(zdev)) {
kfree(zdev);
break;
}
} else {
/* the configuration request may be stale */
if (zdev->state != ZPCI_FN_STATE_STANDBY)
@ -354,7 +357,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
if (IS_ERR(zdev))
break;
zpci_add_device(zdev);
if (zpci_add_device(zdev)) {
kfree(zdev);
break;
}
} else {
zpci_update_fh(zdev, ccdf->fh);
}

View File

@ -453,7 +453,7 @@ static void ap_tasklet_fn(unsigned long dummy)
* important that no requests on any AP get lost.
*/
if (ap_irq_flag)
xchg(ap_airq.lsi_ptr, 0);
WRITE_ONCE(*ap_airq.lsi_ptr, 0);
spin_lock_bh(&ap_queues_lock);
hash_for_each(ap_queues, bkt, aq, hnode) {

View File

@ -360,10 +360,26 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
return 0;
}
static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
/**
* ensure_nib_shared() - Ensure the address of the NIB is secure and shared
* @addr: the physical (absolute) address of the NIB
*
* This function checks whether the NIB page, which has been pinned with
* vfio_pin_pages(), is a shared page belonging to a secure guest.
*
* It will call uv_pin_shared() on it; if the page was already pinned shared
* (i.e. if the NIB belongs to a secure guest and is shared), then 0
* (success) is returned. If the NIB was not shared, vfio_pin_pages() had
* exported it and now it does not belong to the secure guest anymore. In
* that case, an error is returned.
*
* Context: the NIB (at physical address @addr) has to be pinned with
* vfio_pin_pages() before calling this function.
*
* Return: 0 in case of success, otherwise an error < 0.
*/
static int ensure_nib_shared(unsigned long addr)
{
int ret;
/*
* The nib has to be located in shared storage since guest and
* host access it. vfio_pin_pages() will do a pin shared and
@ -374,12 +390,7 @@ static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
*
* If the page is already pinned shared the UV will return a success.
*/
ret = uv_pin_shared(addr);
if (ret) {
/* vfio_pin_pages() likely exported the page so let's re-import */
gmap_convert_to_secure(gmap, addr);
}
return ret;
return uv_pin_shared(addr);
}
/**
@ -425,6 +436,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
return status;
}
/* The pin will probably be successful even if the NIB was not shared */
ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1,
IOMMU_READ | IOMMU_WRITE, &h_page);
switch (ret) {
@ -447,7 +459,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
/* NIB in non-shared storage is a rc 6 for PV guests */
if (kvm_s390_pv_cpu_is_protected(vcpu) &&
ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
ensure_nib_shared(h_nib & PAGE_MASK)) {
vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
status.response_code = AP_RESPONSE_INVALID_ADDRESS;
return status;