diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c64b2987d108..0077969170e8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -87,6 +87,7 @@ config S390 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -218,6 +219,7 @@ config S390 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RETHOOK diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ccd4e148b5ed..a7f7bdc9e19c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -66,14 +66,15 @@ typedef int (debug_header_proc_t) (debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, - char *out_buf); + char *out_buf, size_t out_buf_size); typedef int (debug_format_proc_t) (debug_info_t *id, struct debug_view *view, char *out_buf, + size_t out_buf_size, const char *in_buf); typedef int (debug_prolog_proc_t) (debug_info_t *id, struct debug_view *view, - char *out_buf); + char *out_buf, size_t out_buf_size); typedef int (debug_input_proc_t) (debug_info_t *id, struct debug_view *view, struct file *file, @@ -81,7 +82,8 @@ typedef int (debug_input_proc_t) (debug_info_t *id, size_t in_buf_size, loff_t *offset); int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf); + int area, debug_entry_t *entry, + char *out_buf, size_t out_buf_size); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 64761c78f774..13f51a6a5bb1 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -17,8 +17,8 @@ #define GMAP_NOTIFY_MPROT 0x1 /* Status bits only for huge segment entries */ -#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ -#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ +#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */ /** * struct gmap_struct - guest address space diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 6f815d4ba0ca..a40664b236e9 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -10,6 +10,8 @@ #define _ASM_S390_HUGETLB_H #include +#include +#include #include #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -78,7 +80,7 @@ static inline int huge_pte_none(pte_t pte) #define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY static inline int huge_pte_none_mostly(pte_t pte) { - return huge_pte_none(pte); + return huge_pte_none(pte) || is_pte_marker(pte); } #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 1cd8eaebd3c0..97c7c8127543 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -94,11 +94,16 @@ union ipte_control { }; }; +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ union sca_utility { - __u16 val; + __u32 val; struct { - __u16 mtcr : 1; - __u16 reserved : 15; + __u32 mtcr : 1; + __u32 : 31; }; }; @@ -107,7 +112,7 @@ struct bsca_block { __u64 reserved[5]; __u64 mcn; union sca_utility utility; - __u8 reserved2[6]; + __u8 reserved2[4]; struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; }; @@ -115,7 +120,7 @@ struct esca_block { union ipte_control ipte_control; __u64 reserved1[6]; union sca_utility utility; - __u8 reserved2[6]; + __u8 reserved2[4]; __u64 mcn[4]; __u64 reserved3[20]; struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 8b67036edb69..48268095b0a3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -277,7 +277,8 @@ static inline int is_module_addr(void *addr) #define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) #define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \ + _REGION3_ENTRY_PRESENT) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_HARDWARE_BITS 0xfffffffffffff6ffUL @@ -285,18 +286,27 @@ static inline int is_module_addr(void *addr) #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ +#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ -#define _REGION3_ENTRY_WRITE 0x0002 /* SW region write bit */ -#define _REGION3_ENTRY_READ 0x0001 /* SW region read bit */ +#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */ +#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */ +#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */ #else #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif #define _REGION_ENTRY_BITS 0xfffffffffffff22fUL +/* + * SW region present bit. For non-leaf region-third-table entries, bits 62-63 + * indicate the TABLE LENGTH and both must be set to 1. But such entries + * would always be considered as present, so it is safe to use bit 63 as + * PRESENT bit for PUD. + */ +#define _REGION3_ENTRY_PRESENT 0x0001 + /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe3fUL #define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe3cUL @@ -308,21 +318,29 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */ -#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ + +#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */ -#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */ +#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ +#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */ #else #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ #endif +#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */ + +/* Common bits in region and segment table entries, for swap entries */ +#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */ +#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */ + #define _CRST_ENTRIES 2048 /* number of region/segment table entries */ #define _PAGE_ENTRIES 256 /* number of page table entries */ @@ -454,17 +472,22 @@ static inline int is_module_addr(void *addr) /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) #define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ @@ -491,6 +514,7 @@ static inline int is_module_addr(void *addr) */ #define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -498,12 +522,14 @@ static inline int is_module_addr(void *addr) _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_YOUNG | \ _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -746,7 +772,7 @@ static inline int pud_present(pud_t pud) { if (pud_folded(pud)) return 1; - return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; + return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0; } static inline int pud_none(pud_t pud) @@ -761,13 +787,18 @@ static inline bool pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; - return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); + return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0); +} + +static inline int pmd_present(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0; } #define pmd_leaf pmd_leaf static inline bool pmd_leaf(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; + return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0); } static inline int pmd_bad(pmd_t pmd) @@ -799,11 +830,6 @@ static inline int p4d_bad(p4d_t p4d) return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; } -static inline int pmd_present(pmd_t pmd) -{ - return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; -} - static inline int pmd_none(pmd_t pmd) { return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; @@ -1851,7 +1877,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, static inline int pmd_trans_huge(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; + return pmd_leaf(pmd); } #define has_transparent_hugepage has_transparent_hugepage @@ -1911,6 +1937,53 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE) + * Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste + * as invalid. + * A swap entry is indicated by bit pattern (rste & 0x011) == 0x010 + * | offset |Xtype |11TT|S0| + * |0000000000111111111122222222223333333333444444444455|555555|5566|66| + * |0123456789012345678901234567890123456789012345678901|234567|8901|23| + * + * Bits 0-51 store the offset. + * Bits 53-57 store the type. + * Bit 62 (S) is used for softdirty tracking. + * Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT. + * Bit 52 (X) is unused. + */ + +#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT_RSTE 12 +#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT_RSTE 6 + +/* + * TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3 + * bits 0x01. See also __set_huge_pte_at(). + */ +static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset) +{ + unsigned long rste; + + rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM; + rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE; + rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE; + return rste; +} + +static inline unsigned long __swp_type_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE; +} + +static inline unsigned long __swp_offset_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE; +} + +#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste }) + extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 0cde7e240373..2c29bdf12127 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -130,10 +130,24 @@ static __always_inline bool should_resched(int preempt_offset) #define init_idle_preempt_count(p, cpu) do { } while (0) #ifdef CONFIG_PREEMPTION -extern void preempt_schedule(void); -#define __preempt_schedule() preempt_schedule() -extern void preempt_schedule_notrace(void); -#define __preempt_schedule_notrace() preempt_schedule_notrace() + +void preempt_schedule(void); +void preempt_schedule_notrace(void); + +#ifdef CONFIG_PREEMPT_DYNAMIC + +void dynamic_preempt_schedule(void); +void dynamic_preempt_schedule_notrace(void); +#define __preempt_schedule() dynamic_preempt_schedule() +#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace() + +#else /* CONFIG_PREEMPT_DYNAMIC */ + +#define __preempt_schedule() preempt_schedule() +#define __preempt_schedule_notrace() preempt_schedule_notrace() + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + #endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index ac868a9bb0d1..f87dd0a84855 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -82,9 +82,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) kcsan_release(); asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ - " sth %1,%0\n" - : "=R" (((unsigned short *) &lp->lock)[1]) - : "d" (0) : "cc", "memory"); + " mvhhi %[lock],0\n" + : [lock] "=Q" (((unsigned short *)&lp->lock)[1]) + : + : "memory"); } /* diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 00ac01874a12..c33f7144d1b9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -61,44 +61,45 @@ void arch_setup_new_exec(void); /* * thread information flags bit numbers */ -/* _TIF_WORK bits */ #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_UPROBE 3 /* breakpointed or single-stepping */ -#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ +#define TIF_UPROBE 4 /* breakpointed or single-stepping */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ +#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ - #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 19 /* This task is single stepped */ #define TIF_BLOCK_STEP 20 /* This task is block stepped */ #define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ - -/* _TIF_TRACE bits */ #define TIF_SYSCALL_TRACE 24 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */ #define TIF_SECCOMP 26 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ #define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) -#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING BIT(TIF_SIGPENDING) #define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) +#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE BIT(TIF_UPROBE) -#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) +#define _TIF_PGSTE BIT(TIF_PGSTE) +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) +#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) - #define _TIF_31BIT BIT(TIF_31BIT) +#define _TIF_MEMDIE BIT(TIF_MEMDIE) +#define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) #define _TIF_SINGLE_STEP BIT(TIF_SINGLE_STEP) - +#define _TIF_BLOCK_STEP BIT(TIF_BLOCK_STEP) +#define _TIF_UPROBE_SINGLESTEP BIT(TIF_UPROBE_SINGLESTEP) #define _TIF_SYSCALL_TRACE BIT(TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT BIT(TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP BIT(TIF_SECCOMP) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index a6e2cd89b609..9dfd46dd03c6 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -46,11 +46,6 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) { unsigned long gmap_asce; - /* - * If the machine has IDTE we prefer to do a per mm flush - * on all cpus instead of doing a local flush if the mm - * only ran on the local cpu. - */ preempt_disable(); atomic_inc(&mm->context.flush_count); /* Reset TLB flush mask */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index b3f2103694e4..de19fd8a6a95 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -77,12 +77,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_level_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_prolog_pages_fn(debug_info_t *id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf, + size_t out_buf_size); static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); @@ -90,9 +92,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, struct file *file, const char __user *user_buf, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf); + char *out_buf, size_t out_buf_size, + const char *in_buf); static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf); + char *out_buf, size_t out_buf_size, + const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -391,8 +395,10 @@ static int debug_format_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { /* print prolog */ - if (view->prolog_proc) - len += view->prolog_proc(id_snap, view, p_info->temp_buf); + if (view->prolog_proc) { + len += view->prolog_proc(id_snap, view, p_info->temp_buf, + sizeof(p_info->temp_buf)); + } goto out; } if (!id_snap->areas) /* this is true, if we have a prolog only view */ @@ -402,12 +408,16 @@ static int debug_format_entry(file_private_info_t *p_info) if (act_entry->clock == 0LL) goto out; /* empty entry */ - if (view->header_proc) + if (view->header_proc) { len += view->header_proc(id_snap, view, p_info->act_area, - act_entry, p_info->temp_buf + len); - if (view->format_proc) + act_entry, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len); + } + if (view->format_proc) { len += view->format_proc(id_snap, view, p_info->temp_buf + len, + sizeof(p_info->temp_buf) - len, DEBUG_DATA(act_entry)); + } out: return len; } @@ -1292,9 +1302,9 @@ static inline int debug_get_uint(char *buf) */ static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { - return sprintf(out_buf, "%i\n", id->pages_per_area); + return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area); } /* @@ -1341,14 +1351,14 @@ static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view, * prints out actual debug level */ static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view, - char *out_buf) + char *out_buf, size_t out_buf_size) { int rc = 0; if (id->level == DEBUG_OFF_LEVEL) - rc = sprintf(out_buf, "-\n"); + rc = scnprintf(out_buf, out_buf_size, "-\n"); else - rc = sprintf(out_buf, "%i\n", id->level); + rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level); return rc; } @@ -1465,22 +1475,24 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, * prints debug data in hex/ascii format */ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) + char *out_buf, size_t out_buf_size, const char *in_buf) { int i, rc = 0; - for (i = 0; i < id->buf_size; i++) - rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]); - rc += sprintf(out_buf + rc, "| "); + for (i = 0; i < id->buf_size; i++) { + rc += scnprintf(out_buf + rc, out_buf_size - rc, + "%02x ", ((unsigned char *)in_buf)[i]); + } + rc += scnprintf(out_buf + rc, out_buf_size - rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; if (isascii(c) && isprint(c)) - rc += sprintf(out_buf + rc, "%c", c); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c); else - rc += sprintf(out_buf + rc, "."); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "."); } - rc += sprintf(out_buf + rc, "\n"); + rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n"); return rc; } @@ -1488,7 +1500,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, * prints header for debug entry */ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) { unsigned long sec, usec; unsigned long caller; @@ -1505,9 +1518,9 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ", - area, sec, usec, level, except_str, - entry->cpu, (void *)caller); + rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ", + area, sec, usec, level, except_str, + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); @@ -1520,7 +1533,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *inbuf) + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1533,8 +1546,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, goto out; /* bufsize of entry too small */ if (num_longs == 1) { /* no args, we use only the string */ - strcpy(out_buf, curr_event->string); - rc = strlen(curr_event->string); + rc = strscpy(out_buf, curr_event->string, out_buf_size); + if (rc == -E2BIG) + rc = out_buf_size; goto out; } @@ -1546,12 +1560,13 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, for (i = 0; i < num_used_args; i++) index[i] = i; - rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]], - curr_event->args[index[1]], curr_event->args[index[2]], - curr_event->args[index[3]], curr_event->args[index[4]], - curr_event->args[index[5]], curr_event->args[index[6]], - curr_event->args[index[7]], curr_event->args[index[8]], - curr_event->args[index[9]]); + rc = scnprintf(out_buf, out_buf_size, + curr_event->string, curr_event->args[index[0]], + curr_event->args[index[1]], curr_event->args[index[2]], + curr_event->args[index[3]], curr_event->args[index[4]], + curr_event->args[index[5]], curr_event->args[index[6]], + curr_event->args[index[7]], curr_event->args[index[8]], + curr_event->args[index[9]]); out: return rc; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1ff13239d4e5..960c08700cf6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -430,9 +430,13 @@ SYM_CODE_START(\name) SYM_CODE_END(\name) .endm + .section .irqentry.text, "ax" + INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq + .section .kprobes.text, "ax" + /* * Machine check handler routines */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6295faf0987d..8b80ea57125f 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -489,6 +489,12 @@ int __init arch_init_kprobes(void) return 0; } +int __init arch_populate_kprobe_blacklist(void) +{ + return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); +} + int arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0cde42f8af6e..1e99514fb7ae 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -180,39 +180,27 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long *sdbt, *curr; - - if (!sfb->sdbt) - return; + unsigned long *sdbt, *curr, *head; sdbt = sfb->sdbt; - curr = sdbt; - + if (!sdbt) + return; + sfb->sdbt = NULL; /* Free the SDBT after all SDBs are processed... */ - while (1) { - if (!*curr || !sdbt) - break; - - /* Process table-link entries */ + head = sdbt; + curr = sdbt; + do { if (is_link_entry(curr)) { + /* Process table-link entries */ curr = get_next_sdbt(curr); - if (sdbt) - free_page((unsigned long)sdbt); - - /* If the origin is reached, sampling buffer is freed */ - if (curr == sfb->sdbt) - break; - else - sdbt = curr; + free_page((unsigned long)sdbt); + sdbt = curr; } else { /* Process SDB pointer */ - if (*curr) { - free_page((unsigned long)phys_to_virt(*curr)); - curr++; - } + free_page((unsigned long)phys_to_virt(*curr)); + curr++; } - } - + } while (curr != head); memset(sfb, 0, sizeof(*sfb)); } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 9f59837d159e..40edfde25f5b 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -151,7 +151,7 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo break; } if (!store_ip(consume_entry, cookie, entry, perf, ip)) - return; + break; first = false; } pagefault_enable(); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a688351f4ab5..9816b0060fbe 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm) retry: read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.k) { read_unlock(&kvm->arch.sca_lock); cond_resched(); @@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm) } new = old; new.k = 1; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); out: mutex_unlock(&kvm->arch.ipte_mutex); @@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm) goto out; read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); wake_up(&kvm->arch.ipte_wq); out: @@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm) retry: read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.kg) { read_unlock(&kvm->arch.sca_lock); cond_resched(); @@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm) new = old; new.k = 1; new.kh++; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); } @@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm) read_lock(&kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.kh--; if (!new.kh) new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); if (!new.kh) wake_up(&kvm->arch.ipte_wq); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 4f0e7f61edf7..ea8dce299954 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { - int rc, expect; - if (!kvm_s390_use_sca_entries()) return; kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND); @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old; - old = READ_ONCE(*sigp_ctrl); - expect = old.value; - rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + WRITE_ONCE(sigp_ctrl->value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old; - old = READ_ONCE(*sigp_ctrl); - expect = old.value; - rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + WRITE_ONCE(sigp_ctrl->value, 0); } read_unlock(&vcpu->kvm->arch.sca_lock); - WARN_ON(rc != expect); /* cannot clear? */ } int psw_extint_disabled(struct kvm_vcpu *vcpu) @@ -247,12 +238,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam) { u64 word, _word; + word = READ_ONCE(gisa->u64.word[0]); do { - word = READ_ONCE(gisa->u64.word[0]); if ((u64)gisa != word >> 32) return -EBUSY; _word = (word & ~0xffUL) | iam; - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word)); return 0; } @@ -270,10 +261,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa) { u64 word, _word; + word = READ_ONCE(gisa->u64.word[0]); do { - word = READ_ONCE(gisa->u64.word[0]); _word = word & ~(0xffUL << 24); - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word)); } /** @@ -291,14 +282,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) u8 pending_mask, alert_mask; u64 word, _word; + word = READ_ONCE(gi->origin->u64.word[0]); do { - word = READ_ONCE(gi->origin->u64.word[0]); alert_mask = READ_ONCE(gi->alert.mask); pending_mask = (u8)(word >> 24) & alert_mask; if (pending_mask) return pending_mask; _word = (word & ~0xffUL) | alert_mask; - } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word); + } while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word)); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 442d4a227c0e..d8080c27d45b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) read_lock(&kvm->arch.sca_lock); sca = kvm->arch.sca; + old = READ_ONCE(sca->utility); do { - old = READ_ONCE(sca->utility); new = old; new.mtcr = val; - } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); + } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); read_unlock(&kvm->arch.sca_lock); } diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index a61518b549f0..9b9e7fdd5380 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages) page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + cur_pages = atomic_long_read(&user->locked_vm); do { - cur_pages = atomic_long_read(&user->locked_vm); new_pages = cur_pages + nr_pages; if (new_pages > page_limit) return -ENOMEM; - } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, - new_pages) != cur_pages); + } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); atomic64_add(nr_pages, ¤t->mm->pinned_vm); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 09d735010ee1..a81a01c44927 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -15,6 +15,7 @@ #include #include #include +#include int spin_retry = -1; @@ -76,24 +77,43 @@ static inline int arch_load_niai4(int *lock) asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ - " l %0,%1\n" - : "=d" (owner) : "Q" (*lock) : "memory"); + " l %[owner],%[lock]\n" + : [owner] "=d" (owner) : [lock] "R" (*lock) : "memory"); return owner; } -static inline int arch_cmpxchg_niai8(int *lock, int old, int new) +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) +{ + int cc; + + asm_inline volatile( + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc) + : [new] "d" (new) + : "memory"); + return cc == 0; +} + +#else /* __HAVE_ASM_FLAG_OUTPUTS__ */ + +static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new) { int expected = old; asm_inline volatile( ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ - " cs %0,%3,%1\n" - : "=d" (old), "=Q" (*lock) - : "0" (old), "d" (new), "Q" (*lock) + " cs %[old],%[new],%[lock]\n" + : [old] "+d" (old), [lock] "+Q" (*lock) + : [new] "d" (new) : "cc", "memory"); return expected == old; } +#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */ + static inline struct spin_wait *arch_spin_decode_tail(int lock) { int ix, cpu; @@ -226,7 +246,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) /* Try to get the lock if it is free. */ if (!owner) { new = (old & _Q_TAIL_MASK) | lockval; - if (arch_cmpxchg_niai8(&lp->lock, old, new)) { + if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) { /* Got the lock */ return; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 646326fa0fad..9b681f74dccc 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -338,7 +338,8 @@ static void do_exception(struct pt_regs *regs, int access) handle_fault_error_nolock(regs, 0); else do_sigsegv(regs, SEGV_MAPERR); - } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | + VM_FAULT_HWPOISON_LARGE)) { if (!user_mode(regs)) handle_fault_error_nolock(regs, 0); else diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 643e47bfaddc..16b8a36c56de 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -587,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) - | _SEGMENT_ENTRY_GMAP_UC; + | _SEGMENT_ENTRY_GMAP_UC + | _SEGMENT_ENTRY; } else *table = pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS; @@ -2396,7 +2397,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (purge) __pmdp_csp(pmdp); set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); @@ -2450,7 +2452,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); @@ -2485,7 +2488,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 7c79cf1bc7d7..d9ce199953de 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -24,6 +24,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) { + swp_entry_t arch_entry; unsigned long rste; /* @@ -48,6 +49,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) */ if (pte_present(pte)) { rste = pte_val(pte) & PAGE_MASK; + rste |= _SEGMENT_ENTRY_PRESENT; rste |= move_set_bit(pte_val(pte), _PAGE_READ, _SEGMENT_ENTRY_READ); rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, @@ -66,6 +68,10 @@ static inline unsigned long __pte_to_rste(pte_t pte) #endif rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, _SEGMENT_ENTRY_NOEXEC); + } else if (!pte_none(pte)) { + /* swap pte */ + arch_entry = __pte_to_swp_entry(pte); + rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)); } else rste = _SEGMENT_ENTRY_EMPTY; return rste; @@ -73,13 +79,18 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + swp_entry_t arch_entry; unsigned long pteval; - int present; + int present, none; + pte_t pte; - if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { present = pud_present(__pud(rste)); - else + none = pud_none(__pud(rste)); + } else { present = pmd_present(__pmd(rste)); + none = pmd_none(__pmd(rste)); + } /* * Convert encoding pmd / pud bits pte bits @@ -114,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste) pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); + } else if (!none) { + /* swap rste */ + arch_entry = __rste_to_swp_entry(rste); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + pteval = pte_val(pte); } else pteval = _PAGE_INVALID; return __pte(pteval); @@ -148,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long rste; rste = __pte_to_rste(pte); - if (!MACHINE_HAS_NX) - rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { @@ -223,11 +237,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm, p4dp = p4d_offset(pgdp, addr); if (p4d_present(*p4dp)) { pudp = pud_offset(p4dp, addr); - if (pud_present(*pudp)) { - if (pud_leaf(*pudp)) - return (pte_t *) pudp; + if (sz == PUD_SIZE) + return (pte_t *)pudp; + if (pud_present(*pudp)) pmdp = pmd_offset(pudp, addr); - } } } return (pte_t *) pmdp; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index cbff587dc4e3..88f72745fa59 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -779,8 +779,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) * @fh: Current Function Handle of the device to be created * @state: Initial state after creation either Standby or Configured * - * Creates a new zpci device and adds it to its, possibly newly created, zbus - * as well as zpci_list. + * Allocates a new struct zpci_dev and queries the platform for its details. + * If successful the device can subsequently be added to the zPCI subsystem + * using zpci_add_device(). * * Returns: the zdev on success or an error pointer otherwise */ @@ -803,7 +804,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) goto error; zdev->state = state; - kref_init(&zdev->kref); mutex_init(&zdev->state_lock); mutex_init(&zdev->fmb_lock); mutex_init(&zdev->kzdev_lock); @@ -816,6 +816,17 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) return ERR_PTR(rc); } +/** + * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem + * @zdev: The zPCI device to be added + * + * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating + * a new one as necessary. A hotplug slot is created and events start to be handled. + * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used. + * If adding the struct zpci_dev fails the device was not added and should be freed. + * + * Return: 0 on success, or an error code otherwise + */ int zpci_add_device(struct zpci_dev *zdev) { int rc; @@ -829,6 +840,7 @@ int zpci_add_device(struct zpci_dev *zdev) if (rc) goto error_destroy_iommu; + kref_init(&zdev->kref); spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); @@ -928,10 +940,8 @@ void zpci_device_reserved(struct zpci_dev *zdev) void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - int ret; - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); + WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); @@ -939,28 +949,14 @@ void zpci_release_device(struct kref *kref) if (zdev_enabled(zdev)) zpci_disable_device(zdev); - switch (zdev->state) { - case ZPCI_FN_STATE_CONFIGURED: - ret = sclp_pci_deconfigure(zdev->fid); - zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); - fallthrough; - case ZPCI_FN_STATE_STANDBY: - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - zpci_dbg(3, "rsv fid:%x\n", zdev->fid); - fallthrough; - case ZPCI_FN_STATE_RESERVED: - if (zdev->has_resources) - zpci_cleanup_bus_resources(zdev); - zpci_bus_device_unregister(zdev); - zpci_destroy_iommu(zdev); - fallthrough; - default: - break; - } + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + + if (zdev->has_resources) + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree_rcu(zdev, rcu); } @@ -1121,7 +1117,8 @@ static void zpci_add_devices(struct list_head *scan_list) list_sort(NULL, scan_list, &zpci_cmp_rid); list_for_each_entry_safe(zdev, tmp, scan_list, entry) { list_del_init(&zdev->entry); - zpci_add_device(zdev); + if (zpci_add_device(zdev)) + kfree(zdev); } } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 47f934f4e828..7f7b732b3f3e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -340,7 +340,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); if (IS_ERR(zdev)) break; - zpci_add_device(zdev); + if (zpci_add_device(zdev)) { + kfree(zdev); + break; + } } else { /* the configuration request may be stale */ if (zdev->state != ZPCI_FN_STATE_STANDBY) @@ -354,7 +357,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); if (IS_ERR(zdev)) break; - zpci_add_device(zdev); + if (zpci_add_device(zdev)) { + kfree(zdev); + break; + } } else { zpci_update_fh(zdev, ccdf->fh); } diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index e14638936de6..26e1ea1940ec 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -453,7 +453,7 @@ static void ap_tasklet_fn(unsigned long dummy) * important that no requests on any AP get lost. */ if (ap_irq_flag) - xchg(ap_airq.lsi_ptr, 0); + WRITE_ONCE(*ap_airq.lsi_ptr, 0); spin_lock_bh(&ap_queues_lock); hash_for_each(ap_queues, bkt, aq, hnode) { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 8c0b40d8eb39..a52c2690933f 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -360,10 +360,26 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib) return 0; } -static int ensure_nib_shared(unsigned long addr, struct gmap *gmap) +/** + * ensure_nib_shared() - Ensure the address of the NIB is secure and shared + * @addr: the physical (absolute) address of the NIB + * + * This function checks whether the NIB page, which has been pinned with + * vfio_pin_pages(), is a shared page belonging to a secure guest. + * + * It will call uv_pin_shared() on it; if the page was already pinned shared + * (i.e. if the NIB belongs to a secure guest and is shared), then 0 + * (success) is returned. If the NIB was not shared, vfio_pin_pages() had + * exported it and now it does not belong to the secure guest anymore. In + * that case, an error is returned. + * + * Context: the NIB (at physical address @addr) has to be pinned with + * vfio_pin_pages() before calling this function. + * + * Return: 0 in case of success, otherwise an error < 0. + */ +static int ensure_nib_shared(unsigned long addr) { - int ret; - /* * The nib has to be located in shared storage since guest and * host access it. vfio_pin_pages() will do a pin shared and @@ -374,12 +390,7 @@ static int ensure_nib_shared(unsigned long addr, struct gmap *gmap) * * If the page is already pinned shared the UV will return a success. */ - ret = uv_pin_shared(addr); - if (ret) { - /* vfio_pin_pages() likely exported the page so let's re-import */ - gmap_convert_to_secure(gmap, addr); - } - return ret; + return uv_pin_shared(addr); } /** @@ -425,6 +436,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, return status; } + /* The pin will probably be successful even if the NIB was not shared */ ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1, IOMMU_READ | IOMMU_WRITE, &h_page); switch (ret) { @@ -447,7 +459,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, /* NIB in non-shared storage is a rc 6 for PV guests */ if (kvm_s390_pv_cpu_is_protected(vcpu) && - ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) { + ensure_nib_shared(h_nib & PAGE_MASK)) { vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status;