Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git

This commit is contained in:
Stephen Rothwell 2024-12-20 10:41:48 +11:00
commit a7fac4ab97
46 changed files with 1101 additions and 397 deletions

View File

@ -233,6 +233,7 @@ config S390
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_GATHER
@ -256,6 +257,7 @@ config S390
select USER_STACKTRACE_SUPPORT
select VDSO_GETRANDOM
select VIRT_CPU_ACCOUNTING
select VMAP_STACK
select ZONE_DMA
# Note: keep the above list sorted alphabetically
@ -689,32 +691,6 @@ config MAX_PHYSMEM_BITS
Increasing the number of bits also increases the kernel image size.
By default 46 bits (64TB) are supported.
config CHECK_STACK
def_bool y
depends on !VMAP_STACK
prompt "Detect kernel stack overflow"
help
This option enables the compiler option -mstack-guard and
-mstack-size if they are available. If the compiler supports them
it will emit additional code to each function prolog to trigger
an illegal operation if the kernel stack is about to overflow.
Say N if you are unsure.
config STACK_GUARD
int "Size of the guard area (128-1024)"
range 128 1024
depends on CHECK_STACK
default "256"
help
This allows you to specify the size of the guard area at the lower
end of the kernel stack. If the kernel stack points into the guard
area on function entry an illegal operation is triggered. The size
needs to be a power of 2. Please keep in mind that the size of an
interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
The minimum size for the stack guard should be 256 for 31 bit and
512 for 64 bit.
endmenu
menu "I/O subsystem"

View File

@ -72,15 +72,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
ifeq ($(call cc-option,-mstack-size=8192),)
CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
endif
export CC_FLAGS_CHECK_STACK
cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
endif
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern

View File

@ -13,7 +13,6 @@
struct machine_info {
unsigned char has_edat1 : 1;
unsigned char has_edat2 : 1;
unsigned char has_nx : 1;
};
struct vmlinux_info {

View File

@ -30,6 +30,9 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
unsigned long __bootdata_preserved(page_noexec_mask);
unsigned long __bootdata_preserved(segment_noexec_mask);
unsigned long __bootdata_preserved(region_noexec_mask);
int __bootdata_preserved(relocate_lowcore);
u64 __bootdata_preserved(stfle_fac_list[16]);
@ -51,8 +54,14 @@ static void detect_facilities(void)
}
if (test_facility(78))
machine.has_edat2 = 1;
if (test_facility(130))
machine.has_nx = 1;
page_noexec_mask = -1UL;
segment_noexec_mask = -1UL;
region_noexec_mask = -1UL;
if (!test_facility(130)) {
page_noexec_mask &= ~_PAGE_NOEXEC;
segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
}
}
static int cmma_test_essa(void)

View File

@ -63,13 +63,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
unsigned long memgap_start = 0;
unsigned long untracked_end;
unsigned long start, end;
int i;
pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
if (!machine.has_nx)
pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
@ -93,15 +90,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
} else {
untracked_end = MODULES_VADDR;
}
/* populate kasan shadow for untracked memory */
kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end,
kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
@ -300,8 +292,6 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
continue;
entry = __pte(_pa(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL);
if (!machine.has_nx)
entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
@ -326,8 +316,6 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (can_large_pmd(pmd, addr, next, mode)) {
entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
if (!machine.has_nx)
entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
@ -359,8 +347,6 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (can_large_pud(pud, addr, next, mode)) {
entry = __pud(_pa(addr, _REGION3_SIZE, mode));
entry = set_pud_bit(entry, REGION3_KERNEL);
if (!machine.has_nx)
entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;

View File

@ -1,4 +1,4 @@
# Help: Enable KASan for debugging
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_KASAN_VMALLOC=y
CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000

View File

@ -2,7 +2,7 @@
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
#include <asm/sections.h>
#include <linux/smp.h>
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
@ -25,7 +25,7 @@ static inline void put_abs_lowcore(struct lowcore *lc)
put_cpu();
}
extern int __bootdata_preserved(relocate_lowcore);
extern int relocate_lowcore;
static inline int have_relocated_lowcore(void)
{

View File

@ -17,13 +17,13 @@
static __always_inline int arch_atomic_read(const atomic_t *v)
{
return __atomic_read(v);
return __atomic_read(&v->counter);
}
#define arch_atomic_read arch_atomic_read
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__atomic_set(v, i);
__atomic_set(&v->counter, i);
}
#define arch_atomic_set arch_atomic_set
@ -45,6 +45,36 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
}
#define arch_atomic_add arch_atomic_add
static __always_inline void arch_atomic_inc(atomic_t *v)
{
__atomic_add_const(1, &v->counter);
}
#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_dec(atomic_t *v)
{
__atomic_add_const(-1, &v->counter);
}
#define arch_atomic_dec arch_atomic_dec
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
return __atomic_add_and_test_barrier(-i, &v->counter);
}
#define arch_atomic_sub_and_test arch_atomic_sub_and_test
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
return __atomic_add_const_and_test_barrier(-1, &v->counter);
}
#define arch_atomic_dec_and_test arch_atomic_dec_and_test
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
return __atomic_add_const_and_test_barrier(1, &v->counter);
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
#define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v)
#define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
@ -94,13 +124,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __atomic64_read(v);
return __atomic64_read((long *)&v->counter);
}
#define arch_atomic64_read arch_atomic64_read
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__atomic64_set(v, i);
__atomic64_set((long *)&v->counter, i);
}
#define arch_atomic64_set arch_atomic64_set
@ -122,6 +152,36 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_add arch_atomic64_add
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
__atomic64_add_const(1, (long *)&v->counter);
}
#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
__atomic64_add_const(-1, (long *)&v->counter);
}
#define arch_atomic64_dec arch_atomic64_dec
static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return __atomic64_add_and_test_barrier(-i, (long *)&v->counter);
}
#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter);
}
#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter);
}
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);

View File

@ -10,50 +10,51 @@
#include <linux/limits.h>
#include <asm/march.h>
#include <asm/asm.h>
static __always_inline int __atomic_read(const atomic_t *v)
static __always_inline int __atomic_read(const int *ptr)
{
int c;
int val;
asm volatile(
" l %[c],%[counter]\n"
: [c] "=d" (c) : [counter] "R" (v->counter));
return c;
" l %[val],%[ptr]\n"
: [val] "=d" (val) : [ptr] "R" (*ptr));
return val;
}
static __always_inline void __atomic_set(atomic_t *v, int i)
static __always_inline void __atomic_set(int *ptr, int val)
{
if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
asm volatile(
" mvhi %[counter], %[i]\n"
: [counter] "=Q" (v->counter) : [i] "K" (i));
" mvhi %[ptr],%[val]\n"
: [ptr] "=Q" (*ptr) : [val] "K" (val));
} else {
asm volatile(
" st %[i],%[counter]\n"
: [counter] "=R" (v->counter) : [i] "d" (i));
" st %[val],%[ptr]\n"
: [ptr] "=R" (*ptr) : [val] "d" (val));
}
}
static __always_inline s64 __atomic64_read(const atomic64_t *v)
static __always_inline long __atomic64_read(const long *ptr)
{
s64 c;
long val;
asm volatile(
" lg %[c],%[counter]\n"
: [c] "=d" (c) : [counter] "RT" (v->counter));
return c;
" lg %[val],%[ptr]\n"
: [val] "=d" (val) : [ptr] "RT" (*ptr));
return val;
}
static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
static __always_inline void __atomic64_set(long *ptr, long val)
{
if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
asm volatile(
" mvghi %[counter], %[i]\n"
: [counter] "=Q" (v->counter) : [i] "K" (i));
" mvghi %[ptr],%[val]\n"
: [ptr] "=Q" (*ptr) : [val] "K" (val));
} else {
asm volatile(
" stg %[i],%[counter]\n"
: [counter] "=RT" (v->counter) : [i] "d" (i));
" stg %[val],%[ptr]\n"
: [ptr] "=RT" (*ptr) : [val] "d" (val));
}
}
@ -73,7 +74,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \
} \
#define __ATOMIC_OPS(op_name, op_type, op_string) \
__ATOMIC_OP(op_name, op_type, op_string, "\n") \
__ATOMIC_OP(op_name, op_type, op_string, "") \
__ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_OPS(__atomic_add, int, "laa")
@ -99,7 +100,7 @@ static __always_inline void op_name(op_type val, op_type *ptr) \
}
#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
__ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \
__ATOMIC_CONST_OP(op_name, op_type, op_string, "") \
__ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
@ -169,4 +170,76 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#endif /* MARCH_HAS_Z196_FEATURES */
#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__)
#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier) \
static __always_inline bool op_name(op_type val, op_type *ptr) \
{ \
op_type tmp; \
int cc; \
\
asm volatile( \
op_string " %[tmp],%[val],%[ptr]\n" \
op_barrier \
: "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \
: [val] "d" (val) \
: "memory"); \
return (cc == 0) || (cc == 2); \
} \
#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \
__ATOMIC_TEST_OP(op_name, op_type, op_string, "") \
__ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal")
__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg")
#undef __ATOMIC_TEST_OPS
#undef __ATOMIC_TEST_OP
#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier) \
static __always_inline bool op_name(op_type val, op_type *ptr) \
{ \
int cc; \
\
asm volatile( \
op_string " %[ptr],%[val]\n" \
op_barrier \
: "=@cc" (cc), [ptr] "+QS" (*ptr) \
: [val] "i" (val) \
: "memory"); \
return (cc == 0) || (cc == 2); \
}
#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \
__ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \
__ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi")
__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi")
#undef __ATOMIC_CONST_TEST_OPS
#undef __ATOMIC_CONST_TEST_OP
#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
#define __ATOMIC_TEST_OP(op_name, op_func, op_type) \
static __always_inline bool op_name(op_type val, op_type *ptr) \
{ \
return op_func(val, ptr) == -val; \
}
__ATOMIC_TEST_OP(__atomic_add_and_test, __atomic_add, int)
__ATOMIC_TEST_OP(__atomic_add_and_test_barrier, __atomic_add_barrier, int)
__ATOMIC_TEST_OP(__atomic_add_const_and_test, __atomic_add, int)
__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier, __atomic_add_barrier, int)
__ATOMIC_TEST_OP(__atomic64_add_and_test, __atomic64_add, long)
__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier, __atomic64_add_barrier, long)
__ATOMIC_TEST_OP(__atomic64_add_const_and_test, __atomic64_add, long)
__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier, __atomic64_add_barrier, long)
#undef __ATOMIC_TEST_OP
#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
#endif /* __ARCH_S390_ATOMIC_OPS__ */

View File

@ -25,7 +25,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
instrument_read(buff, len);
kmsan_check_memory(buff, len);
asm volatile("\n"
asm volatile(
"0: cksm %[sum],%[rp]\n"
" jo 0b\n"
: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");

View File

@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry,
char *out_buf, size_t out_buf_size);
#define DEBUG_SPRINTF_MAX_ARGS 10
int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, size_t out_buf_size,
const char *inbuf);
struct debug_view {
char name[DEBUG_MAX_NAME_LEN];
debug_prolog_proc_t *prolog_proc;
@ -114,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
int buf_size, umode_t mode, uid_t uid,
gid_t gid);
ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
char *buf, size_t buf_size, bool reverse);
void debug_unregister(debug_info_t *id);
void debug_set_level(debug_info_t *id, int new_level);

View File

@ -38,6 +38,7 @@ enum diag_stat_enum {
DIAG_STAT_X308,
DIAG_STAT_X318,
DIAG_STAT_X320,
DIAG_STAT_X324,
DIAG_STAT_X49C,
DIAG_STAT_X500,
NR_DIAG_STAT

View File

@ -103,7 +103,7 @@ static inline void fpu_lfpc_safe(unsigned int *fpc)
u32 tmp;
instrument_read(fpc, sizeof(*fpc));
asm volatile("\n"
asm_inline volatile(
"0: lfpc %[fpc]\n"
"1: nopr %%r7\n"
".pushsection .fixup, \"ax\"\n"
@ -188,7 +188,7 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
instrument_read(vxr, sizeof(__vector128));
asm volatile("\n"
asm volatile(
" la 1,%[vxr]\n"
" VL %[v1],0,,1\n"
:
@ -246,7 +246,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
asm volatile("\n"
asm volatile(
" la 1,%[vxr]\n"
" VLL %[v1],%[index],0,1\n"
:
@ -284,7 +284,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
asm volatile("\n" \
asm volatile( \
" la 1,%[vxrs]\n" \
" VLM %[v1],%[v3],0,1\n" \
: \
@ -367,7 +367,7 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
instrument_write(vxr, sizeof(__vector128));
asm volatile("\n"
asm volatile(
" la 1,%[vxr]\n"
" VST %[v1],0,,1\n"
: [vxr] "=R" (*(__vector128 *)vxr)
@ -396,7 +396,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_write(vxr, size);
asm volatile("\n"
asm volatile(
" la 1,%[vxr]\n"
" VSTL %[v1],%[index],0,1\n"
: [vxr] "=R" (*(u8 *)vxr)
@ -430,7 +430,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
asm volatile("\n" \
asm volatile( \
" la 1,%[vxrs]\n" \
" VSTM %[v1],%[v3],0,1\n" \
: [vxrs] "=R" (*_v) \

View File

@ -21,11 +21,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
#define __HAVE_ARCH_HUGE_PTEP_GET
extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
@ -56,6 +57,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
pte_t pte, int dirty)
{
int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
if (changed) {
huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
@ -68,21 +70,10 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
#define __HAVE_ARCH_HUGE_PTE_NONE
static inline int huge_pte_none(pte_t pte)
{
return pte_none(pte);
}
#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
static inline int huge_pte_none_mostly(pte_t pte)
{
return huge_pte_none(pte) || is_pte_marker(pte);
}
#define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
{

View File

@ -7,7 +7,6 @@
#ifndef PAGE_STATES_H
#define PAGE_STATES_H
#include <asm/sections.h>
#include <asm/page.h>
#define ESSA_GET_STATE 0
@ -21,7 +20,7 @@
#define ESSA_MAX ESSA_SET_STABLE_NODAT
extern int __bootdata_preserved(cmma_flag);
extern int cmma_flag;
static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
{

View File

@ -17,7 +17,6 @@
#include <linux/page-flags.h>
#include <linux/radix-tree.h>
#include <linux/atomic.h>
#include <asm/sections.h>
#include <asm/ctlreg.h>
#include <asm/bug.h>
#include <asm/page.h>
@ -35,7 +34,7 @@ enum {
PG_DIRECT_MAP_MAX
};
extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
static inline void update_page_count(int level, long count)
{
@ -85,14 +84,14 @@ extern unsigned long zero_page_mask;
* happen without trampolines and in addition the placement within a
* 2GB frame is branch prediction unit friendly.
*/
extern unsigned long __bootdata_preserved(VMALLOC_START);
extern unsigned long __bootdata_preserved(VMALLOC_END);
extern unsigned long VMALLOC_START;
extern unsigned long VMALLOC_END;
#define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN)
extern struct page *__bootdata_preserved(vmemmap);
extern unsigned long __bootdata_preserved(vmemmap_size);
extern struct page *vmemmap;
extern unsigned long vmemmap_size;
extern unsigned long __bootdata_preserved(MODULES_VADDR);
extern unsigned long __bootdata_preserved(MODULES_END);
extern unsigned long MODULES_VADDR;
extern unsigned long MODULES_END;
#define MODULES_VADDR MODULES_VADDR
#define MODULES_END MODULES_END
#define MODULES_LEN (1UL << 31)
@ -125,6 +124,8 @@ static inline int is_module_addr(void *addr)
#define KASLR_LEN 0UL
#endif
void setup_protection_map(void);
/*
* A 64 bit pagetable entry of S390 has following format:
* | PFRA |0IPC| OS |
@ -443,77 +444,86 @@ static inline int is_module_addr(void *addr)
/*
* Page protection definitions.
*/
#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \
#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \
_PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \
#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \
_PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_INVALID | _PAGE_PROTECT)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
_PAGE_PROTECT | _PAGE_NOEXEC)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY)
/*
* On s390 the page table entry has an invalid bit and a read-only bit.
* Read permission implies execute permission and write permission
* implies read permission.
*/
/*xwr*/
extern unsigned long page_noexec_mask;
#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask)
#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE)
#define PAGE_RO __pgprot_page_mask(__PAGE_RO)
#define PAGE_RX __pgprot_page_mask(__PAGE_RX)
#define PAGE_RW __pgprot_page_mask(__PAGE_RW)
#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX)
#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED)
#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO)
/*
* Segment entry (large page) protection definitions.
*/
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \
#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \
#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_NOEXEC)
#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \
#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ)
#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \
#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_NOEXEC)
#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \
#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE)
#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_DIRTY | \
_SEGMENT_ENTRY_NOEXEC)
#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \
#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_NOEXEC)
#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_DIRTY)
extern unsigned long segment_noexec_mask;
#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask)
#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE)
#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO)
#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX)
#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW)
#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX)
#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL)
#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO)
/*
* Region3 entry (large page) protection definitions.
*/
#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
_REGION3_ENTRY_LARGE | \
_REGION3_ENTRY_READ | \
@ -521,20 +531,20 @@ static inline int is_module_addr(void *addr)
_REGION3_ENTRY_YOUNG | \
_REGION3_ENTRY_DIRTY | \
_REGION_ENTRY_NOEXEC)
#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
_REGION3_ENTRY_LARGE | \
_REGION3_ENTRY_READ | \
_REGION3_ENTRY_YOUNG | \
_REGION_ENTRY_PROTECT | \
_REGION_ENTRY_NOEXEC)
#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
_REGION3_ENTRY_LARGE | \
_REGION3_ENTRY_READ | \
_REGION3_ENTRY_WRITE | \
_REGION3_ENTRY_YOUNG | \
_REGION3_ENTRY_DIRTY)
extern unsigned long region_noexec_mask;
#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask)
#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL)
#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO)
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
@ -1435,8 +1445,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
pte_t __pte;
__pte = __pte(physpage | pgprot_val(pgprot));
if (!MACHINE_HAS_NX)
__pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
return pte_mkyoung(__pte);
}
@ -1804,8 +1812,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
if (!MACHINE_HAS_NX)
entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmdp, entry);
}

View File

@ -8,12 +8,19 @@
#include <asm/cmpxchg.h>
#include <asm/march.h>
#ifdef MARCH_HAS_Z196_FEATURES
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
/*
* We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
* that a decrement hitting 0 means we can and should reschedule.
*/
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
/*
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
* that think a non-zero value indicates we cannot preempt.
*/
static __always_inline int preempt_count(void)
{
return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
@ -29,6 +36,15 @@ static __always_inline void preempt_count_set(int pc)
} while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new));
}
/*
* We fold the NEED_RESCHED bit into the preempt count such that
* preempt_enable() can decrement and test for needing to reschedule with a
* short instruction sequence.
*
* We invert the actual bit, so that when the decrement hits 0 we know we both
* need to resched (the bit is cleared) and can resched (no preempt count).
*/
static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
@ -64,67 +80,24 @@ static __always_inline void __preempt_count_sub(int val)
__preempt_count_add(-val);
}
/*
* Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
* a decrement which hits zero means we have no preempt_count and should
* reschedule.
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
return __atomic_add(-1, &get_lowcore()->preempt_count) == 1;
return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
}
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(READ_ONCE(get_lowcore()->preempt_count) ==
preempt_offset);
return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset);
}
#else /* MARCH_HAS_Z196_FEATURES */
#define PREEMPT_ENABLED (0)
static __always_inline int preempt_count(void)
{
return READ_ONCE(get_lowcore()->preempt_count);
}
static __always_inline void preempt_count_set(int pc)
{
get_lowcore()->preempt_count = pc;
}
static __always_inline void set_preempt_need_resched(void)
{
}
static __always_inline void clear_preempt_need_resched(void)
{
}
static __always_inline bool test_preempt_need_resched(void)
{
return false;
}
static __always_inline void __preempt_count_add(int val)
{
get_lowcore()->preempt_count += val;
}
static __always_inline void __preempt_count_sub(int val)
{
get_lowcore()->preempt_count -= val;
}
static __always_inline bool __preempt_count_dec_and_test(void)
{
return !--get_lowcore()->preempt_count && tif_need_resched();
}
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
}
#endif /* MARCH_HAS_Z196_FEATURES */
#define init_task_preempt_count(p) do { } while (0)
/* Deferred to CPU bringup time */
#define init_idle_preempt_count(p, cpu) do { } while (0)

View File

@ -16,6 +16,11 @@
/* 24 + 16 * SCLP_MAX_CORES */
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
#define SCLP_ERRNOTIFY_AQ_RESET 0
#define SCLP_ERRNOTIFY_AQ_REPAIR 1
#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
#ifndef __ASSEMBLY__
#include <linux/uio.h>
#include <asm/chpid.h>
@ -89,6 +94,7 @@ struct sclp_info {
unsigned char has_gisaf : 1;
unsigned char has_diag318 : 1;
unsigned char has_diag320 : 1;
unsigned char has_diag324 : 1;
unsigned char has_sipl : 1;
unsigned char has_sipl_eckd : 1;
unsigned char has_dirq : 1;
@ -111,6 +117,34 @@ struct sclp_info {
};
extern struct sclp_info sclp;
struct sccb_header {
u16 length;
u8 function_code;
u8 control_mask[3];
u16 response_code;
} __packed;
struct evbuf_header {
u16 length;
u8 type;
u8 flags;
u16 _reserved;
} __packed;
struct err_notify_evbuf {
struct evbuf_header header;
u8 action;
u8 atype;
u32 fh;
u32 fid;
u8 data[];
} __packed;
struct err_notify_sccb {
struct sccb_header header;
struct err_notify_evbuf evbuf;
} __packed;
struct zpci_report_error_header {
u8 version; /* Interface version byte */
u8 action; /* Action qualifier byte

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Diag ioctls and its associated structures definitions.
*
* Copyright IBM Corp. 2024
*/
#ifndef __S390_UAPI_ASM_DIAG_H
#define __S390_UAPI_ASM_DIAG_H
#include <linux/types.h>
#define DIAG_MAGIC_STR 'D'
struct diag324_pib {
__u64 address;
__u64 sequence;
};
/* Diag ioctl definitions */
#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib)
#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t)
#endif /* __S390_UAPI_ASM_DIAG_H */

View File

@ -38,12 +38,13 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
obj-y += diag/
extra-y += vmlinux.lds

View File

@ -2,6 +2,7 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
#include <asm/sections.h>
unsigned long __bootdata_preserved(__abs_lowcore);
int __bootdata_preserved(relocate_lowcore);

View File

@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/debugfs.h>
@ -94,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, size_t out_buf_size,
const char *in_buf);
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, size_t out_buf_size,
const char *inbuf);
static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
static void debug_events_append(debug_info_t *dest, debug_info_t *src);
@ -354,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
for (i = 0; i < in->nr_areas; i++) {
for (j = 0; j < in->pages_per_area; j++)
memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
rc->active_pages[i] = in->active_pages[i];
rc->active_entries[i] = in->active_entries[i];
}
rc->active_area = in->active_area;
out:
spin_unlock_irqrestore(&in->lock, flags);
return rc;
@ -422,11 +423,17 @@ static int debug_format_entry(file_private_info_t *p_info)
return len;
}
/*
* debug_next_entry:
* - goto next entry in p_info
/**
* debug_next_entry - Go to the next entry
* @p_info: Private info that is manipulated
*
* Sets the current position in @p_info to the next entry. If no further entry
* exists the current position is set to one after the end the return value
* indicates that no further entries exist.
*
* Return: True if there are more following entries, false otherwise
*/
static inline int debug_next_entry(file_private_info_t *p_info)
static inline bool debug_next_entry(file_private_info_t *p_info)
{
debug_info_t *id;
@ -434,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
p_info->act_entry = 0;
p_info->act_page = 0;
goto out;
return true;
}
if (!id->areas)
return 1;
return false;
p_info->act_entry += id->entry_size;
/* switch to next page, if we reached the end of the page */
if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@ -450,10 +457,87 @@ static inline int debug_next_entry(file_private_info_t *p_info)
p_info->act_page = 0;
}
if (p_info->act_area >= id->nr_areas)
return 1;
return false;
}
out:
return 0;
return true;
}
/**
* debug_to_act_entry - Go to the currently active entry
* @p_info: Private info that is manipulated
*
* Sets the current position in @p_info to the currently active
* entry of @p_info->debug_info_snap
*/
static void debug_to_act_entry(file_private_info_t *p_info)
{
debug_info_t *snap_id;
snap_id = p_info->debug_info_snap;
p_info->act_area = snap_id->active_area;
p_info->act_page = snap_id->active_pages[snap_id->active_area];
p_info->act_entry = snap_id->active_entries[snap_id->active_area];
}
/**
* debug_prev_entry - Go to the previous entry
* @p_info: Private info that is manipulated
*
* Sets the current position in @p_info to the previous entry. If no previous entry
* exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
* indicates that no previous entries exist.
*
* Return: True if there are more previous entries, false otherwise
*/
static inline bool debug_prev_entry(file_private_info_t *p_info)
{
debug_info_t *id;
id = p_info->debug_info_snap;
if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
debug_to_act_entry(p_info);
if (!id->areas)
return false;
p_info->act_entry -= id->entry_size;
/* switch to prev page, if we reached the beginning of the page */
if (p_info->act_entry < 0) {
/* end of previous page */
p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
p_info->act_page--;
if (p_info->act_page < 0) {
/* previous area */
p_info->act_area--;
p_info->act_page = id->pages_per_area - 1;
}
if (p_info->act_area < 0)
p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
}
/* check full circle */
if (id->active_area == p_info->act_area &&
id->active_pages[id->active_area] == p_info->act_page &&
id->active_entries[id->active_area] == p_info->act_entry)
return false;
return true;
}
/**
* debug_move_entry - Go to next entry in either the forward or backward direction
* @p_info: Private info that is manipulated
* @reverse: If true go to the next entry in reverse i.e. previous
*
* Sets the current position in @p_info to the next (@reverse == false) or
* previous (@reverse == true) entry.
*
* Return: True if there are further entries in that direction,
* false otherwise.
*/
static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
{
if (reverse)
return debug_prev_entry(p_info);
else
return debug_next_entry(p_info);
}
/*
@ -495,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */
}
if (copy_size == formatted_line_residue) {
entry_offset = 0;
if (debug_next_entry(p_info))
if (!debug_next_entry(p_info))
goto out;
}
}
@ -530,6 +614,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
return rc; /* number of input characters */
}
static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
struct debug_view *view)
{
debug_info_t *debug_info_snapshot;
file_private_info_t *p_info;
/*
* Make snapshot of current debug areas to get it consistent.
* To copy all the areas is only needed, if we have a view which
* formats the debug areas.
*/
if (!view->format_proc && !view->header_proc)
debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
else
debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
if (!debug_info_snapshot)
return NULL;
p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
if (!p_info) {
debug_info_free(debug_info_snapshot);
return NULL;
}
p_info->offset = 0;
p_info->debug_info_snap = debug_info_snapshot;
p_info->debug_info_org = debug_info;
p_info->view = view;
p_info->act_area = 0;
p_info->act_page = 0;
p_info->act_entry = DEBUG_PROLOG_ENTRY;
p_info->act_entry_offset = 0;
debug_info_get(debug_info);
return p_info;
}
/*
* debug_open:
* - called for user open()
@ -538,7 +658,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
*/
static int debug_open(struct inode *inode, struct file *file)
{
debug_info_t *debug_info, *debug_info_snapshot;
debug_info_t *debug_info;
file_private_info_t *p_info;
int i, rc = 0;
@ -556,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file)
goto out;
found:
/* Make snapshot of current debug areas to get it consistent. */
/* To copy all the areas is only needed, if we have a view which */
/* formats the debug areas. */
if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
else
debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
if (!debug_info_snapshot) {
rc = -ENOMEM;
goto out;
}
p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
if (!p_info) {
debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
p_info->offset = 0;
p_info->debug_info_snap = debug_info_snapshot;
p_info->debug_info_org = debug_info;
p_info->view = debug_info->views[i];
p_info->act_area = 0;
p_info->act_page = 0;
p_info->act_entry = DEBUG_PROLOG_ENTRY;
p_info->act_entry_offset = 0;
file->private_data = p_info;
debug_info_get(debug_info);
nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
}
static void debug_file_private_free(file_private_info_t *p_info)
{
if (p_info->debug_info_snap)
debug_info_free(p_info->debug_info_snap);
debug_info_put(p_info->debug_info_org);
kfree(p_info);
}
/*
* debug_close:
* - called for user close()
@ -602,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file)
file_private_info_t *p_info;
p_info = (file_private_info_t *) file->private_data;
if (p_info->debug_info_snap)
debug_info_free(p_info->debug_info_snap);
debug_info_put(p_info->debug_info_org);
kfree(file->private_data);
debug_file_private_free(p_info);
file->private_data = NULL;
return 0; /* success */
}
/**
* debug_dump - Get a textual representation of debug info, or as much as fits
* @id: Debug information to use
* @view: View with which to dump the debug information
* @buf: Buffer the textual debug data representation is written to
* @buf_size: Size of the buffer, including the trailing '\0' byte
* @reverse: Go backwards from the last written entry
*
* This function may be used whenever a textual representation of the debug
* information is required without using an s390dbf file.
*
* Note: It is the callers responsibility to supply a view that is compatible
* with the debug information data.
*
* Return: On success returns the number of bytes written to the buffer not
* including the trailing '\0' byte. If bug_size == 0 the function returns 0.
* On failure an error code less than 0 is returned.
*/
ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
char *buf, size_t buf_size, bool reverse)
{
file_private_info_t *p_info;
size_t size, offset = 0;
/* Need space for '\0' byte */
if (buf_size < 1)
return 0;
buf_size--;
p_info = debug_file_private_alloc(id, view);
if (!p_info)
return -ENOMEM;
/* There is always at least the DEBUG_PROLOG_ENTRY */
do {
size = debug_format_entry(p_info);
size = min(size, buf_size - offset);
memcpy(buf + offset, p_info->temp_buf, size);
offset += size;
if (offset >= buf_size)
break;
} while (debug_move_entry(p_info, reverse));
debug_file_private_free(p_info);
buf[offset] = '\0';
return offset;
}
/* Create debugfs entries and add to internal list. */
static void _debug_register(debug_info_t *id)
{
@ -1532,7 +1682,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn);
#define DEBUG_SPRINTF_MAX_ARGS 10
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, size_t out_buf_size, const char *inbuf)
{
debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
@ -1570,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
out:
return rc;
}
EXPORT_SYMBOL(debug_sprintf_format_fn);
/*
* debug_init:

View File

@ -0,0 +1 @@
obj-y := diag_misc.o diag324.o diag.o

View File

@ -17,7 +17,7 @@
#include <asm/trace/diag.h>
#include <asm/sections.h>
#include <asm/asm.h>
#include "entry.h"
#include "../entry.h"
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@ -53,6 +53,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
[DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
[DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};

View File

@ -0,0 +1,224 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Request power readings for resources in a computing environment via
* diag 0x324. diag 0x324 stores the power readings in the power information
* block (pib).
*
* Copyright IBM Corp. 2024
*/
#define pr_fmt(fmt) "diag324: " fmt
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/ioctl.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/diag.h>
#include <asm/sclp.h>
#include <asm/timex.h>
#include <uapi/asm/diag.h>
#include "diag_ioctl.h"
enum subcode {
DIAG324_SUBC_0 = 0,
DIAG324_SUBC_1 = 1,
DIAG324_SUBC_2 = 2,
};
enum retcode {
DIAG324_RET_SUCCESS = 0x0001,
DIAG324_RET_SUBC_NOTAVAIL = 0x0103,
DIAG324_RET_INSUFFICIENT_SIZE = 0x0104,
DIAG324_RET_READING_UNAVAILABLE = 0x0105,
};
union diag324_response {
u64 response;
struct {
u64 installed : 32;
u64 : 16;
u64 rc : 16;
} sc0;
struct {
u64 format : 16;
u64 : 16;
u64 pib_len : 16;
u64 rc : 16;
} sc1;
struct {
u64 : 48;
u64 rc : 16;
} sc2;
};
union diag324_request {
u64 request;
struct {
u64 : 32;
u64 allocated : 16;
u64 : 12;
u64 sc : 4;
} sc2;
};
struct pib {
u32 : 8;
u32 num : 8;
u32 len : 16;
u32 : 24;
u32 hlen : 8;
u64 : 64;
u64 intv;
u8 r[];
} __packed;
struct pibdata {
struct pib *pib;
ktime_t expire;
u64 sequence;
size_t len;
int rc;
};
static DEFINE_MUTEX(pibmutex);
static struct pibdata pibdata;
#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
static void pibwork_handler(struct work_struct *work);
static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
static unsigned long diag324(unsigned long subcode, void *addr)
{
union register_pair rp = { .even = (unsigned long)addr };
diag_stat_inc(DIAG_STAT_X324);
asm volatile("diag %[rp],%[subcode],0x324\n"
: [rp] "+d" (rp.pair)
: [subcode] "d" (subcode)
: "memory");
return rp.odd;
}
static void pibwork_handler(struct work_struct *work)
{
struct pibdata *data = &pibdata;
ktime_t timedout;
mutex_lock(&pibmutex);
timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
if (ktime_before(ktime_get(), timedout)) {
mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
goto out;
}
vfree(data->pib);
data->pib = NULL;
out:
mutex_unlock(&pibmutex);
}
static void pib_update(struct pibdata *data)
{
union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
union diag324_response res;
int rc;
memset(data->pib, 0, data->len);
res.response = diag324(req.request, data->pib);
switch (res.sc2.rc) {
case DIAG324_RET_SUCCESS:
rc = 0;
break;
case DIAG324_RET_SUBC_NOTAVAIL:
rc = -ENOENT;
break;
case DIAG324_RET_INSUFFICIENT_SIZE:
rc = -EMSGSIZE;
break;
case DIAG324_RET_READING_UNAVAILABLE:
rc = -EBUSY;
break;
default:
rc = -EINVAL;
}
data->rc = rc;
}
long diag324_pibbuf(unsigned long arg)
{
struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
struct pibdata *data = &pibdata;
static bool first = true;
u64 address;
int rc;
if (!data->len)
return -EOPNOTSUPP;
if (get_user(address, &udata->address))
return -EFAULT;
mutex_lock(&pibmutex);
rc = -ENOMEM;
if (!data->pib)
data->pib = vmalloc(data->len);
if (!data->pib)
goto out;
if (first || ktime_after(ktime_get(), data->expire)) {
pib_update(data);
data->sequence++;
data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
first = false;
}
rc = data->rc;
if (rc != 0 && rc != -EBUSY)
goto out;
rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
rc |= put_user(data->sequence, &udata->sequence);
if (rc)
rc = -EFAULT;
out:
mutex_unlock(&pibmutex);
return rc;
}
long diag324_piblen(unsigned long arg)
{
struct pibdata *data = &pibdata;
if (!data->len)
return -EOPNOTSUPP;
if (put_user(data->len, (size_t __user *)arg))
return -EFAULT;
return 0;
}
static int __init diag324_init(void)
{
union diag324_response res;
unsigned long installed;
if (!sclp.has_diag324)
return -EOPNOTSUPP;
res.response = diag324(DIAG324_SUBC_0, NULL);
if (res.sc0.rc != DIAG324_RET_SUCCESS)
return -EOPNOTSUPP;
installed = res.response;
if (!test_bit_inv(DIAG324_SUBC_1, &installed))
return -EOPNOTSUPP;
if (!test_bit_inv(DIAG324_SUBC_2, &installed))
return -EOPNOTSUPP;
res.response = diag324(DIAG324_SUBC_1, NULL);
if (res.sc1.rc != DIAG324_RET_SUCCESS)
return -EOPNOTSUPP;
pibdata.len = res.sc1.pib_len;
return 0;
}
device_initcall(diag324_init);

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _DIAG_IOCTL_H
#define _DIAG_IOCTL_H
#include <linux/types.h>
long diag324_pibbuf(unsigned long arg);
long diag324_piblen(unsigned long arg);
#endif /* _DIAG_IOCTL_H */

View File

@ -0,0 +1,54 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Provide diagnose information via misc device /dev/diag.
*
* Copyright IBM Corp. 2024
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/ioctl.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/types.h>
#include <uapi/asm/diag.h>
#include "diag_ioctl.h"
static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
long rc;
switch (cmd) {
case DIAG324_GET_PIBLEN:
rc = diag324_piblen(arg);
break;
case DIAG324_GET_PIBBUF:
rc = diag324_pibbuf(arg);
break;
default:
rc = -ENOIOCTLCMD;
break;
}
return rc;
}
static const struct file_operations fops = {
.owner = THIS_MODULE,
.open = nonseekable_open,
.unlocked_ioctl = diag_ioctl,
};
static struct miscdevice diagdev = {
.name = "diag",
.minor = MISC_DYNAMIC_MINOR,
.fops = &fops,
.mode = 0444,
};
static int diag_init(void)
{
return misc_register(&diagdev);
}
device_initcall(diag_init);

View File

@ -52,16 +52,7 @@ _LPP_OFFSET = __LC_LPP
ALT_FACILITY(193)
.endm
.macro CHECK_STACK savearea, lowcore
#ifdef CONFIG_CHECK_STACK
tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
la %r14,\savearea(\lowcore)
jz stack_overflow
#endif
.endm
.macro CHECK_VMAP_STACK savearea, lowcore, oklabel
#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
@ -77,9 +68,6 @@ _LPP_OFFSET = __LC_LPP
je \oklabel
la %r14,\savearea(\lowcore)
j stack_overflow
#else
j \oklabel
#endif
.endm
/*
@ -326,8 +314,7 @@ SYM_CODE_START(pgm_check_handler)
jnz 2f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
2: CHECK_STACK __LC_SAVE_AREA,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
3: lg %r15,__LC_KERNEL_STACK(%r13)
@ -394,8 +381,7 @@ SYM_CODE_START(\name)
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
0: CHECK_STACK __LC_SAVE_AREA,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
@ -603,7 +589,6 @@ SYM_CODE_END(early_pgm_check_handler)
.section .kprobes.text, "ax"
#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
* The synchronous or the asynchronous stack overflowed. We are dead.
* No need to properly save the registers, we are going to panic anyway.
@ -621,7 +606,6 @@ SYM_CODE_START(stack_overflow)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
SYM_CODE_END(stack_overflow)
#endif
.section .data, "aw"
.balign 4

View File

@ -18,6 +18,7 @@
#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/asm-offsets.h>
#include <asm/sections.h>
#include <asm/ipl.h>
/*

View File

@ -157,18 +157,18 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
struct oldmem_data __bootdata_preserved(oldmem_data);
unsigned long VMALLOC_START;
unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
unsigned long VMALLOC_END;
unsigned long __bootdata_preserved(VMALLOC_END);
EXPORT_SYMBOL(VMALLOC_END);
struct page *vmemmap;
struct page *__bootdata_preserved(vmemmap);
EXPORT_SYMBOL(vmemmap);
unsigned long vmemmap_size;
unsigned long __bootdata_preserved(vmemmap_size);
unsigned long MODULES_VADDR;
unsigned long MODULES_END;
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
/* An array with a pointer to the lowcore of every CPU. */
struct lowcore *lowcore_ptr[NR_CPUS];
@ -359,25 +359,17 @@ void *restart_stack;
unsigned long stack_alloc(void)
{
#ifdef CONFIG_VMAP_STACK
void *ret;
void *stack;
ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
NUMA_NO_NODE, __builtin_return_address(0));
kmemleak_not_leak(ret);
return (unsigned long)ret;
#else
return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
#endif
kmemleak_not_leak(stack);
return (unsigned long)stack;
}
void stack_free(unsigned long stack)
{
#ifdef CONFIG_VMAP_STACK
vfree((void *) stack);
#else
free_pages(stack, THREAD_SIZE_ORDER);
#endif
vfree((void *)stack);
}
static unsigned long __init stack_alloc_early(void)
@ -979,6 +971,7 @@ void __init setup_arch(char **cmdline_p)
if (test_facility(193))
static_branch_enable(&cpu_has_bear);
setup_protection_map();
/*
* Create kernel page tables.
*/

View File

@ -5,7 +5,7 @@
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)

View File

@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/vmcore_info.h>
#include <asm/abs_lowcore.h>
#include <linux/mm.h>
#include <asm/abs_lowcore.h>
#include <asm/sections.h>
#include <asm/setup.h>
void arch_crash_save_vmcoreinfo(void)

View File

@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove)
la %r3,256(%r3)
brctg %r0,.Lmemmove_forward_loop
.Lmemmove_forward_remainder:
larl %r5,.Lmemmove_mvc
ex %r4,0(%r5)
exrl %r4,.Lmemmove_mvc
.Lmemmove_exit:
BR_EX %r14
.Lmemmove_reverse:
@ -83,8 +82,7 @@ SYM_FUNC_START(__memset)
la %r1,256(%r1)
brctg %r3,.Lmemset_clear_loop
.Lmemset_clear_remainder:
larl %r3,.Lmemset_xc
ex %r4,0(%r3)
exrl %r4,.Lmemset_xc
.Lmemset_exit:
BR_EX %r14
.Lmemset_fill:
@ -102,8 +100,7 @@ SYM_FUNC_START(__memset)
brctg %r5,.Lmemset_fill_loop
.Lmemset_fill_remainder:
stc %r3,0(%r1)
larl %r5,.Lmemset_mvc
ex %r4,0(%r5)
exrl %r4,.Lmemset_mvc
BR_EX %r14
.Lmemset_fill_exit:
stc %r3,0(%r1)
@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy)
lgr %r1,%r2
jnz .Lmemcpy_loop
.Lmemcpy_remainder:
larl %r5,.Lmemcpy_mvc
ex %r4,0(%r5)
exrl %r4,.Lmemcpy_mvc
.Lmemcpy_exit:
BR_EX %r14
.Lmemcpy_loop:
@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits)
brctg %r5,.L__memset_loop\bits
.L__memset_remainder\bits:
\insn %r3,0(%r1)
larl %r5,.L__memset_mvc\bits
ex %r4,0(%r5)
exrl %r4,.L__memset_mvc\bits
BR_EX %r14
.L__memset_store\bits:
\insn %r3,0(%r2)

View File

@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
struct ctlreg __bootdata_preserved(s390_invalid_asce);
unsigned long __bootdata_preserved(page_noexec_mask);
EXPORT_SYMBOL(page_noexec_mask);
unsigned long __bootdata_preserved(segment_noexec_mask);
EXPORT_SYMBOL(segment_noexec_mask);
unsigned long __bootdata_preserved(region_noexec_mask);
EXPORT_SYMBOL(region_noexec_mask);
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);

View File

@ -17,6 +17,7 @@
#include <asm/asm-extable.h>
#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
#include <asm/sections.h>
#include <asm/maccess.h>
#include <asm/ctlreg.h>

View File

@ -196,22 +196,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
}
}
static const pgprot_t protection_map[16] = {
[VM_NONE] = PAGE_NONE,
[VM_READ] = PAGE_RO,
[VM_WRITE] = PAGE_RO,
[VM_WRITE | VM_READ] = PAGE_RO,
[VM_EXEC] = PAGE_RX,
[VM_EXEC | VM_READ] = PAGE_RX,
[VM_EXEC | VM_WRITE] = PAGE_RX,
[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX,
[VM_SHARED] = PAGE_NONE,
[VM_SHARED | VM_READ] = PAGE_RO,
[VM_SHARED | VM_WRITE] = PAGE_RW,
[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW,
[VM_SHARED | VM_EXEC] = PAGE_RX,
[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX,
[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX,
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX
};
static pgprot_t protection_map[16] __ro_after_init;
void __init setup_protection_map(void)
{
pgprot_t *pm = protection_map;
pm[VM_NONE] = PAGE_NONE;
pm[VM_READ] = PAGE_RO;
pm[VM_WRITE] = PAGE_RO;
pm[VM_WRITE | VM_READ] = PAGE_RO;
pm[VM_EXEC] = PAGE_RX;
pm[VM_EXEC | VM_READ] = PAGE_RX;
pm[VM_EXEC | VM_WRITE] = PAGE_RX;
pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX;
pm[VM_SHARED] = PAGE_NONE;
pm[VM_SHARED | VM_READ] = PAGE_RO;
pm[VM_SHARED | VM_WRITE] = PAGE_RW;
pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW;
pm[VM_SHARED | VM_EXEC] = PAGE_RX;
pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX;
pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX;
pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX;
}
DECLARE_VM_GET_PAGE_PROT

View File

@ -109,8 +109,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
} else if (flags & SET_MEMORY_DEF) {
new = __pte(pte_val(new) & PAGE_MASK);
new = set_pte_bit(new, PAGE_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
}
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
ptep++;
@ -167,8 +165,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
} else if (flags & SET_MEMORY_DEF) {
new = __pmd(pmd_val(new) & PMD_MASK);
new = set_pmd_bit(new, SEGMENT_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}
@ -256,8 +252,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
} else if (flags & SET_MEMORY_DEF) {
new = __pud(pud_val(new) & PUD_MASK);
new = set_pud_bit(new, REGION3_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}

View File

@ -360,8 +360,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pgste_t pgste;
struct mm_struct *mm = vma->vm_mm;
if (!MACHINE_HAS_NX)
pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
if (mm_has_pgste(mm)) {
pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte, mm);

View File

@ -171,9 +171,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
pte_t *pte;
prot = pgprot_val(PAGE_KERNEL);
if (!MACHINE_HAS_NX)
prot &= ~_PAGE_NOEXEC;
pte = pte_offset_kernel(pmd, addr);
for (; addr < end; addr += PAGE_SIZE, pte++) {
if (!add) {
@ -230,9 +227,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
pte_t *pte;
prot = pgprot_val(SEGMENT_KERNEL);
if (!MACHINE_HAS_NX)
prot &= ~_SEGMENT_ENTRY_NOEXEC;
pmd = pmd_offset(pud, addr);
for (; addr < end; addr = next, pmd++) {
next = pmd_addr_end(addr, end);
@ -324,8 +318,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
pmd_t *pmd;
prot = pgprot_val(REGION3_KERNEL);
if (!MACHINE_HAS_NX)
prot &= ~_REGION_ENTRY_NOEXEC;
pud = pud_offset(p4d, addr);
for (; addr < end; addr = next, pud++) {
next = pud_addr_end(addr, end);

View File

@ -5,6 +5,6 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o pci_kvm_hook.o
pci_bus.o pci_kvm_hook.o pci_report.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
obj-$(CONFIG_SYSFS) += pci_sysfs.o

View File

@ -16,6 +16,7 @@
#include <asm/sclp.h>
#include "pci_bus.h"
#include "pci_report.h"
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
{
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
struct zpci_dev *zdev = to_zpci(pdev);
char *status_str = "success";
struct pci_driver *driver;
/*
@ -186,30 +189,38 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
status_str = "failed (pass-through)";
goto out_unlock;
}
driver = to_pci_driver(pdev->dev.driver);
if (!is_driver_supported(driver)) {
if (!driver)
if (!driver) {
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
pci_name(pdev));
else
status_str = "failed (no driver)";
} else {
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
pci_name(pdev),
driver->name);
status_str = "failed (no driver support)";
}
goto out_unlock;
}
ers_res = zpci_event_notify_error_detected(pdev, driver);
if (ers_result_indicates_abort(ers_res))
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on detection)";
goto out_unlock;
}
if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
if (ers_result_indicates_abort(ers_res))
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on MMIO enable)";
goto out_unlock;
}
}
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
status_str = "failed (driver can't recover)";
goto out_unlock;
}
@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
driver->err_handler->resume(pdev);
out_unlock:
pci_dev_unlock(pdev);
zpci_report_status(zdev, "recovery", status_str);
return ers_res;
}

158
arch/s390/pci/pci_report.c Normal file
View File

@ -0,0 +1,158 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2024
*
* Author(s):
* Niklas Schnelle <schnelle@linux.ibm.com>
*
*/
#define KMSG_COMPONENT "zpci"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/sprintf.h>
#include <linux/pci.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/pci_debug.h>
#include "pci_report.h"
#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
struct zpci_report_error_data {
u64 timestamp;
u64 err_log_id;
char log_data[];
} __packed;
#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb))
#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
struct zpci_report_error {
struct zpci_report_error_header header;
struct zpci_report_error_data data;
} __packed;
static const char *zpci_state_str(pci_channel_state_t state)
{
switch (state) {
case pci_channel_io_normal:
return "normal";
case pci_channel_io_frozen:
return "frozen";
case pci_channel_io_perm_failure:
return "permanent-failure";
default:
return "invalid";
};
}
static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry, char *out_buf,
size_t out_buf_size)
{
unsigned long sec, usec;
unsigned int level;
char *except_str;
int rc = 0;
level = entry->level;
sec = entry->clock;
usec = do_div(sec, USEC_PER_SEC);
if (entry->exception)
except_str = "*";
else
except_str = "-";
rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ",
sec, usec, level, except_str,
entry->cpu);
return rc;
}
static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
char *out_buf, size_t out_buf_size)
{
return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n");
}
static struct debug_view debug_log_view = {
"pci_msg_log",
&debug_prolog_header,
&debug_log_header_fn,
&debug_sprintf_format_fn,
NULL,
NULL
};
/**
* zpci_report_status - Report the status of operations on a PCI device
* @zdev: The PCI device for which to report status
* @operation: A string representing the operation reported
* @status: A string representing the status of the operation
*
* This function creates a human readable report about an operation such as
* PCI device recovery and forwards this to the platform using the SCLP Write
* Event Data mechanism. Besides the operation and status strings the report
* also contains additional information about the device deemed useful for
* debug such as the currently bound device driver, if any, and error state.
* Additionally a string representation of pci_debug_msg_id, or as much as fits,
* is also included.
*
* Return: 0 on success an error code < 0 otherwise.
*/
int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
{
struct zpci_report_error *report;
struct pci_driver *driver = NULL;
struct pci_dev *pdev = NULL;
char *buf, *end;
int ret;
if (!zdev || !zdev->zbus)
return -ENODEV;
/* Protected virtualization hosts get nothing from us */
if (prot_virt_guest)
return -ENODATA;
report = (void *)get_zeroed_page(GFP_KERNEL);
if (!report)
return -ENOMEM;
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
if (pdev)
driver = to_pci_driver(pdev->dev.driver);
buf = report->data.log_data;
end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
buf += scnprintf(buf, end - buf, "report: %s\n", operation);
buf += scnprintf(buf, end - buf, "status: %s\n", status);
buf += scnprintf(buf, end - buf, "state: %s\n",
(pdev) ? zpci_state_str(pdev->error_state) : "n/a");
buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
if (ret < 0)
pr_err("Reading PCI debug messages failed with code %d\n", ret);
else
buf += ret;
report->header.version = 1;
report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
report->header.length = buf - (char *)&report->data;
report->data.timestamp = ktime_get_clocktai_seconds();
report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
if (ret)
pr_err("Reporting PCI status failed with code %d\n", ret);
else
pr_info("Reported PCI device status\n");
free_page((unsigned long)report);
return ret;
}

View File

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright IBM Corp. 2024
*
* Author(s):
* Niklas Schnelle <schnelle@linux.ibm.com>
*
*/
#ifndef __S390_PCI_REPORT_H
#define __S390_PCI_REPORT_H
struct zpci_dev;
int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
#endif /* __S390_PCI_REPORT_H */

View File

@ -85,13 +85,6 @@ typedef unsigned int sclp_cmdw_t;
typedef u64 sccb_mask_t;
struct sccb_header {
u16 length;
u8 function_code;
u8 control_mask[3];
u16 response_code;
} __attribute__((packed));
struct init_sccb {
struct sccb_header header;
u16 _reserved;
@ -196,7 +189,9 @@ struct read_info_sccb {
u8 byte_134; /* 134 */
u8 cpudirq; /* 135 */
u16 cbl; /* 136-137 */
u8 _pad_138[EXT_SCCB_READ_SCP - 138];
u8 byte_138; /* 138 */
u8 byte_139; /* 139 */
u8 _pad_140[EXT_SCCB_READ_SCP - 140];
} __packed __aligned(PAGE_SIZE);
struct read_storage_sccb {
@ -238,13 +233,6 @@ struct gds_vector {
u16 gds_id;
} __attribute__((packed));
struct evbuf_header {
u16 length;
u8 type;
u8 flags;
u16 _reserved;
} __attribute__((packed));
struct sclp_req {
struct list_head list; /* list_head for request queueing. */
sclp_cmdw_t command; /* sclp command to execute */

View File

@ -64,6 +64,8 @@ static void __init sclp_early_facilities_detect(void)
sclp.has_sipl = !!(sccb->cbl & 0x4000);
sclp.has_sipl_eckd = !!(sccb->cbl & 0x2000);
}
if (sccb->cpuoff > 139)
sclp.has_diag324 = !!(sccb->byte_139 & 0x80);
sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
sclp.rzm <<= 20;

View File

@ -24,30 +24,11 @@
#define SCLP_ATYPE_PCI 2
#define SCLP_ERRNOTIFY_AQ_RESET 0
#define SCLP_ERRNOTIFY_AQ_REPAIR 1
#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
static DEFINE_MUTEX(sclp_pci_mutex);
static struct sclp_register sclp_pci_event = {
.send_mask = EVTYP_ERRNOTIFY_MASK,
};
struct err_notify_evbuf {
struct evbuf_header header;
u8 action;
u8 atype;
u32 fh;
u32 fid;
u8 data[];
} __packed;
struct err_notify_sccb {
struct sccb_header header;
struct err_notify_evbuf evbuf;
} __packed;
struct pci_cfg_sccb {
struct sccb_header header;
u8 atype; /* adapter type */