mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 21:35:07 +00:00
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "This is bigger than usual - the reason is partly a pent-up stream of fixes after the merge window and partly accidental. The fixes are: - five patches to fix a boot failure on Andy Lutomirsky's laptop - four SGI UV platform fixes - KASAN fix - warning fix - documentation update - swap entry definition fix - pkeys fix - irq stats fix" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe() x86/efi: Allocate a trampoline if needed in efi_free_boot_services() x86/boot: Rework reserve_real_mode() to allow multiple tries x86/boot: Defer setup_real_mode() to early_initcall time x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly x86/boot: Run reserve_bios_regions() after we initialize the memory map x86/irq: Do not substract irq_tlb_count from irq_call_count x86/mm: Fix swap entry comment and macro x86/mm/kaslr: Fix -Wformat-security warning x86/mm/pkeys: Fix compact mode by removing protection keys' XSAVE buffer manipulation x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables x86/platform/UV: Fix kernel panic running RHEL kdump kernel on UV systems x86/platform/UV: Fix problem with UV4 BIOS providing incorrect PXM values x86/platform/UV: Fix bug with iounmap() of the UV4 EFI System Table causing a crash x86/platform/UV: Fix problem with UV4 Socket IDs not being contiguous x86/entry: Clarify the RF saving/restoring situation with SYSCALL/SYSRET x86/mm: Disable preemption during CR3 read+write x86/mm/KASLR: Increase BRK pages for KASLR memory randomization x86/mm/KASLR: Fix physical memory calculation on KASLR memory randomization x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text
This commit is contained in:
commit
01ea443982
@ -5,6 +5,8 @@
|
||||
OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
|
||||
|
||||
CFLAGS_syscall_64.o += -Wno-override-init
|
||||
CFLAGS_syscall_32.o += -Wno-override-init
|
||||
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
|
||||
obj-y += common.o
|
||||
|
||||
|
@ -288,11 +288,15 @@ return_from_SYSCALL_64:
|
||||
jne opportunistic_sysret_failed
|
||||
|
||||
/*
|
||||
* SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
|
||||
* restoring TF results in a trap from userspace immediately after
|
||||
* SYSRET. This would cause an infinite loop whenever #DB happens
|
||||
* with register state that satisfies the opportunistic SYSRET
|
||||
* conditions. For example, single-stepping this user code:
|
||||
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
|
||||
* restore RF properly. If the slowpath sets it for whatever reason, we
|
||||
* need to restore it correctly.
|
||||
*
|
||||
* SYSRET can restore TF, but unlike IRET, restoring TF results in a
|
||||
* trap from userspace immediately after SYSRET. This would cause an
|
||||
* infinite loop whenever #DB happens with register state that satisfies
|
||||
* the opportunistic SYSRET conditions. For example, single-stepping
|
||||
* this user code:
|
||||
*
|
||||
* movq $stuck_here, %rcx
|
||||
* pushfq
|
||||
@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
|
||||
.endm
|
||||
#endif
|
||||
|
||||
/* Make sure APIC interrupt handlers end up in the irqentry section: */
|
||||
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
|
||||
# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
|
||||
# define POP_SECTION_IRQENTRY .popsection
|
||||
#else
|
||||
# define PUSH_SECTION_IRQENTRY
|
||||
# define POP_SECTION_IRQENTRY
|
||||
#endif
|
||||
|
||||
.macro apicinterrupt num sym do_sym
|
||||
PUSH_SECTION_IRQENTRY
|
||||
apicinterrupt3 \num \sym \do_sym
|
||||
trace_apicinterrupt \num \sym
|
||||
POP_SECTION_IRQENTRY
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -22,10 +22,6 @@ typedef struct {
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int irq_resched_count;
|
||||
unsigned int irq_call_count;
|
||||
/*
|
||||
* irq_tlb_count is double-counted in irq_call_count, so it must be
|
||||
* subtracted from irq_call_count when displaying irq_call_count
|
||||
*/
|
||||
unsigned int irq_tlb_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
|
@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
|
||||
*
|
||||
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
|
||||
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
|
||||
* | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
|
||||
* | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
|
||||
*
|
||||
* G (8) is aliased and used as a PROT_NONE indicator for
|
||||
* !present ptes. We need to start storing swap entries above
|
||||
@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
|
||||
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
|
||||
#define SWP_TYPE_BITS 5
|
||||
/* Place the offset above the type: */
|
||||
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
|
||||
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
|
||||
|
||||
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
|
||||
|
||||
|
@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
|
||||
extern unsigned char secondary_startup_64[];
|
||||
#endif
|
||||
|
||||
static inline size_t real_mode_size_needed(void)
|
||||
{
|
||||
if (real_mode_header)
|
||||
return 0; /* already allocated. */
|
||||
|
||||
return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
|
||||
}
|
||||
|
||||
void set_real_mode_mem(phys_addr_t mem, size_t size);
|
||||
void reserve_real_mode(void);
|
||||
void setup_real_mode(void);
|
||||
|
||||
#endif /* _ARCH_X86_REALMODE_H */
|
||||
|
@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
|
||||
|
||||
static inline void __native_flush_tlb(void)
|
||||
{
|
||||
/*
|
||||
* If current->mm == NULL then we borrow a mm which may change during a
|
||||
* task switch and therefore we must not be preempted while we write CR3
|
||||
* back:
|
||||
*/
|
||||
preempt_disable();
|
||||
native_write_cr3(native_read_cr3());
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void __native_flush_tlb_global_irq_disabled(void)
|
||||
|
@ -79,7 +79,7 @@ struct uv_gam_range_entry {
|
||||
u16 nasid; /* HNasid */
|
||||
u16 sockid; /* Socket ID, high bits of APIC ID */
|
||||
u16 pnode; /* Index to MMR and GRU spaces */
|
||||
u32 pxm; /* ACPI proximity domain number */
|
||||
u32 unused2;
|
||||
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
|
||||
};
|
||||
|
||||
@ -88,7 +88,8 @@ struct uv_gam_range_entry {
|
||||
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
|
||||
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
|
||||
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
|
||||
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
|
||||
#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
|
||||
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
|
||||
|
||||
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
|
||||
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
|
||||
|
@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
|
||||
/*
|
||||
* At CPU state changes, update the x2apic cluster sibling info.
|
||||
*/
|
||||
int x2apic_prepare_cpu(unsigned int cpu)
|
||||
static int x2apic_prepare_cpu(unsigned int cpu)
|
||||
{
|
||||
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int x2apic_dead_cpu(unsigned int this_cpu)
|
||||
static int x2apic_dead_cpu(unsigned int this_cpu)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
|
||||
static int x2apic_cluster_probe(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
int ret;
|
||||
|
||||
if (!x2apic_mode)
|
||||
return 0;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
|
||||
x2apic_prepare_cpu, x2apic_dead_cpu);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to register X2APIC_PREPARE\n");
|
||||
return 0;
|
||||
}
|
||||
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
|
||||
cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
|
||||
x2apic_prepare_cpu, x2apic_dead_cpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
if (strncmp(oem_id, "SGI", 3) != 0)
|
||||
return 0;
|
||||
|
||||
if (numa_off) {
|
||||
pr_err("UV: NUMA is off, disabling UV support\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Setup early hub type field in uv_hub_info for Node 0 */
|
||||
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
|
||||
|
||||
@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
|
||||
struct uv_gam_range_entry *gre = uv_gre_table;
|
||||
struct uv_gam_range_s *grt;
|
||||
unsigned long last_limit = 0, ram_limit = 0;
|
||||
int bytes, i, sid, lsid = -1;
|
||||
int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
|
||||
|
||||
if (!gre)
|
||||
return;
|
||||
@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
|
||||
}
|
||||
sid = gre->sockid - _min_socket;
|
||||
if (lsid < sid) { /* new range */
|
||||
grt = &_gr_table[sid];
|
||||
grt->base = lsid;
|
||||
grt = &_gr_table[indx];
|
||||
grt->base = lindx;
|
||||
grt->nasid = gre->nasid;
|
||||
grt->limit = last_limit = gre->limit;
|
||||
lsid = sid;
|
||||
lindx = indx++;
|
||||
continue;
|
||||
}
|
||||
if (lsid == sid && !ram_limit) { /* update range */
|
||||
@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
|
||||
}
|
||||
if (!ram_limit) { /* non-contiguous ram range */
|
||||
grt++;
|
||||
grt->base = sid - 1;
|
||||
grt->base = lindx;
|
||||
grt->nasid = gre->nasid;
|
||||
grt->limit = last_limit = gre->limit;
|
||||
continue;
|
||||
@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
|
||||
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
|
||||
if (!index) {
|
||||
pr_info("UV: GAM Range Table...\n");
|
||||
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
|
||||
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
|
||||
"Range", "", "Size", "Type", "NASID",
|
||||
"SID", "PN", "PXM");
|
||||
"SID", "PN");
|
||||
}
|
||||
pr_info(
|
||||
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
|
||||
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
|
||||
index++,
|
||||
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
|
||||
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
|
||||
((unsigned long)(gre->limit - lgre)) >>
|
||||
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
|
||||
gre->type, gre->nasid, gre->sockid,
|
||||
gre->pnode, gre->pxm);
|
||||
gre->type, gre->nasid, gre->sockid, gre->pnode);
|
||||
|
||||
lgre = gre->limit;
|
||||
if (sock_min > gre->sockid)
|
||||
@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
|
||||
_pnode_to_socket[i] = SOCK_EMPTY;
|
||||
|
||||
/* fill in pnode/node/addr conversion list values */
|
||||
pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
|
||||
pr_info("UV: GAM Building socket/pnode conversion tables\n");
|
||||
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
|
||||
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
|
||||
continue;
|
||||
@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
|
||||
if (_socket_to_pnode[i] != SOCK_EMPTY)
|
||||
continue; /* duplicate */
|
||||
_socket_to_pnode[i] = gre->pnode;
|
||||
_socket_to_node[i] = gre->pxm;
|
||||
|
||||
i = gre->pnode - minpnode;
|
||||
_pnode_to_socket[i] = gre->sockid;
|
||||
|
||||
pr_info(
|
||||
"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
|
||||
"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
|
||||
gre->sockid, gre->type, gre->nasid,
|
||||
_socket_to_pnode[gre->sockid - minsock],
|
||||
_socket_to_node[gre->sockid - minsock],
|
||||
_pnode_to_socket[gre->pnode - minpnode]);
|
||||
}
|
||||
|
||||
/* check socket -> node values */
|
||||
/* Set socket -> node values */
|
||||
lnid = -1;
|
||||
for_each_present_cpu(cpu) {
|
||||
int nid = cpu_to_node(cpu);
|
||||
@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
|
||||
lnid = nid;
|
||||
apicid = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
sockid = apicid >> uv_cpuid.socketid_shift;
|
||||
i = sockid - minsock;
|
||||
|
||||
if (nid != _socket_to_node[i]) {
|
||||
pr_warn(
|
||||
"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
|
||||
i, sockid, gre->type, _socket_to_node[i], nid);
|
||||
_socket_to_node[i] = nid;
|
||||
}
|
||||
_socket_to_node[sockid - minsock] = nid;
|
||||
pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
|
||||
sockid, apicid, nid);
|
||||
}
|
||||
|
||||
/* Setup physical blade to pnode translation from GAM Range Table */
|
||||
|
@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
|
||||
return get_xsave_addr(&fpu->state.xsave, xsave_state);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Set xfeatures (aka XSTATE_BV) bit for a feature that we want
|
||||
* to take out of its "init state". This will ensure that an
|
||||
* XRSTOR actually restores the state.
|
||||
*/
|
||||
static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
|
||||
int xstate_feature_mask)
|
||||
{
|
||||
xsave->header.xfeatures |= xstate_feature_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is safe to call whether the FPU is in use or not.
|
||||
*
|
||||
* Note that this only works on the current task.
|
||||
*
|
||||
* Inputs:
|
||||
* @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
|
||||
* XFEATURE_MASK_SSE, etc...)
|
||||
* @xsave_state_ptr: a pointer to a copy of the state that you would
|
||||
* like written in to the current task's FPU xsave state. This pointer
|
||||
* must not be located in the current tasks's xsave area.
|
||||
* Output:
|
||||
* address of the state in the xsave area or NULL if the state
|
||||
* is not present or is in its 'init state'.
|
||||
*/
|
||||
static void fpu__xfeature_set_state(int xstate_feature_mask,
|
||||
void *xstate_feature_src, size_t len)
|
||||
{
|
||||
struct xregs_state *xsave = ¤t->thread.fpu.state.xsave;
|
||||
struct fpu *fpu = ¤t->thread.fpu;
|
||||
void *dst;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
|
||||
WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the FPU code that we need the FPU state to be in
|
||||
* 'fpu' (not in the registers), and that we need it to
|
||||
* be stable while we write to it.
|
||||
*/
|
||||
fpu__current_fpstate_write_begin();
|
||||
|
||||
/*
|
||||
* This method *WILL* *NOT* work for compact-format
|
||||
* buffers. If the 'xstate_feature_mask' is unset in
|
||||
* xcomp_bv then we may need to move other feature state
|
||||
* "up" in the buffer.
|
||||
*/
|
||||
if (xsave->header.xcomp_bv & xstate_feature_mask) {
|
||||
WARN_ON_ONCE(1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* find the location in the xsave buffer of the desired state */
|
||||
dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
|
||||
|
||||
/*
|
||||
* Make sure that the pointer being passed in did not
|
||||
* come from the xsave buffer itself.
|
||||
*/
|
||||
WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
|
||||
|
||||
/* put the caller-provided data in the location */
|
||||
memcpy(dst, xstate_feature_src, len);
|
||||
|
||||
/*
|
||||
* Mark the xfeature so that the CPU knows there is state
|
||||
* in the buffer now.
|
||||
*/
|
||||
fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
|
||||
out:
|
||||
/*
|
||||
* We are done writing to the 'fpu'. Reenable preeption
|
||||
* and (possibly) move the fpstate back in to the fpregs.
|
||||
*/
|
||||
fpu__current_fpstate_write_end();
|
||||
}
|
||||
|
||||
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
|
||||
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
|
||||
|
||||
/*
|
||||
* This will go out and modify the XSAVE buffer so that PKRU is
|
||||
* set to a particular state for access to 'pkey'.
|
||||
*
|
||||
* PKRU state does affect kernel access to user memory. We do
|
||||
* not modfiy PKRU *itself* here, only the XSAVE state that will
|
||||
* be restored in to PKRU when we return back to userspace.
|
||||
* This will go out and modify PKRU register to set the access
|
||||
* rights for @pkey to @init_val.
|
||||
*/
|
||||
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
unsigned long init_val)
|
||||
{
|
||||
struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
|
||||
struct pkru_state *old_pkru_state;
|
||||
struct pkru_state new_pkru_state;
|
||||
u32 old_pkru;
|
||||
int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
|
||||
u32 new_pkru_bits = 0;
|
||||
|
||||
@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
return -EINVAL;
|
||||
/*
|
||||
* For most XSAVE components, this would be an arduous task:
|
||||
* brining fpstate up to date with fpregs, updating fpstate,
|
||||
* then re-populating fpregs. But, for components that are
|
||||
* never lazily managed, we can just access the fpregs
|
||||
* directly. PKRU is never managed lazily, so we can just
|
||||
* manipulate it directly. Make sure it stays that way.
|
||||
*/
|
||||
WARN_ON_ONCE(!use_eager_fpu());
|
||||
|
||||
/* Set the bits we need in PKRU: */
|
||||
if (init_val & PKEY_DISABLE_ACCESS)
|
||||
@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
/* Shift the bits in to the correct place in PKRU for pkey: */
|
||||
new_pkru_bits <<= pkey_shift;
|
||||
|
||||
/* Locate old copy of the state in the xsave buffer: */
|
||||
old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
|
||||
/* Get old PKRU and mask off any old bits in place: */
|
||||
old_pkru = read_pkru();
|
||||
old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
|
||||
|
||||
/*
|
||||
* When state is not in the buffer, it is in the init
|
||||
* state, set it manually. Otherwise, copy out the old
|
||||
* state.
|
||||
*/
|
||||
if (!old_pkru_state)
|
||||
new_pkru_state.pkru = 0;
|
||||
else
|
||||
new_pkru_state.pkru = old_pkru_state->pkru;
|
||||
|
||||
/* Mask off any old bits in place: */
|
||||
new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
|
||||
|
||||
/* Set the newly-requested bits: */
|
||||
new_pkru_state.pkru |= new_pkru_bits;
|
||||
|
||||
/*
|
||||
* We could theoretically live without zeroing pkru.pad.
|
||||
* The current XSAVE feature state definition says that
|
||||
* only bytes 0->3 are used. But we do not want to
|
||||
* chance leaking kernel stack out to userspace in case a
|
||||
* memcpy() of the whole xsave buffer was done.
|
||||
*
|
||||
* They're in the same cacheline anyway.
|
||||
*/
|
||||
new_pkru_state.pad = 0;
|
||||
|
||||
fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
|
||||
/* Write old part along with new part: */
|
||||
write_pkru(old_pkru | new_pkru_bits);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
|
||||
/* Initialize 32bit specific setup functions */
|
||||
x86_init.resources.reserve_resources = i386_reserve_resources;
|
||||
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
|
||||
|
||||
reserve_bios_regions();
|
||||
}
|
||||
|
||||
asmlinkage __visible void __init i386_start_kernel(void)
|
||||
|
@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
|
||||
copy_bootdata(__va(real_mode_data));
|
||||
|
||||
x86_early_init_platform_quirks();
|
||||
reserve_bios_regions();
|
||||
|
||||
switch (boot_params.hdr.hardware_subarch) {
|
||||
case X86_SUBARCH_INTEL_MID:
|
||||
|
@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
|
||||
seq_puts(p, " Rescheduling interrupts\n");
|
||||
seq_printf(p, "%*s: ", prec, "CAL");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
|
||||
irq_stats(j)->irq_tlb_count);
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
|
||||
seq_puts(p, " Function call interrupts\n");
|
||||
seq_printf(p, "%*s: ", prec, "TLB");
|
||||
for_each_online_cpu(j)
|
||||
|
@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
x86_init.oem.arch_setup();
|
||||
|
||||
kernel_randomize_memory();
|
||||
|
||||
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
|
||||
setup_memory_map();
|
||||
parse_setup_data();
|
||||
@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
max_possible_pfn = max_pfn;
|
||||
|
||||
/*
|
||||
* Define random base addresses for memory sections after max_pfn is
|
||||
* defined and before each memory section base is used.
|
||||
*/
|
||||
kernel_randomize_memory();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* max_low_pfn get updated here */
|
||||
find_low_pfn_range();
|
||||
@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
|
||||
efi_find_mirror();
|
||||
}
|
||||
|
||||
reserve_bios_regions();
|
||||
|
||||
/*
|
||||
* The EFI specification says that boot service code won't be called
|
||||
* after ExitBootServices(). This is, in fact, a lie.
|
||||
@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
early_trap_pf_init();
|
||||
|
||||
setup_real_mode();
|
||||
/*
|
||||
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
|
||||
* with the current CR4 value. This may not be necessary, but
|
||||
* auditing all the early-boot CR4 manipulation would be needed to
|
||||
* rule it out.
|
||||
*/
|
||||
if (boot_cpu_data.cpuid_level >= 0)
|
||||
/* A CPU has %cr4 if and only if it has CPUID. */
|
||||
mmu_cr4_features = __read_cr4();
|
||||
|
||||
memblock_set_current_limit(get_max_mapped());
|
||||
|
||||
@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
|
||||
kasan_init();
|
||||
|
||||
if (boot_cpu_data.cpuid_level >= 0) {
|
||||
/* A CPU has %cr4 if and only if it has CPUID */
|
||||
mmu_cr4_features = __read_cr4();
|
||||
if (trampoline_cr4_features)
|
||||
*trampoline_cr4_features = mmu_cr4_features;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* sync back kernel address range */
|
||||
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#define debug_putstr(v) early_printk(v)
|
||||
#define debug_putstr(v) early_printk("%s", v)
|
||||
#define has_cpuflag(f) boot_cpu_has(f)
|
||||
#define get_boot_seed() kaslr_offset()
|
||||
#endif
|
||||
|
@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num)
|
||||
return __va(pfn << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */
|
||||
#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE)
|
||||
/*
|
||||
* By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
|
||||
* With KASLR memory randomization, depending on the machine e820 memory
|
||||
* and the PUD alignment. We may need twice more pages when KASLR memory
|
||||
* randomization is enabled.
|
||||
*/
|
||||
#ifndef CONFIG_RANDOMIZE_MEMORY
|
||||
#define INIT_PGD_PAGE_COUNT 6
|
||||
#else
|
||||
#define INIT_PGD_PAGE_COUNT 12
|
||||
#endif
|
||||
#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
|
||||
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
|
||||
void __init early_alloc_pgt_buf(void)
|
||||
{
|
||||
|
@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void)
|
||||
* add padding if needed (especially for memory hotplug support).
|
||||
*/
|
||||
BUG_ON(kaslr_regions[0].base != &page_offset_base);
|
||||
memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
|
||||
memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
|
||||
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
|
||||
|
||||
/* Adapt phyiscal memory region size based on available memory */
|
||||
|
@ -254,6 +254,7 @@ void __init efi_free_boot_services(void)
|
||||
for_each_efi_memory_desc(md) {
|
||||
unsigned long long start = md->phys_addr;
|
||||
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
size_t rm_size;
|
||||
|
||||
if (md->type != EFI_BOOT_SERVICES_CODE &&
|
||||
md->type != EFI_BOOT_SERVICES_DATA)
|
||||
@ -263,6 +264,26 @@ void __init efi_free_boot_services(void)
|
||||
if (md->attribute & EFI_MEMORY_RUNTIME)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Nasty quirk: if all sub-1MB memory is used for boot
|
||||
* services, we can get here without having allocated the
|
||||
* real mode trampoline. It's too late to hand boot services
|
||||
* memory back to the memblock allocator, so instead
|
||||
* try to manually allocate the trampoline if needed.
|
||||
*
|
||||
* I've seen this on a Dell XPS 13 9350 with firmware
|
||||
* 1.4.4 with SGX enabled booting Linux via Fedora 24's
|
||||
* grub2-efi on a hard disk. (And no, I don't know why
|
||||
* this happened, but Linux should still try to boot rather
|
||||
* panicing early.)
|
||||
*/
|
||||
rm_size = real_mode_size_needed();
|
||||
if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
|
||||
set_real_mode_mem(start, rm_size);
|
||||
start += rm_size;
|
||||
size -= rm_size;
|
||||
}
|
||||
|
||||
free_bootmem_late(start, size);
|
||||
}
|
||||
|
||||
|
@ -200,12 +200,14 @@ void uv_bios_init(void)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Starting with UV4 the UV systab size is variable */
|
||||
if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
|
||||
int size = uv_systab->size;
|
||||
|
||||
iounmap(uv_systab);
|
||||
uv_systab = ioremap(efi.uv_systab, uv_systab->size);
|
||||
uv_systab = ioremap(efi.uv_systab, size);
|
||||
if (!uv_systab) {
|
||||
pr_err("UV: UVsystab: ioremap(%d) failed!\n",
|
||||
uv_systab->size);
|
||||
pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,11 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/realmode.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
struct real_mode_header *real_mode_header;
|
||||
u32 *trampoline_cr4_features;
|
||||
@ -11,25 +13,37 @@ u32 *trampoline_cr4_features;
|
||||
/* Hold the pgd entry used on booting additional CPUs */
|
||||
pgd_t trampoline_pgd_entry;
|
||||
|
||||
void __init reserve_real_mode(void)
|
||||
void __init set_real_mode_mem(phys_addr_t mem, size_t size)
|
||||
{
|
||||
phys_addr_t mem;
|
||||
unsigned char *base;
|
||||
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
|
||||
void *base = __va(mem);
|
||||
|
||||
/* Has to be under 1M so we can execute real-mode AP code. */
|
||||
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
|
||||
if (!mem)
|
||||
panic("Cannot allocate trampoline\n");
|
||||
|
||||
base = __va(mem);
|
||||
memblock_reserve(mem, size);
|
||||
real_mode_header = (struct real_mode_header *) base;
|
||||
printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
|
||||
base, (unsigned long long)mem, size);
|
||||
}
|
||||
|
||||
void __init setup_real_mode(void)
|
||||
void __init reserve_real_mode(void)
|
||||
{
|
||||
phys_addr_t mem;
|
||||
size_t size = real_mode_size_needed();
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
WARN_ON(slab_is_available());
|
||||
|
||||
/* Has to be under 1M so we can execute real-mode AP code. */
|
||||
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
|
||||
if (!mem) {
|
||||
pr_info("No sub-1M memory is available for the trampoline\n");
|
||||
return;
|
||||
}
|
||||
|
||||
memblock_reserve(mem, size);
|
||||
set_real_mode_mem(mem, size);
|
||||
}
|
||||
|
||||
static void __init setup_real_mode(void)
|
||||
{
|
||||
u16 real_mode_seg;
|
||||
const u32 *rel;
|
||||
@ -84,7 +98,7 @@ void __init setup_real_mode(void)
|
||||
|
||||
trampoline_header->start = (u64) secondary_startup_64;
|
||||
trampoline_cr4_features = &trampoline_header->cr4;
|
||||
*trampoline_cr4_features = __read_cr4();
|
||||
*trampoline_cr4_features = mmu_cr4_features;
|
||||
|
||||
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
|
||||
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
|
||||
@ -100,7 +114,7 @@ void __init setup_real_mode(void)
|
||||
* need to mark it executable at do_pre_smp_initcalls() at least,
|
||||
* thus run it as a early_initcall().
|
||||
*/
|
||||
static int __init set_real_mode_permissions(void)
|
||||
static void __init set_real_mode_permissions(void)
|
||||
{
|
||||
unsigned char *base = (unsigned char *) real_mode_header;
|
||||
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
|
||||
@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void)
|
||||
set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
|
||||
set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
|
||||
set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static int __init init_real_mode(void)
|
||||
{
|
||||
if (!real_mode_header)
|
||||
panic("Real mode trampoline was not allocated");
|
||||
|
||||
setup_real_mode();
|
||||
set_real_mode_permissions();
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(set_real_mode_permissions);
|
||||
early_initcall(init_real_mode);
|
||||
|
Loading…
x
Reference in New Issue
Block a user