Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc

Pull sparc updates from David Miller:
 "Mostly VDSO cleanups and optimizations"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc: Several small VDSO vclock_gettime.c improvements.
  sparc: Validate VDSO for undefined symbols.
  sparc: Really use linker with LDFLAGS.
  sparc: Improve VDSO CFLAGS.
  sparc: Set DISABLE_BRANCH_PROFILING in VDSO CFLAGS.
  sparc: Don't bother masking out TICK_PRIV_BIT in VDSO code.
  sparc: Inline VDSO gettime code aggressively.
  sparc: Improve VDSO instruction patching.
  sparc: Fix parport build warnings.
This commit is contained in:
Linus Torvalds 2018-10-24 06:42:00 +01:00
commit a97a2d4d56
11 changed files with 122 additions and 106 deletions

View File

@ -21,6 +21,7 @@
*/
#define HAS_DMA
#ifdef CONFIG_PARPORT_PC_FIFO
static DEFINE_SPINLOCK(dma_spin_lock);
#define claim_dma_lock() \
@ -31,6 +32,7 @@ static DEFINE_SPINLOCK(dma_spin_lock);
#define release_dma_lock(__flags) \
spin_unlock_irqrestore(&dma_spin_lock, __flags);
#endif
static struct sparc_ebus_info {
struct ebus_dma_info info;

View File

@ -121,8 +121,12 @@ struct thread_info {
}
/* how to get the thread information struct from C */
#ifndef BUILD_VDSO
register struct thread_info *current_thread_info_reg asm("g6");
#define current_thread_info() (current_thread_info_reg)
#else
extern struct thread_info *current_thread_info(void);
#endif
/* thread information allocation */
#if PAGE_SHIFT == 13

View File

@ -8,10 +8,10 @@
struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
unsigned long tick_patch, tick_patch_len;
long sym_vvar_start; /* Negative offset to the vvar area */
long sym_vread_tick; /* Start of vread_tick section */
long sym_vread_tick_patch_start; /* Start of tick read */
long sym_vread_tick_patch_end; /* End of tick read */
};
#ifdef CONFIG_SPARC64

View File

@ -53,8 +53,6 @@
DEFINE_SPINLOCK(rtc_lock);
unsigned int __read_mostly vdso_fix_stick;
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@ -838,7 +836,6 @@ void __init time_init_early(void)
} else {
init_tick_ops(&tick_operations);
clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
vdso_fix_stick = 1;
}
} else {
init_tick_ops(&stick_operations);

View File

@ -33,10 +33,8 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,--no-undefined \
-Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
$(DISABLE_LTO)
VDSO_LDFLAGS_vdso.lds = -m elf64_sparc -soname linux-vdso.so.1 --no-undefined \
-z max-page-size=8192 -z common-page-size=8192
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
@ -54,13 +52,14 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
# Don't omit frame pointers for ease of userspace debugging, but do
# optimize sibling calls.
#
CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \
-m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \
-ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \
$(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \
-foptimize-sibling-calls -DBUILD_VDSO
CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
-fno-omit-frame-pointer -foptimize-sibling-calls \
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
$(vobjs): KBUILD_CFLAGS += $(CFL)
SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@ -73,7 +72,7 @@ $(obj)/%.so: $(obj)/%.so.dbg
$(call if_changed,objcopy)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf32_sparc,-soname=linux-gate.so.1
VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1
#This makes sure the $(obj) subdirectory exists even though vdso32/
#is not a kbuild sub-make subdirectory
@ -91,7 +90,8 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic -mno-app-regs -ffixed-g7
KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
@ -109,12 +109,13 @@ $(obj)/vdso32.so.dbg: FORCE \
# The DSO images are built using a special linker script.
#
quiet_cmd_vdso = VDSO $@
cmd_vdso = $(CC) -nostdlib -o $@ \
cmd_vdso = $(LD) -nostdlib -o $@ \
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@'
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
$(call ld-option, --build-id) -Bsymbolic
GCOV_PROFILE := n
#

View File

@ -0,0 +1,10 @@
#!/bin/sh
objdump="$1"
file="$2"
$objdump -t "$file" | grep '*UUND*' | grep -v '#scratch' > /dev/null 2>&1
if [ $? -eq 1 ]; then
exit 0
else
echo "$file: undefined symbols found" >&2
exit 1
fi

View File

@ -12,11 +12,6 @@
* Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
*/
/* Disable profiling for userspace code: */
#ifndef DISABLE_BRANCH_PROFILING
#define DISABLE_BRANCH_PROFILING
#endif
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/string.h>
@ -26,13 +21,6 @@
#include <asm/clocksource.h>
#include <asm/vvar.h>
#undef TICK_PRIV_BIT
#ifdef CONFIG_SPARC64
#define TICK_PRIV_BIT (1UL << 63)
#else
#define TICK_PRIV_BIT (1ULL << 63)
#endif
#ifdef CONFIG_SPARC64
#define SYSCALL_STRING \
"ta 0x6d;" \
@ -60,24 +48,22 @@
* Compute the vvar page's address in the process address space, and return it
* as a pointer to the vvar_data.
*/
static notrace noinline struct vvar_data *
get_vvar_data(void)
notrace static __always_inline struct vvar_data *get_vvar_data(void)
{
unsigned long ret;
/*
* vdso data page is the first vDSO page so grab the return address
* vdso data page is the first vDSO page so grab the PC
* and move up a page to get to the data page.
*/
ret = (unsigned long)__builtin_return_address(0);
__asm__("rd %%pc, %0" : "=r" (ret));
ret &= ~(8192 - 1);
ret -= 8192;
return (struct vvar_data *) ret;
}
static notrace long
vdso_fallback_gettime(long clock, struct timespec *ts)
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
register long num __asm__("g1") = __NR_clock_gettime;
register long o0 __asm__("o0") = clock;
@ -88,8 +74,7 @@ vdso_fallback_gettime(long clock, struct timespec *ts)
return o0;
}
static notrace __always_inline long
vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
notrace static long vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
{
register long num __asm__("g1") = __NR_gettimeofday;
register long o0 __asm__("o0") = (long) tv;
@ -101,38 +86,43 @@ vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
}
#ifdef CONFIG_SPARC64
static notrace noinline u64
vread_tick(void) {
notrace static __always_inline u64 vread_tick(void)
{
u64 ret;
__asm__ __volatile__("rd %%asr24, %0 \n"
".section .vread_tick_patch, \"ax\" \n"
"rd %%tick, %0 \n"
".previous \n"
: "=&r" (ret));
return ret & ~TICK_PRIV_BIT;
__asm__ __volatile__("1:\n\t"
"rd %%tick, %0\n\t"
".pushsection .tick_patch, \"a\"\n\t"
".word 1b - ., 1f - .\n\t"
".popsection\n\t"
".pushsection .tick_patch_replacement, \"ax\"\n\t"
"1:\n\t"
"rd %%asr24, %0\n\t"
".popsection\n"
: "=r" (ret));
return ret;
}
#else
static notrace noinline u64
vread_tick(void)
notrace static __always_inline u64 vread_tick(void)
{
unsigned int lo, hi;
register unsigned long long ret asm("o4");
__asm__ __volatile__("rd %%asr24, %%g1\n\t"
"srlx %%g1, 32, %1\n\t"
"srl %%g1, 0, %0\n"
".section .vread_tick_patch, \"ax\" \n"
"rd %%tick, %%g1\n"
".previous \n"
: "=&r" (lo), "=&r" (hi)
:
: "g1");
return lo | ((u64)hi << 32);
__asm__ __volatile__("1:\n\t"
"rd %%tick, %L0\n\t"
"srlx %L0, 32, %H0\n\t"
".pushsection .tick_patch, \"a\"\n\t"
".word 1b - ., 1f - .\n\t"
".popsection\n\t"
".pushsection .tick_patch_replacement, \"ax\"\n\t"
"1:\n\t"
"rd %%asr24, %L0\n\t"
".popsection\n"
: "=r" (ret));
return ret;
}
#endif
static notrace inline u64
vgetsns(struct vvar_data *vvar)
notrace static __always_inline u64 vgetsns(struct vvar_data *vvar)
{
u64 v;
u64 cycles;
@ -142,13 +132,12 @@ vgetsns(struct vvar_data *vvar)
return v * vvar->clock.mult;
}
static notrace noinline int
do_realtime(struct vvar_data *vvar, struct timespec *ts)
notrace static __always_inline int do_realtime(struct vvar_data *vvar,
struct timespec *ts)
{
unsigned long seq;
u64 ns;
ts->tv_nsec = 0;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->wall_time_sec;
@ -157,18 +146,18 @@ do_realtime(struct vvar_data *vvar, struct timespec *ts)
ns >>= vvar->clock.shift;
} while (unlikely(vvar_read_retry(vvar, seq)));
timespec_add_ns(ts, ns);
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return 0;
}
static notrace noinline int
do_monotonic(struct vvar_data *vvar, struct timespec *ts)
notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
struct timespec *ts)
{
unsigned long seq;
u64 ns;
ts->tv_nsec = 0;
do {
seq = vvar_read_begin(vvar);
ts->tv_sec = vvar->monotonic_time_sec;
@ -177,13 +166,14 @@ do_monotonic(struct vvar_data *vvar, struct timespec *ts)
ns >>= vvar->clock.shift;
} while (unlikely(vvar_read_retry(vvar, seq)));
timespec_add_ns(ts, ns);
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
return 0;
}
static notrace noinline int
do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts)
notrace static int do_realtime_coarse(struct vvar_data *vvar,
struct timespec *ts)
{
unsigned long seq;
@ -195,8 +185,8 @@ do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts)
return 0;
}
static notrace noinline int
do_monotonic_coarse(struct vvar_data *vvar, struct timespec *ts)
notrace static int do_monotonic_coarse(struct vvar_data *vvar,
struct timespec *ts)
{
unsigned long seq;

View File

@ -73,11 +73,8 @@ SECTIONS
.text : { *(.text*) } :text =0x90909090,
.vread_tick_patch : {
vread_tick_patch_start = .;
*(.vread_tick_patch)
vread_tick_patch_end = .;
}
.tick_patch : { *(.tick_patch) } :text
.tick_patch_insns : { *(.tick_patch_insns) } :text
/DISCARD/ : {
*(.discard)

View File

@ -63,9 +63,6 @@ enum {
sym_vvar_start,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
sym_vread_tick,
sym_vread_tick_patch_start,
sym_vread_tick_patch_end
};
struct vdso_sym {
@ -81,9 +78,6 @@ struct vdso_sym required_syms[] = {
[sym_VDSO_FAKE_SECTION_TABLE_END] = {
"VDSO_FAKE_SECTION_TABLE_END", 0
},
[sym_vread_tick] = {"vread_tick", 1},
[sym_vread_tick_patch_start] = {"vread_tick_patch_start", 1},
[sym_vread_tick_patch_end] = {"vread_tick_patch_end", 1}
};
__attribute__((format(printf, 1, 2))) __attribute__((noreturn))

View File

@ -17,10 +17,11 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
unsigned long mapping_size;
int i;
unsigned long j;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
*patch_sec = NULL;
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
ELF(Dyn) *dyn = 0, *dyn_end = 0;
const char *secstrings;
INT_BITS syms[NSYMS] = {};
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
@ -63,11 +64,18 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
/* Walk the section table */
secstrings_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
GET_BE(&hdr->e_shentsize)*GET_BE(&hdr->e_shstrndx);
secstrings = raw_addr + GET_BE(&secstrings_hdr->sh_offset);
for (i = 0; i < GET_BE(&hdr->e_shnum); i++) {
ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) +
GET_BE(&hdr->e_shentsize) * i;
if (GET_BE(&sh->sh_type) == SHT_SYMTAB)
symtab_hdr = sh;
if (!strcmp(secstrings + GET_BE(&sh->sh_name),
".tick_patch"))
patch_sec = sh;
}
if (!symtab_hdr)
@ -134,6 +142,12 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (patch_sec) {
fprintf(outfile, "\t.tick_patch = %lu,\n",
(unsigned long)GET_BE(&patch_sec->sh_offset));
fprintf(outfile, "\t.tick_patch_len = %lu,\n",
(unsigned long)GET_BE(&patch_sec->sh_size));
}
for (i = 0; i < NSYMS; i++) {
if (required_syms[i].export && syms[i])
fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",

View File

@ -16,6 +16,8 @@
#include <linux/linkage.h>
#include <linux/random.h>
#include <linux/elf.h>
#include <asm/cacheflush.h>
#include <asm/spitfire.h>
#include <asm/vdso.h>
#include <asm/vvar.h>
#include <asm/page.h>
@ -40,7 +42,25 @@ static struct vm_special_mapping vdso_mapping32 = {
struct vvar_data *vvar_data;
#define SAVE_INSTR_SIZE 4
struct tick_patch_entry {
s32 orig, repl;
};
static void stick_patch(const struct vdso_image *image)
{
struct tick_patch_entry *p, *p_end;
p = image->data + image->tick_patch;
p_end = (void *)p + image->tick_patch_len;
while (p < p_end) {
u32 *instr = (void *)&p->orig + p->orig;
u32 *repl = (void *)&p->repl + p->repl;
*instr = *repl;
flushi(instr);
p++;
}
}
/*
* Allocate pages for the vdso and vvar, and copy in the vdso text from the
@ -68,21 +88,8 @@ int __init init_vdso_image(const struct vdso_image *image,
if (!cpp)
goto oom;
if (vdso_fix_stick) {
/*
* If the system uses %tick instead of %stick, patch the VDSO
* with instruction reading %tick instead of %stick.
*/
unsigned int j, k = SAVE_INSTR_SIZE;
unsigned char *data = image->data;
for (j = image->sym_vread_tick_patch_start;
j < image->sym_vread_tick_patch_end; j++) {
data[image->sym_vread_tick + k] = data[j];
k++;
}
}
if (tlb_type != spitfire)
stick_patch(image);
for (i = 0; i < cnpages; i++) {
cp = alloc_page(GFP_KERNEL);