mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-08 14:23:19 +00:00
s390/delay: simplify udelay
udelay is implemented by using quite subtle details to make it possible to load an idle psw and waiting for an interrupt even in irq context or when interrupts are disabled. Also handling (or better: no handling) of softirqs is taken into account. All this is done to optimize for something which should in normal circumstances never happen: calling udelay to busy wait. Therefore get rid of the whole complexity and just busy loop like other architectures are doing it also. It could have been possible to use diag 0x44 instead of cpu_relax() in the busy loop, however we have seen too many bad things happen with diag 0x44 that it seems to be better to simply busy loop. Also note that with this new implementation kernel preemption does work when within the udelay loop. This did not work before. To get a feeling what the former code optimizes for: IPL'ing a kernel with 'defconfig' and afterwards compiling a kernel ends with a total of zero udelay calls. Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
This commit is contained in:
parent
91c2bad6ae
commit
dd6cfe5532
@ -14,13 +14,13 @@
|
|||||||
#define _S390_DELAY_H
|
#define _S390_DELAY_H
|
||||||
|
|
||||||
void udelay_enable(void);
|
void udelay_enable(void);
|
||||||
void __ndelay(unsigned long long nsecs);
|
void __ndelay(unsigned long nsecs);
|
||||||
void __udelay(unsigned long long usecs);
|
void __udelay(unsigned long usecs);
|
||||||
void udelay_simple(unsigned long long usecs);
|
void udelay_simple(unsigned long usecs);
|
||||||
void __delay(unsigned long loops);
|
void __delay(unsigned long loops);
|
||||||
|
|
||||||
#define ndelay(n) __ndelay((unsigned long long) (n))
|
#define ndelay(n) __ndelay((unsigned long)(n))
|
||||||
#define udelay(n) __udelay((unsigned long long) (n))
|
#define udelay(n) __udelay((unsigned long)(n))
|
||||||
#define mdelay(n) __udelay((unsigned long long) (n) * 1000)
|
#define mdelay(n) __udelay((unsigned long)(n) * 1000)
|
||||||
|
|
||||||
#endif /* defined(_S390_DELAY_H) */
|
#endif /* defined(_S390_DELAY_H) */
|
||||||
|
@ -16,14 +16,12 @@
|
|||||||
|
|
||||||
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
|
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
|
||||||
#define CIF_FPU 3 /* restore FPU registers */
|
#define CIF_FPU 3 /* restore FPU registers */
|
||||||
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
|
|
||||||
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
|
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
|
||||||
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
|
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
|
||||||
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
|
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
|
||||||
|
|
||||||
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
|
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
|
||||||
#define _CIF_FPU BIT(CIF_FPU)
|
#define _CIF_FPU BIT(CIF_FPU)
|
||||||
#define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ)
|
|
||||||
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
|
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
|
||||||
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
|
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
|
||||||
#define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU)
|
#define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU)
|
||||||
|
@ -779,8 +779,6 @@ ENTRY(io_int_handler)
|
|||||||
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
|
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
|
||||||
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
||||||
TRACE_IRQS_OFF
|
TRACE_IRQS_OFF
|
||||||
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
|
|
||||||
jo .Lio_restore
|
|
||||||
.Lio_loop:
|
.Lio_loop:
|
||||||
lgr %r2,%r11 # pass pointer to pt_regs
|
lgr %r2,%r11 # pass pointer to pt_regs
|
||||||
lghi %r3,IO_INTERRUPT
|
lghi %r3,IO_INTERRUPT
|
||||||
@ -967,8 +965,6 @@ ENTRY(ext_int_handler)
|
|||||||
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
|
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
|
||||||
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
|
||||||
TRACE_IRQS_OFF
|
TRACE_IRQS_OFF
|
||||||
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
|
|
||||||
jo .Lio_restore
|
|
||||||
lgr %r2,%r11 # pass pointer to pt_regs
|
lgr %r2,%r11 # pass pointer to pt_regs
|
||||||
lghi %r3,EXT_INTERRUPT
|
lghi %r3,EXT_INTERRUPT
|
||||||
brasl %r14,do_IRQ
|
brasl %r14,do_IRQ
|
||||||
|
@ -39,79 +39,24 @@ void __delay(unsigned long loops)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__delay);
|
EXPORT_SYMBOL(__delay);
|
||||||
|
|
||||||
static void __udelay_disabled(unsigned long long usecs)
|
static void delay_loop(unsigned long delta, bool simple)
|
||||||
{
|
{
|
||||||
unsigned long cr0, cr0_new, psw_mask;
|
unsigned long end;
|
||||||
struct s390_idle_data idle;
|
|
||||||
u64 end;
|
|
||||||
|
|
||||||
end = get_tod_clock() + (usecs << 12);
|
if (static_branch_likely(&udelay_ready) && !simple) {
|
||||||
__ctl_store(cr0, 0, 0);
|
end = get_tod_clock_monotonic() + delta;
|
||||||
cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
|
while (!tod_after(get_tod_clock_monotonic(), end))
|
||||||
cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
|
cpu_relax();
|
||||||
__ctl_load(cr0_new, 0, 0);
|
} else {
|
||||||
psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
|
end = get_tod_clock() + delta;
|
||||||
set_clock_comparator(end);
|
while (!tod_after(get_tod_clock(), end))
|
||||||
set_cpu_flag(CIF_IGNORE_IRQ);
|
cpu_relax();
|
||||||
psw_idle(&idle, psw_mask);
|
}
|
||||||
trace_hardirqs_off();
|
|
||||||
clear_cpu_flag(CIF_IGNORE_IRQ);
|
|
||||||
set_clock_comparator(S390_lowcore.clock_comparator);
|
|
||||||
__ctl_load(cr0, 0, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __udelay_enabled(unsigned long long usecs)
|
void __udelay(unsigned long usecs)
|
||||||
{
|
{
|
||||||
u64 clock_saved, end;
|
delay_loop(usecs << 12, 0);
|
||||||
|
|
||||||
end = get_tod_clock_fast() + (usecs << 12);
|
|
||||||
do {
|
|
||||||
clock_saved = 0;
|
|
||||||
if (tod_after(S390_lowcore.clock_comparator, end)) {
|
|
||||||
clock_saved = local_tick_disable();
|
|
||||||
set_clock_comparator(end);
|
|
||||||
}
|
|
||||||
enabled_wait();
|
|
||||||
if (clock_saved)
|
|
||||||
local_tick_enable(clock_saved);
|
|
||||||
} while (get_tod_clock_fast() < end);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Waits for 'usecs' microseconds using the TOD clock comparator.
|
|
||||||
*/
|
|
||||||
void __udelay(unsigned long long usecs)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (!static_branch_likely(&udelay_ready)) {
|
|
||||||
udelay_simple(usecs);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
preempt_disable();
|
|
||||||
local_irq_save(flags);
|
|
||||||
if (in_irq()) {
|
|
||||||
__udelay_disabled(usecs);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (in_softirq()) {
|
|
||||||
if (raw_irqs_disabled_flags(flags))
|
|
||||||
__udelay_disabled(usecs);
|
|
||||||
else
|
|
||||||
__udelay_enabled(usecs);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (raw_irqs_disabled_flags(flags)) {
|
|
||||||
local_bh_disable();
|
|
||||||
__udelay_disabled(usecs);
|
|
||||||
_local_bh_enable();
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
__udelay_enabled(usecs);
|
|
||||||
out:
|
|
||||||
local_irq_restore(flags);
|
|
||||||
preempt_enable();
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__udelay);
|
EXPORT_SYMBOL(__udelay);
|
||||||
|
|
||||||
@ -119,25 +64,15 @@ EXPORT_SYMBOL(__udelay);
|
|||||||
* Simple udelay variant. To be used on startup and reboot
|
* Simple udelay variant. To be used on startup and reboot
|
||||||
* when the interrupt handler isn't working.
|
* when the interrupt handler isn't working.
|
||||||
*/
|
*/
|
||||||
void udelay_simple(unsigned long long usecs)
|
void udelay_simple(unsigned long usecs)
|
||||||
{
|
{
|
||||||
u64 end;
|
delay_loop(usecs << 12, 1);
|
||||||
|
|
||||||
end = get_tod_clock_fast() + (usecs << 12);
|
|
||||||
while (get_tod_clock_fast() < end)
|
|
||||||
cpu_relax();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void __ndelay(unsigned long long nsecs)
|
void __ndelay(unsigned long nsecs)
|
||||||
{
|
{
|
||||||
u64 end;
|
|
||||||
|
|
||||||
nsecs <<= 9;
|
nsecs <<= 9;
|
||||||
do_div(nsecs, 125);
|
do_div(nsecs, 125);
|
||||||
end = get_tod_clock_fast() + nsecs;
|
delay_loop(nsecs, 0);
|
||||||
if (nsecs & ~0xfffUL)
|
|
||||||
__udelay(nsecs >> 12);
|
|
||||||
while (get_tod_clock_fast() < end)
|
|
||||||
barrier();
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__ndelay);
|
EXPORT_SYMBOL(__ndelay);
|
||||||
|
Loading…
Reference in New Issue
Block a user