2019-06-04 10:11:33 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
|
|
|
* linux/arch/arm/lib/delay.S
|
|
|
|
*
|
|
|
|
* Copyright (C) 1995, 1996 Russell King
|
|
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
2024-04-23 08:50:38 +01:00
|
|
|
#include <linux/cfi_types.h>
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <asm/assembler.h>
|
2012-07-06 15:47:17 +01:00
|
|
|
#include <asm/delay.h>
|
2015-02-25 22:50:39 +01:00
|
|
|
|
ARM: 9263/1: use .arch directives instead of assembler command line flags
Similar to commit a6c30873ee4a ("ARM: 8989/1: use .fpu assembler
directives instead of assembler arguments").
GCC and GNU binutils support setting the "sub arch" via -march=,
-Wa,-march, target function attribute, and .arch assembler directive.
Clang was missing support for -Wa,-march=, but this was implemented in
clang-13.
The behavior of both GCC and Clang is to
prefer -Wa,-march= over -march= for assembler and assembler-with-cpp
sources, but Clang will warn about the -march= being unused.
clang: warning: argument unused during compilation: '-march=armv6k'
[-Wunused-command-line-argument]
Since most assembler is non-conditionally assembled with one sub arch
(modulo arch/arm/delay-loop.S which conditionally is assembled as armv4
based on CONFIG_ARCH_RPC, and arch/arm/mach-at91/pm-suspend.S which is
conditionally assembled as armv7-a based on CONFIG_CPU_V7), prefer the
.arch assembler directive.
Add a few more instances found in compile testing as found by Arnd and
Nathan.
Link: https://github.com/llvm/llvm-project/commit/1d51c699b9e2ebc5bcfdbe85c74cc871426333d4
Link: https://bugs.llvm.org/show_bug.cgi?id=48894
Link: https://github.com/ClangBuiltLinux/linux/issues/1195
Link: https://github.com/ClangBuiltLinux/linux/issues/1315
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
2022-10-24 20:44:41 +01:00
|
|
|
#ifdef CONFIG_ARCH_RPC
|
|
|
|
.arch armv4
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
.text
|
|
|
|
|
2005-11-11 21:51:49 +00:00
|
|
|
.LC0: .word loops_per_jiffy
|
2012-07-06 15:47:17 +01:00
|
|
|
.LC1: .word UDELAY_MULT
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
/*
|
2016-10-07 05:38:35 +01:00
|
|
|
* loops = r0 * HZ * loops_per_jiffy / 1000000
|
|
|
|
*
|
2006-03-20 17:10:09 +00:00
|
|
|
* r0 <= 2000
|
|
|
|
* HZ <= 1000
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
2006-03-20 17:10:09 +00:00
|
|
|
|
2024-04-23 08:50:38 +01:00
|
|
|
SYM_TYPED_FUNC_START(__loop_udelay)
|
2006-03-20 17:10:09 +00:00
|
|
|
ldr r2, .LC1
|
2016-10-07 05:38:35 +01:00
|
|
|
mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT
|
2024-04-23 08:50:38 +01:00
|
|
|
b __loop_const_udelay
|
|
|
|
SYM_FUNC_END(__loop_udelay)
|
|
|
|
|
|
|
|
SYM_TYPED_FUNC_START(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0
|
2005-11-11 21:51:49 +00:00
|
|
|
ldr r2, .LC0
|
2015-02-25 22:50:39 +01:00
|
|
|
ldr r2, [r2]
|
2016-10-07 05:38:35 +01:00
|
|
|
umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy
|
|
|
|
adds r1, r1, #0xffffffff @ rounding up ...
|
|
|
|
adcs r0, r0, r0 @ and right shift by 31
|
2014-06-30 16:29:12 +01:00
|
|
|
reteq lr
|
2024-04-23 08:50:38 +01:00
|
|
|
b __loop_delay
|
|
|
|
SYM_FUNC_END(__loop_const_udelay)
|
2005-04-16 15:20:36 -07:00
|
|
|
|
ARM: 7907/1: lib: delay-loop: Add align directive to fix BogoMIPS calculation
Currently mx53 (CortexA8) running at 1GHz reports:
Calibrating delay loop... 663.55 BogoMIPS (lpj=3317760)
Tom Evans verified that alignments of 0x0 and 0x8 run the two instructions of __loop_delay in one clock cycle (1 clock/loop), while alignments of 0x4 and 0xc take 3 clocks to run the loop twice. (1.5 clock/loop)
The original object code looks like this:
00000010 <__loop_const_udelay>:
10: e3e01000 mvn r1, #0
14: e51f201c ldr r2, [pc, #-28] ; 0 <__loop_udelay-0x8>
18: e5922000 ldr r2, [r2]
1c: e0800921 add r0, r0, r1, lsr #18
20: e1a00720 lsr r0, r0, #14
24: e0822b21 add r2, r2, r1, lsr #22
28: e1a02522 lsr r2, r2, #10
2c: e0000092 mul r0, r2, r0
30: e0800d21 add r0, r0, r1, lsr #26
34: e1b00320 lsrs r0, r0, #6
38: 01a0f00e moveq pc, lr
0000003c <__loop_delay>:
3c: e2500001 subs r0, r0, #1
40: 8afffffe bhi 3c <__loop_delay>
44: e1a0f00e mov pc, lr
After adding the 'align 3' directive to __loop_delay (align to 8 bytes):
00000010 <__loop_const_udelay>:
10: e3e01000 mvn r1, #0
14: e51f201c ldr r2, [pc, #-28] ; 0 <__loop_udelay-0x8>
18: e5922000 ldr r2, [r2]
1c: e0800921 add r0, r0, r1, lsr #18
20: e1a00720 lsr r0, r0, #14
24: e0822b21 add r2, r2, r1, lsr #22
28: e1a02522 lsr r2, r2, #10
2c: e0000092 mul r0, r2, r0
30: e0800d21 add r0, r0, r1, lsr #26
34: e1b00320 lsrs r0, r0, #6
38: 01a0f00e moveq pc, lr
3c: e320f000 nop {0}
00000040 <__loop_delay>:
40: e2500001 subs r0, r0, #1
44: 8afffffe bhi 40 <__loop_delay>
48: e1a0f00e mov pc, lr
4c: e320f000 nop {0}
, which now reports:
Calibrating delay loop... 996.14 BogoMIPS (lpj=4980736)
Some more test results:
On mx31 (ARM1136) running at 532 MHz, before the patch:
Calibrating delay loop... 351.43 BogoMIPS (lpj=1757184)
On mx31 (ARM1136) running at 532 MHz after the patch:
Calibrating delay loop... 528.79 BogoMIPS (lpj=2643968)
Also tested on mx6 (CortexA9) and on mx27 (ARM926), which shows the same
BogoMIPS value before and after this patch.
Reported-by: Tom Evans <tom_usenet@optusnet.com.au>
Suggested-by: Tom Evans <tom_usenet@optusnet.com.au>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2013-11-30 15:24:42 +01:00
|
|
|
.align 3
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
@ Delay routine
|
2024-04-23 08:50:38 +01:00
|
|
|
SYM_TYPED_FUNC_START(__loop_delay)
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
|
|
|
#if 0
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
2014-06-30 16:29:12 +01:00
|
|
|
retls lr
|
2005-04-16 15:20:36 -07:00
|
|
|
subs r0, r0, #1
|
|
|
|
#endif
|
2012-07-06 15:47:17 +01:00
|
|
|
bhi __loop_delay
|
2014-06-30 16:29:12 +01:00
|
|
|
ret lr
|
2024-04-23 08:50:38 +01:00
|
|
|
SYM_FUNC_END(__loop_delay)
|