216 lines
5.1 KiB
Plaintext
Raw Normal View History

config NIOS2
def_bool y
select CLKSRC_OF
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_KGDB
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select OF
select OF_EARLY_FLATTREE
select SOC_BUS
select SPARSE_IRQ
select USB_ARCH_HAS_HCD if USB_SUPPORT
lib/GCD.c: use binary GCD algorithm instead of Euclidean The binary GCD algorithm is based on the following facts: 1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2) 2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b) 3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b) Even on x86 machines with reasonable division hardware, the binary algorithm runs about 25% faster (80% the execution time) than the division-based Euclidian algorithm. On platforms like Alpha and ARMv6 where division is a function call to emulation code, it's even more significant. There are two variants of the code here, depending on whether a fast __ffs (find least significant set bit) instruction is available. This allows the unpredictable branches in the bit-at-a-time shifting loop to be eliminated. If fast __ffs is not available, the "even/odd" GCD variant is used. I use the following code to benchmark: #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <time.h> #include <unistd.h> #define swap(a, b) \ do { \ a ^= b; \ b ^= a; \ a ^= b; \ } while (0) unsigned long gcd0(unsigned long a, unsigned long b) { unsigned long r; if (a < b) { swap(a, b); } if (b == 0) return a; while ((r = a % b) != 0) { a = b; b = r; } return b; } unsigned long gcd1(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); for (;;) { a >>= __builtin_ctzl(a); if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd2(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; for (;;) { while (!(a & r)) a >>= 1; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } unsigned long gcd3(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); if (b == 1) return r & -r; for (;;) { a >>= __builtin_ctzl(a); if (a == 1) return r & -r; if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd4(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = { gcd0, gcd1, gcd2, gcd3, gcd4, }; #define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0])) #if defined(__x86_64__) #define rdtscll(val) do { \ unsigned long __a,__d; \ __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \ (val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \ } while(0) static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { unsigned long long start, end; unsigned long long ret; unsigned long gcd_res; rdtscll(start); gcd_res = gcd(a, b); rdtscll(end); if (end >= start) ret = end - start; else ret = ~0ULL - start + 1 + end; *res = gcd_res; return ret; } #else static inline struct timespec read_time(void) { struct timespec time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time); return time; } static inline unsigned long long diff_time(struct timespec start, struct timespec end) { struct timespec temp; if ((end.tv_nsec - start.tv_nsec) < 0) { temp.tv_sec = end.tv_sec - start.tv_sec - 1; temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec; } else { temp.tv_sec = end.tv_sec - start.tv_sec; temp.tv_nsec = end.tv_nsec - start.tv_nsec; } return temp.tv_sec * 1000000000ULL + temp.tv_nsec; } static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { struct timespec start, end; unsigned long gcd_res; start = read_time(); gcd_res = gcd(a, b); end = read_time(); *res = gcd_res; return diff_time(start, end); } #endif static inline unsigned long get_rand() { if (sizeof(long) == 8) return (unsigned long)rand() << 32 | rand(); else return rand(); } int main(int argc, char **argv) { unsigned int seed = time(0); int loops = 100; int repeats = 1000; unsigned long (*res)[TEST_ENTRIES]; unsigned long long elapsed[TEST_ENTRIES]; int i, j, k; for (;;) { int opt = getopt(argc, argv, "n:r:s:"); /* End condition always first */ if (opt == -1) break; switch (opt) { case 'n': loops = atoi(optarg); break; case 'r': repeats = atoi(optarg); break; case 's': seed = strtoul(optarg, NULL, 10); break; default: /* You won't actually get here. */ break; } } res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops); memset(elapsed, 0, sizeof(elapsed)); srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); /* Do we have args? */ unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); unsigned long long min_elapsed[TEST_ENTRIES]; for (k = 0; k < repeats; k++) { for (i = 0; i < TEST_ENTRIES; i++) { unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]); if (k == 0 || min_elapsed[i] > tmp) min_elapsed[i] = tmp; } } for (i = 0; i < TEST_ENTRIES; i++) elapsed[i] += min_elapsed[i]; } for (i = 0; i < TEST_ENTRIES; i++) printf("gcd%d: elapsed %llu\n", i, elapsed[i]); k = 0; srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); for (i = 1; i < TEST_ENTRIES; i++) { if (res[j][i] != res[j][0]) break; } if (i < TEST_ENTRIES) { if (k == 0) { k = 1; fprintf(stderr, "Error:\n"); } fprintf(stderr, "gcd(%lu, %lu): ", a, b); for (i = 0; i < TEST_ENTRIES; i++) fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n"); } } if (k == 0) fprintf(stderr, "PASS\n"); free(res); return 0; } Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got: zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 10174 gcd1: elapsed 2120 gcd2: elapsed 2902 gcd3: elapsed 2039 gcd4: elapsed 2812 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9309 gcd1: elapsed 2280 gcd2: elapsed 2822 gcd3: elapsed 2217 gcd4: elapsed 2710 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9589 gcd1: elapsed 2098 gcd2: elapsed 2815 gcd3: elapsed 2030 gcd4: elapsed 2718 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9914 gcd1: elapsed 2309 gcd2: elapsed 2779 gcd3: elapsed 2228 gcd4: elapsed 2709 PASS [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable] Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com> Signed-off-by: George Spelvin <linux@horizon.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-20 17:03:57 -07:00
select CPU_NO_EFFICIENT_FFS
config GENERIC_CSUM
def_bool y
config GENERIC_HWEIGHT
def_bool y
config GENERIC_CALIBRATE_DELAY
def_bool y
config NO_IOPORT_MAP
def_bool y
config HAS_DMA
def_bool y
config FPU
def_bool n
config SWAP
def_bool n
config RWSEM_GENERIC_SPINLOCK
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool n
source "init/Kconfig"
menu "Kernel features"
source "kernel/Kconfig.preempt"
source "kernel/Kconfig.freezer"
source "kernel/Kconfig.hz"
source "mm/Kconfig"
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
range 9 20
default "11"
help
The kernel memory allocator divides physically contiguous memory
blocks into "zones", where each zone is a power of two number of
pages. This option selects the largest power of two that the kernel
keeps in the memory allocator. If you need to allocate very large
blocks of physically contiguous memory, then you may need to
increase this value.
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
endmenu
source "arch/nios2/platform/Kconfig.platform"
menu "Processor type and features"
config MMU
def_bool y
config NR_CPUS
int
default "1"
config NIOS2_ALIGNMENT_TRAP
bool "Catch alignment trap"
default y
help
Nios II CPUs cannot fetch/store data which is not bus aligned,
i.e., a 2 or 4 byte fetch must start at an address divisible by
2 or 4. Any non-aligned load/store instructions will be trapped and
emulated in software if you say Y here, which has a performance
impact.
comment "Boot options"
config CMDLINE_BOOL
bool "Default bootloader kernel arguments"
default y
config CMDLINE
string "Default kernel command string"
default ""
depends on CMDLINE_BOOL
help
On some platforms, there is currently no way for the boot loader to
pass arguments to the kernel. For these platforms, you can supply
some command-line options at build time by entering them here. In
other cases you can specify kernel args so that you don't have
to set them up in board prom initialization routines.
config CMDLINE_FORCE
bool "Force default kernel command string"
depends on CMDLINE_BOOL
help
Set this to have arguments from the default kernel command string
override those passed by the boot loader.
config NIOS2_CMDLINE_IGNORE_DTB
bool "Ignore kernel command string from DTB"
depends on CMDLINE_BOOL
depends on !CMDLINE_FORCE
default y
help
Set this to ignore the bootargs property from the devicetree's
chosen node and fall back to CMDLINE if nothing is passed.
config NIOS2_PASS_CMDLINE
bool "Passed kernel command line from u-boot"
default n
help
Use bootargs env variable from u-boot for kernel command line.
will override "Default kernel command string".
Say N if you are unsure.
config NIOS2_BOOT_LINK_OFFSET
hex "Link address offset for booting"
default "0x00500000"
help
This option allows you to set the link address offset of the zImage.
This can be useful if you are on a board which has a small amount of
memory.
endmenu
menu "Advanced setup"
config ADVANCED_OPTIONS
bool "Prompt for advanced kernel configuration options"
help
comment "Default settings for advanced configuration options are used"
depends on !ADVANCED_OPTIONS
config NIOS2_KERNEL_MMU_REGION_BASE_BOOL
bool "Set custom kernel MMU region base address"
depends on ADVANCED_OPTIONS
help
This option allows you to set the virtual address of the kernel MMU region.
Say N here unless you know what you are doing.
config NIOS2_KERNEL_MMU_REGION_BASE
hex "Virtual base address of the kernel MMU region " if NIOS2_KERNEL_MMU_REGION_BASE_BOOL
default "0x80000000"
help
This option allows you to set the virtual base address of the kernel MMU region.
config NIOS2_KERNEL_REGION_BASE_BOOL
bool "Set custom kernel region base address"
depends on ADVANCED_OPTIONS
help
This option allows you to set the virtual address of the kernel region.
Say N here unless you know what you are doing.
config NIOS2_KERNEL_REGION_BASE
hex "Virtual base address of the kernel region " if NIOS2_KERNEL_REGION_BASE_BOOL
default "0xc0000000"
config NIOS2_IO_REGION_BASE_BOOL
bool "Set custom I/O region base address"
depends on ADVANCED_OPTIONS
help
This option allows you to set the virtual address of the I/O region.
Say N here unless you know what you are doing.
config NIOS2_IO_REGION_BASE
hex "Virtual base address of the I/O region" if NIOS2_IO_REGION_BASE_BOOL
default "0xe0000000"
endmenu
menu "Executable file formats"
source "fs/Kconfig.binfmt"
endmenu
source "net/Kconfig"
source "drivers/Kconfig"
source "fs/Kconfig"
source "arch/nios2/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"