diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h index 5a1bf1aff502..408856a9aba1 100644 --- a/include/asm-generic/div64.h +++ b/include/asm-generic/div64.h @@ -63,7 +63,7 @@ * do the trick here). \ */ \ uint64_t ___res, ___x, ___t, ___m, ___n = (n); \ - uint32_t ___p, ___bias, ___m_lo, ___m_hi, ___n_lo, ___n_hi; \ + uint32_t ___p, ___bias; \ \ /* determine MSB of b */ \ ___p = 1 << ilog2(___b); \ @@ -138,41 +138,62 @@ * 2) whether or not there might be an overflow in the cross \ * product determined by (___m & ((1 << 63) | (1 << 31))). \ * \ - * Select the best way to do (m_bias + m * n) / (p << 64). \ + * Select the best way to do (m_bias + m * n) / (1 << 64). \ * From now on there will be actual runtime code generated. \ */ \ - \ - ___m_lo = ___m; \ - ___m_hi = ___m >> 32; \ - ___n_lo = ___n; \ - ___n_hi = ___n >> 32; \ - \ - if (!___bias) { \ - ___res = ((uint64_t)___m_lo * ___n_lo) >> 32; \ - } else if (!(___m & ((1ULL << 63) | (1ULL << 31)))) { \ - ___res = (___m + (uint64_t)___m_lo * ___n_lo) >> 32; \ - } else { \ - ___res = ___m + (uint64_t)___m_lo * ___n_lo; \ - ___t = (___res < ___m) ? (1ULL << 32) : 0; \ - ___res = (___res >> 32) + ___t; \ - } \ - \ - if (!(___m & ((1ULL << 63) | (1ULL << 31)))) { \ - ___res += (uint64_t)___m_lo * ___n_hi; \ - ___res += (uint64_t)___m_hi * ___n_lo; \ - ___res >>= 32; \ - } else { \ - ___t = ___res += (uint64_t)___m_lo * ___n_hi; \ - ___res += (uint64_t)___m_hi * ___n_lo; \ - ___t = (___res < ___t) ? (1ULL << 32) : 0; \ - ___res = (___res >> 32) + ___t; \ - } \ - \ - ___res += (uint64_t)___m_hi * ___n_hi; \ + ___res = __arch_xprod_64(___m, ___n, ___bias); \ \ ___res /= ___p; \ }) +#ifndef __arch_xprod_64 +/* + * Default C implementation for __arch_xprod_64() + * + * Prototype: uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) + * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 + * + * The product is a 128-bit value, scaled down to 64 bits. + * Assuming constant propagation to optimize away unused conditional code. + * Architectures may provide their own optimized assembly implementation. + */ +static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias) +{ + uint32_t m_lo = m; + uint32_t m_hi = m >> 32; + uint32_t n_lo = n; + uint32_t n_hi = n >> 32; + uint64_t res, tmp; + + if (!bias) { + res = ((uint64_t)m_lo * n_lo) >> 32; + } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { + /* there can't be any overflow here */ + res = (m + (uint64_t)m_lo * n_lo) >> 32; + } else { + res = m + (uint64_t)m_lo * n_lo; + tmp = (res < m) ? (1ULL << 32) : 0; + res = (res >> 32) + tmp; + } + + if (!(m & ((1ULL << 63) | (1ULL << 31)))) { + /* there can't be any overflow here */ + res += (uint64_t)m_lo * n_hi; + res += (uint64_t)m_hi * n_lo; + res >>= 32; + } else { + tmp = res += (uint64_t)m_lo * n_hi; + res += (uint64_t)m_hi * n_lo; + tmp = (res < tmp) ? (1ULL << 32) : 0; + res = (res >> 32) + tmp; + } + + res += (uint64_t)m_hi * n_hi; + + return res; +} +#endif + extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor); /* The unnecessary pointer compare is there