at master 8.1 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> 4 * 5 * Based on former do_div() implementation from asm-parisc/div64.h: 6 * Copyright (C) 1999 Hewlett-Packard Co 7 * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> 8 * 9 * 10 * Generic C version of 64bit/32bit division and modulo, with 11 * 64bit result and 32bit remainder. 12 * 13 * The fast case for (n>>32 == 0) is handled inline by do_div(). 14 * 15 * Code generated for this function might be very inefficient 16 * for some CPUs. __div64_32() can be overridden by linking arch-specific 17 * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S 18 * or by defining a preprocessor macro in arch/include/asm/div64.h. 19 */ 20 21#include <linux/bitops.h> 22#include <linux/export.h> 23#include <linux/math.h> 24#include <linux/math64.h> 25#include <linux/minmax.h> 26#include <linux/log2.h> 27 28/* Not needed on 64bit architectures */ 29#if BITS_PER_LONG == 32 30 31#ifndef __div64_32 32uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) 33{ 34 uint64_t rem = *n; 35 uint64_t b = base; 36 uint64_t res, d = 1; 37 uint32_t high = rem >> 32; 38 39 /* Reduce the thing a bit first */ 40 res = 0; 41 if (high >= base) { 42 high /= base; 43 res = (uint64_t) high << 32; 44 rem -= (uint64_t) (high*base) << 32; 45 } 46 47 while ((int64_t)b > 0 && b < rem) { 48 b = b+b; 49 d = d+d; 50 } 51 52 do { 53 if (rem >= b) { 54 rem -= b; 55 res += d; 56 } 57 b >>= 1; 58 d >>= 1; 59 } while (d); 60 61 *n = res; 62 return rem; 63} 64EXPORT_SYMBOL(__div64_32); 65#endif 66 67#ifndef div_s64_rem 68s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) 69{ 70 u64 quotient; 71 72 if (dividend < 0) { 73 quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); 74 *remainder = -*remainder; 75 if (divisor > 0) 76 quotient = -quotient; 77 } else { 78 quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); 79 if (divisor < 0) 80 quotient = -quotient; 81 } 82 return quotient; 83} 84EXPORT_SYMBOL(div_s64_rem); 85#endif 86 87/* 88 * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder 89 * @dividend: 64bit dividend 90 * @divisor: 64bit divisor 91 * @remainder: 64bit remainder 92 * 93 * This implementation is a comparable to algorithm used by div64_u64. 94 * But this operation, which includes math for calculating the remainder, 95 * is kept distinct to avoid slowing down the div64_u64 operation on 32bit 96 * systems. 97 */ 98#ifndef div64_u64_rem 99u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) 100{ 101 u32 high = divisor >> 32; 102 u64 quot; 103 104 if (high == 0) { 105 u32 rem32; 106 quot = div_u64_rem(dividend, divisor, &rem32); 107 *remainder = rem32; 108 } else { 109 int n = fls(high); 110 quot = div_u64(dividend >> n, divisor >> n); 111 112 if (quot != 0) 113 quot--; 114 115 *remainder = dividend - quot * divisor; 116 if (*remainder >= divisor) { 117 quot++; 118 *remainder -= divisor; 119 } 120 } 121 122 return quot; 123} 124EXPORT_SYMBOL(div64_u64_rem); 125#endif 126 127/* 128 * div64_u64 - unsigned 64bit divide with 64bit divisor 129 * @dividend: 64bit dividend 130 * @divisor: 64bit divisor 131 * 132 * This implementation is a modified version of the algorithm proposed 133 * by the book 'Hacker's Delight'. The original source and full proof 134 * can be found here and is available for use without restriction. 135 * 136 * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' 137 */ 138#ifndef div64_u64 139u64 div64_u64(u64 dividend, u64 divisor) 140{ 141 u32 high = divisor >> 32; 142 u64 quot; 143 144 if (high == 0) { 145 quot = div_u64(dividend, divisor); 146 } else { 147 int n = fls(high); 148 quot = div_u64(dividend >> n, divisor >> n); 149 150 if (quot != 0) 151 quot--; 152 if ((dividend - quot * divisor) >= divisor) 153 quot++; 154 } 155 156 return quot; 157} 158EXPORT_SYMBOL(div64_u64); 159#endif 160 161#ifndef div64_s64 162s64 div64_s64(s64 dividend, s64 divisor) 163{ 164 s64 quot, t; 165 166 quot = div64_u64(abs(dividend), abs(divisor)); 167 t = (dividend ^ divisor) >> 63; 168 169 return (quot ^ t) - t; 170} 171EXPORT_SYMBOL(div64_s64); 172#endif 173 174#endif /* BITS_PER_LONG == 32 */ 175 176/* 177 * Iterative div/mod for use when dividend is not expected to be much 178 * bigger than divisor. 179 */ 180#ifndef iter_div_u64_rem 181u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) 182{ 183 return __iter_div_u64_rem(dividend, divisor, remainder); 184} 185EXPORT_SYMBOL(iter_div_u64_rem); 186#endif 187 188#if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64) 189 190#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c) 191 192#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64) 193static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) 194{ 195 /* native 64x64=128 bits multiplication */ 196 u128 prod = (u128)a * b + c; 197 198 *p_lo = prod; 199 return prod >> 64; 200} 201#else 202static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) 203{ 204 /* perform a 64x64=128 bits multiplication in 32bit chunks */ 205 u64 x, y, z; 206 207 /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */ 208 x = mul_add(a, b, c); 209 y = mul_add(a, b >> 32, c >> 32); 210 y = add_u64_u32(y, x >> 32); 211 z = mul_add(a >> 32, b >> 32, y >> 32); 212 y = mul_add(a >> 32, b, y); 213 *p_lo = (y << 32) + (u32)x; 214 return add_u64_u32(z, y >> 32); 215} 216#endif 217 218#ifndef BITS_PER_ITER 219#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16) 220#endif 221 222#if BITS_PER_ITER == 32 223#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c) 224#define add_u64_long(a, b) ((a) + (b)) 225#else 226#undef BITS_PER_ITER 227#define BITS_PER_ITER 16 228static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c) 229{ 230 u64 n_lo = mul_add(a, b, c); 231 u64 n_med = mul_add(a >> 32, b, c >> 32); 232 233 n_med = add_u64_u32(n_med, n_lo >> 32); 234 *p_lo = n_med << 32 | (u32)n_lo; 235 return n_med >> 32; 236} 237 238#define add_u64_long(a, b) add_u64_u32(a, b) 239#endif 240 241u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) 242{ 243 unsigned long d_msig, q_digit; 244 unsigned int reps, d_z_hi; 245 u64 quotient, n_lo, n_hi; 246 u32 overflow; 247 248 n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c); 249 250 if (!n_hi) 251 return div64_u64(n_lo, d); 252 253 if (unlikely(n_hi >= d)) { 254 /* trigger runtime exception if divisor is zero */ 255 if (d == 0) { 256 unsigned long zero = 0; 257 258 OPTIMIZER_HIDE_VAR(zero); 259 return ~0UL/zero; 260 } 261 /* overflow: result is unrepresentable in a u64 */ 262 return ~0ULL; 263 } 264 265 /* Left align the divisor, shifting the dividend to match */ 266 d_z_hi = __builtin_clzll(d); 267 if (d_z_hi) { 268 d <<= d_z_hi; 269 n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi); 270 n_lo <<= d_z_hi; 271 } 272 273 reps = 64 / BITS_PER_ITER; 274 /* Optimise loop count for small dividends */ 275 if (!(u32)(n_hi >> 32)) { 276 reps -= 32 / BITS_PER_ITER; 277 n_hi = n_hi << 32 | n_lo >> 32; 278 n_lo <<= 32; 279 } 280#if BITS_PER_ITER == 16 281 if (!(u32)(n_hi >> 48)) { 282 reps--; 283 n_hi = add_u64_u32(n_hi << 16, n_lo >> 48); 284 n_lo <<= 16; 285 } 286#endif 287 288 /* Invert the dividend so we can use add instead of subtract. */ 289 n_lo = ~n_lo; 290 n_hi = ~n_hi; 291 292 /* 293 * Get the most significant BITS_PER_ITER bits of the divisor. 294 * This is used to get a low 'guestimate' of the quotient digit. 295 */ 296 d_msig = (d >> (64 - BITS_PER_ITER)) + 1; 297 298 /* 299 * Now do a 'long division' with BITS_PER_ITER bit 'digits'. 300 * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits. 301 * The worst case is dividing ~0 by 0x8000 which requires two subtracts. 302 */ 303 quotient = 0; 304 while (reps--) { 305 q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig; 306 /* Shift 'n' left to align with the product q_digit * d */ 307 overflow = n_hi >> (64 - BITS_PER_ITER); 308 n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER)); 309 n_lo <<= BITS_PER_ITER; 310 /* Add product to negated divisor */ 311 overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi); 312 /* Adjust for the q_digit 'guestimate' being low */ 313 while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) { 314 q_digit++; 315 n_hi += d; 316 overflow += n_hi < d; 317 } 318 quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit); 319 } 320 321 /* 322 * The above only ensures the remainder doesn't overflow, 323 * it can still be possible to add (aka subtract) another copy 324 * of the divisor. 325 */ 326 if ((n_hi + d) > n_hi) 327 quotient++; 328 return quotient; 329} 330#if !defined(test_mul_u64_add_u64_div_u64) 331EXPORT_SYMBOL(mul_u64_add_u64_div_u64); 332#endif 333#endif