lib/math/div64.c at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / lib / math / div64.c
at master 8.1 kB view raw
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
  4 *
  5 * Based on former do_div() implementation from asm-parisc/div64.h:
  6 *	Copyright (C) 1999 Hewlett-Packard Co
  7 *	Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
  8 *
  9 *
 10 * Generic C version of 64bit/32bit division and modulo, with
 11 * 64bit result and 32bit remainder.
 12 *
 13 * The fast case for (n>>32 == 0) is handled inline by do_div().
 14 *
 15 * Code generated for this function might be very inefficient
 16 * for some CPUs. __div64_32() can be overridden by linking arch-specific
 17 * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
 18 * or by defining a preprocessor macro in arch/include/asm/div64.h.
 19 */
 20
 21#include <linux/bitops.h>
 22#include <linux/export.h>
 23#include <linux/math.h>
 24#include <linux/math64.h>
 25#include <linux/minmax.h>
 26#include <linux/log2.h>
 27
 28/* Not needed on 64bit architectures */
 29#if BITS_PER_LONG == 32
 30
 31#ifndef __div64_32
 32uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
 33{
 34	uint64_t rem = *n;
 35	uint64_t b = base;
 36	uint64_t res, d = 1;
 37	uint32_t high = rem >> 32;
 38
 39	/* Reduce the thing a bit first */
 40	res = 0;
 41	if (high >= base) {
 42		high /= base;
 43		res = (uint64_t) high << 32;
 44		rem -= (uint64_t) (high*base) << 32;
 45	}
 46
 47	while ((int64_t)b > 0 && b < rem) {
 48		b = b+b;
 49		d = d+d;
 50	}
 51
 52	do {
 53		if (rem >= b) {
 54			rem -= b;
 55			res += d;
 56		}
 57		b >>= 1;
 58		d >>= 1;
 59	} while (d);
 60
 61	*n = res;
 62	return rem;
 63}
 64EXPORT_SYMBOL(__div64_32);
 65#endif
 66
 67#ifndef div_s64_rem
 68s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 69{
 70	u64 quotient;
 71
 72	if (dividend < 0) {
 73		quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
 74		*remainder = -*remainder;
 75		if (divisor > 0)
 76			quotient = -quotient;
 77	} else {
 78		quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
 79		if (divisor < 0)
 80			quotient = -quotient;
 81	}
 82	return quotient;
 83}
 84EXPORT_SYMBOL(div_s64_rem);
 85#endif
 86
 87/*
 88 * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
 89 * @dividend:	64bit dividend
 90 * @divisor:	64bit divisor
 91 * @remainder:  64bit remainder
 92 *
 93 * This implementation is a comparable to algorithm used by div64_u64.
 94 * But this operation, which includes math for calculating the remainder,
 95 * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
 96 * systems.
 97 */
 98#ifndef div64_u64_rem
 99u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
100{
101	u32 high = divisor >> 32;
102	u64 quot;
103
104	if (high == 0) {
105		u32 rem32;
106		quot = div_u64_rem(dividend, divisor, &rem32);
107		*remainder = rem32;
108	} else {
109		int n = fls(high);
110		quot = div_u64(dividend >> n, divisor >> n);
111
112		if (quot != 0)
113			quot--;
114
115		*remainder = dividend - quot * divisor;
116		if (*remainder >= divisor) {
117			quot++;
118			*remainder -= divisor;
119		}
120	}
121
122	return quot;
123}
124EXPORT_SYMBOL(div64_u64_rem);
125#endif
126
127/*
128 * div64_u64 - unsigned 64bit divide with 64bit divisor
129 * @dividend:	64bit dividend
130 * @divisor:	64bit divisor
131 *
132 * This implementation is a modified version of the algorithm proposed
133 * by the book 'Hacker's Delight'.  The original source and full proof
134 * can be found here and is available for use without restriction.
135 *
136 * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
137 */
138#ifndef div64_u64
139u64 div64_u64(u64 dividend, u64 divisor)
140{
141	u32 high = divisor >> 32;
142	u64 quot;
143
144	if (high == 0) {
145		quot = div_u64(dividend, divisor);
146	} else {
147		int n = fls(high);
148		quot = div_u64(dividend >> n, divisor >> n);
149
150		if (quot != 0)
151			quot--;
152		if ((dividend - quot * divisor) >= divisor)
153			quot++;
154	}
155
156	return quot;
157}
158EXPORT_SYMBOL(div64_u64);
159#endif
160
161#ifndef div64_s64
162s64 div64_s64(s64 dividend, s64 divisor)
163{
164	s64 quot, t;
165
166	quot = div64_u64(abs(dividend), abs(divisor));
167	t = (dividend ^ divisor) >> 63;
168
169	return (quot ^ t) - t;
170}
171EXPORT_SYMBOL(div64_s64);
172#endif
173
174#endif /* BITS_PER_LONG == 32 */
175
176/*
177 * Iterative div/mod for use when dividend is not expected to be much
178 * bigger than divisor.
179 */
180#ifndef iter_div_u64_rem
181u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
182{
183	return __iter_div_u64_rem(dividend, divisor, remainder);
184}
185EXPORT_SYMBOL(iter_div_u64_rem);
186#endif
187
188#if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)
189
190#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)
191
192#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
193static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
194{
195	/* native 64x64=128 bits multiplication */
196	u128 prod = (u128)a * b + c;
197
198	*p_lo = prod;
199	return prod >> 64;
200}
201#else
202static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
203{
204	/* perform a 64x64=128 bits multiplication in 32bit chunks */
205	u64 x, y, z;
206
207	/* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
208	x = mul_add(a, b, c);
209	y = mul_add(a, b >> 32, c >> 32);
210	y = add_u64_u32(y, x >> 32);
211	z = mul_add(a >> 32, b >> 32, y >> 32);
212	y = mul_add(a >> 32, b, y);
213	*p_lo = (y << 32) + (u32)x;
214	return add_u64_u32(z, y >> 32);
215}
216#endif
217
218#ifndef BITS_PER_ITER
219#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
220#endif
221
222#if BITS_PER_ITER == 32
223#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
224#define add_u64_long(a, b) ((a) + (b))
225#else
226#undef BITS_PER_ITER
227#define BITS_PER_ITER 16
228static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
229{
230	u64 n_lo = mul_add(a, b, c);
231	u64 n_med = mul_add(a >> 32, b, c >> 32);
232
233	n_med = add_u64_u32(n_med, n_lo >> 32);
234	*p_lo = n_med << 32 | (u32)n_lo;
235	return n_med >> 32;
236}
237
238#define add_u64_long(a, b) add_u64_u32(a, b)
239#endif
240
241u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
242{
243	unsigned long d_msig, q_digit;
244	unsigned int reps, d_z_hi;
245	u64 quotient, n_lo, n_hi;
246	u32 overflow;
247
248	n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
249
250	if (!n_hi)
251		return div64_u64(n_lo, d);
252
253	if (unlikely(n_hi >= d)) {
254		/* trigger runtime exception if divisor is zero */
255		if (d == 0) {
256			unsigned long zero = 0;
257
258			OPTIMIZER_HIDE_VAR(zero);
259			return ~0UL/zero;
260		}
261		/* overflow: result is unrepresentable in a u64 */
262		return ~0ULL;
263	}
264
265	/* Left align the divisor, shifting the dividend to match */
266	d_z_hi = __builtin_clzll(d);
267	if (d_z_hi) {
268		d <<= d_z_hi;
269		n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
270		n_lo <<= d_z_hi;
271	}
272
273	reps = 64 / BITS_PER_ITER;
274	/* Optimise loop count for small dividends */
275	if (!(u32)(n_hi >> 32)) {
276		reps -= 32 / BITS_PER_ITER;
277		n_hi = n_hi << 32 | n_lo >> 32;
278		n_lo <<= 32;
279	}
280#if BITS_PER_ITER == 16
281	if (!(u32)(n_hi >> 48)) {
282		reps--;
283		n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
284		n_lo <<= 16;
285	}
286#endif
287
288	/* Invert the dividend so we can use add instead of subtract. */
289	n_lo = ~n_lo;
290	n_hi = ~n_hi;
291
292	/*
293	 * Get the most significant BITS_PER_ITER bits of the divisor.
294	 * This is used to get a low 'guestimate' of the quotient digit.
295	 */
296	d_msig = (d >> (64 - BITS_PER_ITER)) + 1;
297
298	/*
299	 * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
300	 * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
301	 * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
302	 */
303	quotient = 0;
304	while (reps--) {
305		q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
306		/* Shift 'n' left to align with the product q_digit * d */
307		overflow = n_hi >> (64 - BITS_PER_ITER);
308		n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
309		n_lo <<= BITS_PER_ITER;
310		/* Add product to negated divisor */
311		overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
312		/* Adjust for the q_digit 'guestimate' being low */
313		while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
314			q_digit++;
315			n_hi += d;
316			overflow += n_hi < d;
317		}
318		quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
319	}
320
321	/*
322	 * The above only ensures the remainder doesn't overflow,
323	 * it can still be possible to add (aka subtract) another copy
324	 * of the divisor.
325	 */
326	if ((n_hi + d) > n_hi)
327		quotient++;
328	return quotient;
329}
330#if !defined(test_mul_u64_add_u64_div_u64)
331EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
332#endif
333#endif