arch/x86/include/asm/div64.h at nocache-cleanup

tjh.dev / kernel
fork atom
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork atom
kernel / arch / x86 / include / asm / div64.h
at nocache-cleanup 130 lines 3.1 kB view raw
wrap content
  1/* SPDX-License-Identifier: GPL-2.0 */
  2#ifndef _ASM_X86_DIV64_H
  3#define _ASM_X86_DIV64_H
  4
  5#ifdef CONFIG_X86_32
  6
  7#include <linux/types.h>
  8#include <linux/log2.h>
  9
 10/*
 11 * do_div() is NOT a C function. It wants to return
 12 * two values (the quotient and the remainder), but
 13 * since that doesn't work very well in C, what it
 14 * does is:
 15 *
 16 * - modifies the 64-bit dividend _in_place_
 17 * - returns the 32-bit remainder
 18 *
 19 * This ends up being the most efficient "calling
 20 * convention" on x86.
 21 */
 22#define do_div(n, base)						\
 23({								\
 24	unsigned long __upper, __low, __high, __mod, __base;	\
 25	__base = (base);					\
 26	if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
 27		__mod = n & (__base - 1);			\
 28		n >>= ilog2(__base);				\
 29	} else {						\
 30		asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
 31		__upper = __high;				\
 32		if (__high) {					\
 33			__upper = __high % (__base);		\
 34			__high = __high / (__base);		\
 35		}						\
 36		asm("divl %2" : "=a" (__low), "=d" (__mod)	\
 37			: "rm" (__base), "0" (__low), "1" (__upper));	\
 38		asm("" : "=A" (n) : "a" (__low), "d" (__high));	\
 39	}							\
 40	__mod;							\
 41})
 42
 43static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 44{
 45	union {
 46		u64 v64;
 47		u32 v32[2];
 48	} d = { dividend };
 49	u32 upper;
 50
 51	upper = d.v32[1];
 52	d.v32[1] = 0;
 53	if (upper >= divisor) {
 54		d.v32[1] = upper / divisor;
 55		upper %= divisor;
 56	}
 57	asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) :
 58		"rm" (divisor), "0" (d.v32[0]), "1" (upper));
 59	return d.v64;
 60}
 61#define div_u64_rem	div_u64_rem
 62
 63/*
 64 * gcc tends to zero extend 32bit values and do full 64bit maths.
 65 * Define asm functions that avoid this.
 66 * (clang generates better code for the C versions.)
 67 */
 68#ifndef __clang__
 69static inline u64 mul_u32_u32(u32 a, u32 b)
 70{
 71	u32 high, low;
 72
 73	asm ("mull %[b]" : "=a" (low), "=d" (high)
 74			 : [a] "a" (a), [b] "rm" (b) );
 75
 76	return low | ((u64)high) << 32;
 77}
 78#define mul_u32_u32 mul_u32_u32
 79
 80static inline u64 add_u64_u32(u64 a, u32 b)
 81{
 82	u32 high = a >> 32, low = a;
 83
 84	asm ("addl %[b], %[low]; adcl $0, %[high]"
 85		: [low] "+r" (low), [high] "+r" (high)
 86		: [b] "rm" (b) );
 87
 88	return low | (u64)high << 32;
 89}
 90#define add_u64_u32 add_u64_u32
 91#endif
 92
 93/*
 94 * __div64_32() is never called on x86, so prevent the
 95 * generic definition from getting built.
 96 */
 97#define __div64_32
 98
 99#else
100# include <asm-generic/div64.h>
101
102/*
103 * Will generate an #DE when the result doesn't fit u64, could fix with an
104 * __ex_table[] entry when it becomes an issue.
105 */
106static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div)
107{
108	u64 rdx;
109
110	asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul));
111
112	if (!statically_true(!add))
113		asm ("addq %[add], %[lo]; adcq $0, %[hi]" :
114			[lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add));
115
116	asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div));
117
118	return rax;
119}
120#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
121
122static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
123{
124	return mul_u64_add_u64_div_u64(a, mul, 0, div);
125}
126#define mul_u64_u32_div	mul_u64_u32_div
127
128#endif /* CONFIG_X86_32 */
129
130#endif /* _ASM_X86_DIV64_H */