common implementation of iterative div/mod

We have a few instances of the open-coded iterative div/mod loop, used
when we don't expcet the dividend to be much bigger than the divisor.
Unfortunately modern gcc's have the tendency to strength "reduce" this
into a full mod operation, which isn't necessarily any faster, and
even if it were, doesn't exist if gcc implements it in libgcc.

The workaround is to put a dummy asm statement in the loop to prevent
gcc from performing the transformation.

This patch creates a single implementation of this loop, and uses it
to replace the open-coded versions I know about.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Robert Hancock <hancockr@shaw.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by Jeremy Fitzhardinge and committed by Ingo Molnar f595ec96 5e70b7f3

+30 -19
+3 -10
arch/x86/xen/time.c
··· 12 12 #include <linux/clocksource.h> 13 13 #include <linux/clockchips.h> 14 14 #include <linux/kernel_stat.h> 15 + #include <linux/math64.h> 15 16 16 17 #include <asm/xen/hypervisor.h> 17 18 #include <asm/xen/hypercall.h> ··· 151 150 if (stolen < 0) 152 151 stolen = 0; 153 152 154 - ticks = 0; 155 - while (stolen >= NS_PER_TICK) { 156 - ticks++; 157 - stolen -= NS_PER_TICK; 158 - } 153 + ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 159 154 __get_cpu_var(residual_stolen) = stolen; 160 155 account_steal_time(NULL, ticks); 161 156 ··· 163 166 if (blocked < 0) 164 167 blocked = 0; 165 168 166 - ticks = 0; 167 - while (blocked >= NS_PER_TICK) { 168 - ticks++; 169 - blocked -= NS_PER_TICK; 170 - } 169 + ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 171 170 __get_cpu_var(residual_blocked) = blocked; 172 171 account_steal_time(idle_task(smp_processor_id()), ticks); 173 172 }
+2
include/linux/math64.h
··· 81 81 } 82 82 #endif 83 83 84 + u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); 85 + 84 86 #endif /* _LINUX_MATH64_H */
+2 -9
include/linux/time.h
··· 6 6 #ifdef __KERNEL__ 7 7 # include <linux/cache.h> 8 8 # include <linux/seqlock.h> 9 + # include <linux/math64.h> 9 10 #endif 10 11 11 12 #ifndef _STRUCT_TIMESPEC ··· 173 172 */ 174 173 static inline void timespec_add_ns(struct timespec *a, u64 ns) 175 174 { 176 - ns += a->tv_nsec; 177 - while(unlikely(ns >= NSEC_PER_SEC)) { 178 - /* The following asm() prevents the compiler from 179 - * optimising this loop into a modulo operation. */ 180 - asm("" : "+r"(ns)); 181 - 182 - ns -= NSEC_PER_SEC; 183 - a->tv_sec++; 184 - } 175 + a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); 185 176 a->tv_nsec = ns; 186 177 } 187 178 #endif /* __KERNEL__ */
+23
lib/div64.c
··· 98 98 #endif 99 99 100 100 #endif /* BITS_PER_LONG == 32 */ 101 + 102 + /* 103 + * Iterative div/mod for use when dividend is not expected to be much 104 + * bigger than divisor. 105 + */ 106 + u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) 107 + { 108 + u32 ret = 0; 109 + 110 + while (dividend >= divisor) { 111 + /* The following asm() prevents the compiler from 112 + optimising this loop into a modulo operation. */ 113 + asm("" : "+rm"(dividend)); 114 + 115 + dividend -= divisor; 116 + ret++; 117 + } 118 + 119 + *remainder = dividend; 120 + 121 + return ret; 122 + } 123 + EXPORT_SYMBOL(iter_div_u64_rem);