AK/Math.h at master · jcs.org/serenity

jcs.org / serenity
fork atom
Serenity Operating System
fork atom
serenity / AK / Math.h
at master 796 lines 18 kB view raw
wrap content
kleines Filmröllchen AK: Add constexpr floor and round 3y ago
b38beb10
  1/*
  2 * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>.
  3 *
  4 * SPDX-License-Identifier: BSD-2-Clause
  5 */
  6
  7#pragma once
  8
  9#include <AK/BuiltinWrappers.h>
 10#include <AK/Concepts.h>
 11#include <AK/NumericLimits.h>
 12#include <AK/StdLibExtraDetails.h>
 13#include <AK/Types.h>
 14
 15#ifdef KERNEL
 16#    error "Including AK/Math.h from the Kernel is never correct! Floating point is disabled."
 17#endif
 18
 19namespace AK {
 20
 21template<FloatingPoint T>
 22constexpr T NaN = __builtin_nan("");
 23template<FloatingPoint T>
 24constexpr T Pi = 3.141592653589793238462643383279502884L;
 25template<FloatingPoint T>
 26constexpr T E = 2.718281828459045235360287471352662498L;
 27template<FloatingPoint T>
 28constexpr T Sqrt2 = 1.414213562373095048801688724209698079L;
 29template<FloatingPoint T>
 30constexpr T Sqrt1_2 = 0.707106781186547524400844362104849039L;
 31
 32namespace Details {
 33template<size_t>
 34constexpr size_t product_even();
 35template<>
 36constexpr size_t product_even<2>() { return 2; }
 37template<size_t value>
 38constexpr size_t product_even() { return value * product_even<value - 2>(); }
 39
 40template<size_t>
 41constexpr size_t product_odd();
 42template<>
 43constexpr size_t product_odd<1>() { return 1; }
 44template<size_t value>
 45constexpr size_t product_odd() { return value * product_odd<value - 2>(); }
 46}
 47
 48#define CONSTEXPR_STATE(function, args...)        \
 49    if (is_constant_evaluated()) {                \
 50        if (IsSame<T, long double>)               \
 51            return __builtin_##function##l(args); \
 52        if (IsSame<T, double>)                    \
 53            return __builtin_##function(args);    \
 54        if (IsSame<T, float>)                     \
 55            return __builtin_##function##f(args); \
 56    }
 57
 58#define AARCH64_INSTRUCTION(instruction, arg) \
 59    if constexpr (IsSame<T, long double>)     \
 60        TODO();                               \
 61    if constexpr (IsSame<T, double>) {        \
 62        double res;                           \
 63        asm(#instruction " %d0, %d1"          \
 64            : "=w"(res)                       \
 65            : "w"(arg));                      \
 66        return res;                           \
 67    }                                         \
 68    if constexpr (IsSame<T, float>) {         \
 69        float res;                            \
 70        asm(#instruction " %s0, %s1"          \
 71            : "=w"(res)                       \
 72            : "w"(arg));                      \
 73        return res;                           \
 74    }
 75
 76namespace Division {
 77template<FloatingPoint T>
 78constexpr T fmod(T x, T y)
 79{
 80    CONSTEXPR_STATE(fmod, x, y);
 81
 82#if ARCH(X86_64)
 83    u16 fpu_status;
 84    do {
 85        asm(
 86            "fprem\n"
 87            "fnstsw %%ax\n"
 88            : "+t"(x), "=a"(fpu_status)
 89            : "u"(y));
 90    } while (fpu_status & 0x400);
 91    return x;
 92#else
 93    return __builtin_fmod(x, y);
 94#endif
 95}
 96template<FloatingPoint T>
 97constexpr T remainder(T x, T y)
 98{
 99    CONSTEXPR_STATE(remainder, x, y);
100
101#if ARCH(X86_64)
102    u16 fpu_status;
103    do {
104        asm(
105            "fprem1\n"
106            "fnstsw %%ax\n"
107            : "+t"(x), "=a"(fpu_status)
108            : "u"(y));
109    } while (fpu_status & 0x400);
110    return x;
111#else
112    return __builtin_fmod(x, y);
113#endif
114}
115}
116
117using Division::fmod;
118using Division::remainder;
119
120template<FloatingPoint T>
121constexpr T sqrt(T x)
122{
123    CONSTEXPR_STATE(sqrt, x);
124
125#if ARCH(X86_64)
126    if constexpr (IsSame<T, float>) {
127        float res;
128        asm("sqrtss %1, %0"
129            : "=x"(res)
130            : "x"(x));
131        return res;
132    }
133    if constexpr (IsSame<T, double>) {
134        double res;
135        asm("sqrtsd %1, %0"
136            : "=x"(res)
137            : "x"(x));
138        return res;
139    }
140    T res;
141    asm("fsqrt"
142        : "=t"(res)
143        : "0"(x));
144    return res;
145#elif ARCH(AARCH64)
146    AARCH64_INSTRUCTION(fsqrt, x);
147#else
148    return __builtin_sqrt(x);
149#endif
150}
151
152template<FloatingPoint T>
153constexpr T rsqrt(T x)
154{
155#if ARCH(AARCH64)
156    AARCH64_INSTRUCTION(frsqrte, x);
157#elif ARCH(X86_64)
158    if constexpr (IsSame<T, float>) {
159        float res;
160        asm("rsqrtss %1, %0"
161            : "=x"(res)
162            : "x"(x));
163        return res;
164    }
165#endif
166    return (T)1. / sqrt(x);
167}
168
169template<FloatingPoint T>
170constexpr T cbrt(T x)
171{
172    CONSTEXPR_STATE(cbrt, x);
173    if (__builtin_isinf(x) || x == 0)
174        return x;
175    if (x < 0)
176        return -cbrt(-x);
177
178    T r = x;
179    T ex = 0;
180
181    while (r < 0.125l) {
182        r *= 8;
183        ex--;
184    }
185    while (r > 1.0l) {
186        r *= 0.125l;
187        ex++;
188    }
189
190    r = (-0.46946116l * r + 1.072302l) * r + 0.3812513l;
191
192    while (ex < 0) {
193        r *= 0.5l;
194        ex++;
195    }
196    while (ex > 0) {
197        r *= 2.0l;
198        ex--;
199    }
200
201    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
202    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
203    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
204    r = (2.0l / 3.0l) * r + (1.0l / 3.0l) * x / (r * r);
205
206    return r;
207}
208
209template<FloatingPoint T>
210constexpr T fabs(T x)
211{
212    if (is_constant_evaluated())
213        return x < 0 ? -x : x;
214#if ARCH(X86_64)
215    asm(
216        "fabs"
217        : "+t"(x));
218    return x;
219#elif ARCH(AARCH64)
220    AARCH64_INSTRUCTION(fabs, x);
221#else
222    return __builtin_fabs(x);
223#endif
224}
225
226namespace Trigonometry {
227
228template<FloatingPoint T>
229constexpr T hypot(T x, T y)
230{
231    return sqrt(x * x + y * y);
232}
233
234template<FloatingPoint T>
235constexpr T sin(T angle)
236{
237    CONSTEXPR_STATE(sin, angle);
238
239#if ARCH(X86_64)
240    T ret;
241    asm(
242        "fsin"
243        : "=t"(ret)
244        : "0"(angle));
245    return ret;
246#else
247    return __builtin_sin(angle);
248#endif
249}
250
251template<FloatingPoint T>
252constexpr T cos(T angle)
253{
254    CONSTEXPR_STATE(cos, angle);
255
256#if ARCH(X86_64)
257    T ret;
258    asm(
259        "fcos"
260        : "=t"(ret)
261        : "0"(angle));
262    return ret;
263#else
264    return __builtin_cos(angle);
265#endif
266}
267
268template<FloatingPoint T>
269constexpr void sincos(T angle, T& sin_val, T& cos_val)
270{
271    if (is_constant_evaluated()) {
272        sin_val = sin(angle);
273        cos_val = cos(angle);
274        return;
275    }
276#if ARCH(X86_64)
277    asm(
278        "fsincos"
279        : "=t"(cos_val), "=u"(sin_val)
280        : "0"(angle));
281#else
282    sin_val = sin(angle);
283    cos_val = cos(angle);
284#endif
285}
286
287template<FloatingPoint T>
288constexpr T tan(T angle)
289{
290    CONSTEXPR_STATE(tan, angle);
291
292#if ARCH(X86_64)
293    T ret, one;
294    asm(
295        "fptan"
296        : "=t"(one), "=u"(ret)
297        : "0"(angle));
298
299    return ret;
300#else
301    return __builtin_tan(angle);
302#endif
303}
304
305template<FloatingPoint T>
306constexpr T atan(T value)
307{
308    CONSTEXPR_STATE(atan, value);
309
310#if ARCH(X86_64)
311    T ret;
312    asm(
313        "fld1\n"
314        "fpatan\n"
315        : "=t"(ret)
316        : "0"(value));
317    return ret;
318#else
319    return __builtin_atan(value);
320#endif
321}
322
323template<FloatingPoint T>
324constexpr T asin(T x)
325{
326    CONSTEXPR_STATE(asin, x);
327    if (x > 1 || x < -1)
328        return NaN<T>;
329    if (x > (T)0.5 || x < (T)-0.5)
330        return 2 * atan<T>(x / (1 + sqrt<T>(1 - x * x)));
331    T squared = x * x;
332    T value = x;
333    T i = x * squared;
334    value += i * Details::product_odd<1>() / Details::product_even<2>() / 3;
335    i *= squared;
336    value += i * Details::product_odd<3>() / Details::product_even<4>() / 5;
337    i *= squared;
338    value += i * Details::product_odd<5>() / Details::product_even<6>() / 7;
339    i *= squared;
340    value += i * Details::product_odd<7>() / Details::product_even<8>() / 9;
341    i *= squared;
342    value += i * Details::product_odd<9>() / Details::product_even<10>() / 11;
343    i *= squared;
344    value += i * Details::product_odd<11>() / Details::product_even<12>() / 13;
345    i *= squared;
346    value += i * Details::product_odd<13>() / Details::product_even<14>() / 15;
347    i *= squared;
348    value += i * Details::product_odd<15>() / Details::product_even<16>() / 17;
349    return value;
350}
351
352template<FloatingPoint T>
353constexpr T acos(T value)
354{
355    CONSTEXPR_STATE(acos, value);
356
357    // FIXME: I am naive
358    return static_cast<T>(0.5) * Pi<T> - asin<T>(value);
359}
360
361template<FloatingPoint T>
362constexpr T atan2(T y, T x)
363{
364    CONSTEXPR_STATE(atan2, y, x);
365
366#if ARCH(X86_64)
367    T ret;
368    asm("fpatan"
369        : "=t"(ret)
370        : "0"(x), "u"(y)
371        : "st(1)");
372    return ret;
373#else
374    return __builtin_atan2(y, x);
375#endif
376}
377
378}
379
380using Trigonometry::acos;
381using Trigonometry::asin;
382using Trigonometry::atan;
383using Trigonometry::atan2;
384using Trigonometry::cos;
385using Trigonometry::hypot;
386using Trigonometry::sin;
387using Trigonometry::sincos;
388using Trigonometry::tan;
389
390namespace Exponentials {
391
392template<FloatingPoint T>
393constexpr T log(T x)
394{
395    CONSTEXPR_STATE(log, x);
396
397#if ARCH(X86_64)
398    T ret;
399    asm(
400        "fldln2\n"
401        "fxch %%st(1)\n"
402        "fyl2x\n"
403        : "=t"(ret)
404        : "0"(x));
405    return ret;
406#else
407    return __builtin_log(x);
408#endif
409}
410
411template<FloatingPoint T>
412constexpr T log2(T x)
413{
414    CONSTEXPR_STATE(log2, x);
415
416#if ARCH(X86_64)
417    T ret;
418    asm(
419        "fld1\n"
420        "fxch %%st(1)\n"
421        "fyl2x\n"
422        : "=t"(ret)
423        : "0"(x));
424    return ret;
425#else
426    return __builtin_log2(x);
427#endif
428}
429
430template<FloatingPoint T>
431constexpr T log10(T x)
432{
433    CONSTEXPR_STATE(log10, x);
434
435#if ARCH(X86_64)
436    T ret;
437    asm(
438        "fldlg2\n"
439        "fxch %%st(1)\n"
440        "fyl2x\n"
441        : "=t"(ret)
442        : "0"(x));
443    return ret;
444#else
445    return __builtin_log10(x);
446#endif
447}
448
449template<FloatingPoint T>
450constexpr T exp(T exponent)
451{
452    CONSTEXPR_STATE(exp, exponent);
453
454#if ARCH(X86_64)
455    T res;
456    asm("fldl2e\n"
457        "fmulp\n"
458        "fld1\n"
459        "fld %%st(1)\n"
460        "fprem\n"
461        "f2xm1\n"
462        "faddp\n"
463        "fscale\n"
464        "fstp %%st(1)"
465        : "=t"(res)
466        : "0"(exponent));
467    return res;
468#else
469    return __builtin_exp(exponent);
470#endif
471}
472
473template<FloatingPoint T>
474constexpr T exp2(T exponent)
475{
476    CONSTEXPR_STATE(exp2, exponent);
477
478#if ARCH(X86_64)
479    T res;
480    asm("fld1\n"
481        "fld %%st(1)\n"
482        "fprem\n"
483        "f2xm1\n"
484        "faddp\n"
485        "fscale\n"
486        "fstp %%st(1)"
487        : "=t"(res)
488        : "0"(exponent));
489    return res;
490#else
491    return __builtin_exp2(exponent);
492#endif
493}
494
495}
496
497using Exponentials::exp;
498using Exponentials::exp2;
499using Exponentials::log;
500using Exponentials::log10;
501using Exponentials::log2;
502
503namespace Hyperbolic {
504
505template<FloatingPoint T>
506constexpr T sinh(T x)
507{
508    T exponentiated = exp<T>(x);
509    if (x > 0)
510        return (exponentiated * exponentiated - 1) / 2 / exponentiated;
511    return (exponentiated - 1 / exponentiated) / 2;
512}
513
514template<FloatingPoint T>
515constexpr T cosh(T x)
516{
517    CONSTEXPR_STATE(cosh, x);
518
519    T exponentiated = exp(-x);
520    if (x < 0)
521        return (1 + exponentiated * exponentiated) / 2 / exponentiated;
522    return (1 / exponentiated + exponentiated) / 2;
523}
524
525template<FloatingPoint T>
526constexpr T tanh(T x)
527{
528    if (x > 0) {
529        T exponentiated = exp<T>(2 * x);
530        return (exponentiated - 1) / (exponentiated + 1);
531    }
532    T plusX = exp<T>(x);
533    T minusX = 1 / plusX;
534    return (plusX - minusX) / (plusX + minusX);
535}
536
537template<FloatingPoint T>
538constexpr T asinh(T x)
539{
540    return log<T>(x + sqrt<T>(x * x + 1));
541}
542
543template<FloatingPoint T>
544constexpr T acosh(T x)
545{
546    return log<T>(x + sqrt<T>(x * x - 1));
547}
548
549template<FloatingPoint T>
550constexpr T atanh(T x)
551{
552    return log<T>((1 + x) / (1 - x)) / (T)2.0l;
553}
554
555}
556
557using Hyperbolic::acosh;
558using Hyperbolic::asinh;
559using Hyperbolic::atanh;
560using Hyperbolic::cosh;
561using Hyperbolic::sinh;
562using Hyperbolic::tanh;
563
564template<Integral I, FloatingPoint P>
565ALWAYS_INLINE I round_to(P value);
566
567#if ARCH(X86_64)
568template<Integral I>
569ALWAYS_INLINE I round_to(long double value)
570{
571    // Note: fistps outputs into a signed integer location (i16, i32, i64),
572    //       so lets be nice and tell the compiler that.
573    Conditional<sizeof(I) >= sizeof(i16), MakeSigned<I>, i16> ret;
574    if constexpr (sizeof(I) == sizeof(i64)) {
575        asm("fistpll %0"
576            : "=m"(ret)
577            : "t"(value)
578            : "st");
579    } else if constexpr (sizeof(I) == sizeof(i32)) {
580        asm("fistpl %0"
581            : "=m"(ret)
582            : "t"(value)
583            : "st");
584    } else {
585        asm("fistps %0"
586            : "=m"(ret)
587            : "t"(value)
588            : "st");
589    }
590    return static_cast<I>(ret);
591}
592
593template<Integral I>
594ALWAYS_INLINE I round_to(float value)
595{
596    // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
597    //        if the value surpasses the i64 limit, even if the result could fit into an u64
598    //        To solve this we would either need to detect that value or do a range check and
599    //        then do a more specialized conversion, which might include a division (which is expensive)
600    if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
601        i64 ret;
602        asm("cvtss2si %1, %0"
603            : "=r"(ret)
604            : "xm"(value));
605        return static_cast<I>(ret);
606    }
607    i32 ret;
608    asm("cvtss2si %1, %0"
609        : "=r"(ret)
610        : "xm"(value));
611    return static_cast<I>(ret);
612}
613
614template<Integral I>
615ALWAYS_INLINE I round_to(double value)
616{
617    // FIXME: round_to<u64> might will cause issues, aka the indefinite value being set,
618    //        if the value surpasses the i64 limit, even if the result could fit into an u64
619    //        To solve this we would either need to detect that value or do a range check and
620    //        then do a more specialized conversion, which might include a division (which is expensive)
621    if constexpr (sizeof(I) == sizeof(i64) || IsSame<I, u32>) {
622        i64 ret;
623        asm("cvtsd2si %1, %0"
624            : "=r"(ret)
625            : "xm"(value));
626        return static_cast<I>(ret);
627    }
628    i32 ret;
629    asm("cvtsd2si %1, %0"
630        : "=r"(ret)
631        : "xm"(value));
632    return static_cast<I>(ret);
633}
634
635#elif ARCH(AARCH64)
636template<Signed I>
637ALWAYS_INLINE I round_to(float value)
638{
639    if constexpr (sizeof(I) <= sizeof(u32)) {
640        i32 res;
641        asm("fcvtns %w0, %s1"
642            : "=r"(res)
643            : "w"(value));
644        return static_cast<I>(res);
645    }
646    i64 res;
647    asm("fcvtns %0, %s1"
648        : "=r"(res)
649        : "w"(value));
650    return static_cast<I>(res);
651}
652
653template<Signed I>
654ALWAYS_INLINE I round_to(double value)
655{
656    if constexpr (sizeof(I) <= sizeof(u32)) {
657        i32 res;
658        asm("fcvtns %w0, %d1"
659            : "=r"(res)
660            : "w"(value));
661        return static_cast<I>(res);
662    }
663    i64 res;
664    asm("fcvtns %0, %d1"
665        : "=r"(res)
666        : "w"(value));
667    return static_cast<I>(res);
668}
669
670template<Unsigned U>
671ALWAYS_INLINE U round_to(float value)
672{
673    if constexpr (sizeof(U) <= sizeof(u32)) {
674        u32 res;
675        asm("fcvtnu %w0, %s1"
676            : "=r"(res)
677            : "w"(value));
678        return static_cast<U>(res);
679    }
680    i64 res;
681    asm("fcvtnu %0, %s1"
682        : "=r"(res)
683        : "w"(value));
684    return static_cast<U>(res);
685}
686
687template<Unsigned U>
688ALWAYS_INLINE U round_to(double value)
689{
690    if constexpr (sizeof(U) <= sizeof(u32)) {
691        u32 res;
692        asm("fcvtns %w0, %d1"
693            : "=r"(res)
694            : "w"(value));
695        return static_cast<U>(res);
696    }
697    i64 res;
698    asm("fcvtns %0, %d1"
699        : "=r"(res)
700        : "w"(value));
701    return static_cast<U>(res);
702}
703
704#else
705template<Integral I, FloatingPoint P>
706ALWAYS_INLINE I round_to(P value)
707{
708    if constexpr (IsSame<P, long double>)
709        return static_cast<I>(__builtin_llrintl(value));
710    if constexpr (IsSame<P, double>)
711        return static_cast<I>(__builtin_llrint(value));
712    if constexpr (IsSame<P, float>)
713        return static_cast<I>(__builtin_llrintf(value));
714}
715#endif
716
717template<FloatingPoint T>
718constexpr T pow(T x, T y)
719{
720    CONSTEXPR_STATE(pow, x, y);
721    // fixme I am naive
722    if (__builtin_isnan(y))
723        return y;
724    if (y == 0)
725        return 1;
726    if (x == 0)
727        return 0;
728    if (y == 1)
729        return x;
730    int y_as_int = (int)y;
731    if (y == (T)y_as_int) {
732        T result = x;
733        for (int i = 0; i < fabs<T>(y) - 1; ++i)
734            result *= x;
735        if (y < 0)
736            result = 1.0l / result;
737        return result;
738    }
739
740    return exp2<T>(y * log2<T>(x));
741}
742
743template<FloatingPoint T>
744constexpr T ceil(T num)
745{
746    if (is_constant_evaluated()) {
747        if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
748            return num;
749        return (static_cast<T>(static_cast<i64>(num)) == num)
750            ? static_cast<i64>(num)
751            : static_cast<i64>(num) + ((num > 0) ? 1 : 0);
752    }
753#if ARCH(AARCH64)
754    AARCH64_INSTRUCTION(frintp, num);
755#else
756    return __builtin_ceil(num);
757#endif
758}
759
760template<FloatingPoint T>
761constexpr T floor(T num)
762{
763    if (is_constant_evaluated()) {
764        if (num < NumericLimits<i64>::min() || num > NumericLimits<i64>::max())
765            return num;
766        return (static_cast<T>(static_cast<i64>(num)) == num)
767            ? static_cast<i64>(num)
768            : static_cast<i64>(num) - ((num > 0) ? 0 : 1);
769    }
770#if ARCH(AARCH64)
771    AARCH64_INSTRUCTION(frintm, num);
772#else
773    return __builtin_floor(num);
774#endif
775}
776
777template<FloatingPoint T>
778constexpr T round(T x)
779{
780    CONSTEXPR_STATE(round, x);
781    // Note: This is break-tie-away-from-zero, so not the hw's understanding of
782    //       "nearest", which would be towards even.
783    if (x == 0.)
784        return x;
785    if (x > 0.)
786        return floor(x + .5);
787    return ceil(x - .5);
788}
789
790#undef CONSTEXPR_STATE
791#undef AARCH64_INSTRUCTION
792}
793
794#if USING_AK_GLOBALLY
795using AK::round_to;
796#endif