Serenity Operating System
1/*
2 * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#pragma once
8
9#include <AK/Math.h>
10#include <AK/SIMD.h>
11#include <AK/SIMDExtras.h>
12#include <math.h>
13
14// Functions returning vectors or accepting vector arguments have different calling conventions
15// depending on whether the target architecture supports SSE or not. GCC generates warning "psabi"
16// when compiling for non-SSE architectures. We disable this warning because these functions
17// are static and should never be visible from outside the translation unit that includes this header.
18#pragma GCC diagnostic push
19#pragma GCC diagnostic ignored "-Wpsabi"
20
21namespace AK::SIMD {
22
23// Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX].
24// Other inputs will generate unexpected results.
25
26ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v)
27{
28 return to_f32x4(to_i32x4(v));
29}
30
31ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v)
32{
33 auto t = truncate_int_range(v);
34 return t > v ? t - 1.0f : t;
35}
36
37ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v)
38{
39 auto t = truncate_int_range(v);
40 return t < v ? t + 1.0f : t;
41}
42
43ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v)
44{
45 return v - floor_int_range(v);
46}
47
48ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max)
49{
50 return v < min ? min : (v > max ? max : v);
51}
52
53ALWAYS_INLINE static f32x4 clamp(f32x4 v, float min, float max)
54{
55 return v < min ? min : (v > max ? max : v);
56}
57
58ALWAYS_INLINE static f32x4 exp(f32x4 v)
59{
60 // FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times
61 return f32x4 {
62 expf(v[0]),
63 expf(v[1]),
64 expf(v[2]),
65 expf(v[3]),
66 };
67}
68
69ALWAYS_INLINE static f32x4 exp_approximate(f32x4 v)
70{
71 static constexpr int number_of_iterations = 10;
72 auto result = 1.f + v / (1 << number_of_iterations);
73 for (int i = 0; i < number_of_iterations; ++i)
74 result *= result;
75 return result;
76}
77
78ALWAYS_INLINE static f32x4 sqrt(f32x4 v)
79{
80#if ARCH(X86_64)
81 return __builtin_ia32_sqrtps(v);
82#else
83 return f32x4 {
84 AK::sqrt(v[0]),
85 AK::sqrt(v[1]),
86 AK::sqrt(v[2]),
87 AK::sqrt(v[3]),
88 };
89#endif
90}
91
92ALWAYS_INLINE static f32x4 rsqrt(f32x4 v)
93{
94#if ARCH(X86_64)
95 return __builtin_ia32_rsqrtps(v);
96#else
97 return f32x4 {
98 1.f / AK::sqrt(v[0]),
99 1.f / AK::sqrt(v[1]),
100 1.f / AK::sqrt(v[2]),
101 1.f / AK::sqrt(v[3]),
102 };
103#endif
104}
105
106}
107
108#pragma GCC diagnostic pop