Serenity Operating System
at master 108 lines 2.6 kB view raw
1/* 2 * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#pragma once 8 9#include <AK/Math.h> 10#include <AK/SIMD.h> 11#include <AK/SIMDExtras.h> 12#include <math.h> 13 14// Functions returning vectors or accepting vector arguments have different calling conventions 15// depending on whether the target architecture supports SSE or not. GCC generates warning "psabi" 16// when compiling for non-SSE architectures. We disable this warning because these functions 17// are static and should never be visible from outside the translation unit that includes this header. 18#pragma GCC diagnostic push 19#pragma GCC diagnostic ignored "-Wpsabi" 20 21namespace AK::SIMD { 22 23// Functions ending in "_int_range" only accept arguments within range [INT_MIN, INT_MAX]. 24// Other inputs will generate unexpected results. 25 26ALWAYS_INLINE static f32x4 truncate_int_range(f32x4 v) 27{ 28 return to_f32x4(to_i32x4(v)); 29} 30 31ALWAYS_INLINE static f32x4 floor_int_range(f32x4 v) 32{ 33 auto t = truncate_int_range(v); 34 return t > v ? t - 1.0f : t; 35} 36 37ALWAYS_INLINE static f32x4 ceil_int_range(f32x4 v) 38{ 39 auto t = truncate_int_range(v); 40 return t < v ? t + 1.0f : t; 41} 42 43ALWAYS_INLINE static f32x4 frac_int_range(f32x4 v) 44{ 45 return v - floor_int_range(v); 46} 47 48ALWAYS_INLINE static f32x4 clamp(f32x4 v, f32x4 min, f32x4 max) 49{ 50 return v < min ? min : (v > max ? max : v); 51} 52 53ALWAYS_INLINE static f32x4 clamp(f32x4 v, float min, float max) 54{ 55 return v < min ? min : (v > max ? max : v); 56} 57 58ALWAYS_INLINE static f32x4 exp(f32x4 v) 59{ 60 // FIXME: This should be replaced with a vectorized algorithm instead of calling the scalar expf 4 times 61 return f32x4 { 62 expf(v[0]), 63 expf(v[1]), 64 expf(v[2]), 65 expf(v[3]), 66 }; 67} 68 69ALWAYS_INLINE static f32x4 exp_approximate(f32x4 v) 70{ 71 static constexpr int number_of_iterations = 10; 72 auto result = 1.f + v / (1 << number_of_iterations); 73 for (int i = 0; i < number_of_iterations; ++i) 74 result *= result; 75 return result; 76} 77 78ALWAYS_INLINE static f32x4 sqrt(f32x4 v) 79{ 80#if ARCH(X86_64) 81 return __builtin_ia32_sqrtps(v); 82#else 83 return f32x4 { 84 AK::sqrt(v[0]), 85 AK::sqrt(v[1]), 86 AK::sqrt(v[2]), 87 AK::sqrt(v[3]), 88 }; 89#endif 90} 91 92ALWAYS_INLINE static f32x4 rsqrt(f32x4 v) 93{ 94#if ARCH(X86_64) 95 return __builtin_ia32_rsqrtps(v); 96#else 97 return f32x4 { 98 1.f / AK::sqrt(v[0]), 99 1.f / AK::sqrt(v[1]), 100 1.f / AK::sqrt(v[2]), 101 1.f / AK::sqrt(v[3]), 102 }; 103#endif 104} 105 106} 107 108#pragma GCC diagnostic pop