Serenity Operating System
at master 243 lines 8.9 kB view raw
1/* 2 * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/SIMDExtras.h> 8#include <AK/SIMDMath.h> 9#include <LibSoftGPU/Config.h> 10#include <LibSoftGPU/Image.h> 11#include <LibSoftGPU/SIMD.h> 12#include <LibSoftGPU/Sampler.h> 13#include <math.h> 14 15namespace SoftGPU { 16 17using AK::SIMD::f32x4; 18using AK::SIMD::i32x4; 19using AK::SIMD::u32x4; 20 21using AK::SIMD::clamp; 22using AK::SIMD::expand4; 23using AK::SIMD::floor_int_range; 24using AK::SIMD::frac_int_range; 25using AK::SIMD::maskbits; 26using AK::SIMD::to_f32x4; 27using AK::SIMD::to_i32x4; 28using AK::SIMD::to_u32x4; 29 30static f32x4 wrap_repeat(f32x4 value) 31{ 32 return frac_int_range(value); 33} 34 35[[maybe_unused]] static f32x4 wrap_clamp(f32x4 value) 36{ 37 return clamp(value, expand4(0.0f), expand4(1.0f)); 38} 39 40static f32x4 wrap_clamp_to_edge(f32x4 value, f32x4 num_texels) 41{ 42 f32x4 const clamp_limit = .5f / num_texels; 43 return clamp(value, clamp_limit, 1.f - clamp_limit); 44} 45 46static f32x4 wrap_mirrored_repeat(f32x4 value, f32x4 num_texels) 47{ 48 f32x4 integer = floor_int_range(value); 49 f32x4 frac = value - integer; 50 auto is_odd = to_i32x4(integer) & 1; 51 return wrap_clamp_to_edge(is_odd ? 1 - frac : frac, num_texels); 52} 53 54static f32x4 wrap(f32x4 value, GPU::TextureWrapMode mode, f32x4 num_texels) 55{ 56 switch (mode) { 57 case GPU::TextureWrapMode::Repeat: 58 return wrap_repeat(value); 59 case GPU::TextureWrapMode::MirroredRepeat: 60 return wrap_mirrored_repeat(value, num_texels); 61 case GPU::TextureWrapMode::Clamp: 62 if constexpr (CLAMP_DEPRECATED_BEHAVIOR) { 63 return wrap_clamp(value); 64 } 65 return wrap_clamp_to_edge(value, num_texels); 66 case GPU::TextureWrapMode::ClampToBorder: 67 case GPU::TextureWrapMode::ClampToEdge: 68 return wrap_clamp_to_edge(value, num_texels); 69 default: 70 VERIFY_NOT_REACHED(); 71 } 72} 73 74ALWAYS_INLINE static Vector4<f32x4> texel4(Image const& image, u32x4 level, u32x4 x, u32x4 y) 75{ 76 auto const& t0 = image.texel(level[0], x[0], y[0], 0); 77 auto const& t1 = image.texel(level[1], x[1], y[1], 0); 78 auto const& t2 = image.texel(level[2], x[2], y[2], 0); 79 auto const& t3 = image.texel(level[3], x[3], y[3], 0); 80 81 return Vector4<f32x4> { 82 f32x4 { t0.x(), t1.x(), t2.x(), t3.x() }, 83 f32x4 { t0.y(), t1.y(), t2.y(), t3.y() }, 84 f32x4 { t0.z(), t1.z(), t2.z(), t3.z() }, 85 f32x4 { t0.w(), t1.w(), t2.w(), t3.w() }, 86 }; 87} 88 89ALWAYS_INLINE static Vector4<f32x4> texel4border(Image const& image, u32x4 level, u32x4 x, u32x4 y, FloatVector4 const& border, u32x4 w, u32x4 h) 90{ 91 auto border_mask = maskbits(x < 0 || x >= w || y < 0 || y >= h); 92 93 auto const& t0 = (border_mask & 1) > 0 ? border : image.texel(level[0], x[0], y[0], 0); 94 auto const& t1 = (border_mask & 2) > 0 ? border : image.texel(level[1], x[1], y[1], 0); 95 auto const& t2 = (border_mask & 4) > 0 ? border : image.texel(level[2], x[2], y[2], 0); 96 auto const& t3 = (border_mask & 8) > 0 ? border : image.texel(level[3], x[3], y[3], 0); 97 98 return Vector4<f32x4> { 99 f32x4 { t0.x(), t1.x(), t2.x(), t3.x() }, 100 f32x4 { t0.y(), t1.y(), t2.y(), t3.y() }, 101 f32x4 { t0.z(), t1.z(), t2.z(), t3.z() }, 102 f32x4 { t0.w(), t1.w(), t2.w(), t3.w() }, 103 }; 104} 105 106Vector4<AK::SIMD::f32x4> Sampler::sample_2d(Vector2<AK::SIMD::f32x4> const& uv) const 107{ 108 if (m_config.bound_image.is_null()) 109 return expand4(FloatVector4 { 1, 0, 0, 1 }); 110 111 auto const& image = *static_ptr_cast<Image>(m_config.bound_image); 112 113 // FIXME: Make base level configurable with glTexParameteri(GL_TEXTURE_BASE_LEVEL, base_level) 114 constexpr unsigned base_level = 0; 115 116 // Determine the texture scale factor. See OpenGL 1.5 spec chapter 3.8.8. 117 // FIXME: Static casting from u32 to float could silently truncate here. 118 // u16 should be plenty enough for texture dimensions and would allow textures of up to 65536x65536x65536 pixels. 119 auto texel_coordinates = uv; 120 texel_coordinates.set_x(texel_coordinates.x() * static_cast<float>(image.width_at_level(base_level))); 121 texel_coordinates.set_y(texel_coordinates.y() * static_cast<float>(image.height_at_level(base_level))); 122 auto dtdx = ddx(texel_coordinates); 123 auto dtdy = ddy(texel_coordinates); 124 auto scale_factor = max(dtdx.dot(dtdx), dtdy.dot(dtdy)); 125 126 // FIXME: Here we simply determine the filter based on the single scale factor of the upper left pixel. 127 // Actually, we could end up with different scale factors for each pixel. This however would break our 128 // parallelisation as we could also end up with different filter modes per pixel. 129 130 // Note: scale_factor approximates texels per pixel. This means a scale factor less than 1 indicates texture magnification. 131 if (scale_factor[0] <= 1.f) 132 return sample_2d_lod(uv, expand4(base_level), m_config.texture_mag_filter); 133 134 if (m_config.mipmap_filter == GPU::MipMapFilter::None) 135 return sample_2d_lod(uv, expand4(base_level), m_config.texture_min_filter); 136 137 auto texture_lod_bias = AK::clamp(m_config.level_of_detail_bias, -MAX_TEXTURE_LOD_BIAS, MAX_TEXTURE_LOD_BIAS); 138 // FIXME: Instead of clamping to num_levels - 1, actually make the max mipmap level configurable with glTexParameteri(GL_TEXTURE_MAX_LEVEL, max_level) 139 auto min_level = expand4(static_cast<float>(base_level)); 140 auto max_level = expand4(static_cast<float>(image.number_of_levels()) - 1.f); 141 auto lambda_xy = log2_approximate(scale_factor) * .5f + texture_lod_bias; 142 auto level = clamp(lambda_xy, min_level, max_level); 143 144 auto lower_level_texel = sample_2d_lod(uv, to_u32x4(level), m_config.texture_min_filter); 145 146 if (m_config.mipmap_filter == GPU::MipMapFilter::Nearest) 147 return lower_level_texel; 148 149 auto higher_level_texel = sample_2d_lod(uv, to_u32x4(min(level + 1.f, max_level)), m_config.texture_min_filter); 150 151 return mix(lower_level_texel, higher_level_texel, frac_int_range(level)); 152} 153 154Vector4<AK::SIMD::f32x4> Sampler::sample_2d_lod(Vector2<AK::SIMD::f32x4> const& uv, AK::SIMD::u32x4 level, GPU::TextureFilter filter) const 155{ 156 auto const& image = *static_ptr_cast<Image>(m_config.bound_image); 157 158 u32x4 const width = { 159 image.width_at_level(level[0]), 160 image.width_at_level(level[1]), 161 image.width_at_level(level[2]), 162 image.width_at_level(level[3]), 163 }; 164 u32x4 const height = { 165 image.height_at_level(level[0]), 166 image.height_at_level(level[1]), 167 image.height_at_level(level[2]), 168 image.height_at_level(level[3]), 169 }; 170 171 auto f_width = to_f32x4(width); 172 auto f_height = to_f32x4(height); 173 174 u32x4 width_mask = width - 1; 175 u32x4 height_mask = height - 1; 176 177 f32x4 u = wrap(uv.x(), m_config.texture_wrap_u, f_width) * f_width; 178 f32x4 v = wrap(uv.y(), m_config.texture_wrap_v, f_height) * f_height; 179 180 if (filter == GPU::TextureFilter::Nearest) { 181 u32x4 i = to_u32x4(u); 182 u32x4 j = to_u32x4(v); 183 184 i = image.width_is_power_of_two() ? i & width_mask : i % width; 185 j = image.height_is_power_of_two() ? j & height_mask : j % height; 186 187 return texel4(image, level, i, j); 188 } 189 190 u -= 0.5f; 191 v -= 0.5f; 192 193 f32x4 const floored_u = floor_int_range(u); 194 f32x4 const floored_v = floor_int_range(v); 195 196 u32x4 i0 = to_u32x4(floored_u); 197 u32x4 i1 = i0 + 1; 198 u32x4 j0 = to_u32x4(floored_v); 199 u32x4 j1 = j0 + 1; 200 201 if (m_config.texture_wrap_u == GPU::TextureWrapMode::Repeat) { 202 if (image.width_is_power_of_two()) { 203 i0 = i0 & width_mask; 204 i1 = i1 & width_mask; 205 } else { 206 i0 = i0 % width; 207 i1 = i1 % width; 208 } 209 } 210 211 if (m_config.texture_wrap_v == GPU::TextureWrapMode::Repeat) { 212 if (image.height_is_power_of_two()) { 213 j0 = j0 & height_mask; 214 j1 = j1 & height_mask; 215 } else { 216 j0 = j0 % height; 217 j1 = j1 % height; 218 } 219 } 220 221 Vector4<f32x4> t0, t1, t2, t3; 222 223 if (m_config.texture_wrap_u == GPU::TextureWrapMode::Repeat && m_config.texture_wrap_v == GPU::TextureWrapMode::Repeat) { 224 t0 = texel4(image, level, i0, j0); 225 t1 = texel4(image, level, i1, j0); 226 t2 = texel4(image, level, i0, j1); 227 t3 = texel4(image, level, i1, j1); 228 } else { 229 t0 = texel4border(image, level, i0, j0, m_config.border_color, width, height); 230 t1 = texel4border(image, level, i1, j0, m_config.border_color, width, height); 231 t2 = texel4border(image, level, i0, j1, m_config.border_color, width, height); 232 t3 = texel4border(image, level, i1, j1, m_config.border_color, width, height); 233 } 234 235 f32x4 const alpha = u - floored_u; 236 f32x4 const beta = v - floored_v; 237 238 auto const lerp_0 = mix(t0, t1, alpha); 239 auto const lerp_1 = mix(t2, t3, alpha); 240 return mix(lerp_0, lerp_1, beta); 241} 242 243}