A game about forced loneliness, made by TACStudios
1#ifndef UNITY_COMMON_INCLUDED 2#define UNITY_COMMON_INCLUDED 3 4#if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL) 5#pragma warning (disable : 3205) // conversion of larger type to smaller 6#endif 7 8// Convention: 9 10// Unity is Y up and left handed in world space 11// Caution: When going from world space to view space, unity is right handed in view space and the determinant of the matrix is negative 12// For cubemap capture (reflection probe) view space is still left handed (cubemap convention) and the determinant is positive. 13 14// The lighting code assume that 1 Unity unit (1uu) == 1 meters. This is very important regarding physically based light unit and inverse square attenuation 15 16// space at the end of the variable name 17// WS: world space 18// RWS: Camera-Relative world space. A space where the translation of the camera have already been substract in order to improve precision 19// VS: view space 20// OS: object space 21// CS: Homogenous clip spaces 22// TS: tangent space 23// TXS: texture space 24// Example: NormalWS 25 26// normalized / unormalized vector 27// normalized direction are almost everywhere, we tag unormalized vector with un. 28// Example: unL for unormalized light vector 29 30// use capital letter for regular vector, vector are always pointing outward the current pixel position (ready for lighting equation) 31// capital letter mean the vector is normalize, unless we put 'un' in front of it. 32// V: View vector (no eye vector) 33// L: Light vector 34// N: Normal vector 35// H: Half vector 36 37// Input/Outputs structs in PascalCase and prefixed by entry type 38// struct AttributesDefault 39// struct VaryingsDefault 40// use input/output as variable name when using these structures 41 42// Entry program name 43// VertDefault 44// FragDefault / FragForward / FragDeferred 45 46// constant floating number written as 1.0 (not 1, not 1.0f, not 1.0h) 47 48// uniform have _ as prefix + uppercase _LowercaseThenCamelCase 49 50// Do not use "in", only "out" or "inout" as califier, no "inline" keyword either, useless. 51// When declaring "out" argument of function, they are always last 52 53// headers from ShaderLibrary do not include "common.hlsl", this should be included in the .shader using it (or Material.hlsl) 54 55// All uniforms should be in contant buffer (nothing in the global namespace). 56// The reason is that for compute shader we need to guarantee that the layout of CBs is consistent across kernels. Something that we can't control with the global namespace (uniforms get optimized out if not used, modifying the global CBuffer layout per kernel) 57 58// Structure definition that are share between C# and hlsl. 59// These structures need to be align on float4 to respect various packing rules from shader language. This mean that these structure need to be padded. 60// Rules: When doing an array for constant buffer variables, we always use float4 to avoid any packing issue, particularly between compute shader and pixel shaders 61// i.e don't use SetGlobalFloatArray or SetComputeFloatParams 62// The array can be alias in hlsl. Exemple: 63// uniform float4 packedArray[3]; 64// static float unpackedArray[12] = (float[12])packedArray; 65 66// The function of the shader library are stateless, no uniform declare in it. 67// Any function that require an explicit precision, use float or half qualifier, when the function can support both, it use real (see below) 68// If a function require to have both a half and a float version, then both need to be explicitly define 69 70/// 71/// Hardware Support for Wave Operations 72/// 73 74// Support for wave operations is intentionally limited to the compute shader stage in order to make this functionality available to a wider range of hardware. 75#if defined(SHADER_STAGE_COMPUTE) 76 // 77 // Platform Support 78 // 79 // Platforms may indicate support for wave operations at compile-time. 80 // Shaders on these platforms may not always be compiled with a compiler that supports wave operations. 81 // To simplify usage, we check for a supported compiler here before indicating that wave operations are supported. 82 #if ((defined(UNITY_PLATFORM_SUPPORTS_WAVE_32) || defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)) && (defined(UNITY_COMPILER_DXC) || defined(SHADER_API_PSSL))) 83 #if defined(UNITY_PLATFORM_SUPPORTS_WAVE_32) 84 #define UNITY_HW_WAVE_SIZE 32 85 #elif defined(UNITY_PLATFORM_SUPPORTS_WAVE_64) 86 #define UNITY_HW_WAVE_SIZE 64 87 #endif 88 89 #define UNITY_PLATFORM_SUPPORTS_WAVE 1 90 // 91 // Device Support 92 // 93 // Devices may indicate support for wave operations at run-time. 94 // Shaders compiled with these defines are always compiled with a compiler that supports wave operations. 95 #elif (defined(UNITY_DEVICE_SUPPORTS_WAVE_ANY) || defined(UNITY_DEVICE_SUPPORTS_WAVE_8) || defined(UNITY_DEVICE_SUPPORTS_WAVE_16) || defined(UNITY_DEVICE_SUPPORTS_WAVE_32) || defined(UNITY_DEVICE_SUPPORTS_WAVE_64) || defined(UNITY_DEVICE_SUPPORTS_WAVE_128)) 96 #if defined(UNITY_DEVICE_SUPPORTS_WAVE_8) 97 #define UNITY_HW_WAVE_SIZE 8 98 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_16) 99 #define UNITY_HW_WAVE_SIZE 16 100 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_32) 101 #define UNITY_HW_WAVE_SIZE 32 102 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_64) 103 #define UNITY_HW_WAVE_SIZE 64 104 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_128) 105 #define UNITY_HW_WAVE_SIZE 128 106 #endif 107 108 #define UNITY_DEVICE_SUPPORTS_WAVE 1 109 #endif 110 111 #if (defined(UNITY_PLATFORM_SUPPORTS_WAVE) || defined(UNITY_DEVICE_SUPPORTS_WAVE)) 112 #define UNITY_HW_SUPPORTS_WAVE 1 113 #endif 114#endif 115 116#ifndef real 117 118// The including shader should define whether half 119// precision is suitable for its needs. The shader 120// API (for now) can indicate whether half is possible. 121#if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH) || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL) 122#define HAS_HALF 1 123#else 124#define HAS_HALF 0 125#endif 126 127#ifndef PREFER_HALF 128#define PREFER_HALF 1 129#endif 130 131#if HAS_HALF && PREFER_HALF 132#define REAL_IS_HALF 1 133#else 134#define REAL_IS_HALF 0 135#endif // Do we have half? 136 137#if REAL_IS_HALF 138#define HALF_IS_FLOAT 0 139#define half min16float 140#define half2 min16float2 141#define half3 min16float3 142#define half4 min16float4 143#define half2x2 min16float2x2 144#define half2x3 min16float2x3 145#define half3x2 min16float3x2 146#define half3x3 min16float3x3 147#define half3x4 min16float3x4 148#define half4x3 min16float4x3 149#define half4x4 min16float4x4 150#else 151#define HALF_IS_FLOAT (!defined(UNITY_DEVICE_SUPPORTS_NATIVE_16BIT)) 152#endif 153 154#if REAL_IS_HALF 155#define real half 156#define real2 half2 157#define real3 half3 158#define real4 half4 159 160#define real2x2 half2x2 161#define real2x3 half2x3 162#define real2x4 half2x4 163#define real3x2 half3x2 164#define real3x3 half3x3 165#define real3x4 half3x4 166#define real4x3 half4x3 167#define real4x4 half4x4 168 169#define REAL_MIN HALF_MIN 170#define REAL_MAX HALF_MAX 171#define REAL_EPS HALF_EPS 172 173#else 174 175#define real float 176#define real2 float2 177#define real3 float3 178#define real4 float4 179 180#define real2x2 float2x2 181#define real2x3 float2x3 182#define real2x4 float2x4 183#define real3x2 float3x2 184#define real3x3 float3x3 185#define real3x4 float3x4 186#define real4x3 float4x3 187#define real4x4 float4x4 188 189#define REAL_MIN FLT_MIN 190#define REAL_MAX FLT_MAX 191#define REAL_EPS FLT_EPS 192 193#endif // REAL_IS_HALF 194 195#endif // #ifndef real 196 197// Target in compute shader are supported in 2018.2, for now define ours 198// (Note only 45 and above support compute shader) 199#ifdef SHADER_STAGE_COMPUTE 200# ifndef SHADER_TARGET 201# if defined(SHADER_API_METAL) 202# define SHADER_TARGET 45 203# else 204# define SHADER_TARGET 50 205# endif 206# endif 207#endif 208 209// This is the default keyword combination and needs to be overriden by the platforms that need specific behaviors 210// when enabling conservative depth overrides 211#define SV_POSITION_QUALIFIERS 212#define DEPTH_OFFSET_SEMANTIC SV_Depth 213 214// Include language header 215#if defined (SHADER_API_GAMECORE) 216#include "Packages/com.unity.render-pipelines.gamecore/ShaderLibrary/API/GameCore.hlsl" 217#elif defined(SHADER_API_XBOXONE) 218#include "Packages/com.unity.render-pipelines.xboxone/ShaderLibrary/API/XBoxOne.hlsl" 219#elif defined(SHADER_API_PS4) 220#include "Packages/com.unity.render-pipelines.ps4/ShaderLibrary/API/PSSL.hlsl" 221#elif defined(SHADER_API_PS5) 222#include "Packages/com.unity.render-pipelines.ps5/ShaderLibrary/API/PSSL.hlsl" 223#elif defined(SHADER_API_D3D11) 224#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/D3D11.hlsl" 225#elif defined(SHADER_API_METAL) 226#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Metal.hlsl" 227#elif defined(SHADER_API_VULKAN) 228#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Vulkan.hlsl" 229#elif defined(SHADER_API_SWITCH) 230#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Switch.hlsl" 231#elif defined(SHADER_API_GLCORE) 232#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLCore.hlsl" 233#elif defined(SHADER_API_GLES3) 234#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLES3.hlsl" 235#elif defined(SHADER_API_WEBGPU) 236#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/WebGPU.hlsl" 237#else 238#error unsupported shader api 239#endif 240#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Validate.hlsl" 241 242#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl" 243#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Random.hlsl" 244 245#if !defined(SHADER_API_PS5) 246#define PushMarker(str) 247#define PopMarker() 248#endif 249 250#ifdef SHADER_API_XBOXONE // TODO: to move in .nda package in 21.1 251#define PLATFORM_SUPPORTS_PRIMITIVE_ID_IN_PIXEL_SHADER 252#endif 253 254#if defined(PLATFORM_SUPPORTS_NATIVE_RENDERPASS) 255 256 #if defined(UNITY_COMPILER_DXC) 257 258 //Subpass inputs are disallowed in non-fragment shader stages with DXC so we need some dummy value to use in the fragment function while it's not being compiled 259 #if defined(SHADER_STAGE_FRAGMENT) 260 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) [[vk::input_attachment_index(idx)]] SubpassInput<type##4> hlslcc_fbinput_##idx 261 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) [[vk::input_attachment_index(idx)]] SubpassInputMS<type##4> hlslcc_fbinput_##idx 262 #else 263 //declaring dummy resources here so that non-fragment shader stage automatic bindings wouldn't diverge from the fragment shader (important for vulkan) 264 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx; 265 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx 266 #endif 267 // Renderpass inputs: Vulkan/Metal subpass input 268 #define FRAMEBUFFER_INPUT_FLOAT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(float, idx) 269 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(float, idx) 270 // For halfs 271 #define FRAMEBUFFER_INPUT_HALF(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(half, idx) 272 #define FRAMEBUFFER_INPUT_HALF_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(half, idx) 273 // For ints 274 #define FRAMEBUFFER_INPUT_INT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(int, idx) 275 #define FRAMEBUFFER_INPUT_INT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(int, idx) 276 // For uints 277 #define FRAMEBUFFER_INPUT_UINT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(uint, idx) 278 #define FRAMEBUFFER_INPUT_UINT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(uint, idx) 279 280 #if defined(SHADER_STAGE_FRAGMENT) 281 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx.SubpassLoad() 282 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx.SubpassLoad(sampleIdx) 283 #else 284 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) DXC_DummySubpassVariable##idx 285 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) DXC_DummySubpassVariable##idx 286 #endif 287 288 #elif defined(SHADER_API_METAL) && defined(UNITY_NEEDS_RENDERPASS_FBFETCH_FALLBACK) 289 290 // On desktop metal we need special magic due to the need to support both intel and apple silicon 291 // since the former does not support framebuffer fetch 292 // Due to this we have special considerations: 293 // 1. since we might need to bind the copy texture, to simplify our lives we always declare _UnityFBInput texture 294 // in metal translation we will add function_constant, but we still want to generate binding in hlsl 295 // so that unity knows about the possibility 296 // 2. hlsl do not have anything like function constants, hence we will add bool to the fake cbuffer for subpass 297 // again, this is done only for hlsl to generate proper code - in translation it will be changed to 298 // a proper function constant (i.e. hlslcc_SubpassInput_f_ cbuffer is just "metadata" and is absent in metal code) 299 // 3. we want to generate an actual if command (not conditional move), hence we need to have an interim function 300 // alas we are not able to hide in it the texture coords: we are guaranteed to have just one "declare fb input" 301 // per index, but nothing stops users to have several "read fb input", hence we need to generate function code 302 // in the former, where we do not know the source of uv coords 303 // while the usage looks weird (we pass hlslcc_fbfetch_ in the function), it is ok due to the way hlsl compiler works 304 // it will generate an actual if and access hlslcc_fbfetch_ only if framebuffer fetch is available 305 // and when creating metal program, compiler takes care of this (function_constant magic) 306 307 #define RENDERPASS_DECLARE_FALLBACK(T, idx) \ 308 Texture2D<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize; \ 309 inline T ReadFBInput_##idx(bool var, uint2 coord) { \ 310 [branch]if(var) { return hlslcc_fbinput_##idx; } \ 311 else { return _UnityFBInput##idx.Load(uint3(coord,0)); } \ 312 } 313 #define RENDERPASS_DECLARE_FALLBACK_MS(T, idx) \ 314 Texture2DMS<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize; \ 315 inline T ReadFBInput_##idx(bool var, uint2 coord, uint sampleIdx) { \ 316 [branch]if(var) { return hlslcc_fbinput_##idx[sampleIdx]; } \ 317 else { return _UnityFBInput##idx.Load(coord,sampleIdx); } \ 318 } 319 320 #define FRAMEBUFFER_INPUT_FLOAT(idx) \ 321 cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \ 322 RENDERPASS_DECLARE_FALLBACK(float4, idx) 323 324 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) \ 325 cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \ 326 RENDERPASS_DECLARE_FALLBACK_MS(float4, idx) 327 328 #define FRAMEBUFFER_INPUT_HALF(idx) \ 329 cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \ 330 RENDERPASS_DECLARE_FALLBACK(half4, idx) 331 332 #define FRAMEBUFFER_INPUT_HALF_MS(idx) \ 333 cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \ 334 RENDERPASS_DECLARE_FALLBACK_MS(half4, idx) 335 336 #define FRAMEBUFFER_INPUT_INT(idx) \ 337 cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \ 338 RENDERPASS_DECLARE_FALLBACK(int4, idx) 339 340 #define FRAMEBUFFER_INPUT_INT_MS(idx) \ 341 cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \ 342 RENDERPASS_DECLARE_FALLBACK_MS(int4, idx) 343 344 #define FRAMEBUFFER_INPUT_UINT(idx) \ 345 cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \ 346 RENDERPASS_DECLARE_FALLBACK(uint4, idx) 347 348 #define FRAMEBUFFER_INPUT_UINT_MS(idx) \ 349 cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \ 350 UNITY_RENDERPASS_DECLARE_FALLBACK_MS(uint4, idx) 351 352 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy)) 353 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy), sampleIdx) 354 355 #else 356 357 // For floats 358 #define FRAMEBUFFER_INPUT_FLOAT(idx) cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; } 359 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; } 360 // For halfs 361 #define FRAMEBUFFER_INPUT_HALF(idx) cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; } 362 #define FRAMEBUFFER_INPUT_HALF_MS(idx) cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; } 363 // For ints 364 #define FRAMEBUFFER_INPUT_INT(idx) cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; } 365 #define FRAMEBUFFER_INPUT_INT_MS(idx) cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; } 366 // For uints 367 #define FRAMEBUFFER_INPUT_UINT(idx) cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; } 368 #define FRAMEBUFFER_INPUT_UINT_MS(idx) cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; } 369 370 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx 371 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx[sampleIdx] 372 373 #endif 374 375#else 376 377 // Renderpass inputs: General fallback paths 378 #define FRAMEBUFFER_INPUT_FLOAT(idx) TEXTURE2D_FLOAT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize 379 #define FRAMEBUFFER_INPUT_HALF(idx) TEXTURE2D_HALF(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize 380 #define FRAMEBUFFER_INPUT_INT(idx) TEXTURE2D_INT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize 381 #define FRAMEBUFFER_INPUT_UINT(idx) TEXTURE2D_UINT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize 382 383 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fvertexname) _UnityFBInput##idx.Load(uint3(v2fvertexname.xy, 0)) 384 385 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize 386 #define FRAMEBUFFER_INPUT_HALF_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize 387 #define FRAMEBUFFER_INPUT_INT_MS(idx) Texture2DMS<int4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize 388 #define FRAMEBUFFER_INPUT_UINT_MS(idx) Texture2DMS<uint4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize 389 390 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fvertexname) _UnityFBInput##idx.Load(uint2(v2fvertexname.xy), sampleIdx) 391 392#endif 393 394// ---------------------------------------------------------------------------- 395// Global resources API definitions for Ray Tracing 396// ---------------------------------------------------------------------------- 397#if (SHADER_STAGE_RAY_TRACING && UNITY_RAY_TRACING_GLOBAL_RESOURCES) 398 #define GLOBAL_RESOURCE(type, name, reg) type name : register(reg, space1); 399 #define GLOBAL_CBUFFER_START(name, reg) cbuffer name : register(reg, space1) { 400 #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name) : register(reg, space1) 401 #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name) : register(reg, space1) 402 #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name) : register(reg, space1) 403#else 404 #define GLOBAL_RESOURCE(type, name, reg) type name; 405 #define GLOBAL_CBUFFER_START(name, reg) CBUFFER_START(name) 406 #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name) 407 #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name) 408 #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name) 409#endif 410 411// ---------------------------------------------------------------------------- 412// Common intrinsic (general implementation of intrinsic available on some platform) 413// ---------------------------------------------------------------------------- 414 415#if !defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) && !defined(UNITY_COMPILER_DXC) && !defined(UNITY_HW_SUPPORTS_WAVE) 416// Intercept wave functions when they aren't supported to provide better error messages 417#define WaveActiveAllTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAllTrue) 418#define WaveActiveAnyTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAnyTrue) 419#define WaveGetLaneIndex ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneIndex) 420#define WaveIsFirstLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsFirstLane) 421#define GetWaveID ERROR_ON_UNSUPPORTED_FUNCTION(GetWaveID) 422#define WaveActiveMin ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMin) 423#define WaveActiveMax ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMax) 424#define WaveActiveBallot ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBallot) 425#define WaveActiveSum ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveSum) 426#define WaveActiveBitAnd ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitAnd) 427#define WaveActiveBitOr ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitOr) 428#define WaveGetLaneCount ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneCount) 429#define WaveIsHelperLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsHelperLane) 430#endif 431 432#if defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) 433// Helper macro to compute lane swizzle offset starting from andMask, orMask and xorMask. 434// IMPORTANT, to guarantee compatibility with all platforms, the masks need to be constant literals (constants at compile time) 435#define LANE_SWIZZLE_OFFSET(andMask, orMask, xorMask) (andMask | (orMask << 5) | (xorMask << 10)) 436#endif 437 438#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonDeprecated.hlsl" 439 440#ifndef INTRINSIC_BITFIELD_EXTRACT 441// Unsigned integer bit field extraction. 442// Note that the intrinsic itself generates a vector instruction. 443// Wrap this function with WaveReadLaneFirst() to get scalar output. 444uint BitFieldExtract(uint data, uint offset, uint numBits) 445{ 446 uint mask = (1u << numBits) - 1u; 447 return (data >> offset) & mask; 448} 449#endif // INTRINSIC_BITFIELD_EXTRACT 450 451#ifndef INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND 452// Integer bit field extraction with sign extension. 453// Note that the intrinsic itself generates a vector instruction. 454// Wrap this function with WaveReadLaneFirst() to get scalar output. 455int BitFieldExtractSignExtend(int data, uint offset, uint numBits) 456{ 457 int shifted = data >> offset; // Sign-extending (arithmetic) shift 458 int signBit = shifted & (1u << (numBits - 1u)); 459 uint mask = (1u << numBits) - 1u; 460 461 return -signBit | (shifted & mask); // Use 2-complement for negation to replicate the sign bit 462} 463#endif // INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND 464 465#ifndef INTRINSIC_BITFIELD_INSERT 466// Inserts the bits indicated by 'mask' from 'src' into 'dst'. 467uint BitFieldInsert(uint mask, uint src, uint dst) 468{ 469 return (src & mask) | (dst & ~mask); 470} 471#endif // INTRINSIC_BITFIELD_INSERT 472 473bool IsBitSet(uint data, uint offset) 474{ 475 return BitFieldExtract(data, offset, 1u) != 0; 476} 477 478void SetBit(inout uint data, uint offset) 479{ 480 data |= 1u << offset; 481} 482 483void ClearBit(inout uint data, uint offset) 484{ 485 data &= ~(1u << offset); 486} 487 488void ToggleBit(inout uint data, uint offset) 489{ 490 data ^= 1u << offset; 491} 492 493#ifndef INTRINSIC_WAVEREADFIRSTLANE 494 // Warning: for correctness, the argument's value must be the same across all lanes of the wave. 495 TEMPLATE_1_FLT_HALF(WaveReadLaneFirst, scalarValue, return scalarValue) 496 TEMPLATE_1_INT(WaveReadLaneFirst, scalarValue, return scalarValue) 497#endif 498 499#ifndef INTRINSIC_MUL24 500 TEMPLATE_2_INT(Mul24, a, b, return a * b) 501#endif // INTRINSIC_MUL24 502 503#ifndef INTRINSIC_MAD24 504 TEMPLATE_3_INT(Mad24, a, b, c, return a * b + c) 505#endif // INTRINSIC_MAD24 506 507#ifndef INTRINSIC_MINMAX3 508 TEMPLATE_3_FLT_HALF(Min3, a, b, c, return min(min(a, b), c)) 509 TEMPLATE_3_INT(Min3, a, b, c, return min(min(a, b), c)) 510 TEMPLATE_3_FLT_HALF(Max3, a, b, c, return max(max(a, b), c)) 511 TEMPLATE_3_INT(Max3, a, b, c, return max(max(a, b), c)) 512#endif // INTRINSIC_MINMAX3 513 514TEMPLATE_3_FLT_HALF(Avg3, a, b, c, return (a + b + c) * 0.33333333) 515 516// Important! Quad functions only valid in pixel shaders! 517 float2 GetQuadOffset(int2 screenPos) 518 { 519 return float2(float(screenPos.x & 1) * 2.0 - 1.0, float(screenPos.y & 1) * 2.0 - 1.0); 520 } 521 522#ifndef INTRINSIC_QUAD_SHUFFLE 523 float QuadReadAcrossX(float value, int2 screenPos) 524 { 525 return value - (ddx_fine(value) * (float(screenPos.x & 1) * 2.0 - 1.0)); 526 } 527 528 float QuadReadAcrossY(float value, int2 screenPos) 529 { 530 return value - (ddy_fine(value) * (float(screenPos.y & 1) * 2.0 - 1.0)); 531 } 532 533 float QuadReadAcrossDiagonal(float value, int2 screenPos) 534 { 535 float2 quadDir = GetQuadOffset(screenPos); 536 float dX = ddx_fine(value); 537 float X = value - (dX * quadDir.x); 538 return X - (ddy_fine(X) * quadDir.y); 539 } 540#endif 541 542 float3 QuadReadFloat3AcrossX(float3 val, int2 positionSS) 543 { 544 return float3(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS)); 545 } 546 547 float4 QuadReadFloat4AcrossX(float4 val, int2 positionSS) 548 { 549 return float4(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS), QuadReadAcrossX(val.w, positionSS)); 550 } 551 552 float3 QuadReadFloat3AcrossY(float3 val, int2 positionSS) 553 { 554 return float3(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS)); 555 } 556 557 float4 QuadReadFloat4AcrossY(float4 val, int2 positionSS) 558 { 559 return float4(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS), QuadReadAcrossY(val.w, positionSS)); 560 } 561 562 float3 QuadReadFloat3AcrossDiagonal(float3 val, int2 positionSS) 563 { 564 return float3(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS)); 565 } 566 567 float4 QuadReadFloat4AcrossDiagonal(float4 val, int2 positionSS) 568 { 569 return float4(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS), QuadReadAcrossDiagonal(val.w, positionSS)); 570 } 571 572TEMPLATE_SWAP(Swap) // Define a Swap(a, b) function for all types 573 574#define CUBEMAPFACE_POSITIVE_X 0 575#define CUBEMAPFACE_NEGATIVE_X 1 576#define CUBEMAPFACE_POSITIVE_Y 2 577#define CUBEMAPFACE_NEGATIVE_Y 3 578#define CUBEMAPFACE_POSITIVE_Z 4 579#define CUBEMAPFACE_NEGATIVE_Z 5 580 581#ifndef INTRINSIC_CUBEMAP_FACE_ID 582float CubeMapFaceID(float3 dir) 583{ 584 float faceID; 585 586 if (abs(dir.z) >= abs(dir.x) && abs(dir.z) >= abs(dir.y)) 587 { 588 faceID = (dir.z < 0.0) ? CUBEMAPFACE_NEGATIVE_Z : CUBEMAPFACE_POSITIVE_Z; 589 } 590 else if (abs(dir.y) >= abs(dir.x)) 591 { 592 faceID = (dir.y < 0.0) ? CUBEMAPFACE_NEGATIVE_Y : CUBEMAPFACE_POSITIVE_Y; 593 } 594 else 595 { 596 faceID = (dir.x < 0.0) ? CUBEMAPFACE_NEGATIVE_X : CUBEMAPFACE_POSITIVE_X; 597 } 598 599 return faceID; 600} 601#endif // INTRINSIC_CUBEMAP_FACE_ID 602 603// Intrinsic isnan can't be used because it require /Gic to be enabled on fxc that we can't do. So use AnyIsNan instead 604bool IsNaN(float x) 605{ 606 return (asuint(x) & 0x7FFFFFFF) > 0x7F800000; 607} 608 609bool AnyIsNaN(float2 v) 610{ 611 return (IsNaN(v.x) || IsNaN(v.y)); 612} 613 614bool AnyIsNaN(float3 v) 615{ 616 return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z)); 617} 618 619bool AnyIsNaN(float4 v) 620{ 621 return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z) || IsNaN(v.w)); 622} 623 624bool IsInf(float x) 625{ 626 return (asuint(x) & 0x7FFFFFFF) == 0x7F800000; 627} 628 629bool AnyIsInf(float2 v) 630{ 631 return (IsInf(v.x) || IsInf(v.y)); 632} 633 634bool AnyIsInf(float3 v) 635{ 636 return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z)); 637} 638 639bool AnyIsInf(float4 v) 640{ 641 return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z) || IsInf(v.w)); 642} 643 644bool IsFinite(float x) 645{ 646 return (asuint(x) & 0x7F800000) != 0x7F800000; 647} 648 649float SanitizeFinite(float x) 650{ 651 return IsFinite(x) ? x : 0; 652} 653 654bool IsPositiveFinite(float x) 655{ 656 return asuint(x) < 0x7F800000; 657} 658 659float SanitizePositiveFinite(float x) 660{ 661 return IsPositiveFinite(x) ? x : 0; 662} 663 664// ---------------------------------------------------------------------------- 665// Common math functions 666// ---------------------------------------------------------------------------- 667 668real DegToRad(real deg) 669{ 670 return deg * (PI / 180.0); 671} 672 673real RadToDeg(real rad) 674{ 675 return rad * (180.0 / PI); 676} 677 678// Square functions for cleaner code 679TEMPLATE_1_FLT_HALF(Sq, x, return (x) * (x)) 680TEMPLATE_1_INT(Sq, x, return (x) * (x)) 681 682bool IsPower2(uint x) 683{ 684 return (x & (x - 1)) == 0; 685} 686 687// Input [0, 1] and output [0, PI/2] 688// 9 VALU 689real FastACosPos(real inX) 690{ 691 real x = abs(inX); 692 real res = (0.0468878 * x + -0.203471) * x + 1.570796; // p(x) 693 res *= sqrt(1.0 - x); 694 695 return res; 696} 697 698// Ref: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ 699// Input [-1, 1] and output [0, PI] 700// 12 VALU 701real FastACos(real inX) 702{ 703 real res = FastACosPos(inX); 704 705 return (inX >= 0) ? res : PI - res; // Undo range reduction 706} 707 708// Same cost as Acos + 1 FR 709// Same error 710// input [-1, 1] and output [-PI/2, PI/2] 711real FastASin(real x) 712{ 713 return HALF_PI - FastACos(x); 714} 715 716// max absolute error 1.3x10^-3 717// Eberly's odd polynomial degree 5 - respect bounds 718// 4 VGPR, 14 FR (10 FR, 1 QR), 2 scalar 719// input [0, infinity] and output [0, PI/2] 720real FastATanPos(real x) 721{ 722 real t0 = (x < 1.0) ? x : 1.0 / x; 723 real t1 = t0 * t0; 724 real poly = 0.0872929; 725 poly = -0.301895 + poly * t1; 726 poly = 1.0 + poly * t1; 727 poly = poly * t0; 728 return (x < 1.0) ? poly : HALF_PI - poly; 729} 730 731// 4 VGPR, 16 FR (12 FR, 1 QR), 2 scalar 732// input [-infinity, infinity] and output [-PI/2, PI/2] 733real FastATan(real x) 734{ 735 real t0 = FastATanPos(abs(x)); 736 return (x < 0.0) ? -t0 : t0; 737} 738 739real FastAtan2(real y, real x) 740{ 741 return FastATan(y / x) + real(y >= 0.0 ? PI : -PI) * (x < 0.0); 742} 743 744#if (SHADER_TARGET >= 45) 745uint FastLog2(uint x) 746{ 747 return firstbithigh(x); 748} 749#endif 750 751// Using pow often result to a warning like this 752// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them" 753// PositivePow remove this warning when you know the value is positive or 0 and avoid inf/NAN. 754// Note: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx pow(0, >0) == 0 755TEMPLATE_2_FLT_HALF(PositivePow, base, power, return pow(abs(base), power)) 756 757// SafePositivePow: Same as pow(x,y) but considers x always positive and never exactly 0 such that 758// SafePositivePow(0,y) will numerically converge to 1 as y -> 0, including SafePositivePow(0,0) returning 1. 759// 760// First, like PositivePow, SafePositivePow removes this warning for when you know the x value is positive or 0 and you know 761// you avoid a NaN: 762// ie you know that x == 0 and y > 0, such that pow(x,y) == pow(0, >0) == 0 763// SafePositivePow(0, y) will however return close to 1 as y -> 0, see below. 764// 765// Also, pow(x,y) is most probably approximated as exp2(log2(x) * y), so pow(0,0) will give exp2(-inf * 0) == exp2(NaN) == NaN. 766// 767// SafePositivePow avoids NaN in allowing SafePositivePow(x,y) where (x,y) == (0,y) for any y including 0 by clamping x to a 768// minimum of FLT_EPS. The consequences are: 769// 770// -As a replacement for pow(0,y) where y >= 1, the result of SafePositivePow(x,y) should be close enough to 0. 771// -For cases where we substitute for pow(0,y) where 0 < y < 1, SafePositivePow(x,y) will quickly reach 1 as y -> 0, while 772// normally pow(0,y) would give 0 instead of 1 for all 0 < y. 773// eg: if we #define FLT_EPS 5.960464478e-8 (for fp32), 774// SafePositivePow(0, 0.1) = 0.1894646 775// SafePositivePow(0, 0.01) = 0.8467453 776// SafePositivePow(0, 0.001) = 0.9835021 777// 778// Depending on the intended usage of pow(), this difference in behavior might be a moot point since: 779// 1) by leaving "y" free to get to 0, we get a NaNs 780// 2) the behavior of SafePositivePow() has more continuity when both x and y get closer together to 0, since 781// when x is assured to be positive non-zero, pow(x,x) -> 1 as x -> 0. 782// 783// TL;DR: SafePositivePow(x,y) avoids NaN and is safe for positive (x,y) including (x,y) == (0,0), 784// but SafePositivePow(0, y) will return close to 1 as y -> 0, instead of 0, so watch out 785// for behavior depending on pow(0, y) giving always 0, especially for 0 < y < 1. 786// 787// Ref: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx 788TEMPLATE_2_FLT(SafePositivePow, base, power, return pow(max(abs(base), float(FLT_EPS)), power)) 789TEMPLATE_2_HALF(SafePositivePow, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power)) 790 791// Helpers for making shadergraph functions consider precision spec through the same $precision token used for variable types 792TEMPLATE_2_FLT(SafePositivePow_float, base, power, return pow(max(abs(base), float(FLT_EPS)), power)) 793TEMPLATE_2_HALF(SafePositivePow_half, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power)) 794 795float Eps_float() { return FLT_EPS; } 796float Min_float() { return FLT_MIN; } 797float Max_float() { return FLT_MAX; } 798half Eps_half() { return HALF_EPS; } 799half Min_half() { return HALF_MIN; } 800half Max_half() { return HALF_MAX; } 801 802// Compute the 'epsilon equal' relative to the scale of 'a' & 'b'. 803// Farther to 0.0f 'a' or 'b' are, larger epsilon have to be. 804bool NearlyEqual(float a, float b, float epsilon) 805{ 806 return abs(a - b) / (abs(a) + abs(b)) < epsilon; 807} 808 809TEMPLATE_2_FLT(NearlyEqual_Float, a, b, return abs(a - b) / (abs(a) + abs(b)) < float(FLT_EPS)) 810TEMPLATE_2_HALF(NearlyEqual_Half, a, b, return abs(a - b) / (abs(a) + abs(b)) < min16float(HALF_EPS)) 811 812// Composes a floating point value with the magnitude of 'x' and the sign of 's'. 813// See the comment about FastSign() below. 814float CopySign(float x, float s, bool ignoreNegZero = true) 815{ 816 if (ignoreNegZero) 817 { 818 return (s >= 0) ? abs(x) : -abs(x); 819 } 820 else 821 { 822 uint negZero = 0x80000000u; 823 uint signBit = negZero & asuint(s); 824 return asfloat(BitFieldInsert(negZero, signBit, asuint(x))); 825 } 826} 827 828// Returns -1 for negative numbers and 1 for positive numbers. 829// 0 can be handled in 2 different ways. 830// The IEEE floating point standard defines 0 as signed: +0 and -0. 831// However, mathematics typically treats 0 as unsigned. 832// Therefore, we treat -0 as +0 by default: FastSign(+0) = FastSign(-0) = 1. 833// If (ignoreNegZero = false), FastSign(-0, false) = -1. 834// Note that the sign() function in HLSL implements signum, which returns 0 for 0. 835float FastSign(float s, bool ignoreNegZero = true) 836{ 837 return CopySign(1.0, s, ignoreNegZero); 838} 839 840// Orthonormalizes the tangent frame using the Gram-Schmidt process. 841// We assume that the normal is normalized and that the two vectors 842// aren't collinear. 843// Returns the new tangent (the normal is unaffected). 844real3 Orthonormalize(real3 tangent, real3 normal) 845{ 846 // TODO: use SafeNormalize()? 847 return normalize(tangent - dot(tangent, normal) * normal); 848} 849 850// [start, end] -> [0, 1] : (x - start) / (end - start) = x * rcpLength - (start * rcpLength) 851TEMPLATE_3_FLT_HALF(Remap01, x, rcpLength, startTimesRcpLength, return saturate(x * rcpLength - startTimesRcpLength)) 852 853// [start, end] -> [1, 0] : (end - x) / (end - start) = (end * rcpLength) - x * rcpLength 854TEMPLATE_3_FLT_HALF(Remap10, x, rcpLength, endTimesRcpLength, return saturate(endTimesRcpLength - x * rcpLength)) 855 856// Remap: [0.5 / size, 1 - 0.5 / size] -> [0, 1] 857real2 RemapHalfTexelCoordTo01(real2 coord, real2 size) 858{ 859 const real2 rcpLen = size * rcp(size - 1); 860 const real2 startTimesRcpLength = 0.5 * rcp(size - 1); 861 862 return Remap01(coord, rcpLen, startTimesRcpLength); 863} 864 865// Remap: [0, 1] -> [0.5 / size, 1 - 0.5 / size] 866real2 Remap01ToHalfTexelCoord(real2 coord, real2 size) 867{ 868 const real2 start = 0.5 * rcp(size); 869 const real2 len = 1 - rcp(size); 870 871 return coord * len + start; 872} 873 874// smoothstep that assumes that 'x' lies within the [0, 1] interval. 875real Smoothstep01(real x) 876{ 877 return x * x * (3 - (2 * x)); 878} 879 880real Smootherstep01(real x) 881{ 882 return x * x * x * (x * (x * 6 - 15) + 10); 883} 884 885real Smootherstep(real a, real b, real t) 886{ 887 real r = rcp(b - a); 888 real x = Remap01(t, r, a * r); 889 return Smootherstep01(x); 890} 891 892float3 NLerp(float3 A, float3 B, float t) 893{ 894 return normalize(lerp(A, B, t)); 895} 896 897float Length2(float3 v) 898{ 899 return dot(v, v); 900} 901 902#ifndef BUILTIN_TARGET_API 903real Pow4(real x) 904{ 905 return (x * x) * (x * x); 906} 907#endif 908 909TEMPLATE_3_FLT(RangeRemap, min, max, t, return saturate((t - min) / (max - min))) 910TEMPLATE_3_FLT(RangeRemapFrom01, min, max, t, return (max - min) * t + min) 911 912float4x4 Inverse(float4x4 m) 913{ 914 float n11 = m[0][0], n12 = m[1][0], n13 = m[2][0], n14 = m[3][0]; 915 float n21 = m[0][1], n22 = m[1][1], n23 = m[2][1], n24 = m[3][1]; 916 float n31 = m[0][2], n32 = m[1][2], n33 = m[2][2], n34 = m[3][2]; 917 float n41 = m[0][3], n42 = m[1][3], n43 = m[2][3], n44 = m[3][3]; 918 919 float t11 = n23 * n34 * n42 - n24 * n33 * n42 + n24 * n32 * n43 - n22 * n34 * n43 - n23 * n32 * n44 + n22 * n33 * n44; 920 float t12 = n14 * n33 * n42 - n13 * n34 * n42 - n14 * n32 * n43 + n12 * n34 * n43 + n13 * n32 * n44 - n12 * n33 * n44; 921 float t13 = n13 * n24 * n42 - n14 * n23 * n42 + n14 * n22 * n43 - n12 * n24 * n43 - n13 * n22 * n44 + n12 * n23 * n44; 922 float t14 = n14 * n23 * n32 - n13 * n24 * n32 - n14 * n22 * n33 + n12 * n24 * n33 + n13 * n22 * n34 - n12 * n23 * n34; 923 924 float det = n11 * t11 + n21 * t12 + n31 * t13 + n41 * t14; 925 float idet = 1.0f / det; 926 927 float4x4 ret; 928 929 ret[0][0] = t11 * idet; 930 ret[0][1] = (n24 * n33 * n41 - n23 * n34 * n41 - n24 * n31 * n43 + n21 * n34 * n43 + n23 * n31 * n44 - n21 * n33 * n44) * idet; 931 ret[0][2] = (n22 * n34 * n41 - n24 * n32 * n41 + n24 * n31 * n42 - n21 * n34 * n42 - n22 * n31 * n44 + n21 * n32 * n44) * idet; 932 ret[0][3] = (n23 * n32 * n41 - n22 * n33 * n41 - n23 * n31 * n42 + n21 * n33 * n42 + n22 * n31 * n43 - n21 * n32 * n43) * idet; 933 934 ret[1][0] = t12 * idet; 935 ret[1][1] = (n13 * n34 * n41 - n14 * n33 * n41 + n14 * n31 * n43 - n11 * n34 * n43 - n13 * n31 * n44 + n11 * n33 * n44) * idet; 936 ret[1][2] = (n14 * n32 * n41 - n12 * n34 * n41 - n14 * n31 * n42 + n11 * n34 * n42 + n12 * n31 * n44 - n11 * n32 * n44) * idet; 937 ret[1][3] = (n12 * n33 * n41 - n13 * n32 * n41 + n13 * n31 * n42 - n11 * n33 * n42 - n12 * n31 * n43 + n11 * n32 * n43) * idet; 938 939 ret[2][0] = t13 * idet; 940 ret[2][1] = (n14 * n23 * n41 - n13 * n24 * n41 - n14 * n21 * n43 + n11 * n24 * n43 + n13 * n21 * n44 - n11 * n23 * n44) * idet; 941 ret[2][2] = (n12 * n24 * n41 - n14 * n22 * n41 + n14 * n21 * n42 - n11 * n24 * n42 - n12 * n21 * n44 + n11 * n22 * n44) * idet; 942 ret[2][3] = (n13 * n22 * n41 - n12 * n23 * n41 - n13 * n21 * n42 + n11 * n23 * n42 + n12 * n21 * n43 - n11 * n22 * n43) * idet; 943 944 ret[3][0] = t14 * idet; 945 ret[3][1] = (n13 * n24 * n31 - n14 * n23 * n31 + n14 * n21 * n33 - n11 * n24 * n33 - n13 * n21 * n34 + n11 * n23 * n34) * idet; 946 ret[3][2] = (n14 * n22 * n31 - n12 * n24 * n31 - n14 * n21 * n32 + n11 * n24 * n32 + n12 * n21 * n34 - n11 * n22 * n34) * idet; 947 ret[3][3] = (n12 * n23 * n31 - n13 * n22 * n31 + n13 * n21 * n32 - n11 * n23 * n32 - n12 * n21 * n33 + n11 * n22 * n33) * idet; 948 949 return ret; 950} 951 952float Remap(float origFrom, float origTo, float targetFrom, float targetTo, float value) 953{ 954 return lerp(targetFrom, targetTo, (value - origFrom) / (origTo - origFrom)); 955} 956 957// ---------------------------------------------------------------------------- 958// Texture utilities 959// ---------------------------------------------------------------------------- 960 961float ComputeTextureLOD(float2 uvdx, float2 uvdy, float2 scale, float bias = 0.0) 962{ 963 float2 ddx_ = scale * uvdx; 964 float2 ddy_ = scale * uvdy; 965 float d = max(dot(ddx_, ddx_), dot(ddy_, ddy_)); 966 967 return max(0.5 * log2(d) - bias, 0.0); 968} 969 970float ComputeTextureLOD(float2 uv, float bias = 0.0) 971{ 972 float2 ddx_ = ddx(uv); 973 float2 ddy_ = ddy(uv); 974 975 return ComputeTextureLOD(ddx_, ddy_, 1.0, bias); 976} 977 978// x contains width, w contains height 979float ComputeTextureLOD(float2 uv, float2 texelSize, float bias = 0.0) 980{ 981 uv *= texelSize; 982 983 return ComputeTextureLOD(uv, bias); 984} 985 986// LOD clamp is optional and happens outside the function. 987float ComputeTextureLOD(float3 duvw_dx, float3 duvw_dy, float3 duvw_dz, float scale, float bias = 0.0) 988{ 989 float d = Max3(dot(duvw_dx, duvw_dx), dot(duvw_dy, duvw_dy), dot(duvw_dz, duvw_dz)); 990 991 return max(0.5f * log2(d * (scale * scale)) - bias, 0.0); 992} 993 994#if defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12) || defined(SHADER_API_D3D11_9X) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_PSSL) || defined(SHADER_API_METAL) 995 #define MIP_COUNT_SUPPORTED 1 996#endif 997 // TODO: Bug workaround, switch defines GLCORE when it shouldn't 998#if ((defined(SHADER_API_GLCORE) && !defined(SHADER_API_SWITCH)) || defined(SHADER_API_VULKAN)) && !defined(SHADER_STAGE_COMPUTE) 999 // OpenGL only supports textureSize for width, height, depth 1000 // textureQueryLevels (GL_ARB_texture_query_levels) needs OpenGL 4.3 or above and doesn't compile in compute shaders 1001 // tex.GetDimensions converted to textureQueryLevels 1002 #define MIP_COUNT_SUPPORTED 1 1003#endif 1004 // Metal doesn't support high enough OpenGL version 1005 1006uint GetMipCount(TEXTURE2D_PARAM(tex, smp)) 1007{ 1008#if defined(MIP_COUNT_SUPPORTED) 1009 uint mipLevel, width, height, mipCount; 1010 mipLevel = width = height = mipCount = 0; 1011 tex.GetDimensions(mipLevel, width, height, mipCount); 1012 return mipCount; 1013#else 1014 return 0; 1015#endif 1016} 1017 1018// ---------------------------------------------------------------------------- 1019// Texture format sampling 1020// ---------------------------------------------------------------------------- 1021 1022// DXC no longer supports DX9-style HLSL syntax for sampler2D, tex2D and the like. 1023// These are emulated for backwards compatibility using our own small structs and functions which manually combine samplers and textures. 1024#if defined(UNITY_COMPILER_DXC) && !defined(DXC_SAMPLER_COMPATIBILITY) 1025#define DXC_SAMPLER_COMPATIBILITY 1 1026 1027// On DXC platforms which don't care about explicit sampler precison we want the emulated types to work directly e.g without needing to redefine 'sampler2D' to 'sampler2D_f' 1028#if !defined(SHADER_API_GLES3) && !defined(SHADER_API_VULKAN) && !defined(SHADER_API_METAL) && !defined(SHADER_API_SWITCH) && !defined(SHADER_API_WEBGPU) 1029 #define sampler1D_f sampler1D 1030 #define sampler2D_f sampler2D 1031 #define sampler3D_f sampler3D 1032 #define samplerCUBE_f samplerCUBE 1033#endif 1034 1035struct sampler1D_f { Texture1D<float4> t; SamplerState s; }; 1036struct sampler2D_f { Texture2D<float4> t; SamplerState s; }; 1037struct sampler3D_f { Texture3D<float4> t; SamplerState s; }; 1038struct samplerCUBE_f { TextureCube<float4> t; SamplerState s; }; 1039 1040float4 tex1D(sampler1D_f x, float v) { return x.t.Sample(x.s, v); } 1041float4 tex2D(sampler2D_f x, float2 v) { return x.t.Sample(x.s, v); } 1042float4 tex3D(sampler3D_f x, float3 v) { return x.t.Sample(x.s, v); } 1043float4 texCUBE(samplerCUBE_f x, float3 v) { return x.t.Sample(x.s, v); } 1044 1045float4 tex1Dbias(sampler1D_f x, in float4 t) { return x.t.SampleBias(x.s, t.x, t.w); } 1046float4 tex2Dbias(sampler2D_f x, in float4 t) { return x.t.SampleBias(x.s, t.xy, t.w); } 1047float4 tex3Dbias(sampler3D_f x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); } 1048float4 texCUBEbias(samplerCUBE_f x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); } 1049 1050float4 tex1Dlod(sampler1D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.x, t.w); } 1051float4 tex2Dlod(sampler2D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xy, t.w); } 1052float4 tex3Dlod(sampler3D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); } 1053float4 texCUBElod(samplerCUBE_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); } 1054 1055float4 tex1Dgrad(sampler1D_f x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1056float4 tex2Dgrad(sampler2D_f x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1057float4 tex3Dgrad(sampler3D_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1058float4 texCUBEgrad(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1059 1060float4 tex1D(sampler1D_f x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1061float4 tex2D(sampler2D_f x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1062float4 tex3D(sampler3D_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1063float4 texCUBE(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1064 1065float4 tex1Dproj(sampler1D_f s, in float2 t) { return tex1D(s, t.x / t.y); } 1066float4 tex1Dproj(sampler1D_f s, in float4 t) { return tex1D(s, t.x / t.w); } 1067float4 tex2Dproj(sampler2D_f s, in float3 t) { return tex2D(s, t.xy / t.z); } 1068float4 tex2Dproj(sampler2D_f s, in float4 t) { return tex2D(s, t.xy / t.w); } 1069float4 tex3Dproj(sampler3D_f s, in float4 t) { return tex3D(s, t.xyz / t.w); } 1070float4 texCUBEproj(samplerCUBE_f s, in float4 t) { return texCUBE(s, t.xyz / t.w); } 1071 1072// Half precision emulated samplers used instead the sampler.*_half unity types 1073struct sampler1D_h { Texture1D<min16float4> t; SamplerState s; }; 1074struct sampler2D_h { Texture2D<min16float4> t; SamplerState s; }; 1075struct sampler3D_h { Texture3D<min16float4> t; SamplerState s; }; 1076struct samplerCUBE_h { TextureCube<min16float4> t; SamplerState s; }; 1077 1078min16float4 tex1D(sampler1D_h x, float v) { return x.t.Sample(x.s, v); } 1079min16float4 tex2D(sampler2D_h x, float2 v) { return x.t.Sample(x.s, v); } 1080min16float4 tex3D(sampler3D_h x, float3 v) { return x.t.Sample(x.s, v); } 1081min16float4 texCUBE(samplerCUBE_h x, float3 v) { return x.t.Sample(x.s, v); } 1082 1083min16float4 tex1Dbias(sampler1D_h x, in float4 t) { return x.t.SampleBias(x.s, t.x, t.w); } 1084min16float4 tex2Dbias(sampler2D_h x, in float4 t) { return x.t.SampleBias(x.s, t.xy, t.w); } 1085min16float4 tex3Dbias(sampler3D_h x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); } 1086min16float4 texCUBEbias(samplerCUBE_h x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); } 1087 1088min16float4 tex1Dlod(sampler1D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.x, t.w); } 1089min16float4 tex2Dlod(sampler2D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xy, t.w); } 1090min16float4 tex3Dlod(sampler3D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); } 1091min16float4 texCUBElod(samplerCUBE_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); } 1092 1093min16float4 tex1Dgrad(sampler1D_h x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1094min16float4 tex2Dgrad(sampler2D_h x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1095min16float4 tex3Dgrad(sampler3D_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1096min16float4 texCUBEgrad(samplerCUBE_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1097 1098min16float4 tex1D(sampler1D_h x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1099min16float4 tex2D(sampler2D_h x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1100min16float4 tex3D(sampler3D_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1101min16float4 texCUBE(samplerCUBE_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); } 1102 1103min16float4 tex1Dproj(sampler1D_h s, in float2 t) { return tex1D(s, t.x / t.y); } 1104min16float4 tex1Dproj(sampler1D_h s, in float4 t) { return tex1D(s, t.x / t.w); } 1105min16float4 tex2Dproj(sampler2D_h s, in float3 t) { return tex2D(s, t.xy / t.z); } 1106min16float4 tex2Dproj(sampler2D_h s, in float4 t) { return tex2D(s, t.xy / t.w); } 1107min16float4 tex3Dproj(sampler3D_h s, in float4 t) { return tex3D(s, t.xyz / t.w); } 1108min16float4 texCUBEproj(samplerCUBE_h s, in float4 t) { return texCUBE(s, t.xyz / t.w); } 1109#endif 1110 1111float2 DirectionToLatLongCoordinate(float3 unDir) 1112{ 1113 float3 dir = normalize(unDir); 1114 // coordinate frame is (-Z, X) meaning negative Z is primary axis and X is secondary axis. 1115 return float2(1.0 - 0.5 * INV_PI * atan2(dir.x, -dir.z), asin(dir.y) * INV_PI + 0.5); 1116} 1117 1118float3 LatlongToDirectionCoordinate(float2 coord) 1119{ 1120 float theta = coord.y * PI; 1121 float phi = (coord.x * 2.f * PI - PI*0.5f); 1122 1123 float cosTheta = cos(theta); 1124 float sinTheta = sqrt(1.0 - min(1.0, cosTheta*cosTheta)); 1125 float cosPhi = cos(phi); 1126 float sinPhi = sin(phi); 1127 1128 float3 direction = float3(sinTheta*cosPhi, cosTheta, sinTheta*sinPhi); 1129 direction.xy *= -1.0; 1130 return direction; 1131} 1132 1133float2 OrientationToDirection(float orientation) 1134{ 1135 return float2(cos(orientation), sin(orientation)); 1136} 1137 1138// ---------------------------------------------------------------------------- 1139// Depth encoding/decoding 1140// ---------------------------------------------------------------------------- 1141 1142// Z buffer to linear 0..1 depth (0 at near plane, 1 at far plane). 1143// Does NOT correctly handle oblique view frustums. 1144// Does NOT work with orthographic projection. 1145// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1, 1, (1/n - 1/f), 1/f } 1146// zBufferParam = { 1 - f/n, f/n, (1/f - 1/n), 1/n } 1147float Linear01DepthFromNear(float depth, float4 zBufferParam) 1148{ 1149 #if UNITY_REVERSED_Z 1150 return (1.0 - depth) / (zBufferParam.x * depth + zBufferParam.y); 1151 #else 1152 return depth / (zBufferParam.x * depth + zBufferParam.y); 1153 #endif 1154} 1155 1156// Z buffer to linear 0..1 depth (0 at camera position, 1 at far plane). 1157// Does NOT work with orthographic projections. 1158// Does NOT correctly handle oblique view frustums. 1159// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1, 1, (1/n - 1/f), 1/f } 1160// zBufferParam = { 1 - f/n, f/n, (1/f - 1/n), 1/n } 1161float Linear01Depth(float depth, float4 zBufferParam) 1162{ 1163 return 1.0 / (zBufferParam.x * depth + zBufferParam.y); 1164} 1165 1166// Z buffer to linear view space (eye) depth. 1167// Does NOT correctly handle oblique view frustums. 1168// Does NOT work with orthographic projection. 1169// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1, 1, (1/n - 1/f), 1/f } 1170// zBufferParam = { 1 - f/n, f/n, (1/f - 1/n), 1/n } 1171float LinearEyeDepth(float depth, float4 zBufferParam) 1172{ 1173 return 1.0 / (zBufferParam.z * depth + zBufferParam.w); 1174} 1175 1176// Z buffer to linear depth. 1177// Correctly handles oblique view frustums. 1178// Does NOT work with orthographic projection. 1179// Ref: An Efficient Depth Linearization Method for Oblique View Frustums, Eq. 6. 1180float LinearEyeDepth(float2 positionNDC, float deviceDepth, float4 invProjParam) 1181{ 1182 float viewSpaceZ = rcp(dot(float4(positionNDC, deviceDepth, 1.0), invProjParam)); 1183 1184 // If the matrix is right-handed, we have to flip the Z axis to get a positive value. 1185 return abs(viewSpaceZ); 1186} 1187 1188// Z buffer to linear depth. 1189// Works in all cases. 1190// Typically, this is the cheapest variant, provided you've already computed 'positionWS'. 1191// Assumes that the 'positionWS' is in front of the camera. 1192float LinearEyeDepth(float3 positionWS, float4x4 viewMatrix) 1193{ 1194 float viewSpaceZ = mul(viewMatrix, float4(positionWS, 1.0)).z; 1195 1196 // If the matrix is right-handed, we have to flip the Z axis to get a positive value. 1197 return abs(viewSpaceZ); 1198} 1199 1200// 'z' is the view space Z position (linear depth). 1201// saturate(z) the output of the function to clamp them to the [0, 1] range. 1202// d = log2(c * (z - n) + 1) / log2(c * (f - n) + 1) 1203// = log2(c * (z - n + 1/c)) / log2(c * (f - n) + 1) 1204// = log2(c) / log2(c * (f - n) + 1) + log2(z - (n - 1/c)) / log2(c * (f - n) + 1) 1205// = E + F * log2(z - G) 1206// encodingParams = { E, F, G, 0 } 1207float EncodeLogarithmicDepthGeneralized(float z, float4 encodingParams) 1208{ 1209 // Use max() to avoid NaNs. 1210 return encodingParams.x + encodingParams.y * log2(max(0, z - encodingParams.z)); 1211} 1212 1213// 'd' is the logarithmically encoded depth value. 1214// saturate(d) to clamp the output of the function to the [n, f] range. 1215// z = 1/c * (pow(c * (f - n) + 1, d) - 1) + n 1216// = 1/c * pow(c * (f - n) + 1, d) + n - 1/c 1217// = 1/c * exp2(d * log2(c * (f - n) + 1)) + (n - 1/c) 1218// = L * exp2(d * M) + N 1219// decodingParams = { L, M, N, 0 } 1220// Graph: https://www.desmos.com/calculator/qrtatrlrba 1221float DecodeLogarithmicDepthGeneralized(float d, float4 decodingParams) 1222{ 1223 return decodingParams.x * exp2(d * decodingParams.y) + decodingParams.z; 1224} 1225 1226// 'z' is the view-space Z position (linear depth). 1227// saturate(z) the output of the function to clamp them to the [0, 1] range. 1228// encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) } 1229// This is an optimized version of EncodeLogarithmicDepthGeneralized() for (c = 2). 1230float EncodeLogarithmicDepth(float z, float4 encodingParams) 1231{ 1232 // Use max() to avoid NaNs. 1233 // TODO: optimize to (log2(z) - log2(n)) / (log2(f) - log2(n)). 1234 return log2(max(0, z * encodingParams.z)) * encodingParams.w; 1235} 1236 1237// 'd' is the logarithmically encoded depth value. 1238// saturate(d) to clamp the output of the function to the [n, f] range. 1239// encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) } 1240// This is an optimized version of DecodeLogarithmicDepthGeneralized() for (c = 2). 1241// Graph: https://www.desmos.com/calculator/qrtatrlrba 1242float DecodeLogarithmicDepth(float d, float4 encodingParams) 1243{ 1244 // TODO: optimize to exp2(d * y + log2(x)). 1245 return encodingParams.x * exp2(d * encodingParams.y); 1246} 1247 1248// Use an infinite far plane 1249// https://chaosinmotion.com/2010/09/06/goodbye-far-clipping-plane/ 1250// 'depth' is the linear depth (view-space Z position) 1251float EncodeInfiniteDepth(float depth, float near) 1252{ 1253 return saturate(near / depth); 1254} 1255 1256// 'z' is the depth encoded in the depth buffer (1 at near plane, 0 at far plane) 1257float DecodeInfiniteDepth(float z, float near) 1258{ 1259 return near / max(z, FLT_EPS); 1260} 1261 1262real4 CompositeOver(real4 front, real4 back) 1263{ 1264 return front + (1 - front.a) * back; 1265} 1266 1267void CompositeOver(real3 colorFront, real3 alphaFront, 1268 real3 colorBack, real3 alphaBack, 1269 out real3 color, out real3 alpha) 1270{ 1271 color = colorFront + (1 - alphaFront) * colorBack; 1272 alpha = alphaFront + (1 - alphaFront) * alphaBack; 1273} 1274 1275// ---------------------------------------------------------------------------- 1276// Space transformations 1277// ---------------------------------------------------------------------------- 1278 1279static const float3x3 k_identity3x3 = {1, 0, 0, 1280 0, 1, 0, 1281 0, 0, 1}; 1282 1283static const float4x4 k_identity4x4 = {1, 0, 0, 0, 1284 0, 1, 0, 0, 1285 0, 0, 1, 0, 1286 0, 0, 0, 1}; 1287 1288float4 ComputeClipSpacePosition(float2 positionNDC, float deviceDepth) 1289{ 1290 float4 positionCS = float4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0); 1291 1292#if UNITY_UV_STARTS_AT_TOP 1293 // Our world space, view space, screen space and NDC space are Y-up. 1294 // Our clip space is flipped upside-down due to poor legacy Unity design. 1295 // The flip is baked into the projection matrix, so we only have to flip 1296 // manually when going from CS to NDC and back. 1297 positionCS.y = -positionCS.y; 1298#endif 1299 1300 return positionCS; 1301} 1302 1303// Use case examples: 1304// (position = positionCS) => (clipSpaceTransform = use default) 1305// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P) 1306// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP) 1307float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k_identity4x4) 1308{ 1309 return mul(clipSpaceTransform, float4(position, 1.0)); 1310} 1311 1312// The returned Z value is the depth buffer value (and NOT linear view space Z value). 1313// Use case examples: 1314// (position = positionCS) => (clipSpaceTransform = use default) 1315// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P) 1316// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP) 1317float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpaceTransform = k_identity4x4) 1318{ 1319 float4 positionCS = ComputeClipSpacePosition(position, clipSpaceTransform); 1320 1321#if UNITY_UV_STARTS_AT_TOP 1322 // Our world space, view space, screen space and NDC space are Y-up. 1323 // Our clip space is flipped upside-down due to poor legacy Unity design. 1324 // The flip is baked into the projection matrix, so we only have to flip 1325 // manually when going from CS to NDC and back. 1326 positionCS.y = -positionCS.y; 1327#endif 1328 1329 positionCS *= rcp(positionCS.w); 1330 positionCS.xy = positionCS.xy * 0.5 + 0.5; 1331 1332 return positionCS.xyz; 1333} 1334 1335// Use case examples: 1336// (position = positionCS) => (clipSpaceTransform = use default) 1337// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P) 1338// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP) 1339float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_identity4x4) 1340{ 1341 return ComputeNormalizedDeviceCoordinatesWithZ(position, clipSpaceTransform).xy; 1342} 1343 1344float3 ComputeViewSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invProjMatrix) 1345{ 1346 float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth); 1347 float4 positionVS = mul(invProjMatrix, positionCS); 1348 // The view space uses a right-handed coordinate system. 1349 positionVS.z = -positionVS.z; 1350 return positionVS.xyz / positionVS.w; 1351} 1352 1353float3 ComputeWorldSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invViewProjMatrix) 1354{ 1355 float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth); 1356 float4 hpositionWS = mul(invViewProjMatrix, positionCS); 1357 return hpositionWS.xyz / hpositionWS.w; 1358} 1359 1360float3 ComputeWorldSpacePosition(float4 positionCS, float4x4 invViewProjMatrix) 1361{ 1362 float4 hpositionWS = mul(invViewProjMatrix, positionCS); 1363 return hpositionWS.xyz / hpositionWS.w; 1364} 1365 1366// ---------------------------------------------------------------------------- 1367// PositionInputs 1368// ---------------------------------------------------------------------------- 1369 1370// Note: if you modify this struct, be sure to update the CustomPassFullscreenShader.template 1371struct PositionInputs 1372{ 1373 float3 positionWS; // World space position (could be camera-relative) 1374 float2 positionNDC; // Normalized screen coordinates within the viewport : [0, 1) (with the half-pixel offset) 1375 uint2 positionSS; // Screen space pixel coordinates : [0, NumPixels) 1376 uint2 tileCoord; // Screen tile coordinates : [0, NumTiles) 1377 float deviceDepth; // Depth from the depth buffer : [0, 1] (typically reversed) 1378 float linearDepth; // View space Z coordinate : [Near, Far] 1379}; 1380 1381// This function is use to provide an easy way to sample into a screen texture, either from a pixel or a compute shaders. 1382// This allow to easily share code. 1383// If a compute shader call this function positionSS is an integer usually calculate like: uint2 positionSS = groupId.xy * BLOCK_SIZE + groupThreadId.xy 1384// else it is current unormalized screen coordinate like return by SV_Position 1385PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, uint2 tileCoord) // Specify explicit tile coordinates so that we can easily make it lane invariant for compute evaluation. 1386{ 1387 PositionInputs posInput; 1388 ZERO_INITIALIZE(PositionInputs, posInput); 1389 1390 posInput.positionNDC = positionSS; 1391#if defined(SHADER_STAGE_COMPUTE) || defined(SHADER_STAGE_RAY_TRACING) 1392 // In case of compute shader an extra half offset is added to the screenPos to shift the integer position to pixel center. 1393 posInput.positionNDC.xy += float2(0.5, 0.5); 1394#endif 1395 posInput.positionNDC *= invScreenSize; 1396 posInput.positionSS = uint2(positionSS); 1397 posInput.tileCoord = tileCoord; 1398 1399 return posInput; 1400} 1401 1402PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize) 1403{ 1404 return GetPositionInput(positionSS, invScreenSize, uint2(0, 0)); 1405} 1406 1407// For Raytracing only 1408// This function does not initialize deviceDepth and linearDepth 1409PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float3 positionWS) 1410{ 1411 PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, uint2(0, 0)); 1412 posInput.positionWS = positionWS; 1413 1414 return posInput; 1415} 1416 1417// From forward 1418// deviceDepth and linearDepth come directly from .zw of SV_Position 1419PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS, uint2 tileCoord) 1420{ 1421 PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord); 1422 posInput.positionWS = positionWS; 1423 posInput.deviceDepth = deviceDepth; 1424 posInput.linearDepth = linearDepth; 1425 1426 return posInput; 1427} 1428 1429PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS) 1430{ 1431 return GetPositionInput(positionSS, invScreenSize, deviceDepth, linearDepth, positionWS, uint2(0, 0)); 1432} 1433 1434// From deferred or compute shader 1435// depth must be the depth from the raw depth buffer. This allow to handle all kind of depth automatically with the inverse view projection matrix. 1436// For information. In Unity Depth is always in range 0..1 (even on OpenGL) but can be reversed. 1437PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, 1438 float4x4 invViewProjMatrix, float4x4 viewMatrix, 1439 uint2 tileCoord) 1440{ 1441 PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord); 1442 posInput.positionWS = ComputeWorldSpacePosition(posInput.positionNDC, deviceDepth, invViewProjMatrix); 1443 posInput.deviceDepth = deviceDepth; 1444 posInput.linearDepth = LinearEyeDepth(posInput.positionWS, viewMatrix); 1445 1446 return posInput; 1447} 1448 1449PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, 1450 float4x4 invViewProjMatrix, float4x4 viewMatrix) 1451{ 1452 return GetPositionInput(positionSS, invScreenSize, deviceDepth, invViewProjMatrix, viewMatrix, uint2(0, 0)); 1453} 1454 1455// The view direction 'V' points towards the camera. 1456// 'depthOffsetVS' is always applied in the opposite direction (-V). 1457void ApplyDepthOffsetPositionInput(float3 V, float depthOffsetVS, float3 viewForwardDir, float4x4 viewProjMatrix, inout PositionInputs posInput) 1458{ 1459 posInput.positionWS += depthOffsetVS * (-V); 1460 posInput.deviceDepth = ComputeNormalizedDeviceCoordinatesWithZ(posInput.positionWS, viewProjMatrix).z; 1461 1462 // Transform the displacement along the view vector to the displacement along the forward vector. 1463 // Use abs() to make sure we get the sign right. 1464 // 'depthOffsetVS' applies in the direction away from the camera. 1465 posInput.linearDepth += depthOffsetVS * abs(dot(V, viewForwardDir)); 1466} 1467 1468// ---------------------------------------------------------------------------- 1469// Terrain/Brush heightmap encoding/decoding 1470// ---------------------------------------------------------------------------- 1471 1472#if defined(SHADER_API_VULKAN) || defined(SHADER_API_GLES3) || defined(SHADER_API_WEBGPU) 1473 1474// For the built-in target this is already a defined symbol 1475#ifndef BUILTIN_TARGET_API 1476real4 PackHeightmap(real height) 1477{ 1478 uint a = (uint)(65535.0 * height); 1479 return real4((a >> 0) & 0xFF, (a >> 8) & 0xFF, 0, 0) / 255.0; 1480} 1481 1482real UnpackHeightmap(real4 height) 1483{ 1484 return (height.r + height.g * 256.0) / 257.0; // (255.0 * height.r + 255.0 * 256.0 * height.g) / 65535.0 1485} 1486#endif 1487 1488#else 1489 1490// For the built-in target this is already a defined symbol 1491#ifndef BUILTIN_TARGET_API 1492real4 PackHeightmap(real height) 1493{ 1494 return real4(height, 0, 0, 0); 1495} 1496 1497real UnpackHeightmap(real4 height) 1498{ 1499 return height.r; 1500} 1501#endif 1502 1503#endif 1504 1505// ---------------------------------------------------------------------------- 1506// Misc utilities 1507// ---------------------------------------------------------------------------- 1508 1509// Simple function to test a bitfield 1510bool HasFlag(uint bitfield, uint flag) 1511{ 1512 return (bitfield & flag) != 0; 1513} 1514 1515// Normalize that account for vectors with zero length 1516float3 SafeNormalize(float3 inVec) 1517{ 1518 float dp3 = max(FLT_MIN, dot(inVec, inVec)); 1519 return inVec * rsqrt(dp3); 1520} 1521 1522half3 SafeNormalize(half3 inVec) 1523{ 1524 half dp3 = max(HALF_MIN, dot(inVec, inVec)); 1525 return inVec * rsqrt(dp3); 1526} 1527 1528bool IsNormalized(float3 inVec) 1529{ 1530 float squaredLength = dot(inVec, inVec); 1531 return 0.9998 < squaredLength && squaredLength < 1.0002001; 1532} 1533 1534bool IsNormalized(half3 inVec) 1535{ 1536 half squaredLength = dot(inVec, inVec); 1537 return 0.998 < squaredLength && squaredLength < 1.002; 1538} 1539 1540// Division which returns 1 for (inf/inf) and (0/0). 1541// If any of the input parameters are NaNs, the result is a NaN. 1542real SafeDiv(real numer, real denom) 1543{ 1544 return (numer != denom) ? numer / denom : 1; 1545} 1546 1547// Perform a square root safe of imaginary number. 1548real SafeSqrt(real x) 1549{ 1550 return sqrt(max(0, x)); 1551} 1552 1553// Assumes that (0 <= x <= Pi). 1554real SinFromCos(real cosX) 1555{ 1556 return sqrt(saturate(1 - cosX * cosX)); 1557} 1558 1559// Dot product in spherical coordinates. 1560real SphericalDot(real cosTheta1, real phi1, real cosTheta2, real phi2) 1561{ 1562 return SinFromCos(cosTheta1) * SinFromCos(cosTheta2) * cos(phi1 - phi2) + cosTheta1 * cosTheta2; 1563} 1564 1565// Generates a triangle in homogeneous clip space, s.t. 1566// v0 = (-1, -1, 1), v1 = (3, -1, 1), v2 = (-1, 3, 1). 1567float2 GetFullScreenTriangleTexCoord(uint vertexID) 1568{ 1569#if UNITY_UV_STARTS_AT_TOP 1570 return float2((vertexID << 1) & 2, 1.0 - (vertexID & 2)); 1571#else 1572 return float2((vertexID << 1) & 2, vertexID & 2); 1573#endif 1574} 1575 1576float4 GetFullScreenTriangleVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE) 1577{ 1578 // note: the triangle vertex position coordinates are x2 so the returned UV coordinates are in range -1, 1 on the screen. 1579 float2 uv = float2((vertexID << 1) & 2, vertexID & 2); 1580 float4 pos = float4(uv * 2.0 - 1.0, z, 1.0); 1581#ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION 1582 pos = ApplyPretransformRotation(pos); 1583#endif 1584 return pos; 1585} 1586 1587 1588// draw procedural with 2 triangles has index order (0,1,2) (0,2,3) 1589 1590// 0 - 0,0 1591// 1 - 0,1 1592// 2 - 1,1 1593// 3 - 1,0 1594 1595float2 GetQuadTexCoord(uint vertexID) 1596{ 1597 uint topBit = vertexID >> 1; 1598 uint botBit = (vertexID & 1); 1599 float u = topBit; 1600 float v = (topBit + botBit) & 1; // produces 0 for indices 0,3 and 1 for 1,2 1601#if UNITY_UV_STARTS_AT_TOP 1602 v = 1.0 - v; 1603#endif 1604 return float2(u, v); 1605} 1606 1607// 0 - 0,1 1608// 1 - 0,0 1609// 2 - 1,0 1610// 3 - 1,1 1611float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE) 1612{ 1613 uint topBit = vertexID >> 1; 1614 uint botBit = (vertexID & 1); 1615 float x = topBit; 1616 float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2 1617 float4 pos = float4(x, y, z, 1.0); 1618#ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION 1619 pos = ApplyPretransformRotation(pos); 1620#endif 1621 return pos; 1622} 1623 1624#if !defined(SHADER_STAGE_RAY_TRACING) 1625 1626// LOD dithering transition helper 1627// LOD0 must use this function with ditherFactor 1..0 1628// LOD1 must use this function with ditherFactor -1..0 1629// This is what is provided by unity_LODFade 1630void LODDitheringTransition(uint2 fadeMaskSeed, float ditherFactor) 1631{ 1632 // Generate a spatially varying pattern. 1633 // Unfortunately, varying the pattern with time confuses the TAA, increasing the amount of noise. 1634 float p = GenerateHashedRandomFloat(fadeMaskSeed); 1635 1636 // This preserves the symmetry s.t. if LOD 0 has f = x, LOD 1 has f = -x. 1637 float f = ditherFactor - CopySign(p, ditherFactor); 1638 clip(f); 1639} 1640 1641#endif 1642 1643// The resource that is bound when binding a stencil buffer from the depth buffer is two channel. On D3D11 the stencil value is in the green channel, 1644// while on other APIs is in the red channel. Note that on some platform, always using the green channel might work, but is not guaranteed. 1645uint GetStencilValue(uint2 stencilBufferVal) 1646{ 1647#if defined(SHADER_API_D3D11) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_GAMECORE) 1648 return stencilBufferVal.y; 1649#else 1650 return stencilBufferVal.x; 1651#endif 1652} 1653 1654// Sharpens the alpha of a texture to the width of a single pixel 1655// Used for alpha to coverage 1656// source: https://medium.com/@bgolus/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f 1657float SharpenAlpha(float alpha, float alphaClipTreshold) 1658{ 1659 return saturate((alpha - alphaClipTreshold) / max(fwidth(alpha), 0.0001) + 0.5); 1660} 1661 1662// These clamping function to max of floating point 16 bit are use to prevent INF in code in case of extreme value 1663TEMPLATE_1_FLT(ClampToFloat16Max, value, return min(value, HALF_MAX)) 1664 1665#if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH 1666#pragma warning (enable : 3205) // conversion of larger type to smaller 1667#endif 1668 1669float2 RepeatOctahedralUV(float u, float v) 1670{ 1671 float2 uv; 1672 1673 if (u < 0.0f) 1674 { 1675 if (v < 0.0f) 1676 uv = float2(1.0f + u, 1.0f + v); 1677 else if (v < 1.0f) 1678 uv = float2(-u, 1.0f - v); 1679 else 1680 uv = float2(1.0f + u, v - 1.0f); 1681 } 1682 else if (u < 1.0f) 1683 { 1684 if (v < 0.0f) 1685 uv = float2(1.0f - u, -v); 1686 else if (v < 1.0f) 1687 uv = float2(u, v); 1688 else 1689 uv = float2(1.0f - u, 2.0f - v); 1690 } 1691 else 1692 { 1693 if (v < 0.0f) 1694 uv = float2(u - 1.0f, 1.0f + v); 1695 else if (v < 1.0f) 1696 uv = float2(2.0f - u, 1.0f - v); 1697 else 1698 uv = float2(u - 1.0f, v - 1.0f); 1699 } 1700 1701 return uv; 1702} 1703 1704#endif // UNITY_COMMON_INCLUDED