Library/PackageCache/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl at master · tacstudios.tngl.sh/AloneGame

A game about forced loneliness, made by TACStudios
AloneGame / Library / PackageCache / com.unity.render-pipelines.core / ShaderLibrary / Common.hlsl
at master 1704 lines 71 kB view raw
   1#ifndef UNITY_COMMON_INCLUDED
   2#define UNITY_COMMON_INCLUDED
   3
   4#if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL)
   5#pragma warning (disable : 3205) // conversion of larger type to smaller
   6#endif
   7
   8// Convention:
   9
  10// Unity is Y up and left handed in world space
  11// Caution: When going from world space to view space, unity is right handed in view space and the determinant of the matrix is negative
  12// For cubemap capture (reflection probe) view space is still left handed (cubemap convention) and the determinant is positive.
  13
  14// The lighting code assume that 1 Unity unit (1uu) == 1 meters.  This is very important regarding physically based light unit and inverse square attenuation
  15
  16// space at the end of the variable name
  17// WS: world space
  18// RWS: Camera-Relative world space. A space where the translation of the camera have already been substract in order to improve precision
  19// VS: view space
  20// OS: object space
  21// CS: Homogenous clip spaces
  22// TS: tangent space
  23// TXS: texture space
  24// Example: NormalWS
  25
  26// normalized / unormalized vector
  27// normalized direction are almost everywhere, we tag unormalized vector with un.
  28// Example: unL for unormalized light vector
  29
  30// use capital letter for regular vector, vector are always pointing outward the current pixel position (ready for lighting equation)
  31// capital letter mean the vector is normalize, unless we put 'un' in front of it.
  32// V: View vector  (no eye vector)
  33// L: Light vector
  34// N: Normal vector
  35// H: Half vector
  36
  37// Input/Outputs structs in PascalCase and prefixed by entry type
  38// struct AttributesDefault
  39// struct VaryingsDefault
  40// use input/output as variable name when using these structures
  41
  42// Entry program name
  43// VertDefault
  44// FragDefault / FragForward / FragDeferred
  45
  46// constant floating number written as 1.0  (not 1, not 1.0f, not 1.0h)
  47
  48// uniform have _ as prefix + uppercase _LowercaseThenCamelCase
  49
  50// Do not use "in", only "out" or "inout" as califier, no "inline" keyword either, useless.
  51// When declaring "out" argument of function, they are always last
  52
  53// headers from ShaderLibrary do not include "common.hlsl", this should be included in the .shader using it (or Material.hlsl)
  54
  55// All uniforms should be in contant buffer (nothing in the global namespace).
  56// The reason is that for compute shader we need to guarantee that the layout of CBs is consistent across kernels. Something that we can't control with the global namespace (uniforms get optimized out if not used, modifying the global CBuffer layout per kernel)
  57
  58// Structure definition that are share between C# and hlsl.
  59// These structures need to be align on float4 to respect various packing rules from shader language. This mean that these structure need to be padded.
  60// Rules: When doing an array for constant buffer variables, we always use float4 to avoid any packing issue, particularly between compute shader and pixel shaders
  61// i.e don't use SetGlobalFloatArray or SetComputeFloatParams
  62// The array can be alias in hlsl. Exemple:
  63// uniform float4 packedArray[3];
  64// static float unpackedArray[12] = (float[12])packedArray;
  65
  66// The function of the shader library are stateless, no uniform declare in it.
  67// Any function that require an explicit precision, use float or half qualifier, when the function can support both, it use real (see below)
  68// If a function require to have both a half and a float version, then both need to be explicitly define
  69
  70///
  71/// Hardware Support for Wave Operations
  72///
  73
  74// Support for wave operations is intentionally limited to the compute shader stage in order to make this functionality available to a wider range of hardware.
  75#if defined(SHADER_STAGE_COMPUTE)
  76    //
  77    // Platform Support
  78    //
  79    // Platforms may indicate support for wave operations at compile-time.
  80    // Shaders on these platforms may not always be compiled with a compiler that supports wave operations.
  81    // To simplify usage, we check for a supported compiler here before indicating that wave operations are supported.
  82    #if ((defined(UNITY_PLATFORM_SUPPORTS_WAVE_32) || defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)) && (defined(UNITY_COMPILER_DXC) || defined(SHADER_API_PSSL)))
  83        #if defined(UNITY_PLATFORM_SUPPORTS_WAVE_32)
  84            #define UNITY_HW_WAVE_SIZE 32
  85        #elif defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)
  86            #define UNITY_HW_WAVE_SIZE 64
  87        #endif
  88
  89        #define UNITY_PLATFORM_SUPPORTS_WAVE 1
  90    //
  91    // Device Support
  92    //
  93    // Devices may indicate support for wave operations at run-time.
  94    // Shaders compiled with these defines are always compiled with a compiler that supports wave operations.
  95    #elif (defined(UNITY_DEVICE_SUPPORTS_WAVE_ANY) || defined(UNITY_DEVICE_SUPPORTS_WAVE_8) || defined(UNITY_DEVICE_SUPPORTS_WAVE_16) || defined(UNITY_DEVICE_SUPPORTS_WAVE_32) || defined(UNITY_DEVICE_SUPPORTS_WAVE_64) || defined(UNITY_DEVICE_SUPPORTS_WAVE_128))
  96        #if defined(UNITY_DEVICE_SUPPORTS_WAVE_8)
  97            #define UNITY_HW_WAVE_SIZE 8
  98        #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_16)
  99            #define UNITY_HW_WAVE_SIZE 16
 100        #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_32)
 101            #define UNITY_HW_WAVE_SIZE 32
 102        #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_64)
 103            #define UNITY_HW_WAVE_SIZE 64
 104        #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_128)
 105            #define UNITY_HW_WAVE_SIZE 128
 106        #endif
 107
 108        #define UNITY_DEVICE_SUPPORTS_WAVE 1
 109    #endif
 110
 111    #if (defined(UNITY_PLATFORM_SUPPORTS_WAVE) || defined(UNITY_DEVICE_SUPPORTS_WAVE))
 112        #define UNITY_HW_SUPPORTS_WAVE 1
 113    #endif
 114#endif
 115
 116#ifndef real
 117
 118// The including shader should define whether half
 119// precision is suitable for its needs.  The shader
 120// API (for now) can indicate whether half is possible.
 121#if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH) || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL)
 122#define HAS_HALF 1
 123#else
 124#define HAS_HALF 0
 125#endif
 126
 127#ifndef PREFER_HALF
 128#define PREFER_HALF 1
 129#endif
 130
 131#if HAS_HALF && PREFER_HALF
 132#define REAL_IS_HALF 1
 133#else
 134#define REAL_IS_HALF 0
 135#endif // Do we have half?
 136
 137#if REAL_IS_HALF
 138#define HALF_IS_FLOAT 0
 139#define half min16float
 140#define half2 min16float2
 141#define half3 min16float3
 142#define half4 min16float4
 143#define half2x2 min16float2x2
 144#define half2x3 min16float2x3
 145#define half3x2 min16float3x2
 146#define half3x3 min16float3x3
 147#define half3x4 min16float3x4
 148#define half4x3 min16float4x3
 149#define half4x4 min16float4x4
 150#else
 151#define HALF_IS_FLOAT (!defined(UNITY_DEVICE_SUPPORTS_NATIVE_16BIT))
 152#endif
 153
 154#if REAL_IS_HALF
 155#define real half
 156#define real2 half2
 157#define real3 half3
 158#define real4 half4
 159
 160#define real2x2 half2x2
 161#define real2x3 half2x3
 162#define real2x4 half2x4
 163#define real3x2 half3x2
 164#define real3x3 half3x3
 165#define real3x4 half3x4
 166#define real4x3 half4x3
 167#define real4x4 half4x4
 168
 169#define REAL_MIN HALF_MIN
 170#define REAL_MAX HALF_MAX
 171#define REAL_EPS HALF_EPS
 172
 173#else
 174
 175#define real float
 176#define real2 float2
 177#define real3 float3
 178#define real4 float4
 179
 180#define real2x2 float2x2
 181#define real2x3 float2x3
 182#define real2x4 float2x4
 183#define real3x2 float3x2
 184#define real3x3 float3x3
 185#define real3x4 float3x4
 186#define real4x3 float4x3
 187#define real4x4 float4x4
 188
 189#define REAL_MIN FLT_MIN
 190#define REAL_MAX FLT_MAX
 191#define REAL_EPS FLT_EPS
 192
 193#endif // REAL_IS_HALF
 194
 195#endif // #ifndef real
 196
 197// Target in compute shader are supported in 2018.2, for now define ours
 198// (Note only 45 and above support compute shader)
 199#ifdef  SHADER_STAGE_COMPUTE
 200#   ifndef SHADER_TARGET
 201#       if defined(SHADER_API_METAL)
 202#       define SHADER_TARGET 45
 203#       else
 204#       define SHADER_TARGET 50
 205#       endif
 206#   endif
 207#endif
 208
 209// This is the default keyword combination and needs to be overriden by the platforms that need specific behaviors
 210// when enabling conservative depth overrides
 211#define SV_POSITION_QUALIFIERS
 212#define DEPTH_OFFSET_SEMANTIC SV_Depth
 213
 214// Include language header
 215#if defined (SHADER_API_GAMECORE)
 216#include "Packages/com.unity.render-pipelines.gamecore/ShaderLibrary/API/GameCore.hlsl"
 217#elif defined(SHADER_API_XBOXONE)
 218#include "Packages/com.unity.render-pipelines.xboxone/ShaderLibrary/API/XBoxOne.hlsl"
 219#elif defined(SHADER_API_PS4)
 220#include "Packages/com.unity.render-pipelines.ps4/ShaderLibrary/API/PSSL.hlsl"
 221#elif defined(SHADER_API_PS5)
 222#include "Packages/com.unity.render-pipelines.ps5/ShaderLibrary/API/PSSL.hlsl"
 223#elif defined(SHADER_API_D3D11)
 224#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/D3D11.hlsl"
 225#elif defined(SHADER_API_METAL)
 226#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Metal.hlsl"
 227#elif defined(SHADER_API_VULKAN)
 228#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Vulkan.hlsl"
 229#elif defined(SHADER_API_SWITCH)
 230#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Switch.hlsl"
 231#elif defined(SHADER_API_GLCORE)
 232#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLCore.hlsl"
 233#elif defined(SHADER_API_GLES3)
 234#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLES3.hlsl"
 235#elif defined(SHADER_API_WEBGPU)
 236#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/WebGPU.hlsl"
 237#else
 238#error unsupported shader api
 239#endif
 240#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Validate.hlsl"
 241
 242#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl"
 243#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Random.hlsl"
 244
 245#if !defined(SHADER_API_PS5)
 246#define PushMarker(str)
 247#define PopMarker()
 248#endif
 249
 250#ifdef SHADER_API_XBOXONE // TODO: to move in .nda package in 21.1
 251#define PLATFORM_SUPPORTS_PRIMITIVE_ID_IN_PIXEL_SHADER
 252#endif
 253
 254#if defined(PLATFORM_SUPPORTS_NATIVE_RENDERPASS)
 255
 256    #if defined(UNITY_COMPILER_DXC)
 257
 258        //Subpass inputs are disallowed in non-fragment shader stages with DXC so we need some dummy value to use in the fragment function while it's not being compiled
 259        #if defined(SHADER_STAGE_FRAGMENT)
 260            #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) [[vk::input_attachment_index(idx)]] SubpassInput<type##4> hlslcc_fbinput_##idx
 261            #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) [[vk::input_attachment_index(idx)]] SubpassInputMS<type##4> hlslcc_fbinput_##idx
 262        #else
 263            //declaring dummy resources here so that non-fragment shader stage automatic bindings wouldn't diverge from the fragment shader (important for vulkan)
 264            #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx;
 265            #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx
 266        #endif
 267        // Renderpass inputs: Vulkan/Metal subpass input
 268        #define FRAMEBUFFER_INPUT_FLOAT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(float, idx)
 269        #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(float, idx)
 270        // For halfs
 271        #define FRAMEBUFFER_INPUT_HALF(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(half, idx)
 272        #define FRAMEBUFFER_INPUT_HALF_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(half, idx)
 273        // For ints
 274        #define FRAMEBUFFER_INPUT_INT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(int, idx)
 275        #define FRAMEBUFFER_INPUT_INT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(int, idx)
 276        // For uints
 277        #define FRAMEBUFFER_INPUT_UINT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(uint, idx)
 278        #define FRAMEBUFFER_INPUT_UINT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(uint, idx)
 279
 280        #if defined(SHADER_STAGE_FRAGMENT)
 281            #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx.SubpassLoad()
 282            #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx.SubpassLoad(sampleIdx)
 283        #else
 284            #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) DXC_DummySubpassVariable##idx
 285            #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) DXC_DummySubpassVariable##idx
 286        #endif
 287
 288    #elif defined(SHADER_API_METAL) && defined(UNITY_NEEDS_RENDERPASS_FBFETCH_FALLBACK)
 289
 290        // On desktop metal we need special magic due to the need to support both intel and apple silicon
 291        // since the former does not support framebuffer fetch
 292        // Due to this we have special considerations:
 293        // 1. since we might need to bind the copy texture, to simplify our lives we always declare _UnityFBInput texture
 294        //    in metal translation we will add function_constant, but we still want to generate binding in hlsl
 295        //    so that unity knows about the possibility
 296        // 2. hlsl do not have anything like function constants, hence we will add bool to the fake cbuffer for subpass
 297        //    again, this is done only for hlsl to generate proper code - in translation it will be changed to
 298        //    a proper function constant (i.e. hlslcc_SubpassInput_f_ cbuffer is just "metadata" and is absent in metal code)
 299        // 3. we want to generate an actual if command (not conditional move), hence we need to have an interim function
 300        //    alas we are not able to hide in it the texture coords: we are guaranteed to have just one "declare fb input"
 301        //    per index, but nothing stops users to have several "read fb input", hence we need to generate function code
 302        //    in the former, where we do not know the source of uv coords
 303        //    while the usage looks weird (we pass hlslcc_fbfetch_ in the function), it is ok due to the way hlsl compiler works
 304        //    it will generate an actual if and access hlslcc_fbfetch_ only if framebuffer fetch is available
 305        //    and when creating metal program, compiler takes care of this (function_constant magic)
 306
 307        #define RENDERPASS_DECLARE_FALLBACK(T, idx)                                                             \
 308            Texture2D<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize;                             \
 309            inline T ReadFBInput_##idx(bool var, uint2 coord) {                                                 \
 310                [branch]if(var) { return hlslcc_fbinput_##idx; }                                                \
 311                else { return _UnityFBInput##idx.Load(uint3(coord,0)); }                                        \
 312            }
 313        #define RENDERPASS_DECLARE_FALLBACK_MS(T, idx)                                                          \
 314            Texture2DMS<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize;                           \
 315            inline T ReadFBInput_##idx(bool var, uint2 coord, uint sampleIdx) {                                 \
 316                [branch]if(var) { return hlslcc_fbinput_##idx[sampleIdx]; }                                     \
 317                else { return _UnityFBInput##idx.Load(coord,sampleIdx); }                                       \
 318            }
 319
 320        #define FRAMEBUFFER_INPUT_FLOAT(idx)                                                                    \
 321            cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; };    \
 322            RENDERPASS_DECLARE_FALLBACK(float4, idx)
 323
 324        #define FRAMEBUFFER_INPUT_FLOAT_MS(idx)                                                                 \
 325            cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
 326            RENDERPASS_DECLARE_FALLBACK_MS(float4, idx)
 327
 328        #define FRAMEBUFFER_INPUT_HALF(idx)                                                                     \
 329            cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; };     \
 330            RENDERPASS_DECLARE_FALLBACK(half4, idx)
 331
 332        #define FRAMEBUFFER_INPUT_HALF_MS(idx)                                                                  \
 333            cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; };  \
 334            RENDERPASS_DECLARE_FALLBACK_MS(half4, idx)
 335
 336        #define FRAMEBUFFER_INPUT_INT(idx)                                                                      \
 337            cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; };      \
 338            RENDERPASS_DECLARE_FALLBACK(int4, idx)
 339
 340        #define FRAMEBUFFER_INPUT_INT_MS(idx)                                                                   \
 341            cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; };   \
 342            RENDERPASS_DECLARE_FALLBACK_MS(int4, idx)
 343
 344        #define FRAMEBUFFER_INPUT_UINT(idx)                                                                     \
 345            cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; };     \
 346            RENDERPASS_DECLARE_FALLBACK(uint4, idx)
 347
 348        #define FRAMEBUFFER_INPUT_UINT_MS(idx)                                                                  \
 349            cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; };  \
 350            UNITY_RENDERPASS_DECLARE_FALLBACK_MS(uint4, idx)
 351
 352        #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy))
 353        #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy), sampleIdx)
 354
 355    #else
 356
 357        // For floats
 358        #define FRAMEBUFFER_INPUT_FLOAT(idx) cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; }
 359        #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; }
 360        // For halfs
 361        #define FRAMEBUFFER_INPUT_HALF(idx) cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; }
 362        #define FRAMEBUFFER_INPUT_HALF_MS(idx) cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; }
 363        // For ints
 364        #define FRAMEBUFFER_INPUT_INT(idx) cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; }
 365        #define FRAMEBUFFER_INPUT_INT_MS(idx) cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; }
 366        // For uints
 367        #define FRAMEBUFFER_INPUT_UINT(idx) cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; }
 368        #define FRAMEBUFFER_INPUT_UINT_MS(idx) cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; }
 369
 370        #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx
 371        #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx[sampleIdx]
 372
 373    #endif
 374
 375#else
 376
 377    // Renderpass inputs: General fallback paths
 378    #define FRAMEBUFFER_INPUT_FLOAT(idx) TEXTURE2D_FLOAT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
 379    #define FRAMEBUFFER_INPUT_HALF(idx) TEXTURE2D_HALF(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
 380    #define FRAMEBUFFER_INPUT_INT(idx) TEXTURE2D_INT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
 381    #define FRAMEBUFFER_INPUT_UINT(idx) TEXTURE2D_UINT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
 382
 383    #define LOAD_FRAMEBUFFER_INPUT(idx, v2fvertexname) _UnityFBInput##idx.Load(uint3(v2fvertexname.xy, 0))
 384
 385    #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
 386    #define FRAMEBUFFER_INPUT_HALF_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
 387    #define FRAMEBUFFER_INPUT_INT_MS(idx) Texture2DMS<int4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
 388    #define FRAMEBUFFER_INPUT_UINT_MS(idx) Texture2DMS<uint4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
 389
 390    #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fvertexname) _UnityFBInput##idx.Load(uint2(v2fvertexname.xy), sampleIdx)
 391
 392#endif
 393
 394// ----------------------------------------------------------------------------
 395// Global resources API definitions for Ray Tracing
 396// ----------------------------------------------------------------------------
 397#if (SHADER_STAGE_RAY_TRACING && UNITY_RAY_TRACING_GLOBAL_RESOURCES)
 398    #define GLOBAL_RESOURCE(type, name, reg) type name : register(reg, space1);
 399    #define GLOBAL_CBUFFER_START(name, reg) cbuffer name : register(reg, space1) {
 400    #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name) : register(reg, space1)
 401    #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name) : register(reg, space1)
 402    #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name) : register(reg, space1)
 403#else
 404    #define GLOBAL_RESOURCE(type, name, reg) type name;
 405    #define GLOBAL_CBUFFER_START(name, reg) CBUFFER_START(name)
 406    #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name)
 407    #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name)
 408    #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name)
 409#endif
 410
 411// ----------------------------------------------------------------------------
 412// Common intrinsic (general implementation of intrinsic available on some platform)
 413// ----------------------------------------------------------------------------
 414
 415#if !defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) && !defined(UNITY_COMPILER_DXC) && !defined(UNITY_HW_SUPPORTS_WAVE)
 416// Intercept wave functions when they aren't supported to provide better error messages
 417#define WaveActiveAllTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAllTrue)
 418#define WaveActiveAnyTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAnyTrue)
 419#define WaveGetLaneIndex ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneIndex)
 420#define WaveIsFirstLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsFirstLane)
 421#define GetWaveID ERROR_ON_UNSUPPORTED_FUNCTION(GetWaveID)
 422#define WaveActiveMin ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMin)
 423#define WaveActiveMax ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMax)
 424#define WaveActiveBallot ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBallot)
 425#define WaveActiveSum ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveSum)
 426#define WaveActiveBitAnd ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitAnd)
 427#define WaveActiveBitOr ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitOr)
 428#define WaveGetLaneCount ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneCount)
 429#define WaveIsHelperLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsHelperLane)
 430#endif
 431
 432#if defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS)
 433// Helper macro to compute lane swizzle offset starting from andMask, orMask and xorMask.
 434// IMPORTANT, to guarantee compatibility with all platforms, the masks need to be constant literals (constants at compile time)
 435#define LANE_SWIZZLE_OFFSET(andMask, orMask, xorMask)  (andMask | (orMask << 5) | (xorMask << 10))
 436#endif
 437
 438#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonDeprecated.hlsl"
 439
 440#ifndef INTRINSIC_BITFIELD_EXTRACT
 441// Unsigned integer bit field extraction.
 442// Note that the intrinsic itself generates a vector instruction.
 443// Wrap this function with WaveReadLaneFirst() to get scalar output.
 444uint BitFieldExtract(uint data, uint offset, uint numBits)
 445{
 446    uint mask = (1u << numBits) - 1u;
 447    return (data >> offset) & mask;
 448}
 449#endif // INTRINSIC_BITFIELD_EXTRACT
 450
 451#ifndef INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND
 452// Integer bit field extraction with sign extension.
 453// Note that the intrinsic itself generates a vector instruction.
 454// Wrap this function with WaveReadLaneFirst() to get scalar output.
 455int BitFieldExtractSignExtend(int data, uint offset, uint numBits)
 456{
 457    int  shifted = data >> offset;      // Sign-extending (arithmetic) shift
 458    int  signBit = shifted & (1u << (numBits - 1u));
 459    uint mask    = (1u << numBits) - 1u;
 460
 461    return -signBit | (shifted & mask); // Use 2-complement for negation to replicate the sign bit
 462}
 463#endif // INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND
 464
 465#ifndef INTRINSIC_BITFIELD_INSERT
 466// Inserts the bits indicated by 'mask' from 'src' into 'dst'.
 467uint BitFieldInsert(uint mask, uint src, uint dst)
 468{
 469    return (src & mask) | (dst & ~mask);
 470}
 471#endif // INTRINSIC_BITFIELD_INSERT
 472
 473bool IsBitSet(uint data, uint offset)
 474{
 475    return BitFieldExtract(data, offset, 1u) != 0;
 476}
 477
 478void SetBit(inout uint data, uint offset)
 479{
 480    data |= 1u << offset;
 481}
 482
 483void ClearBit(inout uint data, uint offset)
 484{
 485    data &= ~(1u << offset);
 486}
 487
 488void ToggleBit(inout uint data, uint offset)
 489{
 490    data ^= 1u << offset;
 491}
 492
 493#ifndef INTRINSIC_WAVEREADFIRSTLANE
 494    // Warning: for correctness, the argument's value must be the same across all lanes of the wave.
 495    TEMPLATE_1_FLT_HALF(WaveReadLaneFirst, scalarValue, return scalarValue)
 496    TEMPLATE_1_INT(WaveReadLaneFirst, scalarValue, return scalarValue)
 497#endif
 498
 499#ifndef INTRINSIC_MUL24
 500    TEMPLATE_2_INT(Mul24, a, b, return a * b)
 501#endif // INTRINSIC_MUL24
 502
 503#ifndef INTRINSIC_MAD24
 504    TEMPLATE_3_INT(Mad24, a, b, c, return a * b + c)
 505#endif // INTRINSIC_MAD24
 506
 507#ifndef INTRINSIC_MINMAX3
 508    TEMPLATE_3_FLT_HALF(Min3, a, b, c, return min(min(a, b), c))
 509    TEMPLATE_3_INT(Min3, a, b, c, return min(min(a, b), c))
 510    TEMPLATE_3_FLT_HALF(Max3, a, b, c, return max(max(a, b), c))
 511    TEMPLATE_3_INT(Max3, a, b, c, return max(max(a, b), c))
 512#endif // INTRINSIC_MINMAX3
 513
 514TEMPLATE_3_FLT_HALF(Avg3, a, b, c, return (a + b + c) * 0.33333333)
 515
 516// Important! Quad functions only valid in pixel shaders!
 517    float2 GetQuadOffset(int2 screenPos)
 518    {
 519        return float2(float(screenPos.x & 1) * 2.0 - 1.0, float(screenPos.y & 1) * 2.0 - 1.0);
 520    }
 521
 522#ifndef INTRINSIC_QUAD_SHUFFLE
 523    float QuadReadAcrossX(float value, int2 screenPos)
 524    {
 525        return value - (ddx_fine(value) * (float(screenPos.x & 1) * 2.0 - 1.0));
 526    }
 527
 528    float QuadReadAcrossY(float value, int2 screenPos)
 529    {
 530        return value - (ddy_fine(value) * (float(screenPos.y & 1) * 2.0 - 1.0));
 531    }
 532
 533    float QuadReadAcrossDiagonal(float value, int2 screenPos)
 534    {
 535        float2 quadDir = GetQuadOffset(screenPos);
 536        float dX = ddx_fine(value);
 537        float X = value - (dX * quadDir.x);
 538        return X - (ddy_fine(X) * quadDir.y);
 539    }
 540#endif
 541
 542    float3 QuadReadFloat3AcrossX(float3 val, int2 positionSS)
 543    {
 544        return float3(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS));
 545    }
 546
 547    float4 QuadReadFloat4AcrossX(float4 val, int2 positionSS)
 548    {
 549        return float4(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS), QuadReadAcrossX(val.w, positionSS));
 550    }
 551
 552    float3 QuadReadFloat3AcrossY(float3 val, int2 positionSS)
 553    {
 554        return float3(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS));
 555    }
 556
 557    float4 QuadReadFloat4AcrossY(float4 val, int2 positionSS)
 558    {
 559        return float4(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS), QuadReadAcrossY(val.w, positionSS));
 560    }
 561
 562    float3 QuadReadFloat3AcrossDiagonal(float3 val, int2 positionSS)
 563    {
 564        return float3(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS));
 565    }
 566
 567    float4 QuadReadFloat4AcrossDiagonal(float4 val, int2 positionSS)
 568    {
 569        return float4(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS), QuadReadAcrossDiagonal(val.w, positionSS));
 570    }
 571
 572TEMPLATE_SWAP(Swap) // Define a Swap(a, b) function for all types
 573
 574#define CUBEMAPFACE_POSITIVE_X 0
 575#define CUBEMAPFACE_NEGATIVE_X 1
 576#define CUBEMAPFACE_POSITIVE_Y 2
 577#define CUBEMAPFACE_NEGATIVE_Y 3
 578#define CUBEMAPFACE_POSITIVE_Z 4
 579#define CUBEMAPFACE_NEGATIVE_Z 5
 580
 581#ifndef INTRINSIC_CUBEMAP_FACE_ID
 582float CubeMapFaceID(float3 dir)
 583{
 584    float faceID;
 585
 586    if (abs(dir.z) >= abs(dir.x) && abs(dir.z) >= abs(dir.y))
 587    {
 588        faceID = (dir.z < 0.0) ? CUBEMAPFACE_NEGATIVE_Z : CUBEMAPFACE_POSITIVE_Z;
 589    }
 590    else if (abs(dir.y) >= abs(dir.x))
 591    {
 592        faceID = (dir.y < 0.0) ? CUBEMAPFACE_NEGATIVE_Y : CUBEMAPFACE_POSITIVE_Y;
 593    }
 594    else
 595    {
 596        faceID = (dir.x < 0.0) ? CUBEMAPFACE_NEGATIVE_X : CUBEMAPFACE_POSITIVE_X;
 597    }
 598
 599    return faceID;
 600}
 601#endif // INTRINSIC_CUBEMAP_FACE_ID
 602
 603// Intrinsic isnan can't be used because it require /Gic to be enabled on fxc that we can't do. So use AnyIsNan instead
 604bool IsNaN(float x)
 605{
 606    return (asuint(x) & 0x7FFFFFFF) > 0x7F800000;
 607}
 608
 609bool AnyIsNaN(float2 v)
 610{
 611    return (IsNaN(v.x) || IsNaN(v.y));
 612}
 613
 614bool AnyIsNaN(float3 v)
 615{
 616    return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z));
 617}
 618
 619bool AnyIsNaN(float4 v)
 620{
 621    return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z) || IsNaN(v.w));
 622}
 623
 624bool IsInf(float x)
 625{
 626    return (asuint(x) & 0x7FFFFFFF) == 0x7F800000;
 627}
 628
 629bool AnyIsInf(float2 v)
 630{
 631    return (IsInf(v.x) || IsInf(v.y));
 632}
 633
 634bool AnyIsInf(float3 v)
 635{
 636    return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z));
 637}
 638
 639bool AnyIsInf(float4 v)
 640{
 641    return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z) || IsInf(v.w));
 642}
 643
 644bool IsFinite(float x)
 645{
 646    return (asuint(x) & 0x7F800000) != 0x7F800000;
 647}
 648
 649float SanitizeFinite(float x)
 650{
 651    return IsFinite(x) ? x : 0;
 652}
 653
 654bool IsPositiveFinite(float x)
 655{
 656    return asuint(x) < 0x7F800000;
 657}
 658
 659float SanitizePositiveFinite(float x)
 660{
 661    return IsPositiveFinite(x) ? x : 0;
 662}
 663
 664// ----------------------------------------------------------------------------
 665// Common math functions
 666// ----------------------------------------------------------------------------
 667
 668real DegToRad(real deg)
 669{
 670    return deg * (PI / 180.0);
 671}
 672
 673real RadToDeg(real rad)
 674{
 675    return rad * (180.0 / PI);
 676}
 677
 678// Square functions for cleaner code
 679TEMPLATE_1_FLT_HALF(Sq, x, return (x) * (x))
 680TEMPLATE_1_INT(Sq, x, return (x) * (x))
 681
 682bool IsPower2(uint x)
 683{
 684    return (x & (x - 1)) == 0;
 685}
 686
 687// Input [0, 1] and output [0, PI/2]
 688// 9 VALU
 689real FastACosPos(real inX)
 690{
 691    real x = abs(inX);
 692    real res = (0.0468878 * x + -0.203471) * x + 1.570796; // p(x)
 693    res *= sqrt(1.0 - x);
 694
 695    return res;
 696}
 697
 698// Ref: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
 699// Input [-1, 1] and output [0, PI]
 700// 12 VALU
 701real FastACos(real inX)
 702{
 703    real res = FastACosPos(inX);
 704
 705    return (inX >= 0) ? res : PI - res; // Undo range reduction
 706}
 707
 708// Same cost as Acos + 1 FR
 709// Same error
 710// input [-1, 1] and output [-PI/2, PI/2]
 711real FastASin(real x)
 712{
 713    return HALF_PI - FastACos(x);
 714}
 715
 716// max absolute error 1.3x10^-3
 717// Eberly's odd polynomial degree 5 - respect bounds
 718// 4 VGPR, 14 FR (10 FR, 1 QR), 2 scalar
 719// input [0, infinity] and output [0, PI/2]
 720real FastATanPos(real x)
 721{
 722    real t0 = (x < 1.0) ? x : 1.0 / x;
 723    real t1 = t0 * t0;
 724    real poly = 0.0872929;
 725    poly = -0.301895 + poly * t1;
 726    poly = 1.0 + poly * t1;
 727    poly = poly * t0;
 728    return (x < 1.0) ? poly : HALF_PI - poly;
 729}
 730
 731// 4 VGPR, 16 FR (12 FR, 1 QR), 2 scalar
 732// input [-infinity, infinity] and output [-PI/2, PI/2]
 733real FastATan(real x)
 734{
 735    real t0 = FastATanPos(abs(x));
 736    return (x < 0.0) ? -t0 : t0;
 737}
 738
 739real FastAtan2(real y, real x)
 740{
 741    return FastATan(y / x) + real(y >= 0.0 ? PI : -PI) * (x < 0.0);
 742}
 743
 744#if (SHADER_TARGET >= 45)
 745uint FastLog2(uint x)
 746{
 747    return firstbithigh(x);
 748}
 749#endif
 750
 751// Using pow often result to a warning like this
 752// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
 753// PositivePow remove this warning when you know the value is positive or 0 and avoid inf/NAN.
 754// Note: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx pow(0, >0) == 0
 755TEMPLATE_2_FLT_HALF(PositivePow, base, power, return pow(abs(base), power))
 756
 757// SafePositivePow: Same as pow(x,y) but considers x always positive and never exactly 0 such that
 758// SafePositivePow(0,y) will numerically converge to 1 as y -> 0, including SafePositivePow(0,0) returning 1.
 759//
 760// First, like PositivePow, SafePositivePow removes this warning for when you know the x value is positive or 0 and you know
 761// you avoid a NaN:
 762// ie you know that x == 0 and y > 0, such that pow(x,y) == pow(0, >0) == 0
 763// SafePositivePow(0, y) will however return close to 1 as y -> 0, see below.
 764//
 765// Also, pow(x,y) is most probably approximated as exp2(log2(x) * y), so pow(0,0) will give exp2(-inf * 0) == exp2(NaN) == NaN.
 766//
 767// SafePositivePow avoids NaN in allowing SafePositivePow(x,y) where (x,y) == (0,y) for any y including 0 by clamping x to a
 768// minimum of FLT_EPS. The consequences are:
 769//
 770// -As a replacement for pow(0,y) where y >= 1, the result of SafePositivePow(x,y) should be close enough to 0.
 771// -For cases where we substitute for pow(0,y) where 0 < y < 1, SafePositivePow(x,y) will quickly reach 1 as y -> 0, while
 772// normally pow(0,y) would give 0 instead of 1 for all 0 < y.
 773// eg: if we #define FLT_EPS  5.960464478e-8 (for fp32),
 774// SafePositivePow(0, 0.1)   = 0.1894646
 775// SafePositivePow(0, 0.01)  = 0.8467453
 776// SafePositivePow(0, 0.001) = 0.9835021
 777//
 778// Depending on the intended usage of pow(), this difference in behavior might be a moot point since:
 779// 1) by leaving "y" free to get to 0, we get a NaNs
 780// 2) the behavior of SafePositivePow() has more continuity when both x and y get closer together to 0, since
 781// when x is assured to be positive non-zero, pow(x,x) -> 1 as x -> 0.
 782//
 783// TL;DR: SafePositivePow(x,y) avoids NaN and is safe for positive (x,y) including (x,y) == (0,0),
 784//        but SafePositivePow(0, y) will return close to 1 as y -> 0, instead of 0, so watch out
 785//        for behavior depending on pow(0, y) giving always 0, especially for 0 < y < 1.
 786//
 787// Ref: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx
 788TEMPLATE_2_FLT(SafePositivePow, base, power, return pow(max(abs(base), float(FLT_EPS)), power))
 789TEMPLATE_2_HALF(SafePositivePow, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power))
 790
 791// Helpers for making shadergraph functions consider precision spec through the same $precision token used for variable types
 792TEMPLATE_2_FLT(SafePositivePow_float, base, power, return pow(max(abs(base), float(FLT_EPS)), power))
 793TEMPLATE_2_HALF(SafePositivePow_half, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power))
 794
 795float Eps_float() { return FLT_EPS; }
 796float Min_float() { return FLT_MIN; }
 797float Max_float() { return FLT_MAX; }
 798half Eps_half() { return HALF_EPS; }
 799half Min_half() { return HALF_MIN; }
 800half Max_half() { return HALF_MAX; }
 801
 802// Compute the 'epsilon equal' relative to the scale of 'a' & 'b'.
 803// Farther to 0.0f 'a' or 'b' are, larger epsilon have to be.
 804bool NearlyEqual(float a, float b, float epsilon)
 805{
 806    return abs(a - b) / (abs(a) + abs(b)) < epsilon;
 807}
 808
 809TEMPLATE_2_FLT(NearlyEqual_Float, a, b, return abs(a - b) / (abs(a) + abs(b)) < float(FLT_EPS))
 810TEMPLATE_2_HALF(NearlyEqual_Half, a, b, return abs(a - b) / (abs(a) + abs(b)) < min16float(HALF_EPS))
 811
 812// Composes a floating point value with the magnitude of 'x' and the sign of 's'.
 813// See the comment about FastSign() below.
 814float CopySign(float x, float s, bool ignoreNegZero = true)
 815{
 816    if (ignoreNegZero)
 817    {
 818        return (s >= 0) ? abs(x) : -abs(x);
 819    }
 820    else
 821    {
 822        uint negZero = 0x80000000u;
 823        uint signBit = negZero & asuint(s);
 824        return asfloat(BitFieldInsert(negZero, signBit, asuint(x)));
 825    }
 826}
 827
 828// Returns -1 for negative numbers and 1 for positive numbers.
 829// 0 can be handled in 2 different ways.
 830// The IEEE floating point standard defines 0 as signed: +0 and -0.
 831// However, mathematics typically treats 0 as unsigned.
 832// Therefore, we treat -0 as +0 by default: FastSign(+0) = FastSign(-0) = 1.
 833// If (ignoreNegZero = false), FastSign(-0, false) = -1.
 834// Note that the sign() function in HLSL implements signum, which returns 0 for 0.
 835float FastSign(float s, bool ignoreNegZero = true)
 836{
 837    return CopySign(1.0, s, ignoreNegZero);
 838}
 839
 840// Orthonormalizes the tangent frame using the Gram-Schmidt process.
 841// We assume that the normal is normalized and that the two vectors
 842// aren't collinear.
 843// Returns the new tangent (the normal is unaffected).
 844real3 Orthonormalize(real3 tangent, real3 normal)
 845{
 846    // TODO: use SafeNormalize()?
 847    return normalize(tangent - dot(tangent, normal) * normal);
 848}
 849
 850// [start, end] -> [0, 1] : (x - start) / (end - start) = x * rcpLength - (start * rcpLength)
 851TEMPLATE_3_FLT_HALF(Remap01, x, rcpLength, startTimesRcpLength, return saturate(x * rcpLength - startTimesRcpLength))
 852
 853// [start, end] -> [1, 0] : (end - x) / (end - start) = (end * rcpLength) - x * rcpLength
 854TEMPLATE_3_FLT_HALF(Remap10, x, rcpLength, endTimesRcpLength, return saturate(endTimesRcpLength - x * rcpLength))
 855
 856// Remap: [0.5 / size, 1 - 0.5 / size] -> [0, 1]
 857real2 RemapHalfTexelCoordTo01(real2 coord, real2 size)
 858{
 859    const real2 rcpLen              = size * rcp(size - 1);
 860    const real2 startTimesRcpLength = 0.5 * rcp(size - 1);
 861
 862    return Remap01(coord, rcpLen, startTimesRcpLength);
 863}
 864
 865// Remap: [0, 1] -> [0.5 / size, 1 - 0.5 / size]
 866real2 Remap01ToHalfTexelCoord(real2 coord, real2 size)
 867{
 868    const real2 start = 0.5 * rcp(size);
 869    const real2 len   = 1 - rcp(size);
 870
 871    return coord * len + start;
 872}
 873
 874// smoothstep that assumes that 'x' lies within the [0, 1] interval.
 875real Smoothstep01(real x)
 876{
 877    return x * x * (3 - (2 * x));
 878}
 879
 880real Smootherstep01(real x)
 881{
 882  return x * x * x * (x * (x * 6 - 15) + 10);
 883}
 884
 885real Smootherstep(real a, real b, real t)
 886{
 887    real r = rcp(b - a);
 888    real x = Remap01(t, r, a * r);
 889    return Smootherstep01(x);
 890}
 891
 892float3 NLerp(float3 A, float3 B, float t)
 893{
 894    return normalize(lerp(A, B, t));
 895}
 896
 897float Length2(float3 v)
 898{
 899    return dot(v, v);
 900}
 901
 902#ifndef BUILTIN_TARGET_API
 903real Pow4(real x)
 904{
 905    return (x * x) * (x * x);
 906}
 907#endif
 908
 909TEMPLATE_3_FLT(RangeRemap, min, max, t, return saturate((t - min) / (max - min)))
 910TEMPLATE_3_FLT(RangeRemapFrom01, min, max, t,  return (max - min) * t + min)
 911
 912float4x4 Inverse(float4x4 m)
 913{
 914    float n11 = m[0][0], n12 = m[1][0], n13 = m[2][0], n14 = m[3][0];
 915    float n21 = m[0][1], n22 = m[1][1], n23 = m[2][1], n24 = m[3][1];
 916    float n31 = m[0][2], n32 = m[1][2], n33 = m[2][2], n34 = m[3][2];
 917    float n41 = m[0][3], n42 = m[1][3], n43 = m[2][3], n44 = m[3][3];
 918
 919    float t11 = n23 * n34 * n42 - n24 * n33 * n42 + n24 * n32 * n43 - n22 * n34 * n43 - n23 * n32 * n44 + n22 * n33 * n44;
 920    float t12 = n14 * n33 * n42 - n13 * n34 * n42 - n14 * n32 * n43 + n12 * n34 * n43 + n13 * n32 * n44 - n12 * n33 * n44;
 921    float t13 = n13 * n24 * n42 - n14 * n23 * n42 + n14 * n22 * n43 - n12 * n24 * n43 - n13 * n22 * n44 + n12 * n23 * n44;
 922    float t14 = n14 * n23 * n32 - n13 * n24 * n32 - n14 * n22 * n33 + n12 * n24 * n33 + n13 * n22 * n34 - n12 * n23 * n34;
 923
 924    float det = n11 * t11 + n21 * t12 + n31 * t13 + n41 * t14;
 925    float idet = 1.0f / det;
 926
 927    float4x4 ret;
 928
 929    ret[0][0] = t11 * idet;
 930    ret[0][1] = (n24 * n33 * n41 - n23 * n34 * n41 - n24 * n31 * n43 + n21 * n34 * n43 + n23 * n31 * n44 - n21 * n33 * n44) * idet;
 931    ret[0][2] = (n22 * n34 * n41 - n24 * n32 * n41 + n24 * n31 * n42 - n21 * n34 * n42 - n22 * n31 * n44 + n21 * n32 * n44) * idet;
 932    ret[0][3] = (n23 * n32 * n41 - n22 * n33 * n41 - n23 * n31 * n42 + n21 * n33 * n42 + n22 * n31 * n43 - n21 * n32 * n43) * idet;
 933
 934    ret[1][0] = t12 * idet;
 935    ret[1][1] = (n13 * n34 * n41 - n14 * n33 * n41 + n14 * n31 * n43 - n11 * n34 * n43 - n13 * n31 * n44 + n11 * n33 * n44) * idet;
 936    ret[1][2] = (n14 * n32 * n41 - n12 * n34 * n41 - n14 * n31 * n42 + n11 * n34 * n42 + n12 * n31 * n44 - n11 * n32 * n44) * idet;
 937    ret[1][3] = (n12 * n33 * n41 - n13 * n32 * n41 + n13 * n31 * n42 - n11 * n33 * n42 - n12 * n31 * n43 + n11 * n32 * n43) * idet;
 938
 939    ret[2][0] = t13 * idet;
 940    ret[2][1] = (n14 * n23 * n41 - n13 * n24 * n41 - n14 * n21 * n43 + n11 * n24 * n43 + n13 * n21 * n44 - n11 * n23 * n44) * idet;
 941    ret[2][2] = (n12 * n24 * n41 - n14 * n22 * n41 + n14 * n21 * n42 - n11 * n24 * n42 - n12 * n21 * n44 + n11 * n22 * n44) * idet;
 942    ret[2][3] = (n13 * n22 * n41 - n12 * n23 * n41 - n13 * n21 * n42 + n11 * n23 * n42 + n12 * n21 * n43 - n11 * n22 * n43) * idet;
 943
 944    ret[3][0] = t14 * idet;
 945    ret[3][1] = (n13 * n24 * n31 - n14 * n23 * n31 + n14 * n21 * n33 - n11 * n24 * n33 - n13 * n21 * n34 + n11 * n23 * n34) * idet;
 946    ret[3][2] = (n14 * n22 * n31 - n12 * n24 * n31 - n14 * n21 * n32 + n11 * n24 * n32 + n12 * n21 * n34 - n11 * n22 * n34) * idet;
 947    ret[3][3] = (n12 * n23 * n31 - n13 * n22 * n31 + n13 * n21 * n32 - n11 * n23 * n32 - n12 * n21 * n33 + n11 * n22 * n33) * idet;
 948
 949    return ret;
 950}
 951
 952float Remap(float origFrom, float origTo, float targetFrom, float targetTo, float value)
 953{
 954    return lerp(targetFrom, targetTo, (value - origFrom) / (origTo - origFrom));
 955}
 956
 957// ----------------------------------------------------------------------------
 958// Texture utilities
 959// ----------------------------------------------------------------------------
 960
 961float ComputeTextureLOD(float2 uvdx, float2 uvdy, float2 scale, float bias = 0.0)
 962{
 963    float2 ddx_ = scale * uvdx;
 964    float2 ddy_ = scale * uvdy;
 965    float  d    = max(dot(ddx_, ddx_), dot(ddy_, ddy_));
 966
 967    return max(0.5 * log2(d) - bias, 0.0);
 968}
 969
 970float ComputeTextureLOD(float2 uv, float bias = 0.0)
 971{
 972    float2 ddx_ = ddx(uv);
 973    float2 ddy_ = ddy(uv);
 974
 975    return ComputeTextureLOD(ddx_, ddy_, 1.0, bias);
 976}
 977
 978// x contains width, w contains height
 979float ComputeTextureLOD(float2 uv, float2 texelSize, float bias = 0.0)
 980{
 981    uv *= texelSize;
 982
 983    return ComputeTextureLOD(uv, bias);
 984}
 985
 986// LOD clamp is optional and happens outside the function.
 987float ComputeTextureLOD(float3 duvw_dx, float3 duvw_dy, float3 duvw_dz, float scale, float bias = 0.0)
 988{
 989    float d = Max3(dot(duvw_dx, duvw_dx), dot(duvw_dy, duvw_dy), dot(duvw_dz, duvw_dz));
 990
 991    return max(0.5f * log2(d * (scale * scale)) - bias, 0.0);
 992}
 993
 994#if defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12) || defined(SHADER_API_D3D11_9X) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_PSSL) || defined(SHADER_API_METAL)
 995    #define MIP_COUNT_SUPPORTED 1
 996#endif
 997    // TODO: Bug workaround, switch defines GLCORE when it shouldn't
 998#if ((defined(SHADER_API_GLCORE) && !defined(SHADER_API_SWITCH)) || defined(SHADER_API_VULKAN)) && !defined(SHADER_STAGE_COMPUTE)
 999    // OpenGL only supports textureSize for width, height, depth
1000    // textureQueryLevels (GL_ARB_texture_query_levels) needs OpenGL 4.3 or above and doesn't compile in compute shaders
1001    // tex.GetDimensions converted to textureQueryLevels
1002    #define MIP_COUNT_SUPPORTED 1
1003#endif
1004    // Metal doesn't support high enough OpenGL version
1005
1006uint GetMipCount(TEXTURE2D_PARAM(tex, smp))
1007{
1008#if defined(MIP_COUNT_SUPPORTED)
1009    uint mipLevel, width, height, mipCount;
1010    mipLevel = width = height = mipCount = 0;
1011    tex.GetDimensions(mipLevel, width, height, mipCount);
1012    return mipCount;
1013#else
1014    return 0;
1015#endif
1016}
1017
1018// ----------------------------------------------------------------------------
1019// Texture format sampling
1020// ----------------------------------------------------------------------------
1021
1022// DXC no longer supports DX9-style HLSL syntax for sampler2D, tex2D and the like.
1023// These are emulated for backwards compatibility using our own small structs and functions which manually combine samplers and textures.
1024#if defined(UNITY_COMPILER_DXC) && !defined(DXC_SAMPLER_COMPATIBILITY)
1025#define DXC_SAMPLER_COMPATIBILITY 1
1026
1027// On DXC platforms which don't care about explicit sampler precison we want the emulated types to work directly e.g without needing to redefine 'sampler2D' to 'sampler2D_f'
1028#if !defined(SHADER_API_GLES3) && !defined(SHADER_API_VULKAN) && !defined(SHADER_API_METAL) && !defined(SHADER_API_SWITCH) && !defined(SHADER_API_WEBGPU)
1029    #define sampler1D_f sampler1D
1030    #define sampler2D_f sampler2D
1031    #define sampler3D_f sampler3D
1032    #define samplerCUBE_f samplerCUBE
1033#endif
1034
1035struct sampler1D_f      { Texture1D<float4> t; SamplerState s; };
1036struct sampler2D_f      { Texture2D<float4> t; SamplerState s; };
1037struct sampler3D_f      { Texture3D<float4> t; SamplerState s; };
1038struct samplerCUBE_f    { TextureCube<float4> t; SamplerState s; };
1039
1040float4 tex1D(sampler1D_f x, float v)        { return x.t.Sample(x.s, v); }
1041float4 tex2D(sampler2D_f x, float2 v)       { return x.t.Sample(x.s, v); }
1042float4 tex3D(sampler3D_f x, float3 v)       { return x.t.Sample(x.s, v); }
1043float4 texCUBE(samplerCUBE_f x, float3 v)   { return x.t.Sample(x.s, v); }
1044
1045float4 tex1Dbias(sampler1D_f x, in float4 t)        { return x.t.SampleBias(x.s, t.x, t.w); }
1046float4 tex2Dbias(sampler2D_f x, in float4 t)        { return x.t.SampleBias(x.s, t.xy, t.w); }
1047float4 tex3Dbias(sampler3D_f x, in float4 t)        { return x.t.SampleBias(x.s, t.xyz, t.w); }
1048float4 texCUBEbias(samplerCUBE_f x, in float4 t)    { return x.t.SampleBias(x.s, t.xyz, t.w); }
1049
1050float4 tex1Dlod(sampler1D_f x, in float4 t)     { return x.t.SampleLevel(x.s, t.x, t.w); }
1051float4 tex2Dlod(sampler2D_f x, in float4 t)     { return x.t.SampleLevel(x.s, t.xy, t.w); }
1052float4 tex3Dlod(sampler3D_f x, in float4 t)     { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1053float4 texCUBElod(samplerCUBE_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1054
1055float4 tex1Dgrad(sampler1D_f x, float t, float dx, float dy)        { return x.t.SampleGrad(x.s, t, dx, dy); }
1056float4 tex2Dgrad(sampler2D_f x, float2 t, float2 dx, float2 dy)     { return x.t.SampleGrad(x.s, t, dx, dy); }
1057float4 tex3Dgrad(sampler3D_f x, float3 t, float3 dx, float3 dy)     { return x.t.SampleGrad(x.s, t, dx, dy); }
1058float4 texCUBEgrad(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1059
1060float4 tex1D(sampler1D_f x, float t, float dx, float dy)        { return x.t.SampleGrad(x.s, t, dx, dy); }
1061float4 tex2D(sampler2D_f x, float2 t, float2 dx, float2 dy)     { return x.t.SampleGrad(x.s, t, dx, dy); }
1062float4 tex3D(sampler3D_f x, float3 t, float3 dx, float3 dy)     { return x.t.SampleGrad(x.s, t, dx, dy); }
1063float4 texCUBE(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1064
1065float4 tex1Dproj(sampler1D_f s, in float2 t)        { return tex1D(s, t.x / t.y); }
1066float4 tex1Dproj(sampler1D_f s, in float4 t)        { return tex1D(s, t.x / t.w); }
1067float4 tex2Dproj(sampler2D_f s, in float3 t)        { return tex2D(s, t.xy / t.z); }
1068float4 tex2Dproj(sampler2D_f s, in float4 t)        { return tex2D(s, t.xy / t.w); }
1069float4 tex3Dproj(sampler3D_f s, in float4 t)        { return tex3D(s, t.xyz / t.w); }
1070float4 texCUBEproj(samplerCUBE_f s, in float4 t)    { return texCUBE(s, t.xyz / t.w); }
1071
1072// Half precision emulated samplers used instead the sampler.*_half unity types
1073struct sampler1D_h      { Texture1D<min16float4> t; SamplerState s; };
1074struct sampler2D_h      { Texture2D<min16float4> t; SamplerState s; };
1075struct sampler3D_h      { Texture3D<min16float4> t; SamplerState s; };
1076struct samplerCUBE_h    { TextureCube<min16float4> t; SamplerState s; };
1077
1078min16float4 tex1D(sampler1D_h x, float v)       { return x.t.Sample(x.s, v); }
1079min16float4 tex2D(sampler2D_h x, float2 v)      { return x.t.Sample(x.s, v); }
1080min16float4 tex3D(sampler3D_h x, float3 v)      { return x.t.Sample(x.s, v); }
1081min16float4 texCUBE(samplerCUBE_h x, float3 v)  { return x.t.Sample(x.s, v); }
1082
1083min16float4 tex1Dbias(sampler1D_h x, in float4 t)       { return x.t.SampleBias(x.s, t.x, t.w); }
1084min16float4 tex2Dbias(sampler2D_h x, in float4 t)       { return x.t.SampleBias(x.s, t.xy, t.w); }
1085min16float4 tex3Dbias(sampler3D_h x, in float4 t)       { return x.t.SampleBias(x.s, t.xyz, t.w); }
1086min16float4 texCUBEbias(samplerCUBE_h x, in float4 t)   { return x.t.SampleBias(x.s, t.xyz, t.w); }
1087
1088min16float4 tex1Dlod(sampler1D_h x, in float4 t)        { return x.t.SampleLevel(x.s, t.x, t.w); }
1089min16float4 tex2Dlod(sampler2D_h x, in float4 t)        { return x.t.SampleLevel(x.s, t.xy, t.w); }
1090min16float4 tex3Dlod(sampler3D_h x, in float4 t)        { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1091min16float4 texCUBElod(samplerCUBE_h x, in float4 t)    { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1092
1093min16float4 tex1Dgrad(sampler1D_h x, float t, float dx, float dy)           { return x.t.SampleGrad(x.s, t, dx, dy); }
1094min16float4 tex2Dgrad(sampler2D_h x, float2 t, float2 dx, float2 dy)        { return x.t.SampleGrad(x.s, t, dx, dy); }
1095min16float4 tex3Dgrad(sampler3D_h x, float3 t, float3 dx, float3 dy)        { return x.t.SampleGrad(x.s, t, dx, dy); }
1096min16float4 texCUBEgrad(samplerCUBE_h x, float3 t, float3 dx, float3 dy)    { return x.t.SampleGrad(x.s, t, dx, dy); }
1097
1098min16float4 tex1D(sampler1D_h x, float t, float dx, float dy)           { return x.t.SampleGrad(x.s, t, dx, dy); }
1099min16float4 tex2D(sampler2D_h x, float2 t, float2 dx, float2 dy)        { return x.t.SampleGrad(x.s, t, dx, dy); }
1100min16float4 tex3D(sampler3D_h x, float3 t, float3 dx, float3 dy)        { return x.t.SampleGrad(x.s, t, dx, dy); }
1101min16float4 texCUBE(samplerCUBE_h x, float3 t, float3 dx, float3 dy)    { return x.t.SampleGrad(x.s, t, dx, dy); }
1102
1103min16float4 tex1Dproj(sampler1D_h s, in float2 t)       { return tex1D(s, t.x / t.y); }
1104min16float4 tex1Dproj(sampler1D_h s, in float4 t)       { return tex1D(s, t.x / t.w); }
1105min16float4 tex2Dproj(sampler2D_h s, in float3 t)       { return tex2D(s, t.xy / t.z); }
1106min16float4 tex2Dproj(sampler2D_h s, in float4 t)       { return tex2D(s, t.xy / t.w); }
1107min16float4 tex3Dproj(sampler3D_h s, in float4 t)       { return tex3D(s, t.xyz / t.w); }
1108min16float4 texCUBEproj(samplerCUBE_h s, in float4 t)   { return texCUBE(s, t.xyz / t.w); }
1109#endif
1110
1111float2 DirectionToLatLongCoordinate(float3 unDir)
1112{
1113    float3 dir = normalize(unDir);
1114    // coordinate frame is (-Z, X) meaning negative Z is primary axis and X is secondary axis.
1115    return float2(1.0 - 0.5 * INV_PI * atan2(dir.x, -dir.z), asin(dir.y) * INV_PI + 0.5);
1116}
1117
1118float3 LatlongToDirectionCoordinate(float2 coord)
1119{
1120    float theta = coord.y * PI;
1121    float phi = (coord.x * 2.f * PI - PI*0.5f);
1122
1123    float cosTheta = cos(theta);
1124    float sinTheta = sqrt(1.0 - min(1.0, cosTheta*cosTheta));
1125    float cosPhi = cos(phi);
1126    float sinPhi = sin(phi);
1127
1128    float3 direction = float3(sinTheta*cosPhi, cosTheta, sinTheta*sinPhi);
1129    direction.xy *= -1.0;
1130    return direction;
1131}
1132
1133float2 OrientationToDirection(float orientation)
1134{
1135    return float2(cos(orientation), sin(orientation));
1136}
1137
1138// ----------------------------------------------------------------------------
1139// Depth encoding/decoding
1140// ----------------------------------------------------------------------------
1141
1142// Z buffer to linear 0..1 depth (0 at near plane, 1 at far plane).
1143// Does NOT correctly handle oblique view frustums.
1144// Does NOT work with orthographic projection.
1145// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1,   1, (1/n - 1/f), 1/f }
1146// zBufferParam                    = { 1 - f/n, f/n, (1/f - 1/n), 1/n }
1147float Linear01DepthFromNear(float depth, float4 zBufferParam)
1148{
1149    #if UNITY_REVERSED_Z
1150    return (1.0 - depth) / (zBufferParam.x * depth + zBufferParam.y);
1151    #else
1152    return depth / (zBufferParam.x * depth + zBufferParam.y);
1153    #endif
1154}
1155
1156// Z buffer to linear 0..1 depth (0 at camera position, 1 at far plane).
1157// Does NOT work with orthographic projections.
1158// Does NOT correctly handle oblique view frustums.
1159// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1,   1, (1/n - 1/f), 1/f }
1160// zBufferParam                    = { 1 - f/n, f/n, (1/f - 1/n), 1/n }
1161float Linear01Depth(float depth, float4 zBufferParam)
1162{
1163    return 1.0 / (zBufferParam.x * depth + zBufferParam.y);
1164}
1165
1166// Z buffer to linear view space (eye) depth.
1167// Does NOT correctly handle oblique view frustums.
1168// Does NOT work with orthographic projection.
1169// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1,   1, (1/n - 1/f), 1/f }
1170// zBufferParam                    = { 1 - f/n, f/n, (1/f - 1/n), 1/n }
1171float LinearEyeDepth(float depth, float4 zBufferParam)
1172{
1173    return 1.0 / (zBufferParam.z * depth + zBufferParam.w);
1174}
1175
1176// Z buffer to linear depth.
1177// Correctly handles oblique view frustums.
1178// Does NOT work with orthographic projection.
1179// Ref: An Efficient Depth Linearization Method for Oblique View Frustums, Eq. 6.
1180float LinearEyeDepth(float2 positionNDC, float deviceDepth, float4 invProjParam)
1181{
1182    float viewSpaceZ = rcp(dot(float4(positionNDC, deviceDepth, 1.0), invProjParam));
1183
1184    // If the matrix is right-handed, we have to flip the Z axis to get a positive value.
1185    return abs(viewSpaceZ);
1186}
1187
1188// Z buffer to linear depth.
1189// Works in all cases.
1190// Typically, this is the cheapest variant, provided you've already computed 'positionWS'.
1191// Assumes that the 'positionWS' is in front of the camera.
1192float LinearEyeDepth(float3 positionWS, float4x4 viewMatrix)
1193{
1194    float viewSpaceZ = mul(viewMatrix, float4(positionWS, 1.0)).z;
1195
1196    // If the matrix is right-handed, we have to flip the Z axis to get a positive value.
1197    return abs(viewSpaceZ);
1198}
1199
1200// 'z' is the view space Z position (linear depth).
1201// saturate(z) the output of the function to clamp them to the [0, 1] range.
1202// d = log2(c * (z - n) + 1) / log2(c * (f - n) + 1)
1203//   = log2(c * (z - n + 1/c)) / log2(c * (f - n) + 1)
1204//   = log2(c) / log2(c * (f - n) + 1) + log2(z - (n - 1/c)) / log2(c * (f - n) + 1)
1205//   = E + F * log2(z - G)
1206// encodingParams = { E, F, G, 0 }
1207float EncodeLogarithmicDepthGeneralized(float z, float4 encodingParams)
1208{
1209    // Use max() to avoid NaNs.
1210    return encodingParams.x + encodingParams.y * log2(max(0, z - encodingParams.z));
1211}
1212
1213// 'd' is the logarithmically encoded depth value.
1214// saturate(d) to clamp the output of the function to the [n, f] range.
1215// z = 1/c * (pow(c * (f - n) + 1, d) - 1) + n
1216//   = 1/c * pow(c * (f - n) + 1, d) + n - 1/c
1217//   = 1/c * exp2(d * log2(c * (f - n) + 1)) + (n - 1/c)
1218//   = L * exp2(d * M) + N
1219// decodingParams = { L, M, N, 0 }
1220// Graph: https://www.desmos.com/calculator/qrtatrlrba
1221float DecodeLogarithmicDepthGeneralized(float d, float4 decodingParams)
1222{
1223    return decodingParams.x * exp2(d * decodingParams.y) + decodingParams.z;
1224}
1225
1226// 'z' is the view-space Z position (linear depth).
1227// saturate(z) the output of the function to clamp them to the [0, 1] range.
1228// encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) }
1229// This is an optimized version of EncodeLogarithmicDepthGeneralized() for (c = 2).
1230float EncodeLogarithmicDepth(float z, float4 encodingParams)
1231{
1232    // Use max() to avoid NaNs.
1233    // TODO: optimize to (log2(z) - log2(n)) / (log2(f) - log2(n)).
1234    return log2(max(0, z * encodingParams.z)) * encodingParams.w;
1235}
1236
1237// 'd' is the logarithmically encoded depth value.
1238// saturate(d) to clamp the output of the function to the [n, f] range.
1239// encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) }
1240// This is an optimized version of DecodeLogarithmicDepthGeneralized() for (c = 2).
1241// Graph: https://www.desmos.com/calculator/qrtatrlrba
1242float DecodeLogarithmicDepth(float d, float4 encodingParams)
1243{
1244    // TODO: optimize to exp2(d * y + log2(x)).
1245    return encodingParams.x * exp2(d * encodingParams.y);
1246}
1247
1248// Use an infinite far plane
1249// https://chaosinmotion.com/2010/09/06/goodbye-far-clipping-plane/
1250// 'depth' is the linear depth (view-space Z position)
1251float EncodeInfiniteDepth(float depth, float near)
1252{
1253    return saturate(near / depth);
1254}
1255
1256// 'z' is the depth encoded in the depth buffer (1 at near plane, 0 at far plane)
1257float DecodeInfiniteDepth(float z, float near)
1258{
1259    return near / max(z, FLT_EPS);
1260}
1261
1262real4 CompositeOver(real4 front, real4 back)
1263{
1264    return front + (1 - front.a) * back;
1265}
1266
1267void CompositeOver(real3 colorFront, real3 alphaFront,
1268                   real3 colorBack,  real3 alphaBack,
1269                   out real3 color,  out real3 alpha)
1270{
1271    color = colorFront + (1 - alphaFront) * colorBack;
1272    alpha = alphaFront + (1 - alphaFront) * alphaBack;
1273}
1274
1275// ----------------------------------------------------------------------------
1276// Space transformations
1277// ----------------------------------------------------------------------------
1278
1279static const float3x3 k_identity3x3 = {1, 0, 0,
1280                                       0, 1, 0,
1281                                       0, 0, 1};
1282
1283static const float4x4 k_identity4x4 = {1, 0, 0, 0,
1284                                       0, 1, 0, 0,
1285                                       0, 0, 1, 0,
1286                                       0, 0, 0, 1};
1287
1288float4 ComputeClipSpacePosition(float2 positionNDC, float deviceDepth)
1289{
1290    float4 positionCS = float4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
1291
1292#if UNITY_UV_STARTS_AT_TOP
1293    // Our world space, view space, screen space and NDC space are Y-up.
1294    // Our clip space is flipped upside-down due to poor legacy Unity design.
1295    // The flip is baked into the projection matrix, so we only have to flip
1296    // manually when going from CS to NDC and back.
1297    positionCS.y = -positionCS.y;
1298#endif
1299
1300    return positionCS;
1301}
1302
1303// Use case examples:
1304// (position = positionCS) => (clipSpaceTransform = use default)
1305// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
1306// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
1307float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
1308{
1309    return mul(clipSpaceTransform, float4(position, 1.0));
1310}
1311
1312// The returned Z value is the depth buffer value (and NOT linear view space Z value).
1313// Use case examples:
1314// (position = positionCS) => (clipSpaceTransform = use default)
1315// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
1316// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
1317float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
1318{
1319    float4 positionCS = ComputeClipSpacePosition(position, clipSpaceTransform);
1320
1321#if UNITY_UV_STARTS_AT_TOP
1322    // Our world space, view space, screen space and NDC space are Y-up.
1323    // Our clip space is flipped upside-down due to poor legacy Unity design.
1324    // The flip is baked into the projection matrix, so we only have to flip
1325    // manually when going from CS to NDC and back.
1326    positionCS.y = -positionCS.y;
1327#endif
1328
1329    positionCS *= rcp(positionCS.w);
1330    positionCS.xy = positionCS.xy * 0.5 + 0.5;
1331
1332    return positionCS.xyz;
1333}
1334
1335// Use case examples:
1336// (position = positionCS) => (clipSpaceTransform = use default)
1337// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
1338// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
1339float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
1340{
1341    return ComputeNormalizedDeviceCoordinatesWithZ(position, clipSpaceTransform).xy;
1342}
1343
1344float3 ComputeViewSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invProjMatrix)
1345{
1346    float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
1347    float4 positionVS = mul(invProjMatrix, positionCS);
1348    // The view space uses a right-handed coordinate system.
1349    positionVS.z = -positionVS.z;
1350    return positionVS.xyz / positionVS.w;
1351}
1352
1353float3 ComputeWorldSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invViewProjMatrix)
1354{
1355    float4 positionCS  = ComputeClipSpacePosition(positionNDC, deviceDepth);
1356    float4 hpositionWS = mul(invViewProjMatrix, positionCS);
1357    return hpositionWS.xyz / hpositionWS.w;
1358}
1359
1360float3 ComputeWorldSpacePosition(float4 positionCS, float4x4 invViewProjMatrix)
1361{
1362    float4 hpositionWS = mul(invViewProjMatrix, positionCS);
1363    return hpositionWS.xyz / hpositionWS.w;
1364}
1365
1366// ----------------------------------------------------------------------------
1367// PositionInputs
1368// ----------------------------------------------------------------------------
1369
1370// Note: if you modify this struct, be sure to update the CustomPassFullscreenShader.template
1371struct PositionInputs
1372{
1373    float3 positionWS;  // World space position (could be camera-relative)
1374    float2 positionNDC; // Normalized screen coordinates within the viewport    : [0, 1) (with the half-pixel offset)
1375    uint2  positionSS;  // Screen space pixel coordinates                       : [0, NumPixels)
1376    uint2  tileCoord;   // Screen tile coordinates                              : [0, NumTiles)
1377    float  deviceDepth; // Depth from the depth buffer                          : [0, 1] (typically reversed)
1378    float  linearDepth; // View space Z coordinate                              : [Near, Far]
1379};
1380
1381// This function is use to provide an easy way to sample into a screen texture, either from a pixel or a compute shaders.
1382// This allow to easily share code.
1383// If a compute shader call this function positionSS is an integer usually calculate like: uint2 positionSS = groupId.xy * BLOCK_SIZE + groupThreadId.xy
1384// else it is current unormalized screen coordinate like return by SV_Position
1385PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, uint2 tileCoord)   // Specify explicit tile coordinates so that we can easily make it lane invariant for compute evaluation.
1386{
1387    PositionInputs posInput;
1388    ZERO_INITIALIZE(PositionInputs, posInput);
1389
1390    posInput.positionNDC = positionSS;
1391#if defined(SHADER_STAGE_COMPUTE) || defined(SHADER_STAGE_RAY_TRACING)
1392    // In case of compute shader an extra half offset is added to the screenPos to shift the integer position to pixel center.
1393    posInput.positionNDC.xy += float2(0.5, 0.5);
1394#endif
1395    posInput.positionNDC *= invScreenSize;
1396    posInput.positionSS = uint2(positionSS);
1397    posInput.tileCoord = tileCoord;
1398
1399    return posInput;
1400}
1401
1402PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize)
1403{
1404    return GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
1405}
1406
1407// For Raytracing only
1408// This function does not initialize deviceDepth and linearDepth
1409PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float3 positionWS)
1410{
1411    PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
1412    posInput.positionWS = positionWS;
1413
1414    return posInput;
1415}
1416
1417// From forward
1418// deviceDepth and linearDepth come directly from .zw of SV_Position
1419PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS, uint2 tileCoord)
1420{
1421    PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord);
1422    posInput.positionWS = positionWS;
1423    posInput.deviceDepth = deviceDepth;
1424    posInput.linearDepth = linearDepth;
1425
1426    return posInput;
1427}
1428
1429PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS)
1430{
1431    return GetPositionInput(positionSS, invScreenSize, deviceDepth, linearDepth, positionWS, uint2(0, 0));
1432}
1433
1434// From deferred or compute shader
1435// depth must be the depth from the raw depth buffer. This allow to handle all kind of depth automatically with the inverse view projection matrix.
1436// For information. In Unity Depth is always in range 0..1 (even on OpenGL) but can be reversed.
1437PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth,
1438    float4x4 invViewProjMatrix, float4x4 viewMatrix,
1439    uint2 tileCoord)
1440{
1441    PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord);
1442    posInput.positionWS = ComputeWorldSpacePosition(posInput.positionNDC, deviceDepth, invViewProjMatrix);
1443    posInput.deviceDepth = deviceDepth;
1444    posInput.linearDepth = LinearEyeDepth(posInput.positionWS, viewMatrix);
1445
1446    return posInput;
1447}
1448
1449PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth,
1450                                float4x4 invViewProjMatrix, float4x4 viewMatrix)
1451{
1452    return GetPositionInput(positionSS, invScreenSize, deviceDepth, invViewProjMatrix, viewMatrix, uint2(0, 0));
1453}
1454
1455// The view direction 'V' points towards the camera.
1456// 'depthOffsetVS' is always applied in the opposite direction (-V).
1457void ApplyDepthOffsetPositionInput(float3 V, float depthOffsetVS, float3 viewForwardDir, float4x4 viewProjMatrix, inout PositionInputs posInput)
1458{
1459    posInput.positionWS += depthOffsetVS * (-V);
1460    posInput.deviceDepth = ComputeNormalizedDeviceCoordinatesWithZ(posInput.positionWS, viewProjMatrix).z;
1461
1462    // Transform the displacement along the view vector to the displacement along the forward vector.
1463    // Use abs() to make sure we get the sign right.
1464    // 'depthOffsetVS' applies in the direction away from the camera.
1465    posInput.linearDepth += depthOffsetVS * abs(dot(V, viewForwardDir));
1466}
1467
1468// ----------------------------------------------------------------------------
1469// Terrain/Brush heightmap encoding/decoding
1470// ----------------------------------------------------------------------------
1471
1472#if defined(SHADER_API_VULKAN) || defined(SHADER_API_GLES3) || defined(SHADER_API_WEBGPU)
1473
1474// For the built-in target this is already a defined symbol
1475#ifndef BUILTIN_TARGET_API
1476real4 PackHeightmap(real height)
1477{
1478    uint a = (uint)(65535.0 * height);
1479    return real4((a >> 0) & 0xFF, (a >> 8) & 0xFF, 0, 0) / 255.0;
1480}
1481
1482real UnpackHeightmap(real4 height)
1483{
1484    return (height.r + height.g * 256.0) / 257.0; // (255.0 * height.r + 255.0 * 256.0 * height.g) / 65535.0
1485}
1486#endif
1487
1488#else
1489
1490// For the built-in target this is already a defined symbol
1491#ifndef BUILTIN_TARGET_API
1492real4 PackHeightmap(real height)
1493{
1494    return real4(height, 0, 0, 0);
1495}
1496
1497real UnpackHeightmap(real4 height)
1498{
1499    return height.r;
1500}
1501#endif
1502
1503#endif
1504
1505// ----------------------------------------------------------------------------
1506// Misc utilities
1507// ----------------------------------------------------------------------------
1508
1509// Simple function to test a bitfield
1510bool HasFlag(uint bitfield, uint flag)
1511{
1512    return (bitfield & flag) != 0;
1513}
1514
1515// Normalize that account for vectors with zero length
1516float3 SafeNormalize(float3 inVec)
1517{
1518    float dp3 = max(FLT_MIN, dot(inVec, inVec));
1519    return inVec * rsqrt(dp3);
1520}
1521
1522half3 SafeNormalize(half3 inVec)
1523{
1524    half dp3 = max(HALF_MIN, dot(inVec, inVec));
1525    return inVec * rsqrt(dp3);
1526}
1527
1528bool IsNormalized(float3 inVec)
1529{
1530    float squaredLength = dot(inVec, inVec);
1531    return 0.9998 < squaredLength && squaredLength < 1.0002001;
1532}
1533
1534bool IsNormalized(half3 inVec)
1535{
1536    half squaredLength = dot(inVec, inVec);
1537    return 0.998 < squaredLength && squaredLength < 1.002;
1538}
1539
1540// Division which returns 1 for (inf/inf) and (0/0).
1541// If any of the input parameters are NaNs, the result is a NaN.
1542real SafeDiv(real numer, real denom)
1543{
1544    return (numer != denom) ? numer / denom : 1;
1545}
1546
1547// Perform a square root safe of imaginary number.
1548real SafeSqrt(real x)
1549{
1550    return sqrt(max(0, x));
1551}
1552
1553// Assumes that (0 <= x <= Pi).
1554real SinFromCos(real cosX)
1555{
1556    return sqrt(saturate(1 - cosX * cosX));
1557}
1558
1559// Dot product in spherical coordinates.
1560real SphericalDot(real cosTheta1, real phi1, real cosTheta2, real phi2)
1561{
1562    return SinFromCos(cosTheta1) * SinFromCos(cosTheta2) * cos(phi1 - phi2) + cosTheta1 * cosTheta2;
1563}
1564
1565// Generates a triangle in homogeneous clip space, s.t.
1566// v0 = (-1, -1, 1), v1 = (3, -1, 1), v2 = (-1, 3, 1).
1567float2 GetFullScreenTriangleTexCoord(uint vertexID)
1568{
1569#if UNITY_UV_STARTS_AT_TOP
1570    return float2((vertexID << 1) & 2, 1.0 - (vertexID & 2));
1571#else
1572    return float2((vertexID << 1) & 2, vertexID & 2);
1573#endif
1574}
1575
1576float4 GetFullScreenTriangleVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
1577{
1578    // note: the triangle vertex position coordinates are x2 so the returned UV coordinates are in range -1, 1 on the screen.
1579    float2 uv = float2((vertexID << 1) & 2, vertexID & 2);
1580    float4 pos = float4(uv * 2.0 - 1.0, z, 1.0);
1581#ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION
1582    pos = ApplyPretransformRotation(pos);
1583#endif
1584    return pos;
1585}
1586
1587
1588// draw procedural with 2 triangles has index order (0,1,2)  (0,2,3)
1589
1590// 0 - 0,0
1591// 1 - 0,1
1592// 2 - 1,1
1593// 3 - 1,0
1594
1595float2 GetQuadTexCoord(uint vertexID)
1596{
1597    uint topBit = vertexID >> 1;
1598    uint botBit = (vertexID & 1);
1599    float u = topBit;
1600    float v = (topBit + botBit) & 1; // produces 0 for indices 0,3 and 1 for 1,2
1601#if UNITY_UV_STARTS_AT_TOP
1602    v = 1.0 - v;
1603#endif
1604    return float2(u, v);
1605}
1606
1607// 0 - 0,1
1608// 1 - 0,0
1609// 2 - 1,0
1610// 3 - 1,1
1611float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
1612{
1613    uint topBit = vertexID >> 1;
1614    uint botBit = (vertexID & 1);
1615    float x = topBit;
1616    float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2
1617    float4 pos = float4(x, y, z, 1.0);
1618#ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION
1619    pos = ApplyPretransformRotation(pos);
1620#endif
1621    return pos;
1622}
1623
1624#if !defined(SHADER_STAGE_RAY_TRACING)
1625
1626// LOD dithering transition helper
1627// LOD0 must use this function with ditherFactor 1..0
1628// LOD1 must use this function with ditherFactor -1..0
1629// This is what is provided by unity_LODFade
1630void LODDitheringTransition(uint2 fadeMaskSeed, float ditherFactor)
1631{
1632    // Generate a spatially varying pattern.
1633    // Unfortunately, varying the pattern with time confuses the TAA, increasing the amount of noise.
1634    float p = GenerateHashedRandomFloat(fadeMaskSeed);
1635
1636    // This preserves the symmetry s.t. if LOD 0 has f = x, LOD 1 has f = -x.
1637    float f = ditherFactor - CopySign(p, ditherFactor);
1638    clip(f);
1639}
1640
1641#endif
1642
1643// The resource that is bound when binding a stencil buffer from the depth buffer is two channel. On D3D11 the stencil value is in the green channel,
1644// while on other APIs is in the red channel. Note that on some platform, always using the green channel might work, but is not guaranteed.
1645uint GetStencilValue(uint2 stencilBufferVal)
1646{
1647#if defined(SHADER_API_D3D11) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_GAMECORE)
1648    return stencilBufferVal.y;
1649#else
1650    return stencilBufferVal.x;
1651#endif
1652}
1653
1654// Sharpens the alpha of a texture to the width of a single pixel
1655// Used for alpha to coverage
1656// source: https://medium.com/@bgolus/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f
1657float SharpenAlpha(float alpha, float alphaClipTreshold)
1658{
1659    return saturate((alpha - alphaClipTreshold) / max(fwidth(alpha), 0.0001) + 0.5);
1660}
1661
1662// These clamping function to max of floating point 16 bit are use to prevent INF in code in case of extreme value
1663TEMPLATE_1_FLT(ClampToFloat16Max, value, return min(value, HALF_MAX))
1664
1665#if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH
1666#pragma warning (enable : 3205) // conversion of larger type to smaller
1667#endif
1668
1669float2 RepeatOctahedralUV(float u, float v)
1670{
1671    float2 uv;
1672
1673    if (u < 0.0f)
1674    {
1675        if (v < 0.0f)
1676            uv = float2(1.0f + u, 1.0f + v);
1677        else if (v < 1.0f)
1678            uv = float2(-u, 1.0f - v);
1679        else
1680            uv = float2(1.0f + u, v - 1.0f);
1681    }
1682    else if (u < 1.0f)
1683    {
1684        if (v < 0.0f)
1685            uv = float2(1.0f - u, -v);
1686        else if (v < 1.0f)
1687            uv = float2(u, v);
1688        else
1689            uv = float2(1.0f - u, 2.0f - v);
1690    }
1691    else
1692    {
1693        if (v < 0.0f)
1694            uv = float2(u - 1.0f, 1.0f + v);
1695        else if (v < 1.0f)
1696            uv = float2(2.0f - u, 1.0f - v);
1697        else
1698            uv = float2(u - 1.0f, v - 1.0f);
1699    }
1700
1701    return uv;
1702}
1703
1704#endif // UNITY_COMMON_INCLUDED