A game about forced loneliness, made by TACStudios
1#ifndef UNITY_COMMON_INCLUDED
2#define UNITY_COMMON_INCLUDED
3
4#if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL)
5#pragma warning (disable : 3205) // conversion of larger type to smaller
6#endif
7
8// Convention:
9
10// Unity is Y up and left handed in world space
11// Caution: When going from world space to view space, unity is right handed in view space and the determinant of the matrix is negative
12// For cubemap capture (reflection probe) view space is still left handed (cubemap convention) and the determinant is positive.
13
14// The lighting code assume that 1 Unity unit (1uu) == 1 meters. This is very important regarding physically based light unit and inverse square attenuation
15
16// space at the end of the variable name
17// WS: world space
18// RWS: Camera-Relative world space. A space where the translation of the camera have already been substract in order to improve precision
19// VS: view space
20// OS: object space
21// CS: Homogenous clip spaces
22// TS: tangent space
23// TXS: texture space
24// Example: NormalWS
25
26// normalized / unormalized vector
27// normalized direction are almost everywhere, we tag unormalized vector with un.
28// Example: unL for unormalized light vector
29
30// use capital letter for regular vector, vector are always pointing outward the current pixel position (ready for lighting equation)
31// capital letter mean the vector is normalize, unless we put 'un' in front of it.
32// V: View vector (no eye vector)
33// L: Light vector
34// N: Normal vector
35// H: Half vector
36
37// Input/Outputs structs in PascalCase and prefixed by entry type
38// struct AttributesDefault
39// struct VaryingsDefault
40// use input/output as variable name when using these structures
41
42// Entry program name
43// VertDefault
44// FragDefault / FragForward / FragDeferred
45
46// constant floating number written as 1.0 (not 1, not 1.0f, not 1.0h)
47
48// uniform have _ as prefix + uppercase _LowercaseThenCamelCase
49
50// Do not use "in", only "out" or "inout" as califier, no "inline" keyword either, useless.
51// When declaring "out" argument of function, they are always last
52
53// headers from ShaderLibrary do not include "common.hlsl", this should be included in the .shader using it (or Material.hlsl)
54
55// All uniforms should be in contant buffer (nothing in the global namespace).
56// The reason is that for compute shader we need to guarantee that the layout of CBs is consistent across kernels. Something that we can't control with the global namespace (uniforms get optimized out if not used, modifying the global CBuffer layout per kernel)
57
58// Structure definition that are share between C# and hlsl.
59// These structures need to be align on float4 to respect various packing rules from shader language. This mean that these structure need to be padded.
60// Rules: When doing an array for constant buffer variables, we always use float4 to avoid any packing issue, particularly between compute shader and pixel shaders
61// i.e don't use SetGlobalFloatArray or SetComputeFloatParams
62// The array can be alias in hlsl. Exemple:
63// uniform float4 packedArray[3];
64// static float unpackedArray[12] = (float[12])packedArray;
65
66// The function of the shader library are stateless, no uniform declare in it.
67// Any function that require an explicit precision, use float or half qualifier, when the function can support both, it use real (see below)
68// If a function require to have both a half and a float version, then both need to be explicitly define
69
70///
71/// Hardware Support for Wave Operations
72///
73
74// Support for wave operations is intentionally limited to the compute shader stage in order to make this functionality available to a wider range of hardware.
75#if defined(SHADER_STAGE_COMPUTE)
76 //
77 // Platform Support
78 //
79 // Platforms may indicate support for wave operations at compile-time.
80 // Shaders on these platforms may not always be compiled with a compiler that supports wave operations.
81 // To simplify usage, we check for a supported compiler here before indicating that wave operations are supported.
82 #if ((defined(UNITY_PLATFORM_SUPPORTS_WAVE_32) || defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)) && (defined(UNITY_COMPILER_DXC) || defined(SHADER_API_PSSL)))
83 #if defined(UNITY_PLATFORM_SUPPORTS_WAVE_32)
84 #define UNITY_HW_WAVE_SIZE 32
85 #elif defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)
86 #define UNITY_HW_WAVE_SIZE 64
87 #endif
88
89 #define UNITY_PLATFORM_SUPPORTS_WAVE 1
90 //
91 // Device Support
92 //
93 // Devices may indicate support for wave operations at run-time.
94 // Shaders compiled with these defines are always compiled with a compiler that supports wave operations.
95 #elif (defined(UNITY_DEVICE_SUPPORTS_WAVE_ANY) || defined(UNITY_DEVICE_SUPPORTS_WAVE_8) || defined(UNITY_DEVICE_SUPPORTS_WAVE_16) || defined(UNITY_DEVICE_SUPPORTS_WAVE_32) || defined(UNITY_DEVICE_SUPPORTS_WAVE_64) || defined(UNITY_DEVICE_SUPPORTS_WAVE_128))
96 #if defined(UNITY_DEVICE_SUPPORTS_WAVE_8)
97 #define UNITY_HW_WAVE_SIZE 8
98 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_16)
99 #define UNITY_HW_WAVE_SIZE 16
100 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_32)
101 #define UNITY_HW_WAVE_SIZE 32
102 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_64)
103 #define UNITY_HW_WAVE_SIZE 64
104 #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_128)
105 #define UNITY_HW_WAVE_SIZE 128
106 #endif
107
108 #define UNITY_DEVICE_SUPPORTS_WAVE 1
109 #endif
110
111 #if (defined(UNITY_PLATFORM_SUPPORTS_WAVE) || defined(UNITY_DEVICE_SUPPORTS_WAVE))
112 #define UNITY_HW_SUPPORTS_WAVE 1
113 #endif
114#endif
115
116#ifndef real
117
118// The including shader should define whether half
119// precision is suitable for its needs. The shader
120// API (for now) can indicate whether half is possible.
121#if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH) || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL)
122#define HAS_HALF 1
123#else
124#define HAS_HALF 0
125#endif
126
127#ifndef PREFER_HALF
128#define PREFER_HALF 1
129#endif
130
131#if HAS_HALF && PREFER_HALF
132#define REAL_IS_HALF 1
133#else
134#define REAL_IS_HALF 0
135#endif // Do we have half?
136
137#if REAL_IS_HALF
138#define HALF_IS_FLOAT 0
139#define half min16float
140#define half2 min16float2
141#define half3 min16float3
142#define half4 min16float4
143#define half2x2 min16float2x2
144#define half2x3 min16float2x3
145#define half3x2 min16float3x2
146#define half3x3 min16float3x3
147#define half3x4 min16float3x4
148#define half4x3 min16float4x3
149#define half4x4 min16float4x4
150#else
151#define HALF_IS_FLOAT (!defined(UNITY_DEVICE_SUPPORTS_NATIVE_16BIT))
152#endif
153
154#if REAL_IS_HALF
155#define real half
156#define real2 half2
157#define real3 half3
158#define real4 half4
159
160#define real2x2 half2x2
161#define real2x3 half2x3
162#define real2x4 half2x4
163#define real3x2 half3x2
164#define real3x3 half3x3
165#define real3x4 half3x4
166#define real4x3 half4x3
167#define real4x4 half4x4
168
169#define REAL_MIN HALF_MIN
170#define REAL_MAX HALF_MAX
171#define REAL_EPS HALF_EPS
172
173#else
174
175#define real float
176#define real2 float2
177#define real3 float3
178#define real4 float4
179
180#define real2x2 float2x2
181#define real2x3 float2x3
182#define real2x4 float2x4
183#define real3x2 float3x2
184#define real3x3 float3x3
185#define real3x4 float3x4
186#define real4x3 float4x3
187#define real4x4 float4x4
188
189#define REAL_MIN FLT_MIN
190#define REAL_MAX FLT_MAX
191#define REAL_EPS FLT_EPS
192
193#endif // REAL_IS_HALF
194
195#endif // #ifndef real
196
197// Target in compute shader are supported in 2018.2, for now define ours
198// (Note only 45 and above support compute shader)
199#ifdef SHADER_STAGE_COMPUTE
200# ifndef SHADER_TARGET
201# if defined(SHADER_API_METAL)
202# define SHADER_TARGET 45
203# else
204# define SHADER_TARGET 50
205# endif
206# endif
207#endif
208
209// This is the default keyword combination and needs to be overriden by the platforms that need specific behaviors
210// when enabling conservative depth overrides
211#define SV_POSITION_QUALIFIERS
212#define DEPTH_OFFSET_SEMANTIC SV_Depth
213
214// Include language header
215#if defined (SHADER_API_GAMECORE)
216#include "Packages/com.unity.render-pipelines.gamecore/ShaderLibrary/API/GameCore.hlsl"
217#elif defined(SHADER_API_XBOXONE)
218#include "Packages/com.unity.render-pipelines.xboxone/ShaderLibrary/API/XBoxOne.hlsl"
219#elif defined(SHADER_API_PS4)
220#include "Packages/com.unity.render-pipelines.ps4/ShaderLibrary/API/PSSL.hlsl"
221#elif defined(SHADER_API_PS5)
222#include "Packages/com.unity.render-pipelines.ps5/ShaderLibrary/API/PSSL.hlsl"
223#elif defined(SHADER_API_D3D11)
224#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/D3D11.hlsl"
225#elif defined(SHADER_API_METAL)
226#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Metal.hlsl"
227#elif defined(SHADER_API_VULKAN)
228#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Vulkan.hlsl"
229#elif defined(SHADER_API_SWITCH)
230#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Switch.hlsl"
231#elif defined(SHADER_API_GLCORE)
232#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLCore.hlsl"
233#elif defined(SHADER_API_GLES3)
234#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLES3.hlsl"
235#elif defined(SHADER_API_WEBGPU)
236#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/WebGPU.hlsl"
237#else
238#error unsupported shader api
239#endif
240#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Validate.hlsl"
241
242#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl"
243#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Random.hlsl"
244
245#if !defined(SHADER_API_PS5)
246#define PushMarker(str)
247#define PopMarker()
248#endif
249
250#ifdef SHADER_API_XBOXONE // TODO: to move in .nda package in 21.1
251#define PLATFORM_SUPPORTS_PRIMITIVE_ID_IN_PIXEL_SHADER
252#endif
253
254#if defined(PLATFORM_SUPPORTS_NATIVE_RENDERPASS)
255
256 #if defined(UNITY_COMPILER_DXC)
257
258 //Subpass inputs are disallowed in non-fragment shader stages with DXC so we need some dummy value to use in the fragment function while it's not being compiled
259 #if defined(SHADER_STAGE_FRAGMENT)
260 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) [[vk::input_attachment_index(idx)]] SubpassInput<type##4> hlslcc_fbinput_##idx
261 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) [[vk::input_attachment_index(idx)]] SubpassInputMS<type##4> hlslcc_fbinput_##idx
262 #else
263 //declaring dummy resources here so that non-fragment shader stage automatic bindings wouldn't diverge from the fragment shader (important for vulkan)
264 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx;
265 #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx
266 #endif
267 // Renderpass inputs: Vulkan/Metal subpass input
268 #define FRAMEBUFFER_INPUT_FLOAT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(float, idx)
269 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(float, idx)
270 // For halfs
271 #define FRAMEBUFFER_INPUT_HALF(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(half, idx)
272 #define FRAMEBUFFER_INPUT_HALF_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(half, idx)
273 // For ints
274 #define FRAMEBUFFER_INPUT_INT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(int, idx)
275 #define FRAMEBUFFER_INPUT_INT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(int, idx)
276 // For uints
277 #define FRAMEBUFFER_INPUT_UINT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(uint, idx)
278 #define FRAMEBUFFER_INPUT_UINT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(uint, idx)
279
280 #if defined(SHADER_STAGE_FRAGMENT)
281 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx.SubpassLoad()
282 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx.SubpassLoad(sampleIdx)
283 #else
284 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) DXC_DummySubpassVariable##idx
285 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) DXC_DummySubpassVariable##idx
286 #endif
287
288 #elif defined(SHADER_API_METAL) && defined(UNITY_NEEDS_RENDERPASS_FBFETCH_FALLBACK)
289
290 // On desktop metal we need special magic due to the need to support both intel and apple silicon
291 // since the former does not support framebuffer fetch
292 // Due to this we have special considerations:
293 // 1. since we might need to bind the copy texture, to simplify our lives we always declare _UnityFBInput texture
294 // in metal translation we will add function_constant, but we still want to generate binding in hlsl
295 // so that unity knows about the possibility
296 // 2. hlsl do not have anything like function constants, hence we will add bool to the fake cbuffer for subpass
297 // again, this is done only for hlsl to generate proper code - in translation it will be changed to
298 // a proper function constant (i.e. hlslcc_SubpassInput_f_ cbuffer is just "metadata" and is absent in metal code)
299 // 3. we want to generate an actual if command (not conditional move), hence we need to have an interim function
300 // alas we are not able to hide in it the texture coords: we are guaranteed to have just one "declare fb input"
301 // per index, but nothing stops users to have several "read fb input", hence we need to generate function code
302 // in the former, where we do not know the source of uv coords
303 // while the usage looks weird (we pass hlslcc_fbfetch_ in the function), it is ok due to the way hlsl compiler works
304 // it will generate an actual if and access hlslcc_fbfetch_ only if framebuffer fetch is available
305 // and when creating metal program, compiler takes care of this (function_constant magic)
306
307 #define RENDERPASS_DECLARE_FALLBACK(T, idx) \
308 Texture2D<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize; \
309 inline T ReadFBInput_##idx(bool var, uint2 coord) { \
310 [branch]if(var) { return hlslcc_fbinput_##idx; } \
311 else { return _UnityFBInput##idx.Load(uint3(coord,0)); } \
312 }
313 #define RENDERPASS_DECLARE_FALLBACK_MS(T, idx) \
314 Texture2DMS<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize; \
315 inline T ReadFBInput_##idx(bool var, uint2 coord, uint sampleIdx) { \
316 [branch]if(var) { return hlslcc_fbinput_##idx[sampleIdx]; } \
317 else { return _UnityFBInput##idx.Load(coord,sampleIdx); } \
318 }
319
320 #define FRAMEBUFFER_INPUT_FLOAT(idx) \
321 cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
322 RENDERPASS_DECLARE_FALLBACK(float4, idx)
323
324 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) \
325 cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
326 RENDERPASS_DECLARE_FALLBACK_MS(float4, idx)
327
328 #define FRAMEBUFFER_INPUT_HALF(idx) \
329 cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
330 RENDERPASS_DECLARE_FALLBACK(half4, idx)
331
332 #define FRAMEBUFFER_INPUT_HALF_MS(idx) \
333 cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
334 RENDERPASS_DECLARE_FALLBACK_MS(half4, idx)
335
336 #define FRAMEBUFFER_INPUT_INT(idx) \
337 cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
338 RENDERPASS_DECLARE_FALLBACK(int4, idx)
339
340 #define FRAMEBUFFER_INPUT_INT_MS(idx) \
341 cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
342 RENDERPASS_DECLARE_FALLBACK_MS(int4, idx)
343
344 #define FRAMEBUFFER_INPUT_UINT(idx) \
345 cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
346 RENDERPASS_DECLARE_FALLBACK(uint4, idx)
347
348 #define FRAMEBUFFER_INPUT_UINT_MS(idx) \
349 cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
350 UNITY_RENDERPASS_DECLARE_FALLBACK_MS(uint4, idx)
351
352 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy))
353 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy), sampleIdx)
354
355 #else
356
357 // For floats
358 #define FRAMEBUFFER_INPUT_FLOAT(idx) cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; }
359 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; }
360 // For halfs
361 #define FRAMEBUFFER_INPUT_HALF(idx) cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; }
362 #define FRAMEBUFFER_INPUT_HALF_MS(idx) cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; }
363 // For ints
364 #define FRAMEBUFFER_INPUT_INT(idx) cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; }
365 #define FRAMEBUFFER_INPUT_INT_MS(idx) cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; }
366 // For uints
367 #define FRAMEBUFFER_INPUT_UINT(idx) cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; }
368 #define FRAMEBUFFER_INPUT_UINT_MS(idx) cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; }
369
370 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx
371 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx[sampleIdx]
372
373 #endif
374
375#else
376
377 // Renderpass inputs: General fallback paths
378 #define FRAMEBUFFER_INPUT_FLOAT(idx) TEXTURE2D_FLOAT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
379 #define FRAMEBUFFER_INPUT_HALF(idx) TEXTURE2D_HALF(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
380 #define FRAMEBUFFER_INPUT_INT(idx) TEXTURE2D_INT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
381 #define FRAMEBUFFER_INPUT_UINT(idx) TEXTURE2D_UINT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
382
383 #define LOAD_FRAMEBUFFER_INPUT(idx, v2fvertexname) _UnityFBInput##idx.Load(uint3(v2fvertexname.xy, 0))
384
385 #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
386 #define FRAMEBUFFER_INPUT_HALF_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
387 #define FRAMEBUFFER_INPUT_INT_MS(idx) Texture2DMS<int4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
388 #define FRAMEBUFFER_INPUT_UINT_MS(idx) Texture2DMS<uint4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
389
390 #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fvertexname) _UnityFBInput##idx.Load(uint2(v2fvertexname.xy), sampleIdx)
391
392#endif
393
394// ----------------------------------------------------------------------------
395// Global resources API definitions for Ray Tracing
396// ----------------------------------------------------------------------------
397#if (SHADER_STAGE_RAY_TRACING && UNITY_RAY_TRACING_GLOBAL_RESOURCES)
398 #define GLOBAL_RESOURCE(type, name, reg) type name : register(reg, space1);
399 #define GLOBAL_CBUFFER_START(name, reg) cbuffer name : register(reg, space1) {
400 #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name) : register(reg, space1)
401 #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name) : register(reg, space1)
402 #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name) : register(reg, space1)
403#else
404 #define GLOBAL_RESOURCE(type, name, reg) type name;
405 #define GLOBAL_CBUFFER_START(name, reg) CBUFFER_START(name)
406 #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name)
407 #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name)
408 #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name)
409#endif
410
411// ----------------------------------------------------------------------------
412// Common intrinsic (general implementation of intrinsic available on some platform)
413// ----------------------------------------------------------------------------
414
415#if !defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) && !defined(UNITY_COMPILER_DXC) && !defined(UNITY_HW_SUPPORTS_WAVE)
416// Intercept wave functions when they aren't supported to provide better error messages
417#define WaveActiveAllTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAllTrue)
418#define WaveActiveAnyTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAnyTrue)
419#define WaveGetLaneIndex ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneIndex)
420#define WaveIsFirstLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsFirstLane)
421#define GetWaveID ERROR_ON_UNSUPPORTED_FUNCTION(GetWaveID)
422#define WaveActiveMin ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMin)
423#define WaveActiveMax ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMax)
424#define WaveActiveBallot ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBallot)
425#define WaveActiveSum ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveSum)
426#define WaveActiveBitAnd ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitAnd)
427#define WaveActiveBitOr ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitOr)
428#define WaveGetLaneCount ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneCount)
429#define WaveIsHelperLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsHelperLane)
430#endif
431
432#if defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS)
433// Helper macro to compute lane swizzle offset starting from andMask, orMask and xorMask.
434// IMPORTANT, to guarantee compatibility with all platforms, the masks need to be constant literals (constants at compile time)
435#define LANE_SWIZZLE_OFFSET(andMask, orMask, xorMask) (andMask | (orMask << 5) | (xorMask << 10))
436#endif
437
438#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonDeprecated.hlsl"
439
440#ifndef INTRINSIC_BITFIELD_EXTRACT
441// Unsigned integer bit field extraction.
442// Note that the intrinsic itself generates a vector instruction.
443// Wrap this function with WaveReadLaneFirst() to get scalar output.
444uint BitFieldExtract(uint data, uint offset, uint numBits)
445{
446 uint mask = (1u << numBits) - 1u;
447 return (data >> offset) & mask;
448}
449#endif // INTRINSIC_BITFIELD_EXTRACT
450
451#ifndef INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND
452// Integer bit field extraction with sign extension.
453// Note that the intrinsic itself generates a vector instruction.
454// Wrap this function with WaveReadLaneFirst() to get scalar output.
455int BitFieldExtractSignExtend(int data, uint offset, uint numBits)
456{
457 int shifted = data >> offset; // Sign-extending (arithmetic) shift
458 int signBit = shifted & (1u << (numBits - 1u));
459 uint mask = (1u << numBits) - 1u;
460
461 return -signBit | (shifted & mask); // Use 2-complement for negation to replicate the sign bit
462}
463#endif // INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND
464
465#ifndef INTRINSIC_BITFIELD_INSERT
466// Inserts the bits indicated by 'mask' from 'src' into 'dst'.
467uint BitFieldInsert(uint mask, uint src, uint dst)
468{
469 return (src & mask) | (dst & ~mask);
470}
471#endif // INTRINSIC_BITFIELD_INSERT
472
473bool IsBitSet(uint data, uint offset)
474{
475 return BitFieldExtract(data, offset, 1u) != 0;
476}
477
478void SetBit(inout uint data, uint offset)
479{
480 data |= 1u << offset;
481}
482
483void ClearBit(inout uint data, uint offset)
484{
485 data &= ~(1u << offset);
486}
487
488void ToggleBit(inout uint data, uint offset)
489{
490 data ^= 1u << offset;
491}
492
493#ifndef INTRINSIC_WAVEREADFIRSTLANE
494 // Warning: for correctness, the argument's value must be the same across all lanes of the wave.
495 TEMPLATE_1_FLT_HALF(WaveReadLaneFirst, scalarValue, return scalarValue)
496 TEMPLATE_1_INT(WaveReadLaneFirst, scalarValue, return scalarValue)
497#endif
498
499#ifndef INTRINSIC_MUL24
500 TEMPLATE_2_INT(Mul24, a, b, return a * b)
501#endif // INTRINSIC_MUL24
502
503#ifndef INTRINSIC_MAD24
504 TEMPLATE_3_INT(Mad24, a, b, c, return a * b + c)
505#endif // INTRINSIC_MAD24
506
507#ifndef INTRINSIC_MINMAX3
508 TEMPLATE_3_FLT_HALF(Min3, a, b, c, return min(min(a, b), c))
509 TEMPLATE_3_INT(Min3, a, b, c, return min(min(a, b), c))
510 TEMPLATE_3_FLT_HALF(Max3, a, b, c, return max(max(a, b), c))
511 TEMPLATE_3_INT(Max3, a, b, c, return max(max(a, b), c))
512#endif // INTRINSIC_MINMAX3
513
514TEMPLATE_3_FLT_HALF(Avg3, a, b, c, return (a + b + c) * 0.33333333)
515
516// Important! Quad functions only valid in pixel shaders!
517 float2 GetQuadOffset(int2 screenPos)
518 {
519 return float2(float(screenPos.x & 1) * 2.0 - 1.0, float(screenPos.y & 1) * 2.0 - 1.0);
520 }
521
522#ifndef INTRINSIC_QUAD_SHUFFLE
523 float QuadReadAcrossX(float value, int2 screenPos)
524 {
525 return value - (ddx_fine(value) * (float(screenPos.x & 1) * 2.0 - 1.0));
526 }
527
528 float QuadReadAcrossY(float value, int2 screenPos)
529 {
530 return value - (ddy_fine(value) * (float(screenPos.y & 1) * 2.0 - 1.0));
531 }
532
533 float QuadReadAcrossDiagonal(float value, int2 screenPos)
534 {
535 float2 quadDir = GetQuadOffset(screenPos);
536 float dX = ddx_fine(value);
537 float X = value - (dX * quadDir.x);
538 return X - (ddy_fine(X) * quadDir.y);
539 }
540#endif
541
542 float3 QuadReadFloat3AcrossX(float3 val, int2 positionSS)
543 {
544 return float3(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS));
545 }
546
547 float4 QuadReadFloat4AcrossX(float4 val, int2 positionSS)
548 {
549 return float4(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS), QuadReadAcrossX(val.w, positionSS));
550 }
551
552 float3 QuadReadFloat3AcrossY(float3 val, int2 positionSS)
553 {
554 return float3(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS));
555 }
556
557 float4 QuadReadFloat4AcrossY(float4 val, int2 positionSS)
558 {
559 return float4(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS), QuadReadAcrossY(val.w, positionSS));
560 }
561
562 float3 QuadReadFloat3AcrossDiagonal(float3 val, int2 positionSS)
563 {
564 return float3(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS));
565 }
566
567 float4 QuadReadFloat4AcrossDiagonal(float4 val, int2 positionSS)
568 {
569 return float4(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS), QuadReadAcrossDiagonal(val.w, positionSS));
570 }
571
572TEMPLATE_SWAP(Swap) // Define a Swap(a, b) function for all types
573
574#define CUBEMAPFACE_POSITIVE_X 0
575#define CUBEMAPFACE_NEGATIVE_X 1
576#define CUBEMAPFACE_POSITIVE_Y 2
577#define CUBEMAPFACE_NEGATIVE_Y 3
578#define CUBEMAPFACE_POSITIVE_Z 4
579#define CUBEMAPFACE_NEGATIVE_Z 5
580
581#ifndef INTRINSIC_CUBEMAP_FACE_ID
582float CubeMapFaceID(float3 dir)
583{
584 float faceID;
585
586 if (abs(dir.z) >= abs(dir.x) && abs(dir.z) >= abs(dir.y))
587 {
588 faceID = (dir.z < 0.0) ? CUBEMAPFACE_NEGATIVE_Z : CUBEMAPFACE_POSITIVE_Z;
589 }
590 else if (abs(dir.y) >= abs(dir.x))
591 {
592 faceID = (dir.y < 0.0) ? CUBEMAPFACE_NEGATIVE_Y : CUBEMAPFACE_POSITIVE_Y;
593 }
594 else
595 {
596 faceID = (dir.x < 0.0) ? CUBEMAPFACE_NEGATIVE_X : CUBEMAPFACE_POSITIVE_X;
597 }
598
599 return faceID;
600}
601#endif // INTRINSIC_CUBEMAP_FACE_ID
602
603// Intrinsic isnan can't be used because it require /Gic to be enabled on fxc that we can't do. So use AnyIsNan instead
604bool IsNaN(float x)
605{
606 return (asuint(x) & 0x7FFFFFFF) > 0x7F800000;
607}
608
609bool AnyIsNaN(float2 v)
610{
611 return (IsNaN(v.x) || IsNaN(v.y));
612}
613
614bool AnyIsNaN(float3 v)
615{
616 return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z));
617}
618
619bool AnyIsNaN(float4 v)
620{
621 return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z) || IsNaN(v.w));
622}
623
624bool IsInf(float x)
625{
626 return (asuint(x) & 0x7FFFFFFF) == 0x7F800000;
627}
628
629bool AnyIsInf(float2 v)
630{
631 return (IsInf(v.x) || IsInf(v.y));
632}
633
634bool AnyIsInf(float3 v)
635{
636 return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z));
637}
638
639bool AnyIsInf(float4 v)
640{
641 return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z) || IsInf(v.w));
642}
643
644bool IsFinite(float x)
645{
646 return (asuint(x) & 0x7F800000) != 0x7F800000;
647}
648
649float SanitizeFinite(float x)
650{
651 return IsFinite(x) ? x : 0;
652}
653
654bool IsPositiveFinite(float x)
655{
656 return asuint(x) < 0x7F800000;
657}
658
659float SanitizePositiveFinite(float x)
660{
661 return IsPositiveFinite(x) ? x : 0;
662}
663
664// ----------------------------------------------------------------------------
665// Common math functions
666// ----------------------------------------------------------------------------
667
668real DegToRad(real deg)
669{
670 return deg * (PI / 180.0);
671}
672
673real RadToDeg(real rad)
674{
675 return rad * (180.0 / PI);
676}
677
678// Square functions for cleaner code
679TEMPLATE_1_FLT_HALF(Sq, x, return (x) * (x))
680TEMPLATE_1_INT(Sq, x, return (x) * (x))
681
682bool IsPower2(uint x)
683{
684 return (x & (x - 1)) == 0;
685}
686
687// Input [0, 1] and output [0, PI/2]
688// 9 VALU
689real FastACosPos(real inX)
690{
691 real x = abs(inX);
692 real res = (0.0468878 * x + -0.203471) * x + 1.570796; // p(x)
693 res *= sqrt(1.0 - x);
694
695 return res;
696}
697
698// Ref: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
699// Input [-1, 1] and output [0, PI]
700// 12 VALU
701real FastACos(real inX)
702{
703 real res = FastACosPos(inX);
704
705 return (inX >= 0) ? res : PI - res; // Undo range reduction
706}
707
708// Same cost as Acos + 1 FR
709// Same error
710// input [-1, 1] and output [-PI/2, PI/2]
711real FastASin(real x)
712{
713 return HALF_PI - FastACos(x);
714}
715
716// max absolute error 1.3x10^-3
717// Eberly's odd polynomial degree 5 - respect bounds
718// 4 VGPR, 14 FR (10 FR, 1 QR), 2 scalar
719// input [0, infinity] and output [0, PI/2]
720real FastATanPos(real x)
721{
722 real t0 = (x < 1.0) ? x : 1.0 / x;
723 real t1 = t0 * t0;
724 real poly = 0.0872929;
725 poly = -0.301895 + poly * t1;
726 poly = 1.0 + poly * t1;
727 poly = poly * t0;
728 return (x < 1.0) ? poly : HALF_PI - poly;
729}
730
731// 4 VGPR, 16 FR (12 FR, 1 QR), 2 scalar
732// input [-infinity, infinity] and output [-PI/2, PI/2]
733real FastATan(real x)
734{
735 real t0 = FastATanPos(abs(x));
736 return (x < 0.0) ? -t0 : t0;
737}
738
739real FastAtan2(real y, real x)
740{
741 return FastATan(y / x) + real(y >= 0.0 ? PI : -PI) * (x < 0.0);
742}
743
744#if (SHADER_TARGET >= 45)
745uint FastLog2(uint x)
746{
747 return firstbithigh(x);
748}
749#endif
750
751// Using pow often result to a warning like this
752// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
753// PositivePow remove this warning when you know the value is positive or 0 and avoid inf/NAN.
754// Note: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx pow(0, >0) == 0
755TEMPLATE_2_FLT_HALF(PositivePow, base, power, return pow(abs(base), power))
756
757// SafePositivePow: Same as pow(x,y) but considers x always positive and never exactly 0 such that
758// SafePositivePow(0,y) will numerically converge to 1 as y -> 0, including SafePositivePow(0,0) returning 1.
759//
760// First, like PositivePow, SafePositivePow removes this warning for when you know the x value is positive or 0 and you know
761// you avoid a NaN:
762// ie you know that x == 0 and y > 0, such that pow(x,y) == pow(0, >0) == 0
763// SafePositivePow(0, y) will however return close to 1 as y -> 0, see below.
764//
765// Also, pow(x,y) is most probably approximated as exp2(log2(x) * y), so pow(0,0) will give exp2(-inf * 0) == exp2(NaN) == NaN.
766//
767// SafePositivePow avoids NaN in allowing SafePositivePow(x,y) where (x,y) == (0,y) for any y including 0 by clamping x to a
768// minimum of FLT_EPS. The consequences are:
769//
770// -As a replacement for pow(0,y) where y >= 1, the result of SafePositivePow(x,y) should be close enough to 0.
771// -For cases where we substitute for pow(0,y) where 0 < y < 1, SafePositivePow(x,y) will quickly reach 1 as y -> 0, while
772// normally pow(0,y) would give 0 instead of 1 for all 0 < y.
773// eg: if we #define FLT_EPS 5.960464478e-8 (for fp32),
774// SafePositivePow(0, 0.1) = 0.1894646
775// SafePositivePow(0, 0.01) = 0.8467453
776// SafePositivePow(0, 0.001) = 0.9835021
777//
778// Depending on the intended usage of pow(), this difference in behavior might be a moot point since:
779// 1) by leaving "y" free to get to 0, we get a NaNs
780// 2) the behavior of SafePositivePow() has more continuity when both x and y get closer together to 0, since
781// when x is assured to be positive non-zero, pow(x,x) -> 1 as x -> 0.
782//
783// TL;DR: SafePositivePow(x,y) avoids NaN and is safe for positive (x,y) including (x,y) == (0,0),
784// but SafePositivePow(0, y) will return close to 1 as y -> 0, instead of 0, so watch out
785// for behavior depending on pow(0, y) giving always 0, especially for 0 < y < 1.
786//
787// Ref: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx
788TEMPLATE_2_FLT(SafePositivePow, base, power, return pow(max(abs(base), float(FLT_EPS)), power))
789TEMPLATE_2_HALF(SafePositivePow, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power))
790
791// Helpers for making shadergraph functions consider precision spec through the same $precision token used for variable types
792TEMPLATE_2_FLT(SafePositivePow_float, base, power, return pow(max(abs(base), float(FLT_EPS)), power))
793TEMPLATE_2_HALF(SafePositivePow_half, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power))
794
795float Eps_float() { return FLT_EPS; }
796float Min_float() { return FLT_MIN; }
797float Max_float() { return FLT_MAX; }
798half Eps_half() { return HALF_EPS; }
799half Min_half() { return HALF_MIN; }
800half Max_half() { return HALF_MAX; }
801
802// Compute the 'epsilon equal' relative to the scale of 'a' & 'b'.
803// Farther to 0.0f 'a' or 'b' are, larger epsilon have to be.
804bool NearlyEqual(float a, float b, float epsilon)
805{
806 return abs(a - b) / (abs(a) + abs(b)) < epsilon;
807}
808
809TEMPLATE_2_FLT(NearlyEqual_Float, a, b, return abs(a - b) / (abs(a) + abs(b)) < float(FLT_EPS))
810TEMPLATE_2_HALF(NearlyEqual_Half, a, b, return abs(a - b) / (abs(a) + abs(b)) < min16float(HALF_EPS))
811
812// Composes a floating point value with the magnitude of 'x' and the sign of 's'.
813// See the comment about FastSign() below.
814float CopySign(float x, float s, bool ignoreNegZero = true)
815{
816 if (ignoreNegZero)
817 {
818 return (s >= 0) ? abs(x) : -abs(x);
819 }
820 else
821 {
822 uint negZero = 0x80000000u;
823 uint signBit = negZero & asuint(s);
824 return asfloat(BitFieldInsert(negZero, signBit, asuint(x)));
825 }
826}
827
828// Returns -1 for negative numbers and 1 for positive numbers.
829// 0 can be handled in 2 different ways.
830// The IEEE floating point standard defines 0 as signed: +0 and -0.
831// However, mathematics typically treats 0 as unsigned.
832// Therefore, we treat -0 as +0 by default: FastSign(+0) = FastSign(-0) = 1.
833// If (ignoreNegZero = false), FastSign(-0, false) = -1.
834// Note that the sign() function in HLSL implements signum, which returns 0 for 0.
835float FastSign(float s, bool ignoreNegZero = true)
836{
837 return CopySign(1.0, s, ignoreNegZero);
838}
839
840// Orthonormalizes the tangent frame using the Gram-Schmidt process.
841// We assume that the normal is normalized and that the two vectors
842// aren't collinear.
843// Returns the new tangent (the normal is unaffected).
844real3 Orthonormalize(real3 tangent, real3 normal)
845{
846 // TODO: use SafeNormalize()?
847 return normalize(tangent - dot(tangent, normal) * normal);
848}
849
850// [start, end] -> [0, 1] : (x - start) / (end - start) = x * rcpLength - (start * rcpLength)
851TEMPLATE_3_FLT_HALF(Remap01, x, rcpLength, startTimesRcpLength, return saturate(x * rcpLength - startTimesRcpLength))
852
853// [start, end] -> [1, 0] : (end - x) / (end - start) = (end * rcpLength) - x * rcpLength
854TEMPLATE_3_FLT_HALF(Remap10, x, rcpLength, endTimesRcpLength, return saturate(endTimesRcpLength - x * rcpLength))
855
856// Remap: [0.5 / size, 1 - 0.5 / size] -> [0, 1]
857real2 RemapHalfTexelCoordTo01(real2 coord, real2 size)
858{
859 const real2 rcpLen = size * rcp(size - 1);
860 const real2 startTimesRcpLength = 0.5 * rcp(size - 1);
861
862 return Remap01(coord, rcpLen, startTimesRcpLength);
863}
864
865// Remap: [0, 1] -> [0.5 / size, 1 - 0.5 / size]
866real2 Remap01ToHalfTexelCoord(real2 coord, real2 size)
867{
868 const real2 start = 0.5 * rcp(size);
869 const real2 len = 1 - rcp(size);
870
871 return coord * len + start;
872}
873
874// smoothstep that assumes that 'x' lies within the [0, 1] interval.
875real Smoothstep01(real x)
876{
877 return x * x * (3 - (2 * x));
878}
879
880real Smootherstep01(real x)
881{
882 return x * x * x * (x * (x * 6 - 15) + 10);
883}
884
885real Smootherstep(real a, real b, real t)
886{
887 real r = rcp(b - a);
888 real x = Remap01(t, r, a * r);
889 return Smootherstep01(x);
890}
891
892float3 NLerp(float3 A, float3 B, float t)
893{
894 return normalize(lerp(A, B, t));
895}
896
897float Length2(float3 v)
898{
899 return dot(v, v);
900}
901
902#ifndef BUILTIN_TARGET_API
903real Pow4(real x)
904{
905 return (x * x) * (x * x);
906}
907#endif
908
909TEMPLATE_3_FLT(RangeRemap, min, max, t, return saturate((t - min) / (max - min)))
910TEMPLATE_3_FLT(RangeRemapFrom01, min, max, t, return (max - min) * t + min)
911
912float4x4 Inverse(float4x4 m)
913{
914 float n11 = m[0][0], n12 = m[1][0], n13 = m[2][0], n14 = m[3][0];
915 float n21 = m[0][1], n22 = m[1][1], n23 = m[2][1], n24 = m[3][1];
916 float n31 = m[0][2], n32 = m[1][2], n33 = m[2][2], n34 = m[3][2];
917 float n41 = m[0][3], n42 = m[1][3], n43 = m[2][3], n44 = m[3][3];
918
919 float t11 = n23 * n34 * n42 - n24 * n33 * n42 + n24 * n32 * n43 - n22 * n34 * n43 - n23 * n32 * n44 + n22 * n33 * n44;
920 float t12 = n14 * n33 * n42 - n13 * n34 * n42 - n14 * n32 * n43 + n12 * n34 * n43 + n13 * n32 * n44 - n12 * n33 * n44;
921 float t13 = n13 * n24 * n42 - n14 * n23 * n42 + n14 * n22 * n43 - n12 * n24 * n43 - n13 * n22 * n44 + n12 * n23 * n44;
922 float t14 = n14 * n23 * n32 - n13 * n24 * n32 - n14 * n22 * n33 + n12 * n24 * n33 + n13 * n22 * n34 - n12 * n23 * n34;
923
924 float det = n11 * t11 + n21 * t12 + n31 * t13 + n41 * t14;
925 float idet = 1.0f / det;
926
927 float4x4 ret;
928
929 ret[0][0] = t11 * idet;
930 ret[0][1] = (n24 * n33 * n41 - n23 * n34 * n41 - n24 * n31 * n43 + n21 * n34 * n43 + n23 * n31 * n44 - n21 * n33 * n44) * idet;
931 ret[0][2] = (n22 * n34 * n41 - n24 * n32 * n41 + n24 * n31 * n42 - n21 * n34 * n42 - n22 * n31 * n44 + n21 * n32 * n44) * idet;
932 ret[0][3] = (n23 * n32 * n41 - n22 * n33 * n41 - n23 * n31 * n42 + n21 * n33 * n42 + n22 * n31 * n43 - n21 * n32 * n43) * idet;
933
934 ret[1][0] = t12 * idet;
935 ret[1][1] = (n13 * n34 * n41 - n14 * n33 * n41 + n14 * n31 * n43 - n11 * n34 * n43 - n13 * n31 * n44 + n11 * n33 * n44) * idet;
936 ret[1][2] = (n14 * n32 * n41 - n12 * n34 * n41 - n14 * n31 * n42 + n11 * n34 * n42 + n12 * n31 * n44 - n11 * n32 * n44) * idet;
937 ret[1][3] = (n12 * n33 * n41 - n13 * n32 * n41 + n13 * n31 * n42 - n11 * n33 * n42 - n12 * n31 * n43 + n11 * n32 * n43) * idet;
938
939 ret[2][0] = t13 * idet;
940 ret[2][1] = (n14 * n23 * n41 - n13 * n24 * n41 - n14 * n21 * n43 + n11 * n24 * n43 + n13 * n21 * n44 - n11 * n23 * n44) * idet;
941 ret[2][2] = (n12 * n24 * n41 - n14 * n22 * n41 + n14 * n21 * n42 - n11 * n24 * n42 - n12 * n21 * n44 + n11 * n22 * n44) * idet;
942 ret[2][3] = (n13 * n22 * n41 - n12 * n23 * n41 - n13 * n21 * n42 + n11 * n23 * n42 + n12 * n21 * n43 - n11 * n22 * n43) * idet;
943
944 ret[3][0] = t14 * idet;
945 ret[3][1] = (n13 * n24 * n31 - n14 * n23 * n31 + n14 * n21 * n33 - n11 * n24 * n33 - n13 * n21 * n34 + n11 * n23 * n34) * idet;
946 ret[3][2] = (n14 * n22 * n31 - n12 * n24 * n31 - n14 * n21 * n32 + n11 * n24 * n32 + n12 * n21 * n34 - n11 * n22 * n34) * idet;
947 ret[3][3] = (n12 * n23 * n31 - n13 * n22 * n31 + n13 * n21 * n32 - n11 * n23 * n32 - n12 * n21 * n33 + n11 * n22 * n33) * idet;
948
949 return ret;
950}
951
952float Remap(float origFrom, float origTo, float targetFrom, float targetTo, float value)
953{
954 return lerp(targetFrom, targetTo, (value - origFrom) / (origTo - origFrom));
955}
956
957// ----------------------------------------------------------------------------
958// Texture utilities
959// ----------------------------------------------------------------------------
960
961float ComputeTextureLOD(float2 uvdx, float2 uvdy, float2 scale, float bias = 0.0)
962{
963 float2 ddx_ = scale * uvdx;
964 float2 ddy_ = scale * uvdy;
965 float d = max(dot(ddx_, ddx_), dot(ddy_, ddy_));
966
967 return max(0.5 * log2(d) - bias, 0.0);
968}
969
970float ComputeTextureLOD(float2 uv, float bias = 0.0)
971{
972 float2 ddx_ = ddx(uv);
973 float2 ddy_ = ddy(uv);
974
975 return ComputeTextureLOD(ddx_, ddy_, 1.0, bias);
976}
977
978// x contains width, w contains height
979float ComputeTextureLOD(float2 uv, float2 texelSize, float bias = 0.0)
980{
981 uv *= texelSize;
982
983 return ComputeTextureLOD(uv, bias);
984}
985
986// LOD clamp is optional and happens outside the function.
987float ComputeTextureLOD(float3 duvw_dx, float3 duvw_dy, float3 duvw_dz, float scale, float bias = 0.0)
988{
989 float d = Max3(dot(duvw_dx, duvw_dx), dot(duvw_dy, duvw_dy), dot(duvw_dz, duvw_dz));
990
991 return max(0.5f * log2(d * (scale * scale)) - bias, 0.0);
992}
993
994#if defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12) || defined(SHADER_API_D3D11_9X) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_PSSL) || defined(SHADER_API_METAL)
995 #define MIP_COUNT_SUPPORTED 1
996#endif
997 // TODO: Bug workaround, switch defines GLCORE when it shouldn't
998#if ((defined(SHADER_API_GLCORE) && !defined(SHADER_API_SWITCH)) || defined(SHADER_API_VULKAN)) && !defined(SHADER_STAGE_COMPUTE)
999 // OpenGL only supports textureSize for width, height, depth
1000 // textureQueryLevels (GL_ARB_texture_query_levels) needs OpenGL 4.3 or above and doesn't compile in compute shaders
1001 // tex.GetDimensions converted to textureQueryLevels
1002 #define MIP_COUNT_SUPPORTED 1
1003#endif
1004 // Metal doesn't support high enough OpenGL version
1005
1006uint GetMipCount(TEXTURE2D_PARAM(tex, smp))
1007{
1008#if defined(MIP_COUNT_SUPPORTED)
1009 uint mipLevel, width, height, mipCount;
1010 mipLevel = width = height = mipCount = 0;
1011 tex.GetDimensions(mipLevel, width, height, mipCount);
1012 return mipCount;
1013#else
1014 return 0;
1015#endif
1016}
1017
1018// ----------------------------------------------------------------------------
1019// Texture format sampling
1020// ----------------------------------------------------------------------------
1021
1022// DXC no longer supports DX9-style HLSL syntax for sampler2D, tex2D and the like.
1023// These are emulated for backwards compatibility using our own small structs and functions which manually combine samplers and textures.
1024#if defined(UNITY_COMPILER_DXC) && !defined(DXC_SAMPLER_COMPATIBILITY)
1025#define DXC_SAMPLER_COMPATIBILITY 1
1026
1027// On DXC platforms which don't care about explicit sampler precison we want the emulated types to work directly e.g without needing to redefine 'sampler2D' to 'sampler2D_f'
1028#if !defined(SHADER_API_GLES3) && !defined(SHADER_API_VULKAN) && !defined(SHADER_API_METAL) && !defined(SHADER_API_SWITCH) && !defined(SHADER_API_WEBGPU)
1029 #define sampler1D_f sampler1D
1030 #define sampler2D_f sampler2D
1031 #define sampler3D_f sampler3D
1032 #define samplerCUBE_f samplerCUBE
1033#endif
1034
1035struct sampler1D_f { Texture1D<float4> t; SamplerState s; };
1036struct sampler2D_f { Texture2D<float4> t; SamplerState s; };
1037struct sampler3D_f { Texture3D<float4> t; SamplerState s; };
1038struct samplerCUBE_f { TextureCube<float4> t; SamplerState s; };
1039
1040float4 tex1D(sampler1D_f x, float v) { return x.t.Sample(x.s, v); }
1041float4 tex2D(sampler2D_f x, float2 v) { return x.t.Sample(x.s, v); }
1042float4 tex3D(sampler3D_f x, float3 v) { return x.t.Sample(x.s, v); }
1043float4 texCUBE(samplerCUBE_f x, float3 v) { return x.t.Sample(x.s, v); }
1044
1045float4 tex1Dbias(sampler1D_f x, in float4 t) { return x.t.SampleBias(x.s, t.x, t.w); }
1046float4 tex2Dbias(sampler2D_f x, in float4 t) { return x.t.SampleBias(x.s, t.xy, t.w); }
1047float4 tex3Dbias(sampler3D_f x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
1048float4 texCUBEbias(samplerCUBE_f x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
1049
1050float4 tex1Dlod(sampler1D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.x, t.w); }
1051float4 tex2Dlod(sampler2D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xy, t.w); }
1052float4 tex3Dlod(sampler3D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1053float4 texCUBElod(samplerCUBE_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1054
1055float4 tex1Dgrad(sampler1D_f x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1056float4 tex2Dgrad(sampler2D_f x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1057float4 tex3Dgrad(sampler3D_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1058float4 texCUBEgrad(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1059
1060float4 tex1D(sampler1D_f x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1061float4 tex2D(sampler2D_f x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1062float4 tex3D(sampler3D_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1063float4 texCUBE(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1064
1065float4 tex1Dproj(sampler1D_f s, in float2 t) { return tex1D(s, t.x / t.y); }
1066float4 tex1Dproj(sampler1D_f s, in float4 t) { return tex1D(s, t.x / t.w); }
1067float4 tex2Dproj(sampler2D_f s, in float3 t) { return tex2D(s, t.xy / t.z); }
1068float4 tex2Dproj(sampler2D_f s, in float4 t) { return tex2D(s, t.xy / t.w); }
1069float4 tex3Dproj(sampler3D_f s, in float4 t) { return tex3D(s, t.xyz / t.w); }
1070float4 texCUBEproj(samplerCUBE_f s, in float4 t) { return texCUBE(s, t.xyz / t.w); }
1071
1072// Half precision emulated samplers used instead the sampler.*_half unity types
1073struct sampler1D_h { Texture1D<min16float4> t; SamplerState s; };
1074struct sampler2D_h { Texture2D<min16float4> t; SamplerState s; };
1075struct sampler3D_h { Texture3D<min16float4> t; SamplerState s; };
1076struct samplerCUBE_h { TextureCube<min16float4> t; SamplerState s; };
1077
1078min16float4 tex1D(sampler1D_h x, float v) { return x.t.Sample(x.s, v); }
1079min16float4 tex2D(sampler2D_h x, float2 v) { return x.t.Sample(x.s, v); }
1080min16float4 tex3D(sampler3D_h x, float3 v) { return x.t.Sample(x.s, v); }
1081min16float4 texCUBE(samplerCUBE_h x, float3 v) { return x.t.Sample(x.s, v); }
1082
1083min16float4 tex1Dbias(sampler1D_h x, in float4 t) { return x.t.SampleBias(x.s, t.x, t.w); }
1084min16float4 tex2Dbias(sampler2D_h x, in float4 t) { return x.t.SampleBias(x.s, t.xy, t.w); }
1085min16float4 tex3Dbias(sampler3D_h x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
1086min16float4 texCUBEbias(samplerCUBE_h x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
1087
1088min16float4 tex1Dlod(sampler1D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.x, t.w); }
1089min16float4 tex2Dlod(sampler2D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xy, t.w); }
1090min16float4 tex3Dlod(sampler3D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1091min16float4 texCUBElod(samplerCUBE_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
1092
1093min16float4 tex1Dgrad(sampler1D_h x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1094min16float4 tex2Dgrad(sampler2D_h x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1095min16float4 tex3Dgrad(sampler3D_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1096min16float4 texCUBEgrad(samplerCUBE_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1097
1098min16float4 tex1D(sampler1D_h x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1099min16float4 tex2D(sampler2D_h x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1100min16float4 tex3D(sampler3D_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1101min16float4 texCUBE(samplerCUBE_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
1102
1103min16float4 tex1Dproj(sampler1D_h s, in float2 t) { return tex1D(s, t.x / t.y); }
1104min16float4 tex1Dproj(sampler1D_h s, in float4 t) { return tex1D(s, t.x / t.w); }
1105min16float4 tex2Dproj(sampler2D_h s, in float3 t) { return tex2D(s, t.xy / t.z); }
1106min16float4 tex2Dproj(sampler2D_h s, in float4 t) { return tex2D(s, t.xy / t.w); }
1107min16float4 tex3Dproj(sampler3D_h s, in float4 t) { return tex3D(s, t.xyz / t.w); }
1108min16float4 texCUBEproj(samplerCUBE_h s, in float4 t) { return texCUBE(s, t.xyz / t.w); }
1109#endif
1110
1111float2 DirectionToLatLongCoordinate(float3 unDir)
1112{
1113 float3 dir = normalize(unDir);
1114 // coordinate frame is (-Z, X) meaning negative Z is primary axis and X is secondary axis.
1115 return float2(1.0 - 0.5 * INV_PI * atan2(dir.x, -dir.z), asin(dir.y) * INV_PI + 0.5);
1116}
1117
1118float3 LatlongToDirectionCoordinate(float2 coord)
1119{
1120 float theta = coord.y * PI;
1121 float phi = (coord.x * 2.f * PI - PI*0.5f);
1122
1123 float cosTheta = cos(theta);
1124 float sinTheta = sqrt(1.0 - min(1.0, cosTheta*cosTheta));
1125 float cosPhi = cos(phi);
1126 float sinPhi = sin(phi);
1127
1128 float3 direction = float3(sinTheta*cosPhi, cosTheta, sinTheta*sinPhi);
1129 direction.xy *= -1.0;
1130 return direction;
1131}
1132
1133float2 OrientationToDirection(float orientation)
1134{
1135 return float2(cos(orientation), sin(orientation));
1136}
1137
1138// ----------------------------------------------------------------------------
1139// Depth encoding/decoding
1140// ----------------------------------------------------------------------------
1141
1142// Z buffer to linear 0..1 depth (0 at near plane, 1 at far plane).
1143// Does NOT correctly handle oblique view frustums.
1144// Does NOT work with orthographic projection.
1145// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1, 1, (1/n - 1/f), 1/f }
1146// zBufferParam = { 1 - f/n, f/n, (1/f - 1/n), 1/n }
1147float Linear01DepthFromNear(float depth, float4 zBufferParam)
1148{
1149 #if UNITY_REVERSED_Z
1150 return (1.0 - depth) / (zBufferParam.x * depth + zBufferParam.y);
1151 #else
1152 return depth / (zBufferParam.x * depth + zBufferParam.y);
1153 #endif
1154}
1155
1156// Z buffer to linear 0..1 depth (0 at camera position, 1 at far plane).
1157// Does NOT work with orthographic projections.
1158// Does NOT correctly handle oblique view frustums.
1159// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1, 1, (1/n - 1/f), 1/f }
1160// zBufferParam = { 1 - f/n, f/n, (1/f - 1/n), 1/n }
1161float Linear01Depth(float depth, float4 zBufferParam)
1162{
1163 return 1.0 / (zBufferParam.x * depth + zBufferParam.y);
1164}
1165
1166// Z buffer to linear view space (eye) depth.
1167// Does NOT correctly handle oblique view frustums.
1168// Does NOT work with orthographic projection.
1169// zBufferParam (UNITY_REVERSED_Z) = { f/n - 1, 1, (1/n - 1/f), 1/f }
1170// zBufferParam = { 1 - f/n, f/n, (1/f - 1/n), 1/n }
1171float LinearEyeDepth(float depth, float4 zBufferParam)
1172{
1173 return 1.0 / (zBufferParam.z * depth + zBufferParam.w);
1174}
1175
1176// Z buffer to linear depth.
1177// Correctly handles oblique view frustums.
1178// Does NOT work with orthographic projection.
1179// Ref: An Efficient Depth Linearization Method for Oblique View Frustums, Eq. 6.
1180float LinearEyeDepth(float2 positionNDC, float deviceDepth, float4 invProjParam)
1181{
1182 float viewSpaceZ = rcp(dot(float4(positionNDC, deviceDepth, 1.0), invProjParam));
1183
1184 // If the matrix is right-handed, we have to flip the Z axis to get a positive value.
1185 return abs(viewSpaceZ);
1186}
1187
1188// Z buffer to linear depth.
1189// Works in all cases.
1190// Typically, this is the cheapest variant, provided you've already computed 'positionWS'.
1191// Assumes that the 'positionWS' is in front of the camera.
1192float LinearEyeDepth(float3 positionWS, float4x4 viewMatrix)
1193{
1194 float viewSpaceZ = mul(viewMatrix, float4(positionWS, 1.0)).z;
1195
1196 // If the matrix is right-handed, we have to flip the Z axis to get a positive value.
1197 return abs(viewSpaceZ);
1198}
1199
1200// 'z' is the view space Z position (linear depth).
1201// saturate(z) the output of the function to clamp them to the [0, 1] range.
1202// d = log2(c * (z - n) + 1) / log2(c * (f - n) + 1)
1203// = log2(c * (z - n + 1/c)) / log2(c * (f - n) + 1)
1204// = log2(c) / log2(c * (f - n) + 1) + log2(z - (n - 1/c)) / log2(c * (f - n) + 1)
1205// = E + F * log2(z - G)
1206// encodingParams = { E, F, G, 0 }
1207float EncodeLogarithmicDepthGeneralized(float z, float4 encodingParams)
1208{
1209 // Use max() to avoid NaNs.
1210 return encodingParams.x + encodingParams.y * log2(max(0, z - encodingParams.z));
1211}
1212
1213// 'd' is the logarithmically encoded depth value.
1214// saturate(d) to clamp the output of the function to the [n, f] range.
1215// z = 1/c * (pow(c * (f - n) + 1, d) - 1) + n
1216// = 1/c * pow(c * (f - n) + 1, d) + n - 1/c
1217// = 1/c * exp2(d * log2(c * (f - n) + 1)) + (n - 1/c)
1218// = L * exp2(d * M) + N
1219// decodingParams = { L, M, N, 0 }
1220// Graph: https://www.desmos.com/calculator/qrtatrlrba
1221float DecodeLogarithmicDepthGeneralized(float d, float4 decodingParams)
1222{
1223 return decodingParams.x * exp2(d * decodingParams.y) + decodingParams.z;
1224}
1225
1226// 'z' is the view-space Z position (linear depth).
1227// saturate(z) the output of the function to clamp them to the [0, 1] range.
1228// encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) }
1229// This is an optimized version of EncodeLogarithmicDepthGeneralized() for (c = 2).
1230float EncodeLogarithmicDepth(float z, float4 encodingParams)
1231{
1232 // Use max() to avoid NaNs.
1233 // TODO: optimize to (log2(z) - log2(n)) / (log2(f) - log2(n)).
1234 return log2(max(0, z * encodingParams.z)) * encodingParams.w;
1235}
1236
1237// 'd' is the logarithmically encoded depth value.
1238// saturate(d) to clamp the output of the function to the [n, f] range.
1239// encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) }
1240// This is an optimized version of DecodeLogarithmicDepthGeneralized() for (c = 2).
1241// Graph: https://www.desmos.com/calculator/qrtatrlrba
1242float DecodeLogarithmicDepth(float d, float4 encodingParams)
1243{
1244 // TODO: optimize to exp2(d * y + log2(x)).
1245 return encodingParams.x * exp2(d * encodingParams.y);
1246}
1247
1248// Use an infinite far plane
1249// https://chaosinmotion.com/2010/09/06/goodbye-far-clipping-plane/
1250// 'depth' is the linear depth (view-space Z position)
1251float EncodeInfiniteDepth(float depth, float near)
1252{
1253 return saturate(near / depth);
1254}
1255
1256// 'z' is the depth encoded in the depth buffer (1 at near plane, 0 at far plane)
1257float DecodeInfiniteDepth(float z, float near)
1258{
1259 return near / max(z, FLT_EPS);
1260}
1261
1262real4 CompositeOver(real4 front, real4 back)
1263{
1264 return front + (1 - front.a) * back;
1265}
1266
1267void CompositeOver(real3 colorFront, real3 alphaFront,
1268 real3 colorBack, real3 alphaBack,
1269 out real3 color, out real3 alpha)
1270{
1271 color = colorFront + (1 - alphaFront) * colorBack;
1272 alpha = alphaFront + (1 - alphaFront) * alphaBack;
1273}
1274
1275// ----------------------------------------------------------------------------
1276// Space transformations
1277// ----------------------------------------------------------------------------
1278
1279static const float3x3 k_identity3x3 = {1, 0, 0,
1280 0, 1, 0,
1281 0, 0, 1};
1282
1283static const float4x4 k_identity4x4 = {1, 0, 0, 0,
1284 0, 1, 0, 0,
1285 0, 0, 1, 0,
1286 0, 0, 0, 1};
1287
1288float4 ComputeClipSpacePosition(float2 positionNDC, float deviceDepth)
1289{
1290 float4 positionCS = float4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
1291
1292#if UNITY_UV_STARTS_AT_TOP
1293 // Our world space, view space, screen space and NDC space are Y-up.
1294 // Our clip space is flipped upside-down due to poor legacy Unity design.
1295 // The flip is baked into the projection matrix, so we only have to flip
1296 // manually when going from CS to NDC and back.
1297 positionCS.y = -positionCS.y;
1298#endif
1299
1300 return positionCS;
1301}
1302
1303// Use case examples:
1304// (position = positionCS) => (clipSpaceTransform = use default)
1305// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
1306// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
1307float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
1308{
1309 return mul(clipSpaceTransform, float4(position, 1.0));
1310}
1311
1312// The returned Z value is the depth buffer value (and NOT linear view space Z value).
1313// Use case examples:
1314// (position = positionCS) => (clipSpaceTransform = use default)
1315// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
1316// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
1317float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
1318{
1319 float4 positionCS = ComputeClipSpacePosition(position, clipSpaceTransform);
1320
1321#if UNITY_UV_STARTS_AT_TOP
1322 // Our world space, view space, screen space and NDC space are Y-up.
1323 // Our clip space is flipped upside-down due to poor legacy Unity design.
1324 // The flip is baked into the projection matrix, so we only have to flip
1325 // manually when going from CS to NDC and back.
1326 positionCS.y = -positionCS.y;
1327#endif
1328
1329 positionCS *= rcp(positionCS.w);
1330 positionCS.xy = positionCS.xy * 0.5 + 0.5;
1331
1332 return positionCS.xyz;
1333}
1334
1335// Use case examples:
1336// (position = positionCS) => (clipSpaceTransform = use default)
1337// (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
1338// (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
1339float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
1340{
1341 return ComputeNormalizedDeviceCoordinatesWithZ(position, clipSpaceTransform).xy;
1342}
1343
1344float3 ComputeViewSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invProjMatrix)
1345{
1346 float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
1347 float4 positionVS = mul(invProjMatrix, positionCS);
1348 // The view space uses a right-handed coordinate system.
1349 positionVS.z = -positionVS.z;
1350 return positionVS.xyz / positionVS.w;
1351}
1352
1353float3 ComputeWorldSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invViewProjMatrix)
1354{
1355 float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
1356 float4 hpositionWS = mul(invViewProjMatrix, positionCS);
1357 return hpositionWS.xyz / hpositionWS.w;
1358}
1359
1360float3 ComputeWorldSpacePosition(float4 positionCS, float4x4 invViewProjMatrix)
1361{
1362 float4 hpositionWS = mul(invViewProjMatrix, positionCS);
1363 return hpositionWS.xyz / hpositionWS.w;
1364}
1365
1366// ----------------------------------------------------------------------------
1367// PositionInputs
1368// ----------------------------------------------------------------------------
1369
1370// Note: if you modify this struct, be sure to update the CustomPassFullscreenShader.template
1371struct PositionInputs
1372{
1373 float3 positionWS; // World space position (could be camera-relative)
1374 float2 positionNDC; // Normalized screen coordinates within the viewport : [0, 1) (with the half-pixel offset)
1375 uint2 positionSS; // Screen space pixel coordinates : [0, NumPixels)
1376 uint2 tileCoord; // Screen tile coordinates : [0, NumTiles)
1377 float deviceDepth; // Depth from the depth buffer : [0, 1] (typically reversed)
1378 float linearDepth; // View space Z coordinate : [Near, Far]
1379};
1380
1381// This function is use to provide an easy way to sample into a screen texture, either from a pixel or a compute shaders.
1382// This allow to easily share code.
1383// If a compute shader call this function positionSS is an integer usually calculate like: uint2 positionSS = groupId.xy * BLOCK_SIZE + groupThreadId.xy
1384// else it is current unormalized screen coordinate like return by SV_Position
1385PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, uint2 tileCoord) // Specify explicit tile coordinates so that we can easily make it lane invariant for compute evaluation.
1386{
1387 PositionInputs posInput;
1388 ZERO_INITIALIZE(PositionInputs, posInput);
1389
1390 posInput.positionNDC = positionSS;
1391#if defined(SHADER_STAGE_COMPUTE) || defined(SHADER_STAGE_RAY_TRACING)
1392 // In case of compute shader an extra half offset is added to the screenPos to shift the integer position to pixel center.
1393 posInput.positionNDC.xy += float2(0.5, 0.5);
1394#endif
1395 posInput.positionNDC *= invScreenSize;
1396 posInput.positionSS = uint2(positionSS);
1397 posInput.tileCoord = tileCoord;
1398
1399 return posInput;
1400}
1401
1402PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize)
1403{
1404 return GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
1405}
1406
1407// For Raytracing only
1408// This function does not initialize deviceDepth and linearDepth
1409PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float3 positionWS)
1410{
1411 PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
1412 posInput.positionWS = positionWS;
1413
1414 return posInput;
1415}
1416
1417// From forward
1418// deviceDepth and linearDepth come directly from .zw of SV_Position
1419PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS, uint2 tileCoord)
1420{
1421 PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord);
1422 posInput.positionWS = positionWS;
1423 posInput.deviceDepth = deviceDepth;
1424 posInput.linearDepth = linearDepth;
1425
1426 return posInput;
1427}
1428
1429PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS)
1430{
1431 return GetPositionInput(positionSS, invScreenSize, deviceDepth, linearDepth, positionWS, uint2(0, 0));
1432}
1433
1434// From deferred or compute shader
1435// depth must be the depth from the raw depth buffer. This allow to handle all kind of depth automatically with the inverse view projection matrix.
1436// For information. In Unity Depth is always in range 0..1 (even on OpenGL) but can be reversed.
1437PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth,
1438 float4x4 invViewProjMatrix, float4x4 viewMatrix,
1439 uint2 tileCoord)
1440{
1441 PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord);
1442 posInput.positionWS = ComputeWorldSpacePosition(posInput.positionNDC, deviceDepth, invViewProjMatrix);
1443 posInput.deviceDepth = deviceDepth;
1444 posInput.linearDepth = LinearEyeDepth(posInput.positionWS, viewMatrix);
1445
1446 return posInput;
1447}
1448
1449PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth,
1450 float4x4 invViewProjMatrix, float4x4 viewMatrix)
1451{
1452 return GetPositionInput(positionSS, invScreenSize, deviceDepth, invViewProjMatrix, viewMatrix, uint2(0, 0));
1453}
1454
1455// The view direction 'V' points towards the camera.
1456// 'depthOffsetVS' is always applied in the opposite direction (-V).
1457void ApplyDepthOffsetPositionInput(float3 V, float depthOffsetVS, float3 viewForwardDir, float4x4 viewProjMatrix, inout PositionInputs posInput)
1458{
1459 posInput.positionWS += depthOffsetVS * (-V);
1460 posInput.deviceDepth = ComputeNormalizedDeviceCoordinatesWithZ(posInput.positionWS, viewProjMatrix).z;
1461
1462 // Transform the displacement along the view vector to the displacement along the forward vector.
1463 // Use abs() to make sure we get the sign right.
1464 // 'depthOffsetVS' applies in the direction away from the camera.
1465 posInput.linearDepth += depthOffsetVS * abs(dot(V, viewForwardDir));
1466}
1467
1468// ----------------------------------------------------------------------------
1469// Terrain/Brush heightmap encoding/decoding
1470// ----------------------------------------------------------------------------
1471
1472#if defined(SHADER_API_VULKAN) || defined(SHADER_API_GLES3) || defined(SHADER_API_WEBGPU)
1473
1474// For the built-in target this is already a defined symbol
1475#ifndef BUILTIN_TARGET_API
1476real4 PackHeightmap(real height)
1477{
1478 uint a = (uint)(65535.0 * height);
1479 return real4((a >> 0) & 0xFF, (a >> 8) & 0xFF, 0, 0) / 255.0;
1480}
1481
1482real UnpackHeightmap(real4 height)
1483{
1484 return (height.r + height.g * 256.0) / 257.0; // (255.0 * height.r + 255.0 * 256.0 * height.g) / 65535.0
1485}
1486#endif
1487
1488#else
1489
1490// For the built-in target this is already a defined symbol
1491#ifndef BUILTIN_TARGET_API
1492real4 PackHeightmap(real height)
1493{
1494 return real4(height, 0, 0, 0);
1495}
1496
1497real UnpackHeightmap(real4 height)
1498{
1499 return height.r;
1500}
1501#endif
1502
1503#endif
1504
1505// ----------------------------------------------------------------------------
1506// Misc utilities
1507// ----------------------------------------------------------------------------
1508
1509// Simple function to test a bitfield
1510bool HasFlag(uint bitfield, uint flag)
1511{
1512 return (bitfield & flag) != 0;
1513}
1514
1515// Normalize that account for vectors with zero length
1516float3 SafeNormalize(float3 inVec)
1517{
1518 float dp3 = max(FLT_MIN, dot(inVec, inVec));
1519 return inVec * rsqrt(dp3);
1520}
1521
1522half3 SafeNormalize(half3 inVec)
1523{
1524 half dp3 = max(HALF_MIN, dot(inVec, inVec));
1525 return inVec * rsqrt(dp3);
1526}
1527
1528bool IsNormalized(float3 inVec)
1529{
1530 float squaredLength = dot(inVec, inVec);
1531 return 0.9998 < squaredLength && squaredLength < 1.0002001;
1532}
1533
1534bool IsNormalized(half3 inVec)
1535{
1536 half squaredLength = dot(inVec, inVec);
1537 return 0.998 < squaredLength && squaredLength < 1.002;
1538}
1539
1540// Division which returns 1 for (inf/inf) and (0/0).
1541// If any of the input parameters are NaNs, the result is a NaN.
1542real SafeDiv(real numer, real denom)
1543{
1544 return (numer != denom) ? numer / denom : 1;
1545}
1546
1547// Perform a square root safe of imaginary number.
1548real SafeSqrt(real x)
1549{
1550 return sqrt(max(0, x));
1551}
1552
1553// Assumes that (0 <= x <= Pi).
1554real SinFromCos(real cosX)
1555{
1556 return sqrt(saturate(1 - cosX * cosX));
1557}
1558
1559// Dot product in spherical coordinates.
1560real SphericalDot(real cosTheta1, real phi1, real cosTheta2, real phi2)
1561{
1562 return SinFromCos(cosTheta1) * SinFromCos(cosTheta2) * cos(phi1 - phi2) + cosTheta1 * cosTheta2;
1563}
1564
1565// Generates a triangle in homogeneous clip space, s.t.
1566// v0 = (-1, -1, 1), v1 = (3, -1, 1), v2 = (-1, 3, 1).
1567float2 GetFullScreenTriangleTexCoord(uint vertexID)
1568{
1569#if UNITY_UV_STARTS_AT_TOP
1570 return float2((vertexID << 1) & 2, 1.0 - (vertexID & 2));
1571#else
1572 return float2((vertexID << 1) & 2, vertexID & 2);
1573#endif
1574}
1575
1576float4 GetFullScreenTriangleVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
1577{
1578 // note: the triangle vertex position coordinates are x2 so the returned UV coordinates are in range -1, 1 on the screen.
1579 float2 uv = float2((vertexID << 1) & 2, vertexID & 2);
1580 float4 pos = float4(uv * 2.0 - 1.0, z, 1.0);
1581#ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION
1582 pos = ApplyPretransformRotation(pos);
1583#endif
1584 return pos;
1585}
1586
1587
1588// draw procedural with 2 triangles has index order (0,1,2) (0,2,3)
1589
1590// 0 - 0,0
1591// 1 - 0,1
1592// 2 - 1,1
1593// 3 - 1,0
1594
1595float2 GetQuadTexCoord(uint vertexID)
1596{
1597 uint topBit = vertexID >> 1;
1598 uint botBit = (vertexID & 1);
1599 float u = topBit;
1600 float v = (topBit + botBit) & 1; // produces 0 for indices 0,3 and 1 for 1,2
1601#if UNITY_UV_STARTS_AT_TOP
1602 v = 1.0 - v;
1603#endif
1604 return float2(u, v);
1605}
1606
1607// 0 - 0,1
1608// 1 - 0,0
1609// 2 - 1,0
1610// 3 - 1,1
1611float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
1612{
1613 uint topBit = vertexID >> 1;
1614 uint botBit = (vertexID & 1);
1615 float x = topBit;
1616 float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2
1617 float4 pos = float4(x, y, z, 1.0);
1618#ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION
1619 pos = ApplyPretransformRotation(pos);
1620#endif
1621 return pos;
1622}
1623
1624#if !defined(SHADER_STAGE_RAY_TRACING)
1625
1626// LOD dithering transition helper
1627// LOD0 must use this function with ditherFactor 1..0
1628// LOD1 must use this function with ditherFactor -1..0
1629// This is what is provided by unity_LODFade
1630void LODDitheringTransition(uint2 fadeMaskSeed, float ditherFactor)
1631{
1632 // Generate a spatially varying pattern.
1633 // Unfortunately, varying the pattern with time confuses the TAA, increasing the amount of noise.
1634 float p = GenerateHashedRandomFloat(fadeMaskSeed);
1635
1636 // This preserves the symmetry s.t. if LOD 0 has f = x, LOD 1 has f = -x.
1637 float f = ditherFactor - CopySign(p, ditherFactor);
1638 clip(f);
1639}
1640
1641#endif
1642
1643// The resource that is bound when binding a stencil buffer from the depth buffer is two channel. On D3D11 the stencil value is in the green channel,
1644// while on other APIs is in the red channel. Note that on some platform, always using the green channel might work, but is not guaranteed.
1645uint GetStencilValue(uint2 stencilBufferVal)
1646{
1647#if defined(SHADER_API_D3D11) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_GAMECORE)
1648 return stencilBufferVal.y;
1649#else
1650 return stencilBufferVal.x;
1651#endif
1652}
1653
1654// Sharpens the alpha of a texture to the width of a single pixel
1655// Used for alpha to coverage
1656// source: https://medium.com/@bgolus/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f
1657float SharpenAlpha(float alpha, float alphaClipTreshold)
1658{
1659 return saturate((alpha - alphaClipTreshold) / max(fwidth(alpha), 0.0001) + 0.5);
1660}
1661
1662// These clamping function to max of floating point 16 bit are use to prevent INF in code in case of extreme value
1663TEMPLATE_1_FLT(ClampToFloat16Max, value, return min(value, HALF_MAX))
1664
1665#if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH
1666#pragma warning (enable : 3205) // conversion of larger type to smaller
1667#endif
1668
1669float2 RepeatOctahedralUV(float u, float v)
1670{
1671 float2 uv;
1672
1673 if (u < 0.0f)
1674 {
1675 if (v < 0.0f)
1676 uv = float2(1.0f + u, 1.0f + v);
1677 else if (v < 1.0f)
1678 uv = float2(-u, 1.0f - v);
1679 else
1680 uv = float2(1.0f + u, v - 1.0f);
1681 }
1682 else if (u < 1.0f)
1683 {
1684 if (v < 0.0f)
1685 uv = float2(1.0f - u, -v);
1686 else if (v < 1.0f)
1687 uv = float2(u, v);
1688 else
1689 uv = float2(1.0f - u, 2.0f - v);
1690 }
1691 else
1692 {
1693 if (v < 0.0f)
1694 uv = float2(u - 1.0f, 1.0f + v);
1695 else if (v < 1.0f)
1696 uv = float2(2.0f - u, 1.0f - v);
1697 else
1698 uv = float2(u - 1.0f, v - 1.0f);
1699 }
1700
1701 return uv;
1702}
1703
1704#endif // UNITY_COMMON_INCLUDED