A game about forced loneliness, made by TACStudios
at master 509 lines 22 kB view raw
1#pragma kernel StpSetup 2 3#pragma multi_compile _ ENABLE_DEBUG_MODE 4#pragma multi_compile _ ENABLE_STENCIL_RESPONSIVE 5#pragma multi_compile _ ENABLE_LARGE_KERNEL 6 7#pragma multi_compile _ UNITY_DEVICE_SUPPORTS_NATIVE_16BIT 8 9// TODO: Re-enable support for wave reductions (usage of UNITY_DEVICE_SUPPORTS_WAVE_ANY keyword) 10// 11// We've run into many platform specific problems when trying to use wave operations for STP's reductions so they're being 12// disabled for now. Enabling support for wave operations also causes us to use DXC on the 32-bit path on some Qualcomm Android 13// devices and this triggers visual artifacts that we have no other way to work around at the moment. 14 15#pragma multi_compile _ DISABLE_TEXTURE2D_X_ARRAY 16 17#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch 18 19#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" 20#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl" 21#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/UnityInstancing.hlsl" 22 23#define STP_PAT 1 24 25#include "Packages/com.unity.render-pipelines.core/Runtime/STP/StpCommon.hlsl" 26 27// 28// Input 29// 30 31TEXTURE2D_X(_StpInputColor); 32TEXTURE2D_X(_StpInputDepth); 33TEXTURE2D_X(_StpInputMotion); 34 35#if defined(ENABLE_STENCIL_RESPONSIVE) 36 TYPED_TEXTURE2D_X(uint2, _StpInputStencil); 37#endif 38 39// 40// Intermediate Output 41// 42 43RW_TEXTURE2D_X(float4, _StpIntermediateColor); 44RW_TEXTURE2D_X(float, _StpIntermediateConvergence); 45 46// 47// History Input/Output 48// 49 50TYPED_TEXTURE2D_X(uint, _StpPriorDepthMotion); 51RW_TEXTURE2D_X(uint, _StpDepthMotion); 52 53TEXTURE2D_X(_StpPriorLuma); 54RW_TEXTURE2D_X(float2, _StpLuma); 55 56TEXTURE2D_X(_StpPriorConvergence); 57 58TEXTURE2D_X(_StpPriorFeedback); 59 60#define STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET (SLICE_ARRAY_INDEX * STPSETUPPERVIEWCONSTANTS_COUNT) 61 62#if defined(SHADER_API_PSSL) || defined(SHADER_API_SWITCH) || (defined(SHADER_API_METAL) && !defined(SHADER_API_MOBILE)) 63 // Force usage of the 32-bit reduction path even in 16-bit environments 64 #define STP_FORCE_32BIT_REDUCTION 65#endif 66 67#if defined(SHADER_API_PSSL) || defined(SHADER_API_GAMECORE) || defined(SHADER_API_METAL) || (defined(SHADER_API_VULKAN) && defined(SHADER_API_MOBILE)) 68 // Force usage of group shared memory instead using wave operations 69 #define STP_FORCE_GROUPSHARED 70#endif 71 72// Enable the use of wave operations when they're supported by the current hardware and usage of groupshared hasn't been forced. 73#if defined(UNITY_HW_SUPPORTS_WAVE) && !defined(STP_FORCE_GROUPSHARED) 74 #define STP_ENABLE_WAVEOPS 75#endif 76 77// STP requires a 4x4 reduction which must be implemented by either wave operations, or group shared memory. 78#if !defined(STP_ENABLE_WAVEOPS) 79#if defined(STP_16BIT) && !defined(STP_FORCE_32BIT_REDUCTION) 80groupshared uint4 gs_StpScratch[STP_GROUP_SIZE]; 81#else 82groupshared float4 gs_StpScratch[STP_GROUP_SIZE * 2]; 83#endif 84#endif 85 86// In some cases, we have to expose the 32-bit reduction code in the 16-bit path 87#if defined(STP_32BIT) || defined(STP_FORCE_32BIT_REDUCTION) 88void StpPat4x4MaxF8(StpMU1 i, inout StpF4 a, inout StpF4 b) 89{ 90#if defined(STP_ENABLE_WAVEOPS) 91 a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 1))); 92 a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 1))); 93 a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 1))); 94 a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 1))); 95 b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 1))); 96 b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 1))); 97 b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 1))); 98 b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 1))); 99 100 a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 2))); 101 a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 2))); 102 a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 2))); 103 a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 2))); 104 b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 2))); 105 b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 2))); 106 b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 2))); 107 b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 2))); 108 109 a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 4))); 110 a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 4))); 111 a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 4))); 112 a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 4))); 113 b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 4))); 114 b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 4))); 115 b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 4))); 116 b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 4))); 117 118 a.x = max(a.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 8))); 119 a.y = max(a.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 8))); 120 a.z = max(a.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 8))); 121 a.w = max(a.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 8))); 122 b.x = max(b.x, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.x), WaveGetLaneIndex() ^ 8))); 123 b.y = max(b.y, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.y), WaveGetLaneIndex() ^ 8))); 124 b.z = max(b.z, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.z), WaveGetLaneIndex() ^ 8))); 125 b.w = max(b.w, StpF1_U1(WaveReadLaneAt(StpU1_F1(b.w), WaveGetLaneIndex() ^ 8))); 126#else 127 gs_StpScratch[i] = a; 128 gs_StpScratch[i + STP_GROUP_SIZE] = b; 129 130 GroupMemoryBarrierWithGroupSync(); 131 132 // 2x2 Reduction 133 { 134 StpMU1 offset = (i & ~StpMU1(3)); 135 136 StpMU1 a0 = offset + ((i + StpMU1(1)) & StpMU1(3)); 137 StpMU1 a1 = offset + ((i + StpMU1(2)) & StpMU1(3)); 138 StpMU1 a2 = offset + ((i + StpMU1(3)) & StpMU1(3)); 139 140 float4 x0 = gs_StpScratch[a0]; 141 float4 x1 = gs_StpScratch[a1]; 142 float4 x2 = gs_StpScratch[a2]; 143 144 float4 y0 = gs_StpScratch[a0 + STP_GROUP_SIZE]; 145 float4 y1 = gs_StpScratch[a1 + STP_GROUP_SIZE]; 146 float4 y2 = gs_StpScratch[a2 + STP_GROUP_SIZE]; 147 148 GroupMemoryBarrierWithGroupSync(); 149 150 a = max(max(max(a, x0), x1), x2); 151 b = max(max(max(b, y0), y1), y2); 152 } 153 154 gs_StpScratch[i] = a; 155 gs_StpScratch[i + STP_GROUP_SIZE] = b; 156 157 GroupMemoryBarrierWithGroupSync(); 158 159 // 4x4 Reduction 160 { 161 StpMU1 offset = (i & ~StpMU1(15)); 162 163 StpMU1 a0 = offset + ((i + StpMU1(4)) & StpMU1(15)); 164 StpMU1 a1 = offset + ((i + StpMU1(8)) & StpMU1(15)); 165 StpMU1 a2 = offset + ((i + StpMU1(12)) & StpMU1(15)); 166 167 float4 x0 = gs_StpScratch[a0]; 168 float4 x1 = gs_StpScratch[a1]; 169 float4 x2 = gs_StpScratch[a2]; 170 171 float4 y0 = gs_StpScratch[a0 + STP_GROUP_SIZE]; 172 float4 y1 = gs_StpScratch[a1 + STP_GROUP_SIZE]; 173 float4 y2 = gs_StpScratch[a2 + STP_GROUP_SIZE]; 174 175 GroupMemoryBarrierWithGroupSync(); 176 177 a = max(max(max(a, x0), x1), x2); 178 b = max(max(max(b, y0), y1), y2); 179 } 180#endif 181} 182void StpPat4x4SumF4(StpMU1 i, inout StpF4 a) 183{ 184#if defined(STP_ENABLE_WAVEOPS) 185 a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 1)); 186 a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 1)); 187 a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 1)); 188 a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 1)); 189 190 a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 2)); 191 a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 2)); 192 a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 2)); 193 a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 2)); 194 195 a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 4)); 196 a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 4)); 197 a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 4)); 198 a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 4)); 199 200 a.x += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.x), WaveGetLaneIndex() ^ 8)); 201 a.y += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.y), WaveGetLaneIndex() ^ 8)); 202 a.z += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.z), WaveGetLaneIndex() ^ 8)); 203 a.w += StpF1_U1(WaveReadLaneAt(StpU1_F1(a.w), WaveGetLaneIndex() ^ 8)); 204#else 205 gs_StpScratch[i] = a; 206 207 GroupMemoryBarrierWithGroupSync(); 208 209 // 2x2 Reduction 210 { 211 StpMU1 offset = (i & ~StpMU1(3)); 212 213 StpMU1 a0 = offset + ((i + StpMU1(1)) & StpMU1(3)); 214 StpMU1 a1 = offset + ((i + StpMU1(2)) & StpMU1(3)); 215 StpMU1 a2 = offset + ((i + StpMU1(3)) & StpMU1(3)); 216 217 float4 x0 = gs_StpScratch[a0]; 218 float4 x1 = gs_StpScratch[a1]; 219 float4 x2 = gs_StpScratch[a2]; 220 221 GroupMemoryBarrierWithGroupSync(); 222 223 a = a + x0 + x1 + x2; 224 } 225 226 gs_StpScratch[i] = a; 227 228 GroupMemoryBarrierWithGroupSync(); 229 230 // 4x4 Reduction 231 { 232 StpMU1 offset = (i & ~StpMU1(15)); 233 234 StpMU1 a0 = offset + ((i + StpMU1(4)) & StpMU1(15)); 235 StpMU1 a1 = offset + ((i + StpMU1(8)) & StpMU1(15)); 236 StpMU1 a2 = offset + ((i + StpMU1(12)) & StpMU1(15)); 237 238 float4 x0 = gs_StpScratch[a0]; 239 float4 x1 = gs_StpScratch[a1]; 240 float4 x2 = gs_StpScratch[a2]; 241 242 GroupMemoryBarrierWithGroupSync(); 243 244 a = a + x0 + x1 + x2; 245 } 246#endif 247} 248#endif 249 250#if defined(STP_16BIT) 251void StpPat4x4MaxH8(StpW1 i, inout StpH4 a, inout StpH4 b) 252{ 253#if defined(STP_FORCE_32BIT_REDUCTION) 254 StpPat4x4MaxF8(i, a, b); 255#else 256#if defined(STP_ENABLE_WAVEOPS) 257 a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 1))); 258 a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 1))); 259 b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 1))); 260 b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 1))); 261 262 a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 2))); 263 a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 2))); 264 b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 2))); 265 b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 2))); 266 267 a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 4))); 268 a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 4))); 269 b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 4))); 270 b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 4))); 271 272 a.xy = max(a.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 8))); 273 a.zw = max(a.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 8))); 274 b.xy = max(b.xy, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.xy), WaveGetLaneIndex() ^ 8))); 275 b.zw = max(b.zw, StpH2_U1(WaveReadLaneAt(StpU1_H2(b.zw), WaveGetLaneIndex() ^ 8))); 276#else 277 gs_StpScratch[i] = StpU4(StpU1_H2(a.xy), StpU1_H2(a.zw), StpU1_H2(b.xy), StpU1_H2(b.zw)); 278 279 GroupMemoryBarrierWithGroupSync(); 280 281 // 2x2 Reduction 282 { 283 StpW1 offset = (i & ~StpW1(3)); 284 285 StpW1 a0 = offset + ((i + StpW1(1)) & StpW1(3)); 286 StpW1 a1 = offset + ((i + StpW1(2)) & StpW1(3)); 287 StpW1 a2 = offset + ((i + StpW1(3)) & StpW1(3)); 288 289 uint4 x0 = gs_StpScratch[a0]; 290 uint4 x1 = gs_StpScratch[a1]; 291 uint4 x2 = gs_StpScratch[a2]; 292 293 GroupMemoryBarrierWithGroupSync(); 294 295 a.xy = max(max(max(a.xy, StpH2_U1(x0.x)), StpH2_U1(x1.x)), StpH2_U1(x2.x)); 296 a.zw = max(max(max(a.zw, StpH2_U1(x0.y)), StpH2_U1(x1.y)), StpH2_U1(x2.y)); 297 b.xy = max(max(max(b.xy, StpH2_U1(x0.z)), StpH2_U1(x1.z)), StpH2_U1(x2.z)); 298 b.zw = max(max(max(b.zw, StpH2_U1(x0.w)), StpH2_U1(x1.w)), StpH2_U1(x2.w)); 299 } 300 301 gs_StpScratch[i] = StpU4(StpU1_H2(a.xy), StpU1_H2(a.zw), StpU1_H2(b.xy), StpU1_H2(b.zw)); 302 303 GroupMemoryBarrierWithGroupSync(); 304 305 // 4x4 Reduction 306 { 307 StpW1 offset = (i & ~StpW1(15)); 308 309 StpW1 a0 = offset + ((i + StpW1(4)) & StpW1(15)); 310 StpW1 a1 = offset + ((i + StpW1(8)) & StpW1(15)); 311 StpW1 a2 = offset + ((i + StpW1(12)) & StpW1(15)); 312 313 uint4 x0 = gs_StpScratch[a0]; 314 uint4 x1 = gs_StpScratch[a1]; 315 uint4 x2 = gs_StpScratch[a2]; 316 317 GroupMemoryBarrierWithGroupSync(); 318 319 a.xy = max(max(max(a.xy, StpH2_U1(x0.x)), StpH2_U1(x1.x)), StpH2_U1(x2.x)); 320 a.zw = max(max(max(a.zw, StpH2_U1(x0.y)), StpH2_U1(x1.y)), StpH2_U1(x2.y)); 321 b.xy = max(max(max(b.xy, StpH2_U1(x0.z)), StpH2_U1(x1.z)), StpH2_U1(x2.z)); 322 b.zw = max(max(max(b.zw, StpH2_U1(x0.w)), StpH2_U1(x1.w)), StpH2_U1(x2.w)); 323 } 324#endif 325#endif 326} 327void StpPat4x4SumH4(StpW1 i, inout StpH4 a) 328{ 329#if defined(STP_FORCE_32BIT_REDUCTION) 330 StpPat4x4SumF4(i, a); 331#else 332#if defined(STP_ENABLE_WAVEOPS) 333 a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 1)); 334 a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 1)); 335 336 a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 2)); 337 a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 2)); 338 339 a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 4)); 340 a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 4)); 341 342 a.xy += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.xy), WaveGetLaneIndex() ^ 8)); 343 a.zw += StpH2_U1(WaveReadLaneAt(StpU1_H2(a.zw), WaveGetLaneIndex() ^ 8)); 344#else 345 gs_StpScratch[i].xy = StpU2(StpU1_H2(a.xy), StpU1_H2(a.zw)); 346 347 GroupMemoryBarrierWithGroupSync(); 348 349 // 2x2 Reduction 350 { 351 StpW1 offset = (i & ~StpW1(3)); 352 353 StpW1 a0 = offset + ((i + StpW1(1)) & StpW1(3)); 354 StpW1 a1 = offset + ((i + StpW1(2)) & StpW1(3)); 355 StpW1 a2 = offset + ((i + StpW1(3)) & StpW1(3)); 356 357 uint2 x0 = gs_StpScratch[a0].xy; 358 uint2 x1 = gs_StpScratch[a1].xy; 359 uint2 x2 = gs_StpScratch[a2].xy; 360 361 GroupMemoryBarrierWithGroupSync(); 362 363 a.xy = a.xy + StpH2_U1(x0.x) + StpH2_U1(x1.x) + StpH2_U1(x2.x); 364 a.zw = a.zw + StpH2_U1(x0.y) + StpH2_U1(x1.y) + StpH2_U1(x2.y); 365 } 366 367 gs_StpScratch[i].xy = StpU2(StpU1_H2(a.xy), StpU1_H2(a.zw)); 368 369 GroupMemoryBarrierWithGroupSync(); 370 371 // 4x4 Reduction 372 { 373 StpW1 offset = (i & ~StpW1(15)); 374 375 StpW1 a0 = offset + ((i + StpW1(4)) & StpW1(15)); 376 StpW1 a1 = offset + ((i + StpW1(8)) & StpW1(15)); 377 StpW1 a2 = offset + ((i + StpW1(12)) & StpW1(15)); 378 379 uint2 x0 = gs_StpScratch[a0].xy; 380 uint2 x1 = gs_StpScratch[a1].xy; 381 uint2 x2 = gs_StpScratch[a2].xy; 382 383 GroupMemoryBarrierWithGroupSync(); 384 385 a.xy = a.xy + StpH2_U1(x0.x) + StpH2_U1(x1.x) + StpH2_U1(x2.x); 386 a.zw = a.zw + StpH2_U1(x0.y) + StpH2_U1(x1.y) + StpH2_U1(x2.y); 387 } 388#endif 389#endif 390} 391StpH1 StpPatPriConH(StpF2 p) { return (StpH1)SAMPLE_TEXTURE2D_X_LOD(_StpPriorConvergence, s_linear_clamp_sampler, p, 0); } 392 393// These are separate to support inline operation (pass merged instead of loads). 394StpF2 StpPatDatMotH(StpW2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputMotion, o, 0).xy; } 395StpH3 StpPatDatColH(StpW2 o) { return (StpH3)LOAD_TEXTURE2D_X_LOD(_StpInputColor, o, 0).rgb; } 396StpF1 StpPatDatZH(StpW2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputDepth, o, 0).x; } 397// This provides a place to convert Z from depth to linear if not inlined and actually loaded. 398StpF1 StpPatFixZH(StpF1 z) { return 1.0 / (STP_ZBUFFER_PARAMS_Z * z + STP_ZBUFFER_PARAMS_W); } 399StpU1 StpPatDatRH(StpW2 o) { 400 #if defined(ENABLE_STENCIL_RESPONSIVE) 401 return GetStencilValue(LOAD_TEXTURE2D_X_LOD(_StpInputStencil, o, 0).xy); 402 #endif // defined(ENABLE_STENCIL_RESPONSIVE) 403 return StpU1_(0); } 404StpH1 StpPatFixRH(StpU1 v) { 405 // Activate the "responsive" feature when we don't have valid history textures. 406 bool hasValidHistory = DecodeHasValidHistory(STP_COMMON_CONSTANT); 407 bool excludeTaa = false; 408 #if defined(ENABLE_STENCIL_RESPONSIVE) 409 excludeTaa = (v & DecodeStencilMask(STP_COMMON_CONSTANT)) != 0; 410 #endif // defined(ENABLE_STENCIL_RESPONSIVE) 411 return (hasValidHistory && !excludeTaa) ? StpH1_(1.0) : StpH1_(0.0); } 412 413StpH1 StpPatDitH(StpW2 o) { return StpDitH1(o); } 414StpH4 StpPatPriFedH(StpF2 p) { return (StpH4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); } 415StpH4 StpPatPriFedR4H(StpF2 p) { return (StpH4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } 416StpH4 StpPatPriFedG4H(StpF2 p) { return (StpH4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } 417StpH4 StpPatPriFedB4H(StpF2 p) { return (StpH4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } 418StpH2 StpPatPriLumH(StpF2 p) { return (StpH2)SAMPLE_TEXTURE2D_X_LOD(_StpPriorLuma, s_linear_clamp_sampler, p, 0); } 419StpU4 StpPatPriMot4H(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpPriorDepthMotion, s_point_clamp_sampler, p); } 420 421void StpPatStMotH(StpW2 p, StpU1 v) { _StpDepthMotion[COORD_TEXTURE2D_X(p)] = v; } 422void StpPatStColH(StpW2 p, StpH4 v) { _StpIntermediateColor[COORD_TEXTURE2D_X(p)] = v; } 423void StpPatStLumH(StpW2 p, StpH2 v) { _StpLuma[COORD_TEXTURE2D_X(p)] = v; } 424void StpPatStCnvH(StpW2 p, StpH1 v) { _StpIntermediateConvergence[COORD_TEXTURE2D_X(p >> StpW1(2))] = v; } 425#endif 426 427#if defined(STP_32BIT) 428StpMF1 StpPatPriConF(StpF2 p) { return (StpMF1)SAMPLE_TEXTURE2D_X_LOD(_StpPriorConvergence, s_linear_clamp_sampler, p, 0); } 429 430// These are separate to support inline operation (pass merged instead of loads). 431StpF2 StpPatDatMotF(StpMU2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputMotion, o, 0).xy; } 432StpMF3 StpPatDatColF(StpMU2 o) { return (StpMF3)LOAD_TEXTURE2D_X_LOD(_StpInputColor, o, 0).rgb; } 433StpF1 StpPatDatZF(StpMU2 o) { return LOAD_TEXTURE2D_X_LOD(_StpInputDepth, o, 0).x; } 434// This provides a place to convert Z from depth to linear if not inlined and actually loaded. 435StpF1 StpPatFixZF(StpF1 z) { return 1.0 / (STP_ZBUFFER_PARAMS_Z * z + STP_ZBUFFER_PARAMS_W); } 436StpU1 StpPatDatRF(StpMU2 o) { 437 #if defined(ENABLE_STENCIL_RESPONSIVE) 438 return GetStencilValue(LOAD_TEXTURE2D_X_LOD(_StpInputStencil, o, 0).xy); 439 #endif // defined(ENABLE_STENCIL_RESPONSIVE) 440 return StpU1_(0); } 441StpMF1 StpPatFixRF(StpU1 v) { 442 // Activate the "responsive" feature when we don't have valid history textures. 443 bool hasValidHistory = DecodeHasValidHistory(STP_COMMON_CONSTANT); 444 bool excludeTaa = false; 445 #if defined(ENABLE_STENCIL_RESPONSIVE) 446 excludeTaa = (v & DecodeStencilMask(STP_COMMON_CONSTANT)) != 0; 447 #endif // defined(ENABLE_STENCIL_RESPONSIVE) 448 return (hasValidHistory && !excludeTaa) ? StpMF1_(1.0) : StpMF1_(0.0); } 449 450StpMF1 StpPatDitF(StpMU2 o) { return (StpMF1)StpDitF1(o); } 451StpMF4 StpPatPriFedF(StpF2 p) { return (StpMF4)SAMPLE_TEXTURE2D_X_LOD(_StpPriorFeedback, s_linear_clamp_sampler, p, 0); } 452StpMF4 StpPatPriFedR4F(StpF2 p) { return (StpMF4)GATHER_RED_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } 453StpMF4 StpPatPriFedG4F(StpF2 p) { return (StpMF4)GATHER_GREEN_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } 454StpMF4 StpPatPriFedB4F(StpF2 p) { return (StpMF4)GATHER_BLUE_TEXTURE2D_X(_StpPriorFeedback, s_point_clamp_sampler, p); } 455StpMF2 StpPatPriLumF(StpF2 p) { return (StpMF2)SAMPLE_TEXTURE2D_X_LOD(_StpPriorLuma, s_linear_clamp_sampler, p, 0); } 456StpU4 StpPatPriMot4F(StpF2 p) { return GATHER_RED_TEXTURE2D_X(_StpPriorDepthMotion, s_point_clamp_sampler, p); } 457 458void StpPatStMotF(StpMU2 p, StpU1 v) { _StpDepthMotion[COORD_TEXTURE2D_X(p)] = v; } 459void StpPatStColF(StpMU2 p, StpMF4 v) { _StpIntermediateColor[COORD_TEXTURE2D_X(p)] = v; } 460void StpPatStLumF(StpMU2 p, StpMF2 v) { _StpLuma[COORD_TEXTURE2D_X(p)] = v; } 461void StpPatStCnvF(StpMU2 p, StpMF1 v) { _StpIntermediateConvergence[COORD_TEXTURE2D_X(p >> StpMU1(2))] = v; } 462#endif 463 464#define THREADING_BLOCK_SIZE STP_GROUP_SIZE 465#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Threading.hlsl" 466 467[numthreads(STP_GROUP_SIZE, 1, 1)] 468void StpSetup(Threading::Group group) 469{ 470 UNITY_XR_ASSIGN_VIEW_INDEX(group.groupID.z); 471 472#if defined(STP_16BIT) 473 StpW1 lane = StpW1_(group.groupIndex); 474 StpW2 groupPos = ComputeGroupPos(StpW2(group.groupID.xy)); 475 StpW2 pos = groupPos + StpRemapLaneTo8x16H(lane); 476#else 477 StpMU1 lane = StpMU1_(group.groupIndex); 478 StpMU2 groupPos = ComputeGroupPos(StpMU2(group.groupID.xy)); 479 StpMU2 pos = groupPos + StpRemapLaneTo8x16F(lane); 480#endif 481 482#if defined(STP_16BIT) 483 StpPatH( 484 lane, 485 pos, 486#else 487 StpPatF( 488 lane, 489 pos, 490#endif 491 492 asuint(_StpSetupConstants0), 493 asuint(_StpSetupConstants1), 494 asuint(_StpSetupConstants2), 495 asuint(_StpSetupConstants3), 496 asuint(_StpSetupConstants4), 497 asuint(_StpSetupConstants5), 498 499 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 0]), 500 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 1]), 501 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 2]), 502 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 3]), 503 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 4]), 504 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 5]), 505 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 6]), 506 asuint(_StpSetupPerViewConstants[STP_SETUP_PER_VIEW_CONSTANTS_STEREO_OFFSET + 7]) 507 ); 508} 509