A game about forced loneliness, made by TACStudios
1#ifndef UNITY_AREA_LIGHTING_INCLUDED 2#define UNITY_AREA_LIGHTING_INCLUDED 3 4#define APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT 5#define APPROXIMATE_SPHERE_LIGHT_NUMERICALLY 6 7 8// 'sinSqSigma' is the sine^2 of the half-angle subtended by the sphere (aperture) as seen from the shaded point. 9// 'cosOmega' is the cosine of the angle between the normal and the direction to the center of the light. 10// This function performs horizon clipping. 11real DiffuseSphereLightIrradiance(real sinSqSigma, real cosOmega) 12{ 13#ifdef APPROXIMATE_SPHERE_LIGHT_NUMERICALLY 14 real x = sinSqSigma; 15 real y = cosOmega; 16 17 // Use a numerical fit found in Mathematica. Mean absolute error: 0.00476944. 18 // You can use the following Mathematica code to reproduce our results: 19 // t = Flatten[Table[{x, y, f[x, y]}, {x, 0, 0.999999, 0.001}, {y, -0.999999, 0.999999, 0.002}], 1] 20 // m = NonlinearModelFit[t, x * (y + e) * (0.5 + (y - e) * (a + b * x + c * x^2 + d * x^3)), {a, b, c, d, e}, {x, y}] 21 return saturate(x * (0.9245867471551246 + y) * (0.5 + (-0.9245867471551246 + y) * (0.5359050373687144 + x * (-1.0054221851257754 + x * (1.8199061187417047 - x * 1.3172081704209504))))); 22#else 23 #if 0 // Ref: Area Light Sources for Real-Time Graphics, page 4 (1996). 24 real sinSqOmega = saturate(1 - cosOmega * cosOmega); 25 real cosSqSigma = saturate(1 - sinSqSigma); 26 real sinSqGamma = saturate(cosSqSigma / sinSqOmega); 27 real cosSqGamma = saturate(1 - sinSqGamma); 28 29 real sinSigma = sqrt(sinSqSigma); 30 real sinGamma = sqrt(sinSqGamma); 31 real cosGamma = sqrt(cosSqGamma); 32 33 real sigma = asin(sinSigma); 34 real omega = acos(cosOmega); 35 real gamma = asin(sinGamma); 36 37 if (omega >= HALF_PI + sigma) 38 { 39 // Full horizon occlusion (case #4). 40 return 0; 41 } 42 43 real e = sinSqSigma * cosOmega; 44 45 UNITY_BRANCH 46 if (omega < HALF_PI - sigma) 47 { 48 // No horizon occlusion (case #1). 49 return e; 50 } 51 else 52 { 53 real g = (-2 * sqrt(sinSqOmega * cosSqSigma) + sinGamma) * cosGamma + (HALF_PI - gamma); 54 real h = cosOmega * (cosGamma * sqrt(saturate(sinSqSigma - cosSqGamma)) + sinSqSigma * asin(saturate(cosGamma / sinSigma))); 55 56 if (omega < HALF_PI) 57 { 58 // Partial horizon occlusion (case #2). 59 return saturate(e + INV_PI * (g - h)); 60 } 61 else 62 { 63 // Partial horizon occlusion (case #3). 64 return saturate(INV_PI * (g + h)); 65 } 66 } 67 #else // Ref: Moving Frostbite to Physically Based Rendering, page 47 (2015, optimized). 68 real cosSqOmega = cosOmega * cosOmega; // y^2 69 70 UNITY_BRANCH 71 if (cosSqOmega > sinSqSigma) // (y^2)>x 72 { 73 return saturate(sinSqSigma * cosOmega); // Clip[x*y,{0,1}] 74 } 75 else 76 { 77 real cotSqSigma = rcp(sinSqSigma) - 1; // 1/x-1 78 real tanSqSigma = rcp(cotSqSigma); // x/(1-x) 79 real sinSqOmega = 1 - cosSqOmega; // 1-y^2 80 81 real w = sinSqOmega * tanSqSigma; // (1-y^2)*(x/(1-x)) 82 real x = -cosOmega * rsqrt(w); // -y*Sqrt[(1/x-1)/(1-y^2)] 83 real y = sqrt(sinSqOmega * tanSqSigma - cosSqOmega); // Sqrt[(1-y^2)*(x/(1-x))-y^2] 84 real z = y * cotSqSigma; // Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1) 85 86 real a = cosOmega * acos(x) - z; // y*ArcCos[-y*Sqrt[(1/x-1)/(1-y^2)]]-Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1) 87 real b = atan(y); // ArcTan[Sqrt[(1-y^2)*(x/(1-x))-y^2]] 88 89 return saturate(INV_PI * (a * sinSqSigma + b)); 90 } 91 #endif 92#endif 93} 94 95// The output is *not* normalized by the factor of 1/TWO_PI (this is done by the PolygonFormFactor function). 96real3 ComputeEdgeFactor(real3 V1, real3 V2) 97{ 98 real subtendedAngle; 99 100 real V1oV2 = dot(V1, V2); 101 real3 V1xV2 = cross(V1, V2); // Plane normal (tangent to the unit sphere) 102 real sqLen = saturate(1 - V1oV2 * V1oV2); // length(V1xV2) = abs(sin(angle)) 103 real rcpLen = rsqrt(max(FLT_EPS, sqLen)); // Make sure it is finite 104#if 0 105 real y = rcpLen * acos(V1oV2); 106#else 107 // Let y[x_] = ArcCos[x] / Sqrt[1 - x^2]. 108 // Range reduction: since ArcCos[-x] == Pi - ArcCos[x], we only need to consider x on [0, 1]. 109 real x = abs(V1oV2); 110 // Limit[y[x], x -> 1] == 1, 111 // Limit[y[x], x -> 0] == Pi/2. 112 // The approximation is exact at the endpoints of [0, 1]. 113 // Max. abs. error on [0, 1] is 1.33e-6 at x = 0.0036. 114 // Max. rel. error on [0, 1] is 8.66e-7 at x = 0.0037. 115 real y = HALF_PI + x * (-0.99991 + x * (0.783393 + x * (-0.649178 + x * (0.510589 + x * (-0.326137 + x * (0.137528 + x * -0.0270813)))))); 116 117 if (V1oV2 < 0) 118 { 119 y = rcpLen * PI - y; 120 } 121 122#endif 123 124 return V1xV2 * y; 125} 126 127// Input: 3-5 vertices in the coordinate frame centered at the shaded point. 128// Output: signed vector irradiance. 129// No horizon clipping is performed. 130real3 PolygonFormFactor(real4x3 L, real3 L4, uint n) 131{ 132 // The length cannot be zero since we have already checked 133 // that the light has a non-zero effective area, 134 // and thus its plane cannot pass through the origin. 135 L[0] = normalize(L[0]); 136 L[1] = normalize(L[1]); 137 L[2] = normalize(L[2]); 138 139 switch (n) 140 { 141 case 3: 142 L[3] = L[0]; 143 break; 144 case 4: 145 L[3] = normalize(L[3]); 146 L4 = L[0]; 147 break; 148 case 5: 149 L[3] = normalize(L[3]); 150 L4 = normalize(L4); 151 break; 152 } 153 154 // If the magnitudes of a pair of edge factors are 155 // nearly the same, catastrophic cancellation may occur: 156 // https://en.wikipedia.org/wiki/Catastrophic_cancellation 157 // For the same reason, the value of the cross product of two 158 // nearly collinear vectors is prone to large errors. 159 // Therefore, the algorithm is inherently numerically unstable 160 // for area lights that shrink to a line (or a point) after 161 // projection onto the unit sphere. 162 real3 F = ComputeEdgeFactor(L[0], L[1]); 163 F += ComputeEdgeFactor(L[1], L[2]); 164 F += ComputeEdgeFactor(L[2], L[3]); 165 if (n >= 4) 166 F += ComputeEdgeFactor(L[3], L4); 167 if (n == 5) 168 F += ComputeEdgeFactor(L4, L[0]); 169 170 return INV_TWO_PI * F; // The output may be projected onto the tangent plane (F.z) to yield signed irradiance. 171} 172 173// See "Real-Time Area Lighting: a Journey from Research to Production", slide 102. 174// Turns out, despite the authors claiming that this function "calculates an approximation of 175// the clipped sphere form factor", that is simply not true. 176// First of all, above horizon, the function should then just return 'F.z', which it does not. 177// Secondly, if we use the correct function called DiffuseSphereLightIrradiance(), it results 178// in severe light leaking if the light is placed vertically behind the camera. 179// So this function is clearly a hack designed to work around these problems. 180real PolygonIrradianceFromVectorFormFactor(float3 F) 181{ 182#if 1 183 float l = length(F); 184 return max(0, (l * l + F.z) / (l + 1)); 185#else 186 real sff = saturate(dot(F, F)); 187 real sinSqAperture = sqrt(sff); 188 real cosElevationAngle = F.z * rsqrt(sff); 189 190 return DiffuseSphereLightIrradiance(sinSqAperture, cosElevationAngle); 191#endif 192} 193 194// Expects non-normalized vertex positions. 195// Output: F is the signed vector irradiance. 196real PolygonIrradiance(real4x3 L, out real3 F) 197{ 198#ifdef APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT 199 F = PolygonFormFactor(L, real3(0,0,1), 4); // Before horizon clipping. 200 201 return PolygonIrradianceFromVectorFormFactor(F); // Accounts for the horizon. 202#else 203 // 1. ClipQuadToHorizon 204 205 // detect clipping config 206 uint config = 0; 207 if (L[0].z > 0) config += 1; 208 if (L[1].z > 0) config += 2; 209 if (L[2].z > 0) config += 4; 210 if (L[3].z > 0) config += 8; 211 212 // The fifth vertex for cases when clipping cuts off one corner. 213 // Due to a compiler bug, copying L into a vector array with 5 rows 214 // messes something up, so we need to stick with the matrix + the L4 vertex. 215 real3 L4 = L[3]; 216 217 // This switch is surprisingly fast. Tried replacing it with a lookup array of vertices. 218 // Even though that replaced the switch with just some indexing and no branches, it became 219 // way, way slower - mem fetch stalls? 220 221 // clip 222 uint n = 0; 223 switch (config) 224 { 225 case 0: // clip all 226 break; 227 228 case 1: // V1 clip V2 V3 V4 229 n = 3; 230 L[1] = -L[1].z * L[0] + L[0].z * L[1]; 231 L[2] = -L[3].z * L[0] + L[0].z * L[3]; 232 break; 233 234 case 2: // V2 clip V1 V3 V4 235 n = 3; 236 L[0] = -L[0].z * L[1] + L[1].z * L[0]; 237 L[2] = -L[2].z * L[1] + L[1].z * L[2]; 238 break; 239 240 case 3: // V1 V2 clip V3 V4 241 n = 4; 242 L[2] = -L[2].z * L[1] + L[1].z * L[2]; 243 L[3] = -L[3].z * L[0] + L[0].z * L[3]; 244 break; 245 246 case 4: // V3 clip V1 V2 V4 247 n = 3; 248 L[0] = -L[3].z * L[2] + L[2].z * L[3]; 249 L[1] = -L[1].z * L[2] + L[2].z * L[1]; 250 break; 251 252 case 5: // V1 V3 clip V2 V4: impossible 253 break; 254 255 case 6: // V2 V3 clip V1 V4 256 n = 4; 257 L[0] = -L[0].z * L[1] + L[1].z * L[0]; 258 L[3] = -L[3].z * L[2] + L[2].z * L[3]; 259 break; 260 261 case 7: // V1 V2 V3 clip V4 262 n = 5; 263 L4 = -L[3].z * L[0] + L[0].z * L[3]; 264 L[3] = -L[3].z * L[2] + L[2].z * L[3]; 265 break; 266 267 case 8: // V4 clip V1 V2 V3 268 n = 3; 269 L[0] = -L[0].z * L[3] + L[3].z * L[0]; 270 L[1] = -L[2].z * L[3] + L[3].z * L[2]; 271 L[2] = L[3]; 272 break; 273 274 case 9: // V1 V4 clip V2 V3 275 n = 4; 276 L[1] = -L[1].z * L[0] + L[0].z * L[1]; 277 L[2] = -L[2].z * L[3] + L[3].z * L[2]; 278 break; 279 280 case 10: // V2 V4 clip V1 V3: impossible 281 break; 282 283 case 11: // V1 V2 V4 clip V3 284 n = 5; 285 L[3] = -L[2].z * L[3] + L[3].z * L[2]; 286 L[2] = -L[2].z * L[1] + L[1].z * L[2]; 287 break; 288 289 case 12: // V3 V4 clip V1 V2 290 n = 4; 291 L[1] = -L[1].z * L[2] + L[2].z * L[1]; 292 L[0] = -L[0].z * L[3] + L[3].z * L[0]; 293 break; 294 295 case 13: // V1 V3 V4 clip V2 296 n = 5; 297 L[3] = L[2]; 298 L[2] = -L[1].z * L[2] + L[2].z * L[1]; 299 L[1] = -L[1].z * L[0] + L[0].z * L[1]; 300 break; 301 302 case 14: // V2 V3 V4 clip V1 303 n = 5; 304 L4 = -L[0].z * L[3] + L[3].z * L[0]; 305 L[0] = -L[0].z * L[1] + L[1].z * L[0]; 306 break; 307 308 case 15: // V1 V2 V3 V4 309 n = 4; 310 break; 311 } 312 313 if (n == 0) return 0; 314 315 // 2. Integrate 316 F = PolygonFormFactor(L, L4, n); // After the horizon clipping. 317 318 // 3. Compute irradiance 319 return max(0, F.z); 320#endif 321} 322 323// This function assumes that inputs are well-behaved, e.i. 324// that the line does not pass through the origin and 325// that the light is (at least partially) above the surface. 326float I_diffuse_line(float3 C, float3 A, float hl) 327{ 328 // Solve C.z + h * A.z = 0. 329 float h = -C.z * rcp(A.z); // May be Inf, but never NaN 330 331 // Clip the line segment against the z-plane if necessary. 332 float h2 = (A.z >= 0) ? max( hl, h) 333 : min( hl, h); // P2 = C + h2 * A 334 float h1 = (A.z >= 0) ? max(-hl, h) 335 : min(-hl, h); // P1 = C + h1 * A 336 337 // Normalize the tangent. 338 float as = dot(A, A); // |A|^2 339 float ar = rsqrt(as); // 1/|A| 340 float a = as * ar; // |A| 341 float3 T = A * ar; // A/|A| 342 343 // Orthogonal 2D coordinates: 344 // P(n, t) = n * N + t * T. 345 float tc = dot(T, C); // C = n * N + tc * T 346 float3 P0 = C - tc * T; // P(n, 0) = n * N 347 float ns = dot(P0, P0); // |P0|^2 348 349 float nr = rsqrt(ns); // 1/|P0| 350 float n = ns * nr; // |P0| 351 float Nz = P0.z * nr; // N.z = P0.z/|P0| 352 353 // P(n, t) - C = P0 + t * T - P0 - tc * T 354 // = (t - tc) * T = h * A = (h * a) * T. 355 float t2 = tc + h2 * a; // P2.t 356 float t1 = tc + h1 * a; // P1.t 357 float s2 = ns + t2 * t2; // |P2|^2 358 float s1 = ns + t1 * t1; // |P1|^2 359 float mr = rsqrt(s1 * s2); // 1/(|P1|*|P2|) 360 float r2 = s1 * (mr * mr); // 1/|P2|^2 361 float r1 = s2 * (mr * mr); // 1/|P1|^2 362 363 // I = (i1 + i2 + i3) / Pi. 364 // i1 = N.z * (P2.t / |P2|^2 - P1.t / |P1|^2). 365 // i2 = -T.z * (P2.n / |P2|^2 - P1.n / |P1|^2). 366 // i3 = N.z * ArcCos[Dot[P1, P2] / (|P1| * |P2|)] / |P0|. 367 float i12 = (Nz * t2 - (T.z * n)) * r2 368 - (Nz * t1 - (T.z * n)) * r1; 369 // Guard against numerical errors. 370 float dt = min(1, (ns + t1 * t2) * mr); 371 float i3 = acos(dt) * (Nz * nr); // angle * cos(θ) / r^2 372 373 // Guard against numerical errors. 374 return INV_PI * max(0, i12 + i3); 375} 376 377// Computes 1 / length(mul(transpose(inverse(invM)), normalize(ortho))). 378float ComputeLineWidthFactor(float3x3 invM, float3 ortho, float orthoSq) 379{ 380 // transpose(inverse(invM)) = 1 / determinant(invM) * cofactor(invM). 381 // Take into account the sparsity of the matrix: 382 // {{a,0,b}, 383 // {0,c,0}, 384 // {d,0,1}} 385 float a = invM[0][0]; 386 float b = invM[0][2]; 387 float c = invM[1][1]; 388 float d = invM[2][0]; 389 390 float det = c * (a - b * d); 391 float3 X = float3(c * (ortho.x - d * ortho.z), 392 (ortho.y * (a - b * d)), 393 c * (-b * ortho.x + a * ortho.z)); // mul(cof, ortho) 394 395 // 1 / length(1/s * X) = abs(s) / length(X). 396 return abs(det) * rsqrt(dot(X, X) * orthoSq) * orthoSq; // rsqrt(x^2) * x^2 = x 397} 398 399float I_ltc_line(float3x3 invM, float3 center, float3 axis, float halfLength) 400{ 401 float3 ortho = cross(center, axis); 402 float orthoSq = dot(ortho, ortho); 403 404 // Check whether the line passes through the origin. 405 bool quit = (orthoSq == 0); 406 407 // Check whether the light is entirely below the surface. 408 // We must test twice, since a linear transformation 409 // may bring the light above the surface (a side-effect). 410 quit = quit || (center.z + halfLength * abs(axis.z) <= 0); 411 412 // Transform into the diffuse configuration. 413 // This is a sparse matrix multiplication. 414 // Pay attention to the multiplication order 415 // (in case your matrices are transposed). 416 float3 C = mul(invM, center); 417 float3 A = mul(invM, axis); 418 419 // Check whether the light is entirely below the surface. 420 // We must test twice, since a linear transformation 421 // may bring the light below the surface (as expected). 422 quit = quit || (C.z + halfLength * abs(A.z) <= 0); 423 424 float result = 0; 425 426 if (!quit) 427 { 428 float w = ComputeLineWidthFactor(invM, ortho, orthoSq); 429 430 result = I_diffuse_line(C, A, halfLength) * w; 431 } 432 433 return result; 434} 435 436#endif // UNITY_AREA_LIGHTING_INCLUDED