A game about forced loneliness, made by TACStudios
1/* 2MIT License 3 4Copyright (c) 2022 Kleber Garcia 5 6Permission is hereby granted, free of charge, to any person obtaining a copy 7of this software and associated documentation files (the "Software"), to deal 8in the Software without restriction, including without limitation the rights 9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10copies of the Software, and to permit persons to whom the Software is 11furnished to do so, subject to the following conditions: 12 13The above copyright notice and this permission notice shall be included in all 14copies or substantial portions of the Software. 15 16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22SOFTWARE. 23*/ 24 25#ifndef __COVERAGE__ 26#define __COVERAGE__ 27 28//Utilities for coverage bit mask on an 8x8 grid. 29namespace Coverage 30{ 31 32//**************************************************************************************************************/ 33// How to use 34//**************************************************************************************************************/ 35/* 36To utilize this library, first call the genLUT function at the beginning of your compute shader. 37This function must be followed by a group sync. Example follows: 38 39... 40coverage::genLUT(groupThreadIndex); 41GroupMemoryBarrierWithGroupSync(); 42... 43 44Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries. 45 46After this use the coverage functions 47 48*/ 49 50//**************************************************************************************************************/ 51// Coordinate System 52//**************************************************************************************************************/ 53/* 54The functions in this library follow the same convension, input is a shape described by certain vertices, 55output is a 64 bit mask with such shape's coverage. 56 57The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right. 58The LSB represents coordinate (0,0), and sample points are centered on the pixel. 59 60(0.0,0.0) (1.0,0.0) 61 | | 62 |___________________________________| 63 | | | | | | | | | | 64 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 65 |___|___|___|___|___|___|___|___|___| 66 | | | | | | | | | | 67 | 9 | 10| 11| 12| 13| 14| 15| 16| 17| 68 |___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0) 69 70 the center of bit 0 would be 0.5,0.5 and so on 71 72any points outside of the range (0,1) means they are outside the grid. 73*/ 74 75//**************************************************************************************************************/ 76// Masks 77//**************************************************************************************************************/ 78/* 79Masks are stored in a packed 64 bit represented by uint2. 80x component represents the first 32 bits, y component the next 32 bits. 81*/ 82 83//**************************************************************************************************************/ 84// coverage API 85//**************************************************************************************************************/ 86 87/* 88lut for 4x4 quad mask. See buildQuadMask function 894 states for horizontal flipping and vertical flipping 90You can dump this lut to a buffer, and preload it manually, 91or just regenerated in your thread group 92*/ 93groupshared uint gs_quadMask[16 * 4]; 94 95/* 96Call this function to generate the coverage 4x4 luts 97groupThreadIndex - the thread index. 98NOTE: must sync group threads after calling this. 99*/ 100void GenLUT(uint groupThreadIndex); 101 102/* 103Call this function to get a 64 bit coverage mask for a triangle. 104v0, v1, v2 - the triangle coordinates in right hand ruling order 105return - the coverage mask for this triangle 106*/ 107uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface); 108 109 110/* 111Call this function to get a 64 bit coverage mask for a line. 112v0, v1 - the line coordinates. 113thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile 114caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile 115return - the coverage mask of this line 116*/ 117uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps); 118 119 120//**************************************************************************************************************/ 121// coverage implementation 122//**************************************************************************************************************/ 123 124/* 125function that builds a 4x4 compact bit quad for line coverage. 126the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most. 127"incrementMask" is a bit mask specifying how much the y component of a line increments. 128"incrementMask" only describes 4 bits, the rest of the bits are ignored. 129For example, given this bit mask: 1301 0 1 0 131would generate this 4x4 coverage mask: 132 1330 0 0 0 1340 0 0 1 <- 3rd bit tells the line to raise here 1350 1 1 1 <- first bit raises the line 1361 1 1 1 <- low axis is always covered 137*/ 138uint BuildQuadMask(uint incrementMask) 139{ 140 uint c = 0; 141 142 uint mask = 0xF; 143 for (int r = 0; r < 4; ++r) 144 { 145 c |= mask << (r * 8); 146 if (incrementMask == 0) 147 break; 148 int b = firstbitlow(incrementMask); 149 mask = (0xFu << (b + 1)) & 0xFu; 150 incrementMask ^= 1u << b; 151 } 152 153 return c; 154} 155 156//flip 4 bit nibble 157uint FlipNibble(uint mask, int offset) 158{ 159 mask = (mask >> offset) & 0xF; 160 uint r = ((mask << 3) & 0x8) 161 | ((mask << 1) & 0x4) 162 | ((mask >> 1) & 0x2) 163 | ((mask >> 3) & 0x1); 164 return (r << offset); 165} 166 167//flip an entire 4x4 bit quad 168uint FlipQuadInX(uint mask) 169{ 170 return FlipNibble(mask, 0) | FlipNibble(mask, 8) | FlipNibble(mask, 16) | FlipNibble(mask, 24); 171} 172 173uint TransposeQuad(uint mask) 174{ 175 uint result = 0; 176 [unroll] 177 for (int i = 0; i < 4; ++i) 178 { 179 for (int j = 0; j < 4; ++j) 180 { 181 if (mask & (1u << (i * 8 + j))) 182 result |= 1u << (j * 8 + i); 183 } 184 } 185 return result; 186} 187 188// Builds all the luts necessary for fast bit based coverage 189void GenLUT(uint groupThreadIndex) 190{ 191 // Neutral 192 if (groupThreadIndex < 16) 193 gs_quadMask[groupThreadIndex] = BuildQuadMask(groupThreadIndex); 194 195 GroupMemoryBarrierWithGroupSync(); 196 197 // Flip in X axis, transpose 198 if (groupThreadIndex < 16) 199 { 200 gs_quadMask[groupThreadIndex + 16] = FlipQuadInX(gs_quadMask[groupThreadIndex]); 201 gs_quadMask[groupThreadIndex + 32] = TransposeQuad(gs_quadMask[groupThreadIndex]); 202 } 203 GroupMemoryBarrierWithGroupSync(); 204 if (groupThreadIndex < 16) 205 { 206 gs_quadMask[groupThreadIndex + 48] = (~TransposeQuad(FlipQuadInX(gs_quadMask[groupThreadIndex]))) & 0x0F0F0F0F; 207 } 208} 209 210// Represents a 2D analytical line. 211// stores slope (a) and offset (b) 212struct AnalyticalLine 213{ 214 float a; 215 float b; 216 217 // Builds an analytical line based on two points. 218 void Build(float2 v0, float2 v1) 219 { 220 //line equation: f(x): a * x + b; 221 // where a = (v1.y - v0.y)/(v1.x - v0.x) 222 float2 l = v1 - v0; 223 a = l.y/l.x; 224 b = v1.y - a * v1.x; 225 } 226 227 // Builds a "Flipped" line. 228 // A flipped line is defined as having a positive slope < 1.0 229 // The two output booleans specify the flip operators to recover the original line. 230 void BuildFlipped(float2 v0, float2 v1, out bool outFlipX, out bool outFlipAxis, out bool outIsRightHand, out bool outValid) 231 { 232 //build line with flip bits for lookup compression 233 //This line will have a slope between 0 and 0.5, and always positive. 234 //We output the flips as bools 235 236 float2 ll = v1 - v0; 237 outFlipAxis = abs(ll.y) > abs(ll.x); 238 outFlipX = sign(ll.y) != sign(ll.x); 239 outIsRightHand = ll.x >= 0 ? v0.y >= v1.y : v0.y > v1.y; 240 if (outFlipAxis) 241 { 242 ll.xy = ll.yx; 243 v0.xy = v0.yx; 244 v1.xy = v1.yx; 245 } 246 247 a = ll.y/ll.x; 248 if (outFlipX) 249 { 250 v0.x = 1.0 - v0.x; 251 v1.x = 1.0 - v1.x; 252 a *= -1; 253 } 254 b = v1.y - a * v1.x; 255 outValid = any(v1 != v0);//ll.y != 0.0f; 256 } 257 258 // Evaluates f(x) = a * x + b for the line 259 float Eval(float xval) 260 { 261 return xval * a + b; 262 } 263 264 // Evaluates 4 inputs of f(x) = a * x + b for the line 265 float4 Eval4(float4 xvals) 266 { 267 return xvals * a + b; 268 } 269 270 // Evaluates a single 2d in the line given an X. 271 float2 PointAt(float xv) 272 { 273 return float2(xv, Eval(xv)); 274 } 275}; 276 277/* 278Represents a set of bits in an 8x8 grid divided by a line. 279The representation is given by 2 splits of the 8x8 grid. 280offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis) 281the mask represents the increment mask used to look up the quadCoverage 282*/ 283struct LineArea 284{ 285 int offsets[2]; 286 uint masks[2]; 287 bool isValid; 288 bool flipX; 289 bool flipAxis; 290 bool isRightHand; 291 AnalyticalLine debugLine; 292 293 // Recovers a single point in the boundary 294 // of the line (where the line intersects a pixel). 295 // Theres a total of 8 possible points 296 float2 GetBoundaryPoint(uint i) 297 { 298 int j = i & 0x3; 299 int m = i >> 2; 300 int yval = offsets[m] + (int)countbits(((1u << j) - 1) & masks[m]); 301 float2 v = float2(i + 0.5, yval + 0.5) * 1.0/8.0; 302 if (flipX) 303 v.x = 1.0 - v.x; 304 if (flipAxis) 305 { 306 float2 tmp = v; 307 v.xy = tmp.yx; 308 } 309 return v; 310 } 311 312 // Creates a line area object, based on 2 points on an 8x8 quad 313 // quad coordinate domain is 0.0 -> 1.0 for both axis. 314 // Anything negative or greater than 1.0 is by definition outside of the 8x8 quad. 315 static LineArea Create(float2 v0, float2 v1) 316 { 317 LineArea data; 318 319 //line debug data 320 data.debugLine.Build(v0, v1); 321 322 AnalyticalLine l; 323 l.BuildFlipped(v0, v1, data.flipX, data.flipAxis, data.isRightHand, data.isValid); 324 325 // Xs values of 8 points 326 const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0; 327 const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0; 328 329 // Ys values of 8 points 330 float4 ys0 = l.Eval4(xs0); 331 float4 ys1 = l.Eval4(xs1); 332 333 int4 ysi0 = (int4)floor(ys0 * 8.0 - 0.5); 334 int4 ysi1 = (int4)floor(ys1 * 8.0 - 0.5); 335 336 // Incremental masks 337 uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw; 338 uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0); 339 340 // Final output, offset and mask 341 data.offsets[0] = ysi0.x; 342 data.masks[0] = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3); 343 data.offsets[1] = countbits(data.masks[0]) + data.offsets[0]; 344 data.masks[1] = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3); 345 return data; 346 } 347} ; 348 349uint2 CreateCoverageMask(in LineArea lineArea) 350{ 351 const uint leftSideMask = 0x0F0F0F0F; 352 const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask); 353 354 //prepare samples, flip samples if there is mirroring in x 355 int2 ii = lineArea.flipX ? int2(1,0) : int2(0,1); 356 int lutOperation = ((uint)lineArea.flipX << 4) | ((uint)lineArea.flipAxis << 5); 357 int2 offsets = int2(lineArea.offsets[ii.x],lineArea.offsets[ii.y]); 358 uint2 halfSamples = uint2(gs_quadMask[lineArea.masks[ii.x] + lutOperation], gs_quadMask[lineArea.masks[ii.y] + lutOperation]); 359 360 uint2 result = 0; 361 if (lineArea.flipAxis) 362 { 363 //Case were we have flipped axis / transpose. We generate top and bottom part 364 int2 tOffsets = clamp(offsets, -31, 31); 365 uint2 workMask = leftSideMask << clamp(offsets, 0, 4); 366 uint2 topDownMasks = uint2( tOffsets.x > 0 ? 367 ((halfSamples.x << min(4,tOffsets.x)) & leftSideMask) | ((halfSamples.x << min(8,tOffsets.x)) & ~leftSideMask) 368 : ((halfSamples.x << 4) >> min(4,-tOffsets.x) & ~leftSideMask) >> 4, 369 tOffsets.y > 0 ? 370 ((halfSamples.y << min(4, tOffsets.y)) & leftSideMask) | ((halfSamples.y << min(8, tOffsets.y)) & ~leftSideMask) 371 : ((halfSamples.y << 4) >> min(4, -tOffsets.y) & ~leftSideMask) >> 4); 372 ; 373 int2 backMaskShift = lineArea.flipX ? clamp(tOffsets + 4, -31, 31) : tOffsets; 374 uint2 backMaskOp = int2((backMaskShift.x > 0 ? 1u << backMaskShift.x : 1u >> -backMaskShift.x) - 1u, (backMaskShift.y > 0 ? 1u << backMaskShift.y : 1u >> -backMaskShift.y) - 1u); 375 uint2 backBite = uint2( backMaskShift.x <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.x) : (0xFFFF & backMaskOp.x)), 376 backMaskShift.y <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.y) : (0xFFFF & backMaskOp.y))); 377 result = backBite | (backBite << 8) | (backBite << 16) | (backBite << 24) | (topDownMasks & workMask); 378 } 379 else 380 { 381 //Case were the masks are positioned horizontally. We generate 4 quads 382 uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y << 4)); 383 int4 tOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31); 384 uint4 halfMasks = uint4( tOffsets.x > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.x : ~(sideMasks.x >> -tOffsets.x), 385 tOffsets.y > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.y : ~(sideMasks.y >> -tOffsets.y), 386 tOffsets.z > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.z : ~(sideMasks.x >> -tOffsets.z), 387 tOffsets.w > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.w : ~(sideMasks.y >> -tOffsets.w)) & horizontalMask.xyxy; 388 result = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w); 389 } 390 391 result = lineArea.flipX ? ~result : result; 392 result = lineArea.isRightHand ? result : ~result; 393 result = lineArea.isValid ? result : 0; 394 return result; 395 396} 397 398uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface) 399{ 400 uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0, v1)); 401 uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1, v2)); 402 uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v2, v0)); 403 uint2 frontMask = (mask0 & mask1 & mask2); 404 bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0); 405 return (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2)); 406} 407 408uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps) 409{ 410 float2 lineVector = normalize(v1 - v0); 411 float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness; 412 v0 -= caps * lineVector; 413 v1 += caps * lineVector; 414 415 uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 - D, v1 - D)); 416 uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 + D, v0 + D)); 417 uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 + D, v0 - D)); 418 uint2 mask3 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 - D, v1 + D)); 419 return mask0 & mask1 & mask3 & mask2; 420} 421 422} 423 424#endif