A game about forced loneliness, made by TACStudios
1/*
2MIT License
3
4Copyright (c) 2022 Kleber Garcia
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24
25#ifndef __COVERAGE__
26#define __COVERAGE__
27
28//Utilities for coverage bit mask on an 8x8 grid.
29namespace Coverage
30{
31
32//**************************************************************************************************************/
33// How to use
34//**************************************************************************************************************/
35/*
36To utilize this library, first call the genLUT function at the beginning of your compute shader.
37This function must be followed by a group sync. Example follows:
38
39...
40coverage::genLUT(groupThreadIndex);
41GroupMemoryBarrierWithGroupSync();
42...
43
44Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries.
45
46After this use the coverage functions
47
48*/
49
50//**************************************************************************************************************/
51// Coordinate System
52//**************************************************************************************************************/
53/*
54The functions in this library follow the same convension, input is a shape described by certain vertices,
55output is a 64 bit mask with such shape's coverage.
56
57The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right.
58The LSB represents coordinate (0,0), and sample points are centered on the pixel.
59
60(0.0,0.0) (1.0,0.0)
61 | |
62 |___________________________________|
63 | | | | | | | | | |
64 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
65 |___|___|___|___|___|___|___|___|___|
66 | | | | | | | | | |
67 | 9 | 10| 11| 12| 13| 14| 15| 16| 17|
68 |___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0)
69
70 the center of bit 0 would be 0.5,0.5 and so on
71
72any points outside of the range (0,1) means they are outside the grid.
73*/
74
75//**************************************************************************************************************/
76// Masks
77//**************************************************************************************************************/
78/*
79Masks are stored in a packed 64 bit represented by uint2.
80x component represents the first 32 bits, y component the next 32 bits.
81*/
82
83//**************************************************************************************************************/
84// coverage API
85//**************************************************************************************************************/
86
87/*
88lut for 4x4 quad mask. See buildQuadMask function
894 states for horizontal flipping and vertical flipping
90You can dump this lut to a buffer, and preload it manually,
91or just regenerated in your thread group
92*/
93groupshared uint gs_quadMask[16 * 4];
94
95/*
96Call this function to generate the coverage 4x4 luts
97groupThreadIndex - the thread index.
98NOTE: must sync group threads after calling this.
99*/
100void GenLUT(uint groupThreadIndex);
101
102/*
103Call this function to get a 64 bit coverage mask for a triangle.
104v0, v1, v2 - the triangle coordinates in right hand ruling order
105return - the coverage mask for this triangle
106*/
107uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface);
108
109
110/*
111Call this function to get a 64 bit coverage mask for a line.
112v0, v1 - the line coordinates.
113thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile
114caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile
115return - the coverage mask of this line
116*/
117uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps);
118
119
120//**************************************************************************************************************/
121// coverage implementation
122//**************************************************************************************************************/
123
124/*
125function that builds a 4x4 compact bit quad for line coverage.
126the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most.
127"incrementMask" is a bit mask specifying how much the y component of a line increments.
128"incrementMask" only describes 4 bits, the rest of the bits are ignored.
129For example, given this bit mask:
1301 0 1 0
131would generate this 4x4 coverage mask:
132
1330 0 0 0
1340 0 0 1 <- 3rd bit tells the line to raise here
1350 1 1 1 <- first bit raises the line
1361 1 1 1 <- low axis is always covered
137*/
138uint BuildQuadMask(uint incrementMask)
139{
140 uint c = 0;
141
142 uint mask = 0xF;
143 for (int r = 0; r < 4; ++r)
144 {
145 c |= mask << (r * 8);
146 if (incrementMask == 0)
147 break;
148 int b = firstbitlow(incrementMask);
149 mask = (0xFu << (b + 1)) & 0xFu;
150 incrementMask ^= 1u << b;
151 }
152
153 return c;
154}
155
156//flip 4 bit nibble
157uint FlipNibble(uint mask, int offset)
158{
159 mask = (mask >> offset) & 0xF;
160 uint r = ((mask << 3) & 0x8)
161 | ((mask << 1) & 0x4)
162 | ((mask >> 1) & 0x2)
163 | ((mask >> 3) & 0x1);
164 return (r << offset);
165}
166
167//flip an entire 4x4 bit quad
168uint FlipQuadInX(uint mask)
169{
170 return FlipNibble(mask, 0) | FlipNibble(mask, 8) | FlipNibble(mask, 16) | FlipNibble(mask, 24);
171}
172
173uint TransposeQuad(uint mask)
174{
175 uint result = 0;
176 [unroll]
177 for (int i = 0; i < 4; ++i)
178 {
179 for (int j = 0; j < 4; ++j)
180 {
181 if (mask & (1u << (i * 8 + j)))
182 result |= 1u << (j * 8 + i);
183 }
184 }
185 return result;
186}
187
188// Builds all the luts necessary for fast bit based coverage
189void GenLUT(uint groupThreadIndex)
190{
191 // Neutral
192 if (groupThreadIndex < 16)
193 gs_quadMask[groupThreadIndex] = BuildQuadMask(groupThreadIndex);
194
195 GroupMemoryBarrierWithGroupSync();
196
197 // Flip in X axis, transpose
198 if (groupThreadIndex < 16)
199 {
200 gs_quadMask[groupThreadIndex + 16] = FlipQuadInX(gs_quadMask[groupThreadIndex]);
201 gs_quadMask[groupThreadIndex + 32] = TransposeQuad(gs_quadMask[groupThreadIndex]);
202 }
203 GroupMemoryBarrierWithGroupSync();
204 if (groupThreadIndex < 16)
205 {
206 gs_quadMask[groupThreadIndex + 48] = (~TransposeQuad(FlipQuadInX(gs_quadMask[groupThreadIndex]))) & 0x0F0F0F0F;
207 }
208}
209
210// Represents a 2D analytical line.
211// stores slope (a) and offset (b)
212struct AnalyticalLine
213{
214 float a;
215 float b;
216
217 // Builds an analytical line based on two points.
218 void Build(float2 v0, float2 v1)
219 {
220 //line equation: f(x): a * x + b;
221 // where a = (v1.y - v0.y)/(v1.x - v0.x)
222 float2 l = v1 - v0;
223 a = l.y/l.x;
224 b = v1.y - a * v1.x;
225 }
226
227 // Builds a "Flipped" line.
228 // A flipped line is defined as having a positive slope < 1.0
229 // The two output booleans specify the flip operators to recover the original line.
230 void BuildFlipped(float2 v0, float2 v1, out bool outFlipX, out bool outFlipAxis, out bool outIsRightHand, out bool outValid)
231 {
232 //build line with flip bits for lookup compression
233 //This line will have a slope between 0 and 0.5, and always positive.
234 //We output the flips as bools
235
236 float2 ll = v1 - v0;
237 outFlipAxis = abs(ll.y) > abs(ll.x);
238 outFlipX = sign(ll.y) != sign(ll.x);
239 outIsRightHand = ll.x >= 0 ? v0.y >= v1.y : v0.y > v1.y;
240 if (outFlipAxis)
241 {
242 ll.xy = ll.yx;
243 v0.xy = v0.yx;
244 v1.xy = v1.yx;
245 }
246
247 a = ll.y/ll.x;
248 if (outFlipX)
249 {
250 v0.x = 1.0 - v0.x;
251 v1.x = 1.0 - v1.x;
252 a *= -1;
253 }
254 b = v1.y - a * v1.x;
255 outValid = any(v1 != v0);//ll.y != 0.0f;
256 }
257
258 // Evaluates f(x) = a * x + b for the line
259 float Eval(float xval)
260 {
261 return xval * a + b;
262 }
263
264 // Evaluates 4 inputs of f(x) = a * x + b for the line
265 float4 Eval4(float4 xvals)
266 {
267 return xvals * a + b;
268 }
269
270 // Evaluates a single 2d in the line given an X.
271 float2 PointAt(float xv)
272 {
273 return float2(xv, Eval(xv));
274 }
275};
276
277/*
278Represents a set of bits in an 8x8 grid divided by a line.
279The representation is given by 2 splits of the 8x8 grid.
280offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis)
281the mask represents the increment mask used to look up the quadCoverage
282*/
283struct LineArea
284{
285 int offsets[2];
286 uint masks[2];
287 bool isValid;
288 bool flipX;
289 bool flipAxis;
290 bool isRightHand;
291 AnalyticalLine debugLine;
292
293 // Recovers a single point in the boundary
294 // of the line (where the line intersects a pixel).
295 // Theres a total of 8 possible points
296 float2 GetBoundaryPoint(uint i)
297 {
298 int j = i & 0x3;
299 int m = i >> 2;
300 int yval = offsets[m] + (int)countbits(((1u << j) - 1) & masks[m]);
301 float2 v = float2(i + 0.5, yval + 0.5) * 1.0/8.0;
302 if (flipX)
303 v.x = 1.0 - v.x;
304 if (flipAxis)
305 {
306 float2 tmp = v;
307 v.xy = tmp.yx;
308 }
309 return v;
310 }
311
312 // Creates a line area object, based on 2 points on an 8x8 quad
313 // quad coordinate domain is 0.0 -> 1.0 for both axis.
314 // Anything negative or greater than 1.0 is by definition outside of the 8x8 quad.
315 static LineArea Create(float2 v0, float2 v1)
316 {
317 LineArea data;
318
319 //line debug data
320 data.debugLine.Build(v0, v1);
321
322 AnalyticalLine l;
323 l.BuildFlipped(v0, v1, data.flipX, data.flipAxis, data.isRightHand, data.isValid);
324
325 // Xs values of 8 points
326 const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0;
327 const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0;
328
329 // Ys values of 8 points
330 float4 ys0 = l.Eval4(xs0);
331 float4 ys1 = l.Eval4(xs1);
332
333 int4 ysi0 = (int4)floor(ys0 * 8.0 - 0.5);
334 int4 ysi1 = (int4)floor(ys1 * 8.0 - 0.5);
335
336 // Incremental masks
337 uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw;
338 uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0);
339
340 // Final output, offset and mask
341 data.offsets[0] = ysi0.x;
342 data.masks[0] = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3);
343 data.offsets[1] = countbits(data.masks[0]) + data.offsets[0];
344 data.masks[1] = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3);
345 return data;
346 }
347} ;
348
349uint2 CreateCoverageMask(in LineArea lineArea)
350{
351 const uint leftSideMask = 0x0F0F0F0F;
352 const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask);
353
354 //prepare samples, flip samples if there is mirroring in x
355 int2 ii = lineArea.flipX ? int2(1,0) : int2(0,1);
356 int lutOperation = ((uint)lineArea.flipX << 4) | ((uint)lineArea.flipAxis << 5);
357 int2 offsets = int2(lineArea.offsets[ii.x],lineArea.offsets[ii.y]);
358 uint2 halfSamples = uint2(gs_quadMask[lineArea.masks[ii.x] + lutOperation], gs_quadMask[lineArea.masks[ii.y] + lutOperation]);
359
360 uint2 result = 0;
361 if (lineArea.flipAxis)
362 {
363 //Case were we have flipped axis / transpose. We generate top and bottom part
364 int2 tOffsets = clamp(offsets, -31, 31);
365 uint2 workMask = leftSideMask << clamp(offsets, 0, 4);
366 uint2 topDownMasks = uint2( tOffsets.x > 0 ?
367 ((halfSamples.x << min(4,tOffsets.x)) & leftSideMask) | ((halfSamples.x << min(8,tOffsets.x)) & ~leftSideMask)
368 : ((halfSamples.x << 4) >> min(4,-tOffsets.x) & ~leftSideMask) >> 4,
369 tOffsets.y > 0 ?
370 ((halfSamples.y << min(4, tOffsets.y)) & leftSideMask) | ((halfSamples.y << min(8, tOffsets.y)) & ~leftSideMask)
371 : ((halfSamples.y << 4) >> min(4, -tOffsets.y) & ~leftSideMask) >> 4);
372 ;
373 int2 backMaskShift = lineArea.flipX ? clamp(tOffsets + 4, -31, 31) : tOffsets;
374 uint2 backMaskOp = int2((backMaskShift.x > 0 ? 1u << backMaskShift.x : 1u >> -backMaskShift.x) - 1u, (backMaskShift.y > 0 ? 1u << backMaskShift.y : 1u >> -backMaskShift.y) - 1u);
375 uint2 backBite = uint2( backMaskShift.x <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.x) : (0xFFFF & backMaskOp.x)),
376 backMaskShift.y <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.y) : (0xFFFF & backMaskOp.y)));
377 result = backBite | (backBite << 8) | (backBite << 16) | (backBite << 24) | (topDownMasks & workMask);
378 }
379 else
380 {
381 //Case were the masks are positioned horizontally. We generate 4 quads
382 uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y << 4));
383 int4 tOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31);
384 uint4 halfMasks = uint4( tOffsets.x > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.x : ~(sideMasks.x >> -tOffsets.x),
385 tOffsets.y > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.y : ~(sideMasks.y >> -tOffsets.y),
386 tOffsets.z > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.z : ~(sideMasks.x >> -tOffsets.z),
387 tOffsets.w > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.w : ~(sideMasks.y >> -tOffsets.w)) & horizontalMask.xyxy;
388 result = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w);
389 }
390
391 result = lineArea.flipX ? ~result : result;
392 result = lineArea.isRightHand ? result : ~result;
393 result = lineArea.isValid ? result : 0;
394 return result;
395
396}
397
398uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface)
399{
400 uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0, v1));
401 uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1, v2));
402 uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v2, v0));
403 uint2 frontMask = (mask0 & mask1 & mask2);
404 bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0);
405 return (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2));
406}
407
408uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps)
409{
410 float2 lineVector = normalize(v1 - v0);
411 float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness;
412 v0 -= caps * lineVector;
413 v1 += caps * lineVector;
414
415 uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 - D, v1 - D));
416 uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 + D, v0 + D));
417 uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 + D, v0 - D));
418 uint2 mask3 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 - D, v1 + D));
419 return mask0 & mask1 & mask3 & mask2;
420}
421
422}
423
424#endif