A game about forced loneliness, made by TACStudios
1using Unity.Burst;
2using Unity.Collections;
3using UnityEngine.Assertions;
4using Unity.Collections.LowLevel.Unsafe;
5using Unity.Jobs;
6using System;
7
8namespace UnityEngine.Rendering
9{
10 internal struct GPUInstanceDataBufferBuilder : IDisposable
11 {
12 private NativeList<GPUInstanceComponentDesc> m_Components;
13
14 private MetadataValue CreateMetadataValue(int nameID, int gpuAddress, bool isOverridden)
15 {
16 const uint kIsOverriddenBit = 0x80000000;
17 return new MetadataValue
18 {
19 NameID = nameID,
20 Value = (uint)gpuAddress | (isOverridden ? kIsOverriddenBit : 0),
21 };
22 }
23
24 public void AddComponent<T>(int propertyID, bool isOverriden, bool isPerInstance, InstanceType instanceType, InstanceComponentGroup componentGroup = InstanceComponentGroup.Default) where T : unmanaged
25 {
26 AddComponent(propertyID, isOverriden, UnsafeUtility.SizeOf<T>(), isPerInstance, instanceType, componentGroup);
27 }
28
29 public void AddComponent(int propertyID, bool isOverriden, int byteSize, bool isPerInstance, InstanceType instanceType, InstanceComponentGroup componentGroup)
30 {
31 if (!m_Components.IsCreated)
32 m_Components = new NativeList<GPUInstanceComponentDesc>(64, Allocator.Temp);
33
34 if (m_Components.Length > 0)
35 Assert.IsTrue(m_Components[m_Components.Length - 1].instanceType <= instanceType, "Added components must be sorted by InstanceType for better memory layout.");
36
37 m_Components.Add(new GPUInstanceComponentDesc(propertyID, byteSize, isOverriden, isPerInstance, instanceType, componentGroup));
38 }
39
40 public unsafe GPUInstanceDataBuffer Build(in InstanceNumInfo instanceNumInfo)
41 {
42 int perInstanceComponentCounts = 0;
43 var perInstanceComponentIndices = new NativeArray<int>(m_Components.Length, Allocator.Temp);
44 var componentAddresses = new NativeArray<int>(m_Components.Length, Allocator.Temp);
45 var componentByteSizes = new NativeArray<int>(m_Components.Length, Allocator.Temp);
46 var componentInstanceIndexRanges = new NativeArray<Vector2Int>(m_Components.Length, Allocator.Temp);
47
48 GPUInstanceDataBuffer newBuffer = new GPUInstanceDataBuffer();
49 newBuffer.instanceNumInfo = instanceNumInfo;
50 newBuffer.instancesNumPrefixSum = new NativeArray<int>((int)InstanceType.Count, Allocator.Persistent);
51 newBuffer.instancesSpan = new NativeArray<int>((int)InstanceType.Count, Allocator.Persistent);
52
53 int sum = 0;
54
55 for (int i = 0; i < (int)InstanceType.Count; ++i)
56 {
57 newBuffer.instancesNumPrefixSum[i] = sum;
58 sum += instanceNumInfo.InstanceNums[i];
59 newBuffer.instancesSpan[i] = instanceNumInfo.GetInstanceNumIncludingChildren((InstanceType)i);
60 }
61
62 newBuffer.layoutVersion = GPUInstanceDataBuffer.NextVersion();
63 newBuffer.version = 0;
64 newBuffer.defaultMetadata = new NativeArray<MetadataValue>(m_Components.Length, Allocator.Persistent);
65 newBuffer.descriptions = new NativeArray<GPUInstanceComponentDesc>(m_Components.Length, Allocator.Persistent);
66 newBuffer.nameToMetadataMap = new NativeParallelHashMap<int, int>(m_Components.Length, Allocator.Persistent);
67 newBuffer.gpuBufferComponentAddress = new NativeArray<int>(m_Components.Length, Allocator.Persistent);
68
69 //Initial offset, must be 0, 0, 0, 0.
70 int vec4Size = UnsafeUtility.SizeOf<Vector4>();
71 int byteOffset = 4 * vec4Size;
72
73 for (int c = 0; c < m_Components.Length; ++c)
74 {
75 var componentDesc = m_Components[c];
76 newBuffer.descriptions[c] = componentDesc;
77
78 int instancesBegin = newBuffer.instancesNumPrefixSum[(int)componentDesc.instanceType];
79 int instancesEnd = instancesBegin + newBuffer.instancesSpan[(int)componentDesc.instanceType];
80 int instancesNum = componentDesc.isPerInstance ? instancesEnd - instancesBegin : 1;
81 Assert.IsTrue(instancesNum >= 0);
82
83 componentInstanceIndexRanges[c] = new Vector2Int(instancesBegin, instancesBegin + instancesNum);
84
85 int componentGPUAddress = byteOffset - instancesBegin * componentDesc.byteSize;
86 Assert.IsTrue(componentGPUAddress >= 0, "GPUInstanceDataBufferBuilder: GPU address is negative. This is not supported for now. See kIsOverriddenBit." +
87 "In general, if there is only one root InstanceType (MeshRenderer in our case) with a component that is larger or equal in size than any component in a derived InstanceType." +
88 "And the number of parent gpu instances are always larger or equal to the number of derived type gpu instances. Than GPU address cannot become negative.");
89
90 newBuffer.gpuBufferComponentAddress[c] = componentGPUAddress;
91 newBuffer.defaultMetadata[c] = CreateMetadataValue(componentDesc.propertyID, componentGPUAddress, componentDesc.isOverriden);
92
93 componentAddresses[c] = componentGPUAddress;
94 componentByteSizes[c] = componentDesc.byteSize;
95
96 int componentByteSize = componentDesc.byteSize * instancesNum;
97 byteOffset += componentByteSize;
98
99 bool addedToMap = newBuffer.nameToMetadataMap.TryAdd(componentDesc.propertyID, c);
100 Assert.IsTrue(addedToMap, "Repetitive metadata element added to object.");
101
102 if (componentDesc.isPerInstance)
103 {
104 perInstanceComponentIndices[perInstanceComponentCounts] = c;
105 perInstanceComponentCounts++;
106 }
107 }
108
109 newBuffer.byteSize = byteOffset;
110 newBuffer.gpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, newBuffer.byteSize / 4, 4);
111 newBuffer.gpuBuffer.SetData(new NativeArray<Vector4>(4, Allocator.Temp), 0, 0, 4);
112 newBuffer.validComponentsIndicesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, perInstanceComponentCounts, 4);
113 newBuffer.validComponentsIndicesGpuBuffer.SetData(perInstanceComponentIndices, 0, 0, perInstanceComponentCounts);
114 newBuffer.componentAddressesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 4);
115 newBuffer.componentAddressesGpuBuffer.SetData(componentAddresses, 0, 0, m_Components.Length);
116 newBuffer.componentInstanceIndexRangesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 8);
117 newBuffer.componentInstanceIndexRangesGpuBuffer.SetData(componentInstanceIndexRanges, 0, 0, m_Components.Length);
118 newBuffer.componentByteCountsGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 4);
119 newBuffer.componentByteCountsGpuBuffer.SetData(componentByteSizes, 0, 0, m_Components.Length);
120 newBuffer.perInstanceComponentCount = perInstanceComponentCounts;
121
122 perInstanceComponentIndices.Dispose();
123 componentAddresses.Dispose();
124 componentByteSizes.Dispose();
125
126 return newBuffer;
127 }
128
129 public void Dispose()
130 {
131 if (m_Components.IsCreated)
132 m_Components.Dispose();
133 }
134 }
135
136 internal struct GPUInstanceDataBufferUploader : IDisposable
137 {
138 private static class UploadKernelIDs
139 {
140 public static readonly int _InputValidComponentCounts = Shader.PropertyToID("_InputValidComponentCounts");
141 public static readonly int _InputInstanceCounts = Shader.PropertyToID("_InputInstanceCounts");
142 public static readonly int _InputInstanceByteSize = Shader.PropertyToID("_InputInstanceByteSize");
143 public static readonly int _InputComponentOffsets = Shader.PropertyToID("_InputComponentOffsets");
144 public static readonly int _InputInstanceData = Shader.PropertyToID("_InputInstanceData");
145 public static readonly int _InputInstanceIndices = Shader.PropertyToID("_InputInstanceIndices");
146 public static readonly int _InputValidComponentIndices = Shader.PropertyToID("_InputValidComponentIndices");
147 public static readonly int _InputComponentAddresses = Shader.PropertyToID("_InputComponentAddresses");
148 public static readonly int _InputComponentByteCounts = Shader.PropertyToID("_InputComponentByteCounts");
149 public static readonly int _InputComponentInstanceIndexRanges = Shader.PropertyToID("_InputComponentInstanceIndexRanges");
150 public static readonly int _OutputBuffer = Shader.PropertyToID("_OutputBuffer");
151 }
152
153 public struct GPUResources : IDisposable
154 {
155 public ComputeBuffer instanceData;
156 public ComputeBuffer instanceIndices;
157 public ComputeBuffer inputComponentOffsets;
158 public ComputeBuffer validComponentIndices;
159 public ComputeShader cs;
160 public int kernelId;
161
162 private int m_InstanceDataByteSize;
163 private int m_InstanceCount;
164 private int m_ComponentCounts;
165 private int m_ValidComponentIndicesCount;
166
167 public void LoadShaders(GPUResidentDrawerResources resources)
168 {
169 if (cs == null)
170 {
171 cs = resources.instanceDataBufferUploadKernels;
172 kernelId = cs.FindKernel("MainUploadScatterInstances");
173 }
174 }
175
176 public void CreateResources(int newInstanceCount, int sizePerInstance, int newComponentCounts, int validComponentIndicesCount)
177 {
178 int newInstanceDataByteSize = newInstanceCount * sizePerInstance;
179 if (newInstanceDataByteSize > m_InstanceDataByteSize || instanceData == null)
180 {
181 if (instanceData != null)
182 instanceData.Release();
183
184 instanceData = new ComputeBuffer((newInstanceDataByteSize + 3) / 4, 4, ComputeBufferType.Raw);
185 m_InstanceDataByteSize = newInstanceDataByteSize;
186 }
187
188 if (newInstanceCount > m_InstanceCount || instanceIndices == null)
189 {
190 if (instanceIndices != null)
191 instanceIndices.Release();
192
193 instanceIndices = new ComputeBuffer(newInstanceCount, 4, ComputeBufferType.Raw);
194 m_InstanceCount = newInstanceCount;
195 }
196
197 if (newComponentCounts > m_ComponentCounts || inputComponentOffsets == null)
198 {
199 if (inputComponentOffsets != null)
200 inputComponentOffsets.Release();
201
202 inputComponentOffsets = new ComputeBuffer(newComponentCounts, 4, ComputeBufferType.Raw);
203 m_ComponentCounts = newComponentCounts;
204 }
205
206 if (validComponentIndicesCount > m_ValidComponentIndicesCount || validComponentIndices == null)
207 {
208 if (validComponentIndices != null)
209 validComponentIndices.Release();
210
211 validComponentIndices = new ComputeBuffer(validComponentIndicesCount, 4, ComputeBufferType.Raw);
212 m_ValidComponentIndicesCount = validComponentIndicesCount;
213 }
214 }
215
216 public void Dispose()
217 {
218 cs = null;
219
220 if (instanceData != null)
221 instanceData.Release();
222
223 if (instanceIndices != null)
224 instanceIndices.Release();
225
226 if (inputComponentOffsets != null)
227 inputComponentOffsets.Release();
228
229 if(validComponentIndices != null)
230 validComponentIndices.Release();
231 }
232 }
233
234 int m_UintPerInstance;
235 int m_Capacity;
236 int m_InstanceCount;
237 NativeArray<bool> m_ComponentIsInstanced;
238 NativeArray<int> m_ComponentDataIndex;
239 NativeArray<int> m_DescriptionsUintSize;
240 NativeArray<uint> m_TmpDataBuffer;
241 NativeList<int> m_WritenComponentIndices;
242
243 private NativeArray<int> m_DummyArray;
244
245 public GPUInstanceDataBufferUploader(in NativeArray<GPUInstanceComponentDesc> descriptions, int capacity, InstanceType instanceType)
246 {
247 m_Capacity = capacity;
248 m_InstanceCount = 0;
249 m_UintPerInstance = 0;
250 m_ComponentDataIndex = new NativeArray<int>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
251 m_ComponentIsInstanced = new NativeArray<bool>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
252 m_DescriptionsUintSize = new NativeArray<int>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
253 m_WritenComponentIndices = new NativeList<int>(descriptions.Length, Allocator.TempJob);
254 m_DummyArray = new NativeArray<int>(0, Allocator.Persistent);
255
256 int uintSize = UnsafeUtility.SizeOf<uint>();
257
258 for (int c = 0; c < descriptions.Length; ++c)
259 {
260 var componentDesc = descriptions[c];
261 m_ComponentIsInstanced[c] = componentDesc.isPerInstance;
262 if(componentDesc.instanceType == instanceType)
263 {
264 m_ComponentDataIndex[c] = m_UintPerInstance;
265 m_DescriptionsUintSize[c] = descriptions[c].byteSize / uintSize;
266 m_UintPerInstance += componentDesc.isPerInstance ? (componentDesc.byteSize / uintSize) : 0;
267 }
268 else
269 {
270 m_ComponentDataIndex[c] = -1;
271 m_DescriptionsUintSize[c] = 0;
272 }
273 }
274
275 m_TmpDataBuffer = new NativeArray<uint>(m_Capacity * m_UintPerInstance, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
276 }
277
278 public unsafe IntPtr GetUploadBufferPtr()
279 {
280 Assert.IsTrue(m_TmpDataBuffer.IsCreated);
281 Assert.IsTrue(m_TmpDataBuffer.Length > 0 && m_InstanceCount > 0);
282 return new IntPtr(m_TmpDataBuffer.GetUnsafePtr());
283 }
284
285 public int GetUIntPerInstance()
286 {
287 return m_UintPerInstance;
288 }
289
290 public int GetParamUIntOffset(int parameterIndex)
291 {
292 Assert.IsTrue(m_ComponentIsInstanced[parameterIndex], "Component is non instanced. Can only call this function on parameters that are for all instances.");
293 Assert.IsTrue(parameterIndex >= 0 && parameterIndex < m_ComponentDataIndex.Length, "Parameter index invalid.");
294 Assert.IsTrue(m_ComponentDataIndex[parameterIndex] != -1, "Parameter index is not allocated. Did you allocate proper InstanceType parameters?");
295 return m_ComponentDataIndex[parameterIndex];
296 }
297
298 public int PrepareParamWrite<T>(int parameterIndex) where T : unmanaged
299 {
300 int uintPerParameter = UnsafeUtility.SizeOf<T>() / UnsafeUtility.SizeOf<uint>();
301 Assert.IsTrue(uintPerParameter == m_DescriptionsUintSize[parameterIndex], "Parameter to write is incompatible, must be same stride as destination.");
302 if (!m_WritenComponentIndices.Contains(parameterIndex))
303 m_WritenComponentIndices.Add(parameterIndex);
304 return GetParamUIntOffset(parameterIndex);
305 }
306
307 public unsafe void AllocateUploadHandles(int handlesLength)
308 {
309 // No need to preallocate instances anymore, as those are passed as parameters to SubmitToGPU to avoid data duplication
310 // We just set the instance count here to ensure that a) we have the correct capacity and b) write/gatherInstanceData copies the correct amount
311 Assert.IsTrue(m_Capacity >= handlesLength);
312 m_InstanceCount = handlesLength;
313 }
314
315 public unsafe JobHandle WriteInstanceDataJob<T>(int parameterIndex, NativeArray<T> instanceData) where T : unmanaged
316 {
317 return WriteInstanceDataJob(parameterIndex, instanceData, m_DummyArray);
318 }
319
320 public unsafe JobHandle WriteInstanceDataJob<T>(int parameterIndex, NativeArray<T> instanceData, NativeArray<int> gatherIndices) where T : unmanaged
321 {
322 if (m_InstanceCount == 0)
323 return default;
324
325 var gatherData = gatherIndices.Length != 0;
326 Assert.IsTrue(gatherData || instanceData.Length == m_InstanceCount);
327 Assert.IsTrue(!gatherData || gatherIndices.Length == m_InstanceCount);
328 Assert.IsTrue(UnsafeUtility.SizeOf<T>() >= UnsafeUtility.SizeOf<uint>());
329
330 int uintPerParameter = UnsafeUtility.SizeOf<T>() / UnsafeUtility.SizeOf<uint>();
331 Assert.IsTrue(m_ComponentIsInstanced[parameterIndex], "Component is non instanced. Can only call this function on parameters that are for all instances.");
332 Assert.IsTrue(uintPerParameter == m_DescriptionsUintSize[parameterIndex], "Parameter to write is incompatible, must be same stride as destination.");
333 Assert.IsTrue(parameterIndex >= 0 && parameterIndex < m_ComponentDataIndex.Length, "Parameter index invalid.");
334 Assert.IsTrue(m_ComponentDataIndex[parameterIndex] != -1, "Parameter index is not allocated. Did you allocate proper InstanceType parameters?");
335
336 if (!m_WritenComponentIndices.Contains(parameterIndex))
337 m_WritenComponentIndices.Add(parameterIndex);
338
339 var writeJob = new WriteInstanceDataParameterJob
340 {
341 gatherData = gatherData,
342 gatherIndices = gatherIndices,
343 parameterIndex = parameterIndex,
344 uintPerParameter = uintPerParameter,
345 uintPerInstance = m_UintPerInstance,
346 componentDataIndex = m_ComponentDataIndex,
347 instanceData = instanceData.Reinterpret<uint>(UnsafeUtility.SizeOf<T>()),
348 tmpDataBuffer = m_TmpDataBuffer
349 };
350
351 return writeJob.Schedule(m_InstanceCount, WriteInstanceDataParameterJob.k_BatchSize);
352 }
353
354 public void SubmitToGpu(GPUInstanceDataBuffer instanceDataBuffer, NativeArray<GPUInstanceIndex> gpuInstanceIndices, ref GPUResources gpuResources, bool submitOnlyWrittenParams)
355 {
356 if (m_InstanceCount == 0)
357 return;
358
359 Assert.IsTrue(gpuInstanceIndices.Length == m_InstanceCount);
360
361 ++instanceDataBuffer.version;
362 int uintSize = UnsafeUtility.SizeOf<uint>();
363 int instanceByteSize = m_UintPerInstance * uintSize;
364 gpuResources.CreateResources(m_InstanceCount, instanceByteSize, m_ComponentDataIndex.Length, m_WritenComponentIndices.Length);
365 gpuResources.instanceData.SetData(m_TmpDataBuffer, 0, 0, m_InstanceCount * m_UintPerInstance);
366 gpuResources.instanceIndices.SetData(gpuInstanceIndices, 0, 0, m_InstanceCount);
367 gpuResources.inputComponentOffsets.SetData(m_ComponentDataIndex, 0, 0, m_ComponentDataIndex.Length);
368 gpuResources.cs.SetInt(UploadKernelIDs._InputInstanceCounts, m_InstanceCount);
369 gpuResources.cs.SetInt(UploadKernelIDs._InputInstanceByteSize, instanceByteSize);
370 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputInstanceData, gpuResources.instanceData);
371 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputInstanceIndices, gpuResources.instanceIndices);
372 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentOffsets, gpuResources.inputComponentOffsets);
373 if (submitOnlyWrittenParams)
374 {
375 gpuResources.validComponentIndices.SetData(m_WritenComponentIndices.AsArray(), 0, 0, m_WritenComponentIndices.Length);
376 gpuResources.cs.SetInt(UploadKernelIDs._InputValidComponentCounts, m_WritenComponentIndices.Length);
377 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputValidComponentIndices, gpuResources.validComponentIndices);
378 }
379 else
380 {
381 gpuResources.cs.SetInt(UploadKernelIDs._InputValidComponentCounts, instanceDataBuffer.perInstanceComponentCount);
382 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputValidComponentIndices, instanceDataBuffer.validComponentsIndicesGpuBuffer);
383 }
384 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentAddresses, instanceDataBuffer.componentAddressesGpuBuffer);
385 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentByteCounts, instanceDataBuffer.componentByteCountsGpuBuffer);
386 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentInstanceIndexRanges, instanceDataBuffer.componentInstanceIndexRangesGpuBuffer);
387 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._OutputBuffer, instanceDataBuffer.gpuBuffer);
388 gpuResources.cs.Dispatch(gpuResources.kernelId, (m_InstanceCount + 63) / 64, 1, 1);
389
390 m_InstanceCount = 0;
391 m_WritenComponentIndices.Clear();
392 }
393
394 public void SubmitToGpu(GPUInstanceDataBuffer instanceDataBuffer, NativeArray<InstanceHandle> instances, ref GPUResources gpuResources, bool submitOnlyWrittenParams)
395 {
396 if (m_InstanceCount == 0)
397 return;
398
399 var gpuInstanceIndices = new NativeArray<GPUInstanceIndex>(instances.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
400 instanceDataBuffer.CPUInstanceArrayToGPUInstanceArray(instances, gpuInstanceIndices);
401
402 SubmitToGpu(instanceDataBuffer, gpuInstanceIndices, ref gpuResources, submitOnlyWrittenParams);
403
404 gpuInstanceIndices.Dispose();
405 }
406
407 public void Dispose()
408 {
409 if (m_ComponentDataIndex.IsCreated)
410 m_ComponentDataIndex.Dispose();
411
412 if (m_ComponentIsInstanced.IsCreated)
413 m_ComponentIsInstanced.Dispose();
414
415 if (m_DescriptionsUintSize.IsCreated)
416 m_DescriptionsUintSize.Dispose();
417
418 if (m_TmpDataBuffer.IsCreated)
419 m_TmpDataBuffer.Dispose();
420
421 if (m_WritenComponentIndices.IsCreated)
422 m_WritenComponentIndices.Dispose();
423
424 if(m_DummyArray.IsCreated)
425 m_DummyArray.Dispose();
426 }
427
428 [BurstCompile(DisableSafetyChecks = true, OptimizeFor = OptimizeFor.Performance)]
429 internal struct WriteInstanceDataParameterJob : IJobParallelFor
430 {
431 public const int k_BatchSize = 512;
432
433 [ReadOnly] public bool gatherData;
434 [ReadOnly] public int parameterIndex;
435 [ReadOnly] public int uintPerParameter;
436 [ReadOnly] public int uintPerInstance;
437 [ReadOnly] public NativeArray<int> componentDataIndex;
438 [ReadOnly] public NativeArray<int> gatherIndices;
439 [NativeDisableContainerSafetyRestriction, NoAlias][ReadOnly] public NativeArray<uint> instanceData;
440
441 [NativeDisableContainerSafetyRestriction, NoAlias][WriteOnly] public NativeArray<uint> tmpDataBuffer;
442
443 public unsafe void Execute(int index)
444 {
445 Assert.IsTrue(index * uintPerInstance < tmpDataBuffer.Length, "Trying to write to an instance buffer out of bounds.");
446
447 int dataOffset = (gatherData ? gatherIndices[index] : index) * uintPerParameter;
448 Assert.IsTrue(dataOffset < instanceData.Length);
449
450 int uintSize = UnsafeUtility.SizeOf<uint>();
451
452 uint* data = (uint*)instanceData.GetUnsafePtr() + dataOffset;
453 UnsafeUtility.MemCpy((uint*)tmpDataBuffer.GetUnsafePtr() + index * uintPerInstance + componentDataIndex[parameterIndex], data,
454 uintPerParameter * uintSize);
455 }
456 }
457 }
458
459 internal struct GPUInstanceDataBufferGrower : IDisposable
460 {
461 private static class CopyInstancesKernelIDs
462 {
463 public static readonly int _InputValidComponentCounts = Shader.PropertyToID("_InputValidComponentCounts");
464 public static readonly int _InstanceCounts = Shader.PropertyToID("_InstanceCounts");
465 public static readonly int _InstanceOffset = Shader.PropertyToID("_InstanceOffset");
466 public static readonly int _OutputInstanceOffset = Shader.PropertyToID("_OutputInstanceOffset");
467 public static readonly int _ValidComponentIndices = Shader.PropertyToID("_ValidComponentIndices");
468 public static readonly int _ComponentByteCounts = Shader.PropertyToID("_ComponentByteCounts");
469 public static readonly int _InputComponentAddresses = Shader.PropertyToID("_InputComponentAddresses");
470 public static readonly int _OutputComponentAddresses = Shader.PropertyToID("_OutputComponentAddresses");
471 public static readonly int _InputComponentInstanceIndexRanges = Shader.PropertyToID("_InputComponentInstanceIndexRanges");
472 public static readonly int _InputBuffer = Shader.PropertyToID("_InputBuffer");
473 public static readonly int _OutputBuffer = Shader.PropertyToID("_OutputBuffer");
474 }
475
476 public struct GPUResources : IDisposable
477 {
478 public ComputeShader cs;
479 public int kernelId;
480
481 public void LoadShaders(GPUResidentDrawerResources resources)
482 {
483 if (cs == null)
484 {
485 cs = resources.instanceDataBufferCopyKernels;
486 kernelId = cs.FindKernel("MainCopyInstances");
487 }
488 }
489
490 public void CreateResources()
491 {
492 }
493
494 public void Dispose()
495 {
496 cs = null;
497 }
498 }
499
500 private GPUInstanceDataBuffer m_SrcBuffer;
501 private GPUInstanceDataBuffer m_DstBuffer;
502
503 //@ We should implement buffer shrinker too, otherwise lots of instances can be allocated for trees for example
504 //@ while there are no trees in scenes that are in use at all.
505 public unsafe GPUInstanceDataBufferGrower(GPUInstanceDataBuffer sourceBuffer, in InstanceNumInfo instanceNumInfo)
506 {
507 m_SrcBuffer = sourceBuffer;
508 m_DstBuffer = null;
509
510 bool needToGrow = false;
511
512 for(int i = 0; i < (int)InstanceType.Count; ++i)
513 {
514 Assert.IsTrue(instanceNumInfo.InstanceNums[i] >= sourceBuffer.instanceNumInfo.InstanceNums[i], "Shrinking GPU instance buffer is not supported yet.");
515
516 if (instanceNumInfo.InstanceNums[i] > sourceBuffer.instanceNumInfo.InstanceNums[i])
517 needToGrow = true;
518 }
519
520 if (!needToGrow)
521 return;
522
523 GPUInstanceDataBufferBuilder builder = new GPUInstanceDataBufferBuilder();
524
525 foreach (GPUInstanceComponentDesc descriptor in sourceBuffer.descriptions)
526 builder.AddComponent(descriptor.propertyID, descriptor.isOverriden, descriptor.byteSize, descriptor.isPerInstance, descriptor.instanceType, descriptor.componentGroup);
527
528 m_DstBuffer = builder.Build(instanceNumInfo);
529 builder.Dispose();
530 }
531
532 public GPUInstanceDataBuffer SubmitToGpu(ref GPUResources gpuResources)
533 {
534 if (m_DstBuffer == null)
535 return m_SrcBuffer;
536
537 int totalInstanceCount = m_SrcBuffer.instanceNumInfo.GetTotalInstanceNum();
538
539 if(totalInstanceCount == 0)
540 return m_DstBuffer;
541
542 Assert.IsTrue(m_SrcBuffer.perInstanceComponentCount == m_DstBuffer.perInstanceComponentCount);
543
544 gpuResources.CreateResources();
545 gpuResources.cs.SetInt(CopyInstancesKernelIDs._InputValidComponentCounts, m_SrcBuffer.perInstanceComponentCount);
546 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._ValidComponentIndices, m_SrcBuffer.validComponentsIndicesGpuBuffer);
547 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._ComponentByteCounts, m_SrcBuffer.componentByteCountsGpuBuffer);
548 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputComponentAddresses, m_SrcBuffer.componentAddressesGpuBuffer);
549 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputComponentInstanceIndexRanges, m_SrcBuffer.componentInstanceIndexRangesGpuBuffer);
550 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._OutputComponentAddresses, m_DstBuffer.componentAddressesGpuBuffer);
551 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputBuffer, m_SrcBuffer.gpuBuffer);
552 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._OutputBuffer, m_DstBuffer.gpuBuffer);
553
554 //@ We could compute new instance indices on CPU and do one dispatch.
555 //@ Otherwise in theory these multiple dispatches could overlap with no UAV barrier between them as they write to a different parts of the UAV.
556 //@ Need to profile which is better.
557 for(int i = 0; i < (int)InstanceType.Count; ++i)
558 {
559 int instanceCount = m_SrcBuffer.instanceNumInfo.GetInstanceNum((InstanceType)i);
560
561 if(instanceCount > 0)
562 {
563 int instanceOffset = m_SrcBuffer.instancesNumPrefixSum[i];
564 int outputInstanceOffset = m_DstBuffer.instancesNumPrefixSum[i];
565 gpuResources.cs.SetInt(CopyInstancesKernelIDs._InstanceCounts, instanceCount);
566 gpuResources.cs.SetInt(CopyInstancesKernelIDs._InstanceOffset, instanceOffset);
567 gpuResources.cs.SetInt(CopyInstancesKernelIDs._OutputInstanceOffset, outputInstanceOffset);
568 gpuResources.cs.Dispatch(gpuResources.kernelId, (instanceCount + 63) / 64, 1, 1);
569 }
570 }
571
572 return m_DstBuffer;
573 }
574
575 public void Dispose()
576 {
577 }
578 }
579}