A game about forced loneliness, made by TACStudios
at master 579 lines 31 kB view raw
1using Unity.Burst; 2using Unity.Collections; 3using UnityEngine.Assertions; 4using Unity.Collections.LowLevel.Unsafe; 5using Unity.Jobs; 6using System; 7 8namespace UnityEngine.Rendering 9{ 10 internal struct GPUInstanceDataBufferBuilder : IDisposable 11 { 12 private NativeList<GPUInstanceComponentDesc> m_Components; 13 14 private MetadataValue CreateMetadataValue(int nameID, int gpuAddress, bool isOverridden) 15 { 16 const uint kIsOverriddenBit = 0x80000000; 17 return new MetadataValue 18 { 19 NameID = nameID, 20 Value = (uint)gpuAddress | (isOverridden ? kIsOverriddenBit : 0), 21 }; 22 } 23 24 public void AddComponent<T>(int propertyID, bool isOverriden, bool isPerInstance, InstanceType instanceType, InstanceComponentGroup componentGroup = InstanceComponentGroup.Default) where T : unmanaged 25 { 26 AddComponent(propertyID, isOverriden, UnsafeUtility.SizeOf<T>(), isPerInstance, instanceType, componentGroup); 27 } 28 29 public void AddComponent(int propertyID, bool isOverriden, int byteSize, bool isPerInstance, InstanceType instanceType, InstanceComponentGroup componentGroup) 30 { 31 if (!m_Components.IsCreated) 32 m_Components = new NativeList<GPUInstanceComponentDesc>(64, Allocator.Temp); 33 34 if (m_Components.Length > 0) 35 Assert.IsTrue(m_Components[m_Components.Length - 1].instanceType <= instanceType, "Added components must be sorted by InstanceType for better memory layout."); 36 37 m_Components.Add(new GPUInstanceComponentDesc(propertyID, byteSize, isOverriden, isPerInstance, instanceType, componentGroup)); 38 } 39 40 public unsafe GPUInstanceDataBuffer Build(in InstanceNumInfo instanceNumInfo) 41 { 42 int perInstanceComponentCounts = 0; 43 var perInstanceComponentIndices = new NativeArray<int>(m_Components.Length, Allocator.Temp); 44 var componentAddresses = new NativeArray<int>(m_Components.Length, Allocator.Temp); 45 var componentByteSizes = new NativeArray<int>(m_Components.Length, Allocator.Temp); 46 var componentInstanceIndexRanges = new NativeArray<Vector2Int>(m_Components.Length, Allocator.Temp); 47 48 GPUInstanceDataBuffer newBuffer = new GPUInstanceDataBuffer(); 49 newBuffer.instanceNumInfo = instanceNumInfo; 50 newBuffer.instancesNumPrefixSum = new NativeArray<int>((int)InstanceType.Count, Allocator.Persistent); 51 newBuffer.instancesSpan = new NativeArray<int>((int)InstanceType.Count, Allocator.Persistent); 52 53 int sum = 0; 54 55 for (int i = 0; i < (int)InstanceType.Count; ++i) 56 { 57 newBuffer.instancesNumPrefixSum[i] = sum; 58 sum += instanceNumInfo.InstanceNums[i]; 59 newBuffer.instancesSpan[i] = instanceNumInfo.GetInstanceNumIncludingChildren((InstanceType)i); 60 } 61 62 newBuffer.layoutVersion = GPUInstanceDataBuffer.NextVersion(); 63 newBuffer.version = 0; 64 newBuffer.defaultMetadata = new NativeArray<MetadataValue>(m_Components.Length, Allocator.Persistent); 65 newBuffer.descriptions = new NativeArray<GPUInstanceComponentDesc>(m_Components.Length, Allocator.Persistent); 66 newBuffer.nameToMetadataMap = new NativeParallelHashMap<int, int>(m_Components.Length, Allocator.Persistent); 67 newBuffer.gpuBufferComponentAddress = new NativeArray<int>(m_Components.Length, Allocator.Persistent); 68 69 //Initial offset, must be 0, 0, 0, 0. 70 int vec4Size = UnsafeUtility.SizeOf<Vector4>(); 71 int byteOffset = 4 * vec4Size; 72 73 for (int c = 0; c < m_Components.Length; ++c) 74 { 75 var componentDesc = m_Components[c]; 76 newBuffer.descriptions[c] = componentDesc; 77 78 int instancesBegin = newBuffer.instancesNumPrefixSum[(int)componentDesc.instanceType]; 79 int instancesEnd = instancesBegin + newBuffer.instancesSpan[(int)componentDesc.instanceType]; 80 int instancesNum = componentDesc.isPerInstance ? instancesEnd - instancesBegin : 1; 81 Assert.IsTrue(instancesNum >= 0); 82 83 componentInstanceIndexRanges[c] = new Vector2Int(instancesBegin, instancesBegin + instancesNum); 84 85 int componentGPUAddress = byteOffset - instancesBegin * componentDesc.byteSize; 86 Assert.IsTrue(componentGPUAddress >= 0, "GPUInstanceDataBufferBuilder: GPU address is negative. This is not supported for now. See kIsOverriddenBit." + 87 "In general, if there is only one root InstanceType (MeshRenderer in our case) with a component that is larger or equal in size than any component in a derived InstanceType." + 88 "And the number of parent gpu instances are always larger or equal to the number of derived type gpu instances. Than GPU address cannot become negative."); 89 90 newBuffer.gpuBufferComponentAddress[c] = componentGPUAddress; 91 newBuffer.defaultMetadata[c] = CreateMetadataValue(componentDesc.propertyID, componentGPUAddress, componentDesc.isOverriden); 92 93 componentAddresses[c] = componentGPUAddress; 94 componentByteSizes[c] = componentDesc.byteSize; 95 96 int componentByteSize = componentDesc.byteSize * instancesNum; 97 byteOffset += componentByteSize; 98 99 bool addedToMap = newBuffer.nameToMetadataMap.TryAdd(componentDesc.propertyID, c); 100 Assert.IsTrue(addedToMap, "Repetitive metadata element added to object."); 101 102 if (componentDesc.isPerInstance) 103 { 104 perInstanceComponentIndices[perInstanceComponentCounts] = c; 105 perInstanceComponentCounts++; 106 } 107 } 108 109 newBuffer.byteSize = byteOffset; 110 newBuffer.gpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, newBuffer.byteSize / 4, 4); 111 newBuffer.gpuBuffer.SetData(new NativeArray<Vector4>(4, Allocator.Temp), 0, 0, 4); 112 newBuffer.validComponentsIndicesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, perInstanceComponentCounts, 4); 113 newBuffer.validComponentsIndicesGpuBuffer.SetData(perInstanceComponentIndices, 0, 0, perInstanceComponentCounts); 114 newBuffer.componentAddressesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 4); 115 newBuffer.componentAddressesGpuBuffer.SetData(componentAddresses, 0, 0, m_Components.Length); 116 newBuffer.componentInstanceIndexRangesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 8); 117 newBuffer.componentInstanceIndexRangesGpuBuffer.SetData(componentInstanceIndexRanges, 0, 0, m_Components.Length); 118 newBuffer.componentByteCountsGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 4); 119 newBuffer.componentByteCountsGpuBuffer.SetData(componentByteSizes, 0, 0, m_Components.Length); 120 newBuffer.perInstanceComponentCount = perInstanceComponentCounts; 121 122 perInstanceComponentIndices.Dispose(); 123 componentAddresses.Dispose(); 124 componentByteSizes.Dispose(); 125 126 return newBuffer; 127 } 128 129 public void Dispose() 130 { 131 if (m_Components.IsCreated) 132 m_Components.Dispose(); 133 } 134 } 135 136 internal struct GPUInstanceDataBufferUploader : IDisposable 137 { 138 private static class UploadKernelIDs 139 { 140 public static readonly int _InputValidComponentCounts = Shader.PropertyToID("_InputValidComponentCounts"); 141 public static readonly int _InputInstanceCounts = Shader.PropertyToID("_InputInstanceCounts"); 142 public static readonly int _InputInstanceByteSize = Shader.PropertyToID("_InputInstanceByteSize"); 143 public static readonly int _InputComponentOffsets = Shader.PropertyToID("_InputComponentOffsets"); 144 public static readonly int _InputInstanceData = Shader.PropertyToID("_InputInstanceData"); 145 public static readonly int _InputInstanceIndices = Shader.PropertyToID("_InputInstanceIndices"); 146 public static readonly int _InputValidComponentIndices = Shader.PropertyToID("_InputValidComponentIndices"); 147 public static readonly int _InputComponentAddresses = Shader.PropertyToID("_InputComponentAddresses"); 148 public static readonly int _InputComponentByteCounts = Shader.PropertyToID("_InputComponentByteCounts"); 149 public static readonly int _InputComponentInstanceIndexRanges = Shader.PropertyToID("_InputComponentInstanceIndexRanges"); 150 public static readonly int _OutputBuffer = Shader.PropertyToID("_OutputBuffer"); 151 } 152 153 public struct GPUResources : IDisposable 154 { 155 public ComputeBuffer instanceData; 156 public ComputeBuffer instanceIndices; 157 public ComputeBuffer inputComponentOffsets; 158 public ComputeBuffer validComponentIndices; 159 public ComputeShader cs; 160 public int kernelId; 161 162 private int m_InstanceDataByteSize; 163 private int m_InstanceCount; 164 private int m_ComponentCounts; 165 private int m_ValidComponentIndicesCount; 166 167 public void LoadShaders(GPUResidentDrawerResources resources) 168 { 169 if (cs == null) 170 { 171 cs = resources.instanceDataBufferUploadKernels; 172 kernelId = cs.FindKernel("MainUploadScatterInstances"); 173 } 174 } 175 176 public void CreateResources(int newInstanceCount, int sizePerInstance, int newComponentCounts, int validComponentIndicesCount) 177 { 178 int newInstanceDataByteSize = newInstanceCount * sizePerInstance; 179 if (newInstanceDataByteSize > m_InstanceDataByteSize || instanceData == null) 180 { 181 if (instanceData != null) 182 instanceData.Release(); 183 184 instanceData = new ComputeBuffer((newInstanceDataByteSize + 3) / 4, 4, ComputeBufferType.Raw); 185 m_InstanceDataByteSize = newInstanceDataByteSize; 186 } 187 188 if (newInstanceCount > m_InstanceCount || instanceIndices == null) 189 { 190 if (instanceIndices != null) 191 instanceIndices.Release(); 192 193 instanceIndices = new ComputeBuffer(newInstanceCount, 4, ComputeBufferType.Raw); 194 m_InstanceCount = newInstanceCount; 195 } 196 197 if (newComponentCounts > m_ComponentCounts || inputComponentOffsets == null) 198 { 199 if (inputComponentOffsets != null) 200 inputComponentOffsets.Release(); 201 202 inputComponentOffsets = new ComputeBuffer(newComponentCounts, 4, ComputeBufferType.Raw); 203 m_ComponentCounts = newComponentCounts; 204 } 205 206 if (validComponentIndicesCount > m_ValidComponentIndicesCount || validComponentIndices == null) 207 { 208 if (validComponentIndices != null) 209 validComponentIndices.Release(); 210 211 validComponentIndices = new ComputeBuffer(validComponentIndicesCount, 4, ComputeBufferType.Raw); 212 m_ValidComponentIndicesCount = validComponentIndicesCount; 213 } 214 } 215 216 public void Dispose() 217 { 218 cs = null; 219 220 if (instanceData != null) 221 instanceData.Release(); 222 223 if (instanceIndices != null) 224 instanceIndices.Release(); 225 226 if (inputComponentOffsets != null) 227 inputComponentOffsets.Release(); 228 229 if(validComponentIndices != null) 230 validComponentIndices.Release(); 231 } 232 } 233 234 int m_UintPerInstance; 235 int m_Capacity; 236 int m_InstanceCount; 237 NativeArray<bool> m_ComponentIsInstanced; 238 NativeArray<int> m_ComponentDataIndex; 239 NativeArray<int> m_DescriptionsUintSize; 240 NativeArray<uint> m_TmpDataBuffer; 241 NativeList<int> m_WritenComponentIndices; 242 243 private NativeArray<int> m_DummyArray; 244 245 public GPUInstanceDataBufferUploader(in NativeArray<GPUInstanceComponentDesc> descriptions, int capacity, InstanceType instanceType) 246 { 247 m_Capacity = capacity; 248 m_InstanceCount = 0; 249 m_UintPerInstance = 0; 250 m_ComponentDataIndex = new NativeArray<int>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory); 251 m_ComponentIsInstanced = new NativeArray<bool>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory); 252 m_DescriptionsUintSize = new NativeArray<int>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory); 253 m_WritenComponentIndices = new NativeList<int>(descriptions.Length, Allocator.TempJob); 254 m_DummyArray = new NativeArray<int>(0, Allocator.Persistent); 255 256 int uintSize = UnsafeUtility.SizeOf<uint>(); 257 258 for (int c = 0; c < descriptions.Length; ++c) 259 { 260 var componentDesc = descriptions[c]; 261 m_ComponentIsInstanced[c] = componentDesc.isPerInstance; 262 if(componentDesc.instanceType == instanceType) 263 { 264 m_ComponentDataIndex[c] = m_UintPerInstance; 265 m_DescriptionsUintSize[c] = descriptions[c].byteSize / uintSize; 266 m_UintPerInstance += componentDesc.isPerInstance ? (componentDesc.byteSize / uintSize) : 0; 267 } 268 else 269 { 270 m_ComponentDataIndex[c] = -1; 271 m_DescriptionsUintSize[c] = 0; 272 } 273 } 274 275 m_TmpDataBuffer = new NativeArray<uint>(m_Capacity * m_UintPerInstance, Allocator.TempJob, NativeArrayOptions.UninitializedMemory); 276 } 277 278 public unsafe IntPtr GetUploadBufferPtr() 279 { 280 Assert.IsTrue(m_TmpDataBuffer.IsCreated); 281 Assert.IsTrue(m_TmpDataBuffer.Length > 0 && m_InstanceCount > 0); 282 return new IntPtr(m_TmpDataBuffer.GetUnsafePtr()); 283 } 284 285 public int GetUIntPerInstance() 286 { 287 return m_UintPerInstance; 288 } 289 290 public int GetParamUIntOffset(int parameterIndex) 291 { 292 Assert.IsTrue(m_ComponentIsInstanced[parameterIndex], "Component is non instanced. Can only call this function on parameters that are for all instances."); 293 Assert.IsTrue(parameterIndex >= 0 && parameterIndex < m_ComponentDataIndex.Length, "Parameter index invalid."); 294 Assert.IsTrue(m_ComponentDataIndex[parameterIndex] != -1, "Parameter index is not allocated. Did you allocate proper InstanceType parameters?"); 295 return m_ComponentDataIndex[parameterIndex]; 296 } 297 298 public int PrepareParamWrite<T>(int parameterIndex) where T : unmanaged 299 { 300 int uintPerParameter = UnsafeUtility.SizeOf<T>() / UnsafeUtility.SizeOf<uint>(); 301 Assert.IsTrue(uintPerParameter == m_DescriptionsUintSize[parameterIndex], "Parameter to write is incompatible, must be same stride as destination."); 302 if (!m_WritenComponentIndices.Contains(parameterIndex)) 303 m_WritenComponentIndices.Add(parameterIndex); 304 return GetParamUIntOffset(parameterIndex); 305 } 306 307 public unsafe void AllocateUploadHandles(int handlesLength) 308 { 309 // No need to preallocate instances anymore, as those are passed as parameters to SubmitToGPU to avoid data duplication 310 // We just set the instance count here to ensure that a) we have the correct capacity and b) write/gatherInstanceData copies the correct amount 311 Assert.IsTrue(m_Capacity >= handlesLength); 312 m_InstanceCount = handlesLength; 313 } 314 315 public unsafe JobHandle WriteInstanceDataJob<T>(int parameterIndex, NativeArray<T> instanceData) where T : unmanaged 316 { 317 return WriteInstanceDataJob(parameterIndex, instanceData, m_DummyArray); 318 } 319 320 public unsafe JobHandle WriteInstanceDataJob<T>(int parameterIndex, NativeArray<T> instanceData, NativeArray<int> gatherIndices) where T : unmanaged 321 { 322 if (m_InstanceCount == 0) 323 return default; 324 325 var gatherData = gatherIndices.Length != 0; 326 Assert.IsTrue(gatherData || instanceData.Length == m_InstanceCount); 327 Assert.IsTrue(!gatherData || gatherIndices.Length == m_InstanceCount); 328 Assert.IsTrue(UnsafeUtility.SizeOf<T>() >= UnsafeUtility.SizeOf<uint>()); 329 330 int uintPerParameter = UnsafeUtility.SizeOf<T>() / UnsafeUtility.SizeOf<uint>(); 331 Assert.IsTrue(m_ComponentIsInstanced[parameterIndex], "Component is non instanced. Can only call this function on parameters that are for all instances."); 332 Assert.IsTrue(uintPerParameter == m_DescriptionsUintSize[parameterIndex], "Parameter to write is incompatible, must be same stride as destination."); 333 Assert.IsTrue(parameterIndex >= 0 && parameterIndex < m_ComponentDataIndex.Length, "Parameter index invalid."); 334 Assert.IsTrue(m_ComponentDataIndex[parameterIndex] != -1, "Parameter index is not allocated. Did you allocate proper InstanceType parameters?"); 335 336 if (!m_WritenComponentIndices.Contains(parameterIndex)) 337 m_WritenComponentIndices.Add(parameterIndex); 338 339 var writeJob = new WriteInstanceDataParameterJob 340 { 341 gatherData = gatherData, 342 gatherIndices = gatherIndices, 343 parameterIndex = parameterIndex, 344 uintPerParameter = uintPerParameter, 345 uintPerInstance = m_UintPerInstance, 346 componentDataIndex = m_ComponentDataIndex, 347 instanceData = instanceData.Reinterpret<uint>(UnsafeUtility.SizeOf<T>()), 348 tmpDataBuffer = m_TmpDataBuffer 349 }; 350 351 return writeJob.Schedule(m_InstanceCount, WriteInstanceDataParameterJob.k_BatchSize); 352 } 353 354 public void SubmitToGpu(GPUInstanceDataBuffer instanceDataBuffer, NativeArray<GPUInstanceIndex> gpuInstanceIndices, ref GPUResources gpuResources, bool submitOnlyWrittenParams) 355 { 356 if (m_InstanceCount == 0) 357 return; 358 359 Assert.IsTrue(gpuInstanceIndices.Length == m_InstanceCount); 360 361 ++instanceDataBuffer.version; 362 int uintSize = UnsafeUtility.SizeOf<uint>(); 363 int instanceByteSize = m_UintPerInstance * uintSize; 364 gpuResources.CreateResources(m_InstanceCount, instanceByteSize, m_ComponentDataIndex.Length, m_WritenComponentIndices.Length); 365 gpuResources.instanceData.SetData(m_TmpDataBuffer, 0, 0, m_InstanceCount * m_UintPerInstance); 366 gpuResources.instanceIndices.SetData(gpuInstanceIndices, 0, 0, m_InstanceCount); 367 gpuResources.inputComponentOffsets.SetData(m_ComponentDataIndex, 0, 0, m_ComponentDataIndex.Length); 368 gpuResources.cs.SetInt(UploadKernelIDs._InputInstanceCounts, m_InstanceCount); 369 gpuResources.cs.SetInt(UploadKernelIDs._InputInstanceByteSize, instanceByteSize); 370 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputInstanceData, gpuResources.instanceData); 371 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputInstanceIndices, gpuResources.instanceIndices); 372 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentOffsets, gpuResources.inputComponentOffsets); 373 if (submitOnlyWrittenParams) 374 { 375 gpuResources.validComponentIndices.SetData(m_WritenComponentIndices.AsArray(), 0, 0, m_WritenComponentIndices.Length); 376 gpuResources.cs.SetInt(UploadKernelIDs._InputValidComponentCounts, m_WritenComponentIndices.Length); 377 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputValidComponentIndices, gpuResources.validComponentIndices); 378 } 379 else 380 { 381 gpuResources.cs.SetInt(UploadKernelIDs._InputValidComponentCounts, instanceDataBuffer.perInstanceComponentCount); 382 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputValidComponentIndices, instanceDataBuffer.validComponentsIndicesGpuBuffer); 383 } 384 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentAddresses, instanceDataBuffer.componentAddressesGpuBuffer); 385 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentByteCounts, instanceDataBuffer.componentByteCountsGpuBuffer); 386 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentInstanceIndexRanges, instanceDataBuffer.componentInstanceIndexRangesGpuBuffer); 387 gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._OutputBuffer, instanceDataBuffer.gpuBuffer); 388 gpuResources.cs.Dispatch(gpuResources.kernelId, (m_InstanceCount + 63) / 64, 1, 1); 389 390 m_InstanceCount = 0; 391 m_WritenComponentIndices.Clear(); 392 } 393 394 public void SubmitToGpu(GPUInstanceDataBuffer instanceDataBuffer, NativeArray<InstanceHandle> instances, ref GPUResources gpuResources, bool submitOnlyWrittenParams) 395 { 396 if (m_InstanceCount == 0) 397 return; 398 399 var gpuInstanceIndices = new NativeArray<GPUInstanceIndex>(instances.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory); 400 instanceDataBuffer.CPUInstanceArrayToGPUInstanceArray(instances, gpuInstanceIndices); 401 402 SubmitToGpu(instanceDataBuffer, gpuInstanceIndices, ref gpuResources, submitOnlyWrittenParams); 403 404 gpuInstanceIndices.Dispose(); 405 } 406 407 public void Dispose() 408 { 409 if (m_ComponentDataIndex.IsCreated) 410 m_ComponentDataIndex.Dispose(); 411 412 if (m_ComponentIsInstanced.IsCreated) 413 m_ComponentIsInstanced.Dispose(); 414 415 if (m_DescriptionsUintSize.IsCreated) 416 m_DescriptionsUintSize.Dispose(); 417 418 if (m_TmpDataBuffer.IsCreated) 419 m_TmpDataBuffer.Dispose(); 420 421 if (m_WritenComponentIndices.IsCreated) 422 m_WritenComponentIndices.Dispose(); 423 424 if(m_DummyArray.IsCreated) 425 m_DummyArray.Dispose(); 426 } 427 428 [BurstCompile(DisableSafetyChecks = true, OptimizeFor = OptimizeFor.Performance)] 429 internal struct WriteInstanceDataParameterJob : IJobParallelFor 430 { 431 public const int k_BatchSize = 512; 432 433 [ReadOnly] public bool gatherData; 434 [ReadOnly] public int parameterIndex; 435 [ReadOnly] public int uintPerParameter; 436 [ReadOnly] public int uintPerInstance; 437 [ReadOnly] public NativeArray<int> componentDataIndex; 438 [ReadOnly] public NativeArray<int> gatherIndices; 439 [NativeDisableContainerSafetyRestriction, NoAlias][ReadOnly] public NativeArray<uint> instanceData; 440 441 [NativeDisableContainerSafetyRestriction, NoAlias][WriteOnly] public NativeArray<uint> tmpDataBuffer; 442 443 public unsafe void Execute(int index) 444 { 445 Assert.IsTrue(index * uintPerInstance < tmpDataBuffer.Length, "Trying to write to an instance buffer out of bounds."); 446 447 int dataOffset = (gatherData ? gatherIndices[index] : index) * uintPerParameter; 448 Assert.IsTrue(dataOffset < instanceData.Length); 449 450 int uintSize = UnsafeUtility.SizeOf<uint>(); 451 452 uint* data = (uint*)instanceData.GetUnsafePtr() + dataOffset; 453 UnsafeUtility.MemCpy((uint*)tmpDataBuffer.GetUnsafePtr() + index * uintPerInstance + componentDataIndex[parameterIndex], data, 454 uintPerParameter * uintSize); 455 } 456 } 457 } 458 459 internal struct GPUInstanceDataBufferGrower : IDisposable 460 { 461 private static class CopyInstancesKernelIDs 462 { 463 public static readonly int _InputValidComponentCounts = Shader.PropertyToID("_InputValidComponentCounts"); 464 public static readonly int _InstanceCounts = Shader.PropertyToID("_InstanceCounts"); 465 public static readonly int _InstanceOffset = Shader.PropertyToID("_InstanceOffset"); 466 public static readonly int _OutputInstanceOffset = Shader.PropertyToID("_OutputInstanceOffset"); 467 public static readonly int _ValidComponentIndices = Shader.PropertyToID("_ValidComponentIndices"); 468 public static readonly int _ComponentByteCounts = Shader.PropertyToID("_ComponentByteCounts"); 469 public static readonly int _InputComponentAddresses = Shader.PropertyToID("_InputComponentAddresses"); 470 public static readonly int _OutputComponentAddresses = Shader.PropertyToID("_OutputComponentAddresses"); 471 public static readonly int _InputComponentInstanceIndexRanges = Shader.PropertyToID("_InputComponentInstanceIndexRanges"); 472 public static readonly int _InputBuffer = Shader.PropertyToID("_InputBuffer"); 473 public static readonly int _OutputBuffer = Shader.PropertyToID("_OutputBuffer"); 474 } 475 476 public struct GPUResources : IDisposable 477 { 478 public ComputeShader cs; 479 public int kernelId; 480 481 public void LoadShaders(GPUResidentDrawerResources resources) 482 { 483 if (cs == null) 484 { 485 cs = resources.instanceDataBufferCopyKernels; 486 kernelId = cs.FindKernel("MainCopyInstances"); 487 } 488 } 489 490 public void CreateResources() 491 { 492 } 493 494 public void Dispose() 495 { 496 cs = null; 497 } 498 } 499 500 private GPUInstanceDataBuffer m_SrcBuffer; 501 private GPUInstanceDataBuffer m_DstBuffer; 502 503 //@ We should implement buffer shrinker too, otherwise lots of instances can be allocated for trees for example 504 //@ while there are no trees in scenes that are in use at all. 505 public unsafe GPUInstanceDataBufferGrower(GPUInstanceDataBuffer sourceBuffer, in InstanceNumInfo instanceNumInfo) 506 { 507 m_SrcBuffer = sourceBuffer; 508 m_DstBuffer = null; 509 510 bool needToGrow = false; 511 512 for(int i = 0; i < (int)InstanceType.Count; ++i) 513 { 514 Assert.IsTrue(instanceNumInfo.InstanceNums[i] >= sourceBuffer.instanceNumInfo.InstanceNums[i], "Shrinking GPU instance buffer is not supported yet."); 515 516 if (instanceNumInfo.InstanceNums[i] > sourceBuffer.instanceNumInfo.InstanceNums[i]) 517 needToGrow = true; 518 } 519 520 if (!needToGrow) 521 return; 522 523 GPUInstanceDataBufferBuilder builder = new GPUInstanceDataBufferBuilder(); 524 525 foreach (GPUInstanceComponentDesc descriptor in sourceBuffer.descriptions) 526 builder.AddComponent(descriptor.propertyID, descriptor.isOverriden, descriptor.byteSize, descriptor.isPerInstance, descriptor.instanceType, descriptor.componentGroup); 527 528 m_DstBuffer = builder.Build(instanceNumInfo); 529 builder.Dispose(); 530 } 531 532 public GPUInstanceDataBuffer SubmitToGpu(ref GPUResources gpuResources) 533 { 534 if (m_DstBuffer == null) 535 return m_SrcBuffer; 536 537 int totalInstanceCount = m_SrcBuffer.instanceNumInfo.GetTotalInstanceNum(); 538 539 if(totalInstanceCount == 0) 540 return m_DstBuffer; 541 542 Assert.IsTrue(m_SrcBuffer.perInstanceComponentCount == m_DstBuffer.perInstanceComponentCount); 543 544 gpuResources.CreateResources(); 545 gpuResources.cs.SetInt(CopyInstancesKernelIDs._InputValidComponentCounts, m_SrcBuffer.perInstanceComponentCount); 546 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._ValidComponentIndices, m_SrcBuffer.validComponentsIndicesGpuBuffer); 547 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._ComponentByteCounts, m_SrcBuffer.componentByteCountsGpuBuffer); 548 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputComponentAddresses, m_SrcBuffer.componentAddressesGpuBuffer); 549 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputComponentInstanceIndexRanges, m_SrcBuffer.componentInstanceIndexRangesGpuBuffer); 550 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._OutputComponentAddresses, m_DstBuffer.componentAddressesGpuBuffer); 551 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputBuffer, m_SrcBuffer.gpuBuffer); 552 gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._OutputBuffer, m_DstBuffer.gpuBuffer); 553 554 //@ We could compute new instance indices on CPU and do one dispatch. 555 //@ Otherwise in theory these multiple dispatches could overlap with no UAV barrier between them as they write to a different parts of the UAV. 556 //@ Need to profile which is better. 557 for(int i = 0; i < (int)InstanceType.Count; ++i) 558 { 559 int instanceCount = m_SrcBuffer.instanceNumInfo.GetInstanceNum((InstanceType)i); 560 561 if(instanceCount > 0) 562 { 563 int instanceOffset = m_SrcBuffer.instancesNumPrefixSum[i]; 564 int outputInstanceOffset = m_DstBuffer.instancesNumPrefixSum[i]; 565 gpuResources.cs.SetInt(CopyInstancesKernelIDs._InstanceCounts, instanceCount); 566 gpuResources.cs.SetInt(CopyInstancesKernelIDs._InstanceOffset, instanceOffset); 567 gpuResources.cs.SetInt(CopyInstancesKernelIDs._OutputInstanceOffset, outputInstanceOffset); 568 gpuResources.cs.Dispatch(gpuResources.kernelId, (instanceCount + 63) / 64, 1, 1); 569 } 570 } 571 572 return m_DstBuffer; 573 } 574 575 public void Dispose() 576 { 577 } 578 } 579}