The open source OpenXR runtime
1#ifdef TRACY_ENABLE
2
3#ifdef _WIN32
4# ifndef NOMINMAX
5# define NOMINMAX
6# endif
7# include <winsock2.h>
8# include <windows.h>
9# include <tlhelp32.h>
10# include <inttypes.h>
11# include <intrin.h>
12# include "../common/TracyUwp.hpp"
13#else
14# include <sys/time.h>
15# include <sys/param.h>
16#endif
17
18#ifdef _GNU_SOURCE
19# include <errno.h>
20#endif
21
22#ifdef __linux__
23# include <dirent.h>
24# include <pthread.h>
25# include <sys/types.h>
26# include <sys/syscall.h>
27#endif
28
29#if defined __APPLE__ || defined BSD
30# include <sys/types.h>
31# include <sys/sysctl.h>
32#endif
33
34#if defined __APPLE__
35# include "TargetConditionals.h"
36# include <mach-o/dyld.h>
37#endif
38
39#ifdef __ANDROID__
40# include <sys/mman.h>
41# include <sys/system_properties.h>
42# include <stdio.h>
43# include <stdint.h>
44# include <algorithm>
45# include <vector>
46#endif
47
48#include <algorithm>
49#include <assert.h>
50#include <atomic>
51#include <chrono>
52#include <limits>
53#include <new>
54#include <stdlib.h>
55#include <string.h>
56#include <sys/stat.h>
57#include <thread>
58
59#include "../common/TracyAlign.hpp"
60#include "../common/TracyAlloc.hpp"
61#include "../common/TracySocket.hpp"
62#include "../common/TracySystem.hpp"
63#include "../common/TracyYield.hpp"
64#include "../common/tracy_lz4.hpp"
65#include "tracy_rpmalloc.hpp"
66#include "TracyCallstack.hpp"
67#include "TracyDebug.hpp"
68#include "TracyDxt1.hpp"
69#include "TracyScoped.hpp"
70#include "TracyProfiler.hpp"
71#include "TracyThread.hpp"
72#include "TracyArmCpuTable.hpp"
73#include "TracySysTrace.hpp"
74#include "../tracy/TracyC.h"
75
76#ifdef TRACY_PORT
77# ifndef TRACY_DATA_PORT
78# define TRACY_DATA_PORT TRACY_PORT
79# endif
80# ifndef TRACY_BROADCAST_PORT
81# define TRACY_BROADCAST_PORT TRACY_PORT
82# endif
83#endif
84
85#ifdef __APPLE__
86# define TRACY_DELAYED_INIT
87#else
88# ifdef __GNUC__
89# define init_order( val ) __attribute__ ((init_priority(val)))
90# else
91# define init_order(x)
92# endif
93#endif
94
95#if defined _WIN32
96# include <lmcons.h>
97extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW );
98extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD );
99#else
100# include <unistd.h>
101# include <limits.h>
102#endif
103#if defined __linux__
104# include <sys/sysinfo.h>
105# include <sys/utsname.h>
106#endif
107
108#if !defined _WIN32 && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
109# include "TracyCpuid.hpp"
110#endif
111
112#if !( ( defined _WIN32 && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ )
113# include <mutex>
114#endif
115
116namespace tracy
117{
118
119#ifdef __ANDROID__
120// Implementation helpers of EnsureReadable(address).
121// This is so far only needed on Android, where it is common for libraries to be mapped
122// with only executable, not readable, permissions. Typical example (line from /proc/self/maps):
123/*
124746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so
125*/
126// See https://github.com/wolfpld/tracy/issues/125 .
127// To work around this, we parse /proc/self/maps and we use mprotect to set read permissions
128// on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery.
129
130namespace {
131// Holds some information about a single memory mapping.
132struct MappingInfo {
133 // Start of address range. Inclusive.
134 uintptr_t start_address;
135 // End of address range. Exclusive, so the mapping is the half-open interval
136 // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps.
137 uintptr_t end_address;
138 // Read/Write/Executable permissions.
139 bool perm_r, perm_w, perm_x;
140};
141} // anonymous namespace
142
143 // Internal implementation helper for LookUpMapping(address).
144 //
145 // Parses /proc/self/maps returning a vector<MappingInfo>.
146 // /proc/self/maps is assumed to be sorted by ascending address, so the resulting
147 // vector is sorted by ascending address too.
148static std::vector<MappingInfo> ParseMappings()
149{
150 std::vector<MappingInfo> result;
151 FILE* file = fopen( "/proc/self/maps", "r" );
152 if( !file ) return result;
153 char line[1024];
154 while( fgets( line, sizeof( line ), file ) )
155 {
156 uintptr_t start_addr;
157 uintptr_t end_addr;
158 if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue;
159 char* first_space = strchr( line, ' ' );
160 if( !first_space ) continue;
161 char* perm = first_space + 1;
162 char* second_space = strchr( perm, ' ' );
163 if( !second_space || second_space - perm != 4 ) continue;
164 result.emplace_back();
165 auto& mapping = result.back();
166 mapping.start_address = start_addr;
167 mapping.end_address = end_addr;
168 mapping.perm_r = perm[0] == 'r';
169 mapping.perm_w = perm[1] == 'w';
170 mapping.perm_x = perm[2] == 'x';
171 }
172 fclose( file );
173 return result;
174}
175
176// Internal implementation helper for LookUpMapping(address).
177//
178// Takes as input an `address` and a known vector `mappings`, assumed to be
179// sorted by increasing addresses, as /proc/self/maps seems to be.
180// Returns a pointer to the MappingInfo describing the mapping that this
181// address belongs to, or nullptr if the address isn't in `mappings`.
182static MappingInfo* LookUpMapping(std::vector<MappingInfo>& mappings, uintptr_t address)
183{
184 // Comparison function for std::lower_bound. Returns true if all addresses in `m1`
185 // are lower than `addr`.
186 auto Compare = []( const MappingInfo& m1, uintptr_t addr ) {
187 // '<=' because the address ranges are half-open intervals, [start, end).
188 return m1.end_address <= addr;
189 };
190 auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare );
191 if( iter == mappings.end() || iter->start_address > address) {
192 return nullptr;
193 }
194 return &*iter;
195}
196
197// Internal implementation helper for EnsureReadable(address).
198//
199// Takes as input an `address` and returns a pointer to a MappingInfo
200// describing the mapping that this address belongs to, or nullptr if
201// the address isn't in any known mapping.
202//
203// This function is stateful and not reentrant (assumes to be called from
204// only one thread). It holds a vector of mappings parsed from /proc/self/maps.
205//
206// Attempts to react to mappings changes by re-parsing /proc/self/maps.
207static MappingInfo* LookUpMapping(uintptr_t address)
208{
209 // Static state managed by this function. Not constant, we mutate that state as
210 // we turn some mappings readable. Initially parsed once here, updated as needed below.
211 static std::vector<MappingInfo> s_mappings = ParseMappings();
212 MappingInfo* mapping = LookUpMapping( s_mappings, address );
213 if( mapping ) return mapping;
214
215 // This address isn't in any known mapping. Try parsing again, maybe
216 // mappings changed.
217 s_mappings = ParseMappings();
218 return LookUpMapping( s_mappings, address );
219}
220
221// Internal implementation helper for EnsureReadable(address).
222//
223// Attempts to make the specified `mapping` readable if it isn't already.
224// Returns true if and only if the mapping is readable.
225static bool EnsureReadable( MappingInfo& mapping )
226{
227 if( mapping.perm_r )
228 {
229 // The mapping is already readable.
230 return true;
231 }
232 int prot = PROT_READ;
233 if( mapping.perm_w ) prot |= PROT_WRITE;
234 if( mapping.perm_x ) prot |= PROT_EXEC;
235 if( mprotect( reinterpret_cast<void*>( mapping.start_address ),
236 mapping.end_address - mapping.start_address, prot ) == -1 )
237 {
238 // Failed to make the mapping readable. Shouldn't happen, hasn't
239 // been observed yet. If it happened in practice, we should consider
240 // adding a bool to MappingInfo to track this to avoid retrying mprotect
241 // everytime on such mappings.
242 return false;
243 }
244 // The mapping is now readable. Update `mapping` so the next call will be fast.
245 mapping.perm_r = true;
246 return true;
247}
248
249// Attempts to set the read permission on the entire mapping containing the
250// specified address. Returns true if and only if the mapping is now readable.
251static bool EnsureReadable( uintptr_t address )
252{
253 MappingInfo* mapping = LookUpMapping(address);
254 return mapping && EnsureReadable( *mapping );
255}
256
257#endif // defined __ANDROID__
258
259#ifndef TRACY_DELAYED_INIT
260
261struct InitTimeWrapper
262{
263 int64_t val;
264};
265
266struct ProducerWrapper
267{
268 tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
269};
270
271struct ThreadHandleWrapper
272{
273 uint32_t val;
274};
275#endif
276
277
278#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
279static inline void CpuId( uint32_t* regs, uint32_t leaf )
280{
281 memset(regs, 0, sizeof(uint32_t) * 4);
282#if defined _WIN32
283 __cpuidex( (int*)regs, leaf, 0 );
284#else
285 __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 );
286#endif
287}
288
289static void InitFailure( const char* msg )
290{
291#if defined _WIN32
292 bool hasConsole = false;
293 bool reopen = false;
294 const auto attached = AttachConsole( ATTACH_PARENT_PROCESS );
295 if( attached )
296 {
297 hasConsole = true;
298 reopen = true;
299 }
300 else
301 {
302 const auto err = GetLastError();
303 if( err == ERROR_ACCESS_DENIED )
304 {
305 hasConsole = true;
306 }
307 }
308 if( hasConsole )
309 {
310 fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
311 if( reopen )
312 {
313 freopen( "CONOUT$", "w", stderr );
314 fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
315 }
316 }
317 else
318 {
319# ifndef TRACY_UWP
320 MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP );
321# endif
322 }
323#else
324 fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
325#endif
326 exit( 1 );
327}
328
329static bool CheckHardwareSupportsInvariantTSC()
330{
331 const char* noCheck = GetEnvVar( "TRACY_NO_INVARIANT_CHECK" );
332 if( noCheck && noCheck[0] == '1' ) return true;
333
334 uint32_t regs[4];
335 CpuId( regs, 1 );
336 if( !( regs[3] & ( 1 << 4 ) ) )
337 {
338#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK
339 InitFailure( "CPU doesn't support RDTSC instruction." );
340#else
341 return false;
342#endif
343 }
344 CpuId( regs, 0x80000007 );
345 if( regs[3] & ( 1 << 8 ) ) return true;
346
347 return false;
348}
349
350#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER
351bool HardwareSupportsInvariantTSC()
352{
353 static bool cachedResult = CheckHardwareSupportsInvariantTSC();
354 return cachedResult;
355}
356#endif
357
358static int64_t SetupHwTimer()
359{
360#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK
361 if( !CheckHardwareSupportsInvariantTSC() )
362 {
363#if defined _WIN32
364 InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." );
365#else
366 InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." );
367#endif
368 }
369#endif
370
371 return Profiler::GetTime();
372}
373#else
374static int64_t SetupHwTimer()
375{
376 return Profiler::GetTime();
377}
378#endif
379
380static const char* GetProcessName()
381{
382 const char* processName = "unknown";
383#ifdef _WIN32
384 static char buf[_MAX_PATH];
385 GetModuleFileNameA( nullptr, buf, _MAX_PATH );
386 const char* ptr = buf;
387 while( *ptr != '\0' ) ptr++;
388 while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--;
389 if( ptr > buf ) ptr++;
390 processName = ptr;
391#elif defined __ANDROID__
392# if __ANDROID_API__ >= 21
393 auto buf = getprogname();
394 if( buf ) processName = buf;
395# endif
396#elif defined __linux__ && defined _GNU_SOURCE
397 if( program_invocation_short_name ) processName = program_invocation_short_name;
398#elif defined __APPLE__ || defined BSD
399 auto buf = getprogname();
400 if( buf ) processName = buf;
401#endif
402 return processName;
403}
404
405static const char* GetProcessExecutablePath()
406{
407#ifdef _WIN32
408 static char buf[_MAX_PATH];
409 GetModuleFileNameA( nullptr, buf, _MAX_PATH );
410 return buf;
411#elif defined __ANDROID__
412 return nullptr;
413#elif defined __linux__ && defined _GNU_SOURCE
414 return program_invocation_name;
415#elif defined __APPLE__
416 static char buf[1024];
417 uint32_t size = 1024;
418 _NSGetExecutablePath( buf, &size );
419 return buf;
420#elif defined __DragonFly__
421 static char buf[1024];
422 readlink( "/proc/curproc/file", buf, 1024 );
423 return buf;
424#elif defined __FreeBSD__
425 static char buf[1024];
426 int mib[4];
427 mib[0] = CTL_KERN;
428 mib[1] = KERN_PROC;
429 mib[2] = KERN_PROC_PATHNAME;
430 mib[3] = -1;
431 size_t cb = 1024;
432 sysctl( mib, 4, buf, &cb, nullptr, 0 );
433 return buf;
434#elif defined __NetBSD__
435 static char buf[1024];
436 readlink( "/proc/curproc/exe", buf, 1024 );
437 return buf;
438#else
439 return nullptr;
440#endif
441}
442
443#if defined __linux__ && defined __ARM_ARCH
444static uint32_t GetHex( char*& ptr, int skip )
445{
446 uint32_t ret;
447 ptr += skip;
448 char* end;
449 if( ptr[0] == '0' && ptr[1] == 'x' )
450 {
451 ptr += 2;
452 ret = strtol( ptr, &end, 16 );
453 }
454 else
455 {
456 ret = strtol( ptr, &end, 10 );
457 }
458 ptr = end;
459 return ret;
460}
461#endif
462
463static const char* GetHostInfo()
464{
465 static char buf[1024];
466 auto ptr = buf;
467#if defined _WIN32
468# ifdef TRACY_UWP
469 auto GetVersion = &::GetVersionEx;
470# else
471 auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" );
472# endif
473 if( !GetVersion )
474 {
475# ifdef __MINGW32__
476 ptr += sprintf( ptr, "OS: Windows (MingW)\n" );
477# else
478 ptr += sprintf( ptr, "OS: Windows\n" );
479# endif
480 }
481 else
482 {
483 RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) };
484 GetVersion( &ver );
485
486# ifdef __MINGW32__
487 ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber );
488# else
489 ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber );
490# endif
491 }
492#elif defined __linux__
493 struct utsname utsName;
494 uname( &utsName );
495# if defined __ANDROID__
496 ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release );
497# else
498 ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release );
499# endif
500#elif defined __APPLE__
501# if TARGET_OS_IPHONE == 1
502 ptr += sprintf( ptr, "OS: Darwin (iOS)\n" );
503# elif TARGET_OS_MAC == 1
504 ptr += sprintf( ptr, "OS: Darwin (OSX)\n" );
505# else
506 ptr += sprintf( ptr, "OS: Darwin (unknown)\n" );
507# endif
508#elif defined __DragonFly__
509 ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" );
510#elif defined __FreeBSD__
511 ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" );
512#elif defined __NetBSD__
513 ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" );
514#elif defined __OpenBSD__
515 ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" );
516#else
517 ptr += sprintf( ptr, "OS: unknown\n" );
518#endif
519
520#if defined _MSC_VER
521# if defined __clang__
522 ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ );
523# else
524 ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER );
525# endif
526#elif defined __clang__
527 ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ );
528#elif defined __GNUC__
529 ptr += sprintf( ptr, "Compiler: gcc %i.%i.%i\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ );
530#else
531 ptr += sprintf( ptr, "Compiler: unknown\n" );
532#endif
533
534#if defined _WIN32
535 InitWinSock();
536
537 char hostname[512];
538 gethostname( hostname, 512 );
539
540# ifdef TRACY_UWP
541 const char* user = "";
542# else
543 DWORD userSz = UNLEN+1;
544 char user[UNLEN+1];
545 GetUserNameA( user, &userSz );
546# endif
547
548 ptr += sprintf( ptr, "User: %s@%s\n", user, hostname );
549#else
550 char hostname[_POSIX_HOST_NAME_MAX]{};
551 char user[_POSIX_LOGIN_NAME_MAX]{};
552
553 gethostname( hostname, _POSIX_HOST_NAME_MAX );
554# if defined __ANDROID__
555 const auto login = getlogin();
556 if( login )
557 {
558 strcpy( user, login );
559 }
560 else
561 {
562 memcpy( user, "(?)", 4 );
563 }
564# else
565 getlogin_r( user, _POSIX_LOGIN_NAME_MAX );
566# endif
567
568 ptr += sprintf( ptr, "User: %s@%s\n", user, hostname );
569#endif
570
571#if defined __i386 || defined _M_IX86
572 ptr += sprintf( ptr, "Arch: x86\n" );
573#elif defined __x86_64__ || defined _M_X64
574 ptr += sprintf( ptr, "Arch: x64\n" );
575#elif defined __aarch64__
576 ptr += sprintf( ptr, "Arch: ARM64\n" );
577#elif defined __ARM_ARCH
578 ptr += sprintf( ptr, "Arch: ARM\n" );
579#else
580 ptr += sprintf( ptr, "Arch: unknown\n" );
581#endif
582
583#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
584 uint32_t regs[4];
585 char cpuModel[4*4*3+1] = {};
586 auto modelPtr = cpuModel;
587 for( uint32_t i=0x80000002; i<0x80000005; ++i )
588 {
589 CpuId( regs, i );
590 memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs );
591 }
592
593 ptr += sprintf( ptr, "CPU: %s\n", cpuModel );
594#elif defined __linux__ && defined __ARM_ARCH
595 bool cpuFound = false;
596 FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" );
597 if( fcpuinfo )
598 {
599 enum { BufSize = 4*1024 };
600 char buf[BufSize];
601 const auto sz = fread( buf, 1, BufSize, fcpuinfo );
602 fclose( fcpuinfo );
603 const auto end = buf + sz;
604 auto cptr = buf;
605
606 uint32_t impl = 0;
607 uint32_t var = 0;
608 uint32_t part = 0;
609 uint32_t rev = 0;
610
611 while( end - cptr > 20 )
612 {
613 while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 )
614 {
615 cptr += 4;
616 while( end - cptr > 20 && *cptr != '\n' ) cptr++;
617 cptr++;
618 }
619 if( end - cptr <= 20 ) break;
620 cptr += 4;
621 if( memcmp( cptr, "implementer\t: ", 14 ) == 0 )
622 {
623 if( impl != 0 ) break;
624 impl = GetHex( cptr, 14 );
625 }
626 else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 );
627 else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 );
628 else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 );
629 while( *cptr != '\n' && *cptr != '\0' ) cptr++;
630 cptr++;
631 }
632
633 if( impl != 0 || var != 0 || part != 0 || rev != 0 )
634 {
635 cpuFound = true;
636 ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev );
637 }
638 }
639 if( !cpuFound )
640 {
641 ptr += sprintf( ptr, "CPU: unknown\n" );
642 }
643#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
644 {
645 size_t sz;
646 sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 );
647 auto str = (char*)tracy_malloc( sz );
648 sysctlbyname( "hw.machine", str, &sz, nullptr, 0 );
649 ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) );
650 tracy_free( str );
651 }
652#else
653 ptr += sprintf( ptr, "CPU: unknown\n" );
654#endif
655#ifdef __ANDROID__
656 char deviceModel[PROP_VALUE_MAX+1];
657 char deviceManufacturer[PROP_VALUE_MAX+1];
658 __system_property_get( "ro.product.model", deviceModel );
659 __system_property_get( "ro.product.manufacturer", deviceManufacturer );
660 ptr += sprintf( ptr, "Device: %s %s\n", deviceManufacturer, deviceModel );
661#endif
662
663 ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() );
664
665#if defined _WIN32
666 MEMORYSTATUSEX statex;
667 statex.dwLength = sizeof( statex );
668 GlobalMemoryStatusEx( &statex );
669# ifdef _MSC_VER
670 ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 );
671# else
672 ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 );
673# endif
674#elif defined __linux__
675 struct sysinfo sysInfo;
676 sysinfo( &sysInfo );
677 ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 );
678#elif defined __APPLE__
679 size_t memSize;
680 size_t sz = sizeof( memSize );
681 sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 );
682 ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 );
683#elif defined BSD
684 size_t memSize;
685 size_t sz = sizeof( memSize );
686 sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 );
687 ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 );
688#else
689 ptr += sprintf( ptr, "RAM: unknown\n" );
690#endif
691
692 return buf;
693}
694
695static uint64_t GetPid()
696{
697#if defined _WIN32
698 return uint64_t( GetCurrentProcessId() );
699#else
700 return uint64_t( getpid() );
701#endif
702}
703
704void Profiler::AckServerQuery()
705{
706 QueueItem item;
707 MemWrite( &item.hdr.type, QueueType::AckServerQueryNoop );
708 NeedDataSize( QueueDataSize[(int)QueueType::AckServerQueryNoop] );
709 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckServerQueryNoop] );
710}
711
712void Profiler::AckSymbolCodeNotAvailable()
713{
714 QueueItem item;
715 MemWrite( &item.hdr.type, QueueType::AckSymbolCodeNotAvailable );
716 NeedDataSize( QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] );
717 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] );
718}
719
720static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port )
721{
722 static BroadcastMessage msg;
723
724 msg.broadcastVersion = BroadcastVersion;
725 msg.protocolVersion = ProtocolVersion;
726 msg.listenPort = port;
727 msg.pid = GetPid();
728
729 memcpy( msg.programName, procname, pnsz );
730 memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
731
732 len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 );
733 return msg;
734}
735
736#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER
737static DWORD s_profilerThreadId = 0;
738static DWORD s_symbolThreadId = 0;
739static char s_crashText[1024];
740
741LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
742{
743 if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH;
744
745 const unsigned ec = pExp->ExceptionRecord->ExceptionCode;
746 auto msgPtr = s_crashText;
747 switch( ec )
748 {
749 case EXCEPTION_ACCESS_VIOLATION:
750 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec );
751 switch( pExp->ExceptionRecord->ExceptionInformation[0] )
752 {
753 case 0:
754 msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
755 break;
756 case 1:
757 msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
758 break;
759 case 8:
760 msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
761 break;
762 default:
763 break;
764 }
765 break;
766 case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
767 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec );
768 break;
769 case EXCEPTION_DATATYPE_MISALIGNMENT:
770 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec );
771 break;
772 case EXCEPTION_FLT_DIVIDE_BY_ZERO:
773 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec );
774 break;
775 case EXCEPTION_ILLEGAL_INSTRUCTION:
776 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec );
777 break;
778 case EXCEPTION_IN_PAGE_ERROR:
779 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec );
780 break;
781 case EXCEPTION_INT_DIVIDE_BY_ZERO:
782 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec );
783 break;
784 case EXCEPTION_PRIV_INSTRUCTION:
785 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec );
786 break;
787 case EXCEPTION_STACK_OVERFLOW:
788 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec );
789 break;
790 default:
791 return EXCEPTION_CONTINUE_SEARCH;
792 }
793
794 {
795 GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
796
797 TracyQueuePrepare( QueueType::CrashReport );
798 item->crashReport.time = Profiler::GetTime();
799 item->crashReport.text = (uint64_t)s_crashText;
800 TracyQueueCommit( crashReportThread );
801 }
802
803 HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 );
804 if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH;
805
806 THREADENTRY32 te = { sizeof( te ) };
807 if( !Thread32First( h, &te ) )
808 {
809 CloseHandle( h );
810 return EXCEPTION_CONTINUE_SEARCH;
811 }
812
813 const auto pid = GetCurrentProcessId();
814 const auto tid = GetCurrentThreadId();
815
816 do
817 {
818 if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId && te.th32ThreadID != s_symbolThreadId )
819 {
820 HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID );
821 if( th != INVALID_HANDLE_VALUE )
822 {
823 SuspendThread( th );
824 CloseHandle( th );
825 }
826 }
827 }
828 while( Thread32Next( h, &te ) );
829 CloseHandle( h );
830
831 {
832 TracyLfqPrepare( QueueType::Crash );
833 TracyLfqCommit;
834 }
835
836 std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
837 GetProfiler().RequestShutdown();
838 while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); };
839
840 return EXCEPTION_CONTINUE_SEARCH;
841}
842#endif
843
844static Profiler* s_instance = nullptr;
845static Thread* s_thread;
846#ifndef TRACY_NO_FRAME_IMAGE
847static Thread* s_compressThread;
848#endif
849#ifdef TRACY_HAS_CALLSTACK
850static Thread* s_symbolThread;
851std::atomic<bool> s_symbolThreadGone { false };
852#endif
853#ifdef TRACY_HAS_SYSTEM_TRACING
854static Thread* s_sysTraceThread = nullptr;
855#endif
856
857#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER
858# ifndef TRACY_CRASH_SIGNAL
859# define TRACY_CRASH_SIGNAL SIGPWR
860# endif
861
862static long s_profilerTid = 0;
863static long s_symbolTid = 0;
864static char s_crashText[1024];
865static std::atomic<bool> s_alreadyCrashed( false );
866
867static void ThreadFreezer( int /*signal*/ )
868{
869 for(;;) sleep( 1000 );
870}
871
872static inline void HexPrint( char*& ptr, uint64_t val )
873{
874 if( val == 0 )
875 {
876 *ptr++ = '0';
877 return;
878 }
879
880 static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
881 char buf[16];
882 auto bptr = buf;
883
884 do
885 {
886 *bptr++ = HexTable[val%16];
887 val /= 16;
888 }
889 while( val > 0 );
890
891 do
892 {
893 *ptr++ = *--bptr;
894 }
895 while( bptr != buf );
896}
897
898static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
899{
900 bool expected = false;
901 if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal );
902
903 struct sigaction act = {};
904 act.sa_handler = SIG_DFL;
905 sigaction( SIGABRT, &act, nullptr );
906
907 auto msgPtr = s_crashText;
908 switch( signal )
909 {
910 case SIGILL:
911 strcpy( msgPtr, "Illegal Instruction.\n" );
912 while( *msgPtr ) msgPtr++;
913 switch( info->si_code )
914 {
915 case ILL_ILLOPC:
916 strcpy( msgPtr, "Illegal opcode.\n" );
917 break;
918 case ILL_ILLOPN:
919 strcpy( msgPtr, "Illegal operand.\n" );
920 break;
921 case ILL_ILLADR:
922 strcpy( msgPtr, "Illegal addressing mode.\n" );
923 break;
924 case ILL_ILLTRP:
925 strcpy( msgPtr, "Illegal trap.\n" );
926 break;
927 case ILL_PRVOPC:
928 strcpy( msgPtr, "Privileged opcode.\n" );
929 break;
930 case ILL_PRVREG:
931 strcpy( msgPtr, "Privileged register.\n" );
932 break;
933 case ILL_COPROC:
934 strcpy( msgPtr, "Coprocessor error.\n" );
935 break;
936 case ILL_BADSTK:
937 strcpy( msgPtr, "Internal stack error.\n" );
938 break;
939 default:
940 break;
941 }
942 break;
943 case SIGFPE:
944 strcpy( msgPtr, "Floating-point exception.\n" );
945 while( *msgPtr ) msgPtr++;
946 switch( info->si_code )
947 {
948 case FPE_INTDIV:
949 strcpy( msgPtr, "Integer divide by zero.\n" );
950 break;
951 case FPE_INTOVF:
952 strcpy( msgPtr, "Integer overflow.\n" );
953 break;
954 case FPE_FLTDIV:
955 strcpy( msgPtr, "Floating-point divide by zero.\n" );
956 break;
957 case FPE_FLTOVF:
958 strcpy( msgPtr, "Floating-point overflow.\n" );
959 break;
960 case FPE_FLTUND:
961 strcpy( msgPtr, "Floating-point underflow.\n" );
962 break;
963 case FPE_FLTRES:
964 strcpy( msgPtr, "Floating-point inexact result.\n" );
965 break;
966 case FPE_FLTINV:
967 strcpy( msgPtr, "Floating-point invalid operation.\n" );
968 break;
969 case FPE_FLTSUB:
970 strcpy( msgPtr, "Subscript out of range.\n" );
971 break;
972 default:
973 break;
974 }
975 break;
976 case SIGSEGV:
977 strcpy( msgPtr, "Invalid memory reference.\n" );
978 while( *msgPtr ) msgPtr++;
979 switch( info->si_code )
980 {
981 case SEGV_MAPERR:
982 strcpy( msgPtr, "Address not mapped to object.\n" );
983 break;
984 case SEGV_ACCERR:
985 strcpy( msgPtr, "Invalid permissions for mapped object.\n" );
986 break;
987# ifdef SEGV_BNDERR
988 case SEGV_BNDERR:
989 strcpy( msgPtr, "Failed address bound checks.\n" );
990 break;
991# endif
992# ifdef SEGV_PKUERR
993 case SEGV_PKUERR:
994 strcpy( msgPtr, "Access was denied by memory protection keys.\n" );
995 break;
996# endif
997 default:
998 break;
999 }
1000 break;
1001 case SIGPIPE:
1002 strcpy( msgPtr, "Broken pipe.\n" );
1003 while( *msgPtr ) msgPtr++;
1004 break;
1005 case SIGBUS:
1006 strcpy( msgPtr, "Bus error.\n" );
1007 while( *msgPtr ) msgPtr++;
1008 switch( info->si_code )
1009 {
1010 case BUS_ADRALN:
1011 strcpy( msgPtr, "Invalid address alignment.\n" );
1012 break;
1013 case BUS_ADRERR:
1014 strcpy( msgPtr, "Nonexistent physical address.\n" );
1015 break;
1016 case BUS_OBJERR:
1017 strcpy( msgPtr, "Object-specific hardware error.\n" );
1018 break;
1019# ifdef BUS_MCEERR_AR
1020 case BUS_MCEERR_AR:
1021 strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" );
1022 break;
1023# endif
1024# ifdef BUS_MCEERR_AO
1025 case BUS_MCEERR_AO:
1026 strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" );
1027 break;
1028# endif
1029 default:
1030 break;
1031 }
1032 break;
1033 case SIGABRT:
1034 strcpy( msgPtr, "Abort signal from abort().\n" );
1035 break;
1036 default:
1037 abort();
1038 }
1039 while( *msgPtr ) msgPtr++;
1040
1041 if( signal != SIGPIPE )
1042 {
1043 strcpy( msgPtr, "Fault address: 0x" );
1044 while( *msgPtr ) msgPtr++;
1045 HexPrint( msgPtr, uint64_t( info->si_addr ) );
1046 *msgPtr++ = '\n';
1047 }
1048
1049 {
1050 GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
1051
1052 TracyQueuePrepare( QueueType::CrashReport );
1053 item->crashReport.time = Profiler::GetTime();
1054 item->crashReport.text = (uint64_t)s_crashText;
1055 TracyQueueCommit( crashReportThread );
1056 }
1057
1058 DIR* dp = opendir( "/proc/self/task" );
1059 if( !dp ) abort();
1060
1061 const auto selfTid = syscall( SYS_gettid );
1062
1063 struct dirent* ep;
1064 while( ( ep = readdir( dp ) ) != nullptr )
1065 {
1066 if( ep->d_name[0] == '.' ) continue;
1067 int tid = atoi( ep->d_name );
1068 if( tid != selfTid && tid != s_profilerTid && tid != s_symbolTid )
1069 {
1070 syscall( SYS_tkill, tid, TRACY_CRASH_SIGNAL );
1071 }
1072 }
1073 closedir( dp );
1074
1075#ifdef TRACY_HAS_CALLSTACK
1076 if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release );
1077#endif
1078
1079 TracyLfqPrepare( QueueType::Crash );
1080 TracyLfqCommit;
1081
1082 std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
1083 GetProfiler().RequestShutdown();
1084 while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); };
1085
1086 abort();
1087}
1088#endif
1089
1090
1091enum { QueuePrealloc = 256 * 1024 };
1092
1093TRACY_API int64_t GetFrequencyQpc()
1094{
1095#if defined _WIN32
1096 LARGE_INTEGER t;
1097 QueryPerformanceFrequency( &t );
1098 return t.QuadPart;
1099#else
1100 return 0;
1101#endif
1102}
1103
1104#ifdef TRACY_DELAYED_INIT
1105struct ThreadNameData;
1106TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
1107
1108struct ProfilerData
1109{
1110 int64_t initTime = SetupHwTimer();
1111 moodycamel::ConcurrentQueue<QueueItem> queue;
1112 Profiler profiler;
1113 std::atomic<uint32_t> lockCounter { 0 };
1114 std::atomic<uint8_t> gpuCtxCounter { 0 };
1115 std::atomic<ThreadNameData*> threadNameData { nullptr };
1116};
1117
1118struct ProducerWrapper
1119{
1120 ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {}
1121 moodycamel::ProducerToken detail;
1122 tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
1123};
1124
1125struct ProfilerThreadData
1126{
1127 ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {}
1128 ProducerWrapper token;
1129 GpuCtxWrapper gpuCtx;
1130# ifdef TRACY_ON_DEMAND
1131 LuaZoneState luaZoneState;
1132# endif
1133};
1134
1135std::atomic<int> RpInitDone { 0 };
1136std::atomic<int> RpInitLock { 0 };
1137thread_local bool RpThreadInitDone = false;
1138thread_local bool RpThreadShutdown = false;
1139
1140# ifdef TRACY_MANUAL_LIFETIME
1141ProfilerData* s_profilerData = nullptr;
1142static ProfilerThreadData& GetProfilerThreadData();
1143TRACY_API void StartupProfiler()
1144{
1145 s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) );
1146 new (s_profilerData) ProfilerData();
1147 s_profilerData->profiler.SpawnWorkerThreads();
1148 GetProfilerThreadData().token = ProducerWrapper( *s_profilerData );
1149}
1150static ProfilerData& GetProfilerData()
1151{
1152 assert( s_profilerData );
1153 return *s_profilerData;
1154}
1155TRACY_API void ShutdownProfiler()
1156{
1157 s_profilerData->~ProfilerData();
1158 tracy_free( s_profilerData );
1159 s_profilerData = nullptr;
1160 rpmalloc_finalize();
1161 RpThreadInitDone = false;
1162 RpInitDone.store( 0, std::memory_order_release );
1163}
1164# else
1165static std::atomic<int> profilerDataLock { 0 };
1166static std::atomic<ProfilerData*> profilerData { nullptr };
1167
1168static ProfilerData& GetProfilerData()
1169{
1170 auto ptr = profilerData.load( std::memory_order_acquire );
1171 if( !ptr )
1172 {
1173 int expected = 0;
1174 while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); }
1175 ptr = profilerData.load( std::memory_order_acquire );
1176 if( !ptr )
1177 {
1178 ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) );
1179 new (ptr) ProfilerData();
1180 profilerData.store( ptr, std::memory_order_release );
1181 }
1182 profilerDataLock.store( 0, std::memory_order_release );
1183 }
1184 return *ptr;
1185}
1186# endif
1187
1188// GCC prior to 8.4 had a bug with function-inline thread_local variables. Versions of glibc beginning with
1189// 2.18 may attempt to work around this issue, which manifests as a crash while running static destructors
1190// if this function is compiled into a shared object. Unfortunately, centos7 ships with glibc 2.17. If running
1191// on old GCC, use the old-fashioned way as a workaround
1192// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85400
1193#if !defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 8) || ((__GNUC__ == 8) && (__GNUC_MINOR__ < 4)))
1194struct ProfilerThreadDataKey
1195{
1196public:
1197 ProfilerThreadDataKey()
1198 {
1199 int val = pthread_key_create(&m_key, sDestructor);
1200 static_cast<void>(val); // unused
1201 assert(val == 0);
1202 }
1203 ~ProfilerThreadDataKey()
1204 {
1205 int val = pthread_key_delete(m_key);
1206 static_cast<void>(val); // unused
1207 assert(val == 0);
1208 }
1209 ProfilerThreadData& get()
1210 {
1211 void* p = pthread_getspecific(m_key);
1212 if (!p)
1213 {
1214 p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) );
1215 new (p) ProfilerThreadData(GetProfilerData());
1216 pthread_setspecific(m_key, p);
1217 }
1218 return *static_cast<ProfilerThreadData*>(p);
1219 }
1220private:
1221 pthread_key_t m_key;
1222
1223 static void sDestructor(void* p)
1224 {
1225 ((ProfilerThreadData*)p)->~ProfilerThreadData();
1226 tracy_free(p);
1227 }
1228};
1229
1230static ProfilerThreadData& GetProfilerThreadData()
1231{
1232 static ProfilerThreadDataKey key;
1233 return key.get();
1234}
1235#else
1236static ProfilerThreadData& GetProfilerThreadData()
1237{
1238 thread_local ProfilerThreadData data( GetProfilerData() );
1239 return data;
1240}
1241#endif
1242
1243TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; }
1244TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; }
1245TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return GetProfilerData().queue; }
1246TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; }
1247TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
1248TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
1249TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; }
1250TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
1251std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; }
1252
1253# ifdef TRACY_ON_DEMAND
1254TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; }
1255# endif
1256
1257# ifndef TRACY_MANUAL_LIFETIME
1258namespace
1259{
1260 const auto& __profiler_init = GetProfiler();
1261}
1262# endif
1263
1264#else
1265
1266// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
1267
1268// 1a. But s_queue is needed for initialization of variables in point 2.
1269extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
1270
1271// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
1272thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
1273thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
1274thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() };
1275
1276# ifdef _MSC_VER
1277// 1. Initialize these static variables before all other variables.
1278# pragma warning( disable : 4075 )
1279# pragma init_seg( ".CRT$XCB" )
1280# endif
1281
1282static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() };
1283std::atomic<int> init_order(102) RpInitDone( 0 );
1284std::atomic<int> init_order(102) RpInitLock( 0 );
1285thread_local bool RpThreadInitDone = false;
1286thread_local bool RpThreadShutdown = false;
1287moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
1288std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
1289std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
1290
1291thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr };
1292
1293struct ThreadNameData;
1294static std::atomic<ThreadNameData*> init_order(104) s_threadNameDataInstance( nullptr );
1295std::atomic<ThreadNameData*>& s_threadNameData = s_threadNameDataInstance;
1296
1297# ifdef TRACY_ON_DEMAND
1298thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false };
1299# endif
1300
1301static Profiler init_order(105) s_profiler;
1302
1303TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; }
1304TRACY_API Profiler& GetProfiler() { return s_profiler; }
1305TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; }
1306TRACY_API int64_t GetInitTime() { return s_initTime.val; }
1307TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
1308TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
1309TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
1310TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; }
1311
1312std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; }
1313
1314# ifdef TRACY_ON_DEMAND
1315TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
1316# endif
1317#endif
1318
1319TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; }
1320TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; }
1321
1322Profiler::Profiler()
1323 : m_timeBegin( 0 )
1324 , m_mainThread( detail::GetThreadHandleImpl() )
1325 , m_epoch( std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count() )
1326 , m_shutdown( false )
1327 , m_shutdownManual( false )
1328 , m_shutdownFinished( false )
1329 , m_sock( nullptr )
1330 , m_broadcast( nullptr )
1331 , m_noExit( false )
1332 , m_userPort( 0 )
1333 , m_zoneId( 1 )
1334 , m_samplingPeriod( 0 )
1335 , m_stream( LZ4_createStream() )
1336 , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
1337 , m_bufferOffset( 0 )
1338 , m_bufferStart( 0 )
1339 , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
1340 , m_serialQueue( 1024*1024 )
1341 , m_serialDequeue( 1024*1024 )
1342#ifndef TRACY_NO_FRAME_IMAGE
1343 , m_fiQueue( 16 )
1344 , m_fiDequeue( 16 )
1345#endif
1346 , m_symbolQueue( 8*1024 )
1347 , m_frameCount( 0 )
1348 , m_isConnected( false )
1349#ifdef TRACY_ON_DEMAND
1350 , m_connectionId( 0 )
1351 , m_deferredQueue( 64*1024 )
1352#endif
1353 , m_paramCallback( nullptr )
1354 , m_sourceCallback( nullptr )
1355 , m_queryImage( nullptr )
1356 , m_queryData( nullptr )
1357 , m_crashHandlerInstalled( false )
1358{
1359 assert( !s_instance );
1360 s_instance = this;
1361
1362#ifndef TRACY_DELAYED_INIT
1363# ifdef _MSC_VER
1364 // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
1365 s_token_detail = moodycamel::ProducerToken( s_queue );
1366 s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
1367 s_threadHandle = ThreadHandleWrapper { m_mainThread };
1368# endif
1369#endif
1370
1371 CalibrateTimer();
1372 CalibrateDelay();
1373 ReportTopology();
1374
1375#ifndef TRACY_NO_EXIT
1376 const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" );
1377 if( noExitEnv && noExitEnv[0] == '1' )
1378 {
1379 m_noExit = true;
1380 }
1381#endif
1382
1383 const char* userPort = GetEnvVar( "TRACY_PORT" );
1384 if( userPort )
1385 {
1386 m_userPort = atoi( userPort );
1387 }
1388
1389#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME)
1390 SpawnWorkerThreads();
1391#endif
1392}
1393
1394void Profiler::SpawnWorkerThreads()
1395{
1396#ifdef TRACY_HAS_SYSTEM_TRACING
1397 if( SysTraceStart( m_samplingPeriod ) )
1398 {
1399 s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) );
1400 new(s_sysTraceThread) Thread( SysTraceWorker, nullptr );
1401 std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) );
1402 }
1403#endif
1404
1405 s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
1406 new(s_thread) Thread( LaunchWorker, this );
1407
1408#ifndef TRACY_NO_FRAME_IMAGE
1409 s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) );
1410 new(s_compressThread) Thread( LaunchCompressWorker, this );
1411#endif
1412
1413#ifdef TRACY_HAS_CALLSTACK
1414 s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) );
1415 new(s_symbolThread) Thread( LaunchSymbolWorker, this );
1416#endif
1417
1418#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER
1419 s_profilerThreadId = GetThreadId( s_thread->Handle() );
1420# ifdef TRACY_HAS_CALLSTACK
1421 s_symbolThreadId = GetThreadId( s_symbolThread->Handle() );
1422# endif
1423 m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter );
1424#endif
1425
1426#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER
1427 struct sigaction threadFreezer = {};
1428 threadFreezer.sa_handler = ThreadFreezer;
1429 sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr );
1430
1431 struct sigaction crashHandler = {};
1432 crashHandler.sa_sigaction = CrashHandler;
1433 crashHandler.sa_flags = SA_SIGINFO;
1434 sigaction( SIGILL, &crashHandler, &m_prevSignal.ill );
1435 sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe );
1436 sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv );
1437 sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe );
1438 sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus );
1439 sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt );
1440#endif
1441
1442#ifndef TRACY_NO_CRASH_HANDLER
1443 m_crashHandlerInstalled = true;
1444#endif
1445
1446#ifdef TRACY_HAS_CALLSTACK
1447 InitCallstackCritical();
1448#endif
1449
1450 m_timeBegin.store( GetTime(), std::memory_order_relaxed );
1451}
1452
1453Profiler::~Profiler()
1454{
1455 m_shutdown.store( true, std::memory_order_relaxed );
1456
1457#if defined _WIN32 && !defined TRACY_UWP
1458 if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler );
1459#endif
1460
1461#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER
1462 if( m_crashHandlerInstalled )
1463 {
1464 sigaction( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr, nullptr );
1465 sigaction( SIGILL, &m_prevSignal.ill, nullptr );
1466 sigaction( SIGFPE, &m_prevSignal.fpe, nullptr );
1467 sigaction( SIGSEGV, &m_prevSignal.segv, nullptr );
1468 sigaction( SIGPIPE, &m_prevSignal.pipe, nullptr );
1469 sigaction( SIGBUS, &m_prevSignal.bus, nullptr );
1470 sigaction( SIGABRT, &m_prevSignal.abrt, nullptr );
1471 }
1472#endif
1473
1474#ifdef TRACY_HAS_SYSTEM_TRACING
1475 if( s_sysTraceThread )
1476 {
1477 SysTraceStop();
1478 s_sysTraceThread->~Thread();
1479 tracy_free( s_sysTraceThread );
1480 }
1481#endif
1482
1483#ifdef TRACY_HAS_CALLSTACK
1484 s_symbolThread->~Thread();
1485 tracy_free( s_symbolThread );
1486#endif
1487
1488#ifndef TRACY_NO_FRAME_IMAGE
1489 s_compressThread->~Thread();
1490 tracy_free( s_compressThread );
1491#endif
1492
1493 s_thread->~Thread();
1494 tracy_free( s_thread );
1495
1496#ifdef TRACY_HAS_CALLSTACK
1497 EndCallstack();
1498#endif
1499
1500 tracy_free( m_lz4Buf );
1501 tracy_free( m_buffer );
1502 LZ4_freeStream( (LZ4_stream_t*)m_stream );
1503
1504 if( m_sock )
1505 {
1506 m_sock->~Socket();
1507 tracy_free( m_sock );
1508 }
1509
1510 if( m_broadcast )
1511 {
1512 m_broadcast->~UdpBroadcast();
1513 tracy_free( m_broadcast );
1514 }
1515
1516 assert( s_instance );
1517 s_instance = nullptr;
1518}
1519
1520bool Profiler::ShouldExit()
1521{
1522 return s_instance->m_shutdown.load( std::memory_order_relaxed );
1523}
1524
1525void Profiler::Worker()
1526{
1527#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER
1528 s_profilerTid = syscall( SYS_gettid );
1529#endif
1530
1531 ThreadExitHandler threadExitHandler;
1532
1533 SetThreadName( "Tracy Profiler" );
1534
1535#ifdef TRACY_DATA_PORT
1536 const bool dataPortSearch = false;
1537 auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT;
1538#else
1539 const bool dataPortSearch = m_userPort == 0;
1540 auto dataPort = m_userPort != 0 ? m_userPort : 8086;
1541#endif
1542#ifdef TRACY_BROADCAST_PORT
1543 const auto broadcastPort = TRACY_BROADCAST_PORT;
1544#else
1545 const auto broadcastPort = 8086;
1546#endif
1547
1548 while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
1549
1550#ifdef TRACY_USE_RPMALLOC
1551 rpmalloc_thread_initialize();
1552#endif
1553
1554 m_exectime = 0;
1555 const auto execname = GetProcessExecutablePath();
1556 if( execname )
1557 {
1558 struct stat st;
1559 if( stat( execname, &st ) == 0 )
1560 {
1561 m_exectime = (uint64_t)st.st_mtime;
1562 }
1563 }
1564
1565 const auto procname = GetProcessName();
1566 const auto pnsz = std::min<size_t>( strlen( procname ), WelcomeMessageProgramNameSize - 1 );
1567
1568 const auto hostinfo = GetHostInfo();
1569 const auto hisz = std::min<size_t>( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 );
1570
1571 const uint64_t pid = GetPid();
1572
1573 uint8_t flags = 0;
1574
1575#ifdef TRACY_ON_DEMAND
1576 flags |= WelcomeFlag::OnDemand;
1577#endif
1578#ifdef __APPLE__
1579 flags |= WelcomeFlag::IsApple;
1580#endif
1581#ifndef TRACY_NO_CODE_TRANSFER
1582 flags |= WelcomeFlag::CodeTransfer;
1583#endif
1584#ifdef _WIN32
1585 flags |= WelcomeFlag::CombineSamples;
1586# ifndef TRACY_NO_CONTEXT_SWITCH
1587 flags |= WelcomeFlag::IdentifySamples;
1588# endif
1589#endif
1590
1591#if defined __i386 || defined _M_IX86
1592 uint8_t cpuArch = CpuArchX86;
1593#elif defined __x86_64__ || defined _M_X64
1594 uint8_t cpuArch = CpuArchX64;
1595#elif defined __aarch64__
1596 uint8_t cpuArch = CpuArchArm64;
1597#elif defined __ARM_ARCH
1598 uint8_t cpuArch = CpuArchArm32;
1599#else
1600 uint8_t cpuArch = CpuArchUnknown;
1601#endif
1602
1603#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
1604 uint32_t regs[4];
1605 char manufacturer[12];
1606 CpuId( regs, 0 );
1607 memcpy( manufacturer, regs+1, 4 );
1608 memcpy( manufacturer+4, regs+3, 4 );
1609 memcpy( manufacturer+8, regs+2, 4 );
1610
1611 CpuId( regs, 1 );
1612 uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 );
1613#else
1614 const char manufacturer[12] = {};
1615 uint32_t cpuId = 0;
1616#endif
1617
1618 WelcomeMessage welcome;
1619 MemWrite( &welcome.timerMul, m_timerMul );
1620 MemWrite( &welcome.initBegin, GetInitTime() );
1621 MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) );
1622 MemWrite( &welcome.delay, m_delay );
1623 MemWrite( &welcome.resolution, m_resolution );
1624 MemWrite( &welcome.epoch, m_epoch );
1625 MemWrite( &welcome.exectime, m_exectime );
1626 MemWrite( &welcome.pid, pid );
1627 MemWrite( &welcome.samplingPeriod, m_samplingPeriod );
1628 MemWrite( &welcome.flags, flags );
1629 MemWrite( &welcome.cpuArch, cpuArch );
1630 memcpy( welcome.cpuManufacturer, manufacturer, 12 );
1631 MemWrite( &welcome.cpuId, cpuId );
1632 memcpy( welcome.programName, procname, pnsz );
1633 memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
1634 memcpy( welcome.hostInfo, hostinfo, hisz );
1635 memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz );
1636
1637 moodycamel::ConsumerToken token( GetQueue() );
1638
1639 ListenSocket listen;
1640 bool isListening = false;
1641 if( !dataPortSearch )
1642 {
1643 isListening = listen.Listen( dataPort, 4 );
1644 }
1645 else
1646 {
1647 for( uint32_t i=0; i<20; i++ )
1648 {
1649 if( listen.Listen( dataPort+i, 4 ) )
1650 {
1651 dataPort += i;
1652 isListening = true;
1653 break;
1654 }
1655 }
1656 }
1657 if( !isListening )
1658 {
1659 for(;;)
1660 {
1661 if( ShouldExit() )
1662 {
1663 m_shutdownFinished.store( true, std::memory_order_relaxed );
1664 return;
1665 }
1666
1667 ClearQueues( token );
1668 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
1669 }
1670 }
1671
1672#ifndef TRACY_NO_BROADCAST
1673 m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) );
1674 new(m_broadcast) UdpBroadcast();
1675# ifdef TRACY_ONLY_LOCALHOST
1676 const char* addr = "127.255.255.255";
1677# else
1678 const char* addr = "255.255.255.255";
1679# endif
1680 if( !m_broadcast->Open( addr, broadcastPort ) )
1681 {
1682 m_broadcast->~UdpBroadcast();
1683 tracy_free( m_broadcast );
1684 m_broadcast = nullptr;
1685 }
1686#endif
1687
1688 int broadcastLen = 0;
1689 auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort );
1690 uint64_t lastBroadcast = 0;
1691
1692 // Connections loop.
1693 // Each iteration of the loop handles whole connection. Multiple iterations will only
1694 // happen in the on-demand mode or when handshake fails.
1695 for(;;)
1696 {
1697 // Wait for incoming connection
1698 for(;;)
1699 {
1700#ifndef TRACY_NO_EXIT
1701 if( !m_noExit && ShouldExit() )
1702 {
1703 if( m_broadcast )
1704 {
1705 broadcastMsg.activeTime = -1;
1706 m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
1707 }
1708 m_shutdownFinished.store( true, std::memory_order_relaxed );
1709 return;
1710 }
1711#endif
1712 m_sock = listen.Accept();
1713 if( m_sock ) break;
1714#ifndef TRACY_ON_DEMAND
1715 ProcessSysTime();
1716#endif
1717
1718 if( m_broadcast )
1719 {
1720 const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
1721 if( t - lastBroadcast > 3000000000 ) // 3s
1722 {
1723 lastBroadcast = t;
1724 const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count();
1725 broadcastMsg.activeTime = int32_t( ts - m_epoch );
1726 assert( broadcastMsg.activeTime >= 0 );
1727 m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
1728 }
1729 }
1730 }
1731
1732 if( m_broadcast )
1733 {
1734 lastBroadcast = 0;
1735 broadcastMsg.activeTime = -1;
1736 m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
1737 }
1738
1739 // Handshake
1740 {
1741 char shibboleth[HandshakeShibbolethSize];
1742 auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 );
1743 if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 )
1744 {
1745 m_sock->~Socket();
1746 tracy_free( m_sock );
1747 m_sock = nullptr;
1748 continue;
1749 }
1750
1751 uint32_t protocolVersion;
1752 res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 );
1753 if( !res )
1754 {
1755 m_sock->~Socket();
1756 tracy_free( m_sock );
1757 m_sock = nullptr;
1758 continue;
1759 }
1760
1761 if( protocolVersion != ProtocolVersion )
1762 {
1763 HandshakeStatus status = HandshakeProtocolMismatch;
1764 m_sock->Send( &status, sizeof( status ) );
1765 m_sock->~Socket();
1766 tracy_free( m_sock );
1767 m_sock = nullptr;
1768 continue;
1769 }
1770 }
1771
1772#ifdef TRACY_ON_DEMAND
1773 const auto currentTime = GetTime();
1774 ClearQueues( token );
1775 m_connectionId.fetch_add( 1, std::memory_order_release );
1776#endif
1777 m_isConnected.store( true, std::memory_order_release );
1778
1779 HandshakeStatus handshake = HandshakeWelcome;
1780 m_sock->Send( &handshake, sizeof( handshake ) );
1781
1782 LZ4_resetStream( (LZ4_stream_t*)m_stream );
1783 m_sock->Send( &welcome, sizeof( welcome ) );
1784
1785 m_threadCtx = 0;
1786 m_refTimeSerial = 0;
1787 m_refTimeCtx = 0;
1788 m_refTimeGpu = 0;
1789
1790#ifdef TRACY_ON_DEMAND
1791 OnDemandPayloadMessage onDemand;
1792 onDemand.frames = m_frameCount.load( std::memory_order_relaxed );
1793 onDemand.currentTime = currentTime;
1794
1795 m_sock->Send( &onDemand, sizeof( onDemand ) );
1796
1797 m_deferredLock.lock();
1798 for( auto& item : m_deferredQueue )
1799 {
1800 uint64_t ptr;
1801 uint16_t size;
1802 const auto idx = MemRead<uint8_t>( &item.hdr.idx );
1803 switch( (QueueType)idx )
1804 {
1805 case QueueType::MessageAppInfo:
1806 ptr = MemRead<uint64_t>( &item.messageFat.text );
1807 size = MemRead<uint16_t>( &item.messageFat.size );
1808 SendSingleString( (const char*)ptr, size );
1809 break;
1810 case QueueType::LockName:
1811 ptr = MemRead<uint64_t>( &item.lockNameFat.name );
1812 size = MemRead<uint16_t>( &item.lockNameFat.size );
1813 SendSingleString( (const char*)ptr, size );
1814 break;
1815 case QueueType::GpuContextName:
1816 ptr = MemRead<uint64_t>( &item.gpuContextNameFat.ptr );
1817 size = MemRead<uint16_t>( &item.gpuContextNameFat.size );
1818 SendSingleString( (const char*)ptr, size );
1819 break;
1820 default:
1821 break;
1822 }
1823 AppendData( &item, QueueDataSize[idx] );
1824 }
1825 m_deferredLock.unlock();
1826#endif
1827
1828 // Main communications loop
1829 int keepAlive = 0;
1830 for(;;)
1831 {
1832 ProcessSysTime();
1833 const auto status = Dequeue( token );
1834 const auto serialStatus = DequeueSerial();
1835 if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
1836 {
1837 break;
1838 }
1839 else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty )
1840 {
1841 if( ShouldExit() ) break;
1842 if( m_bufferOffset != m_bufferStart )
1843 {
1844 if( !CommitData() ) break;
1845 }
1846 if( keepAlive == 500 )
1847 {
1848 QueueItem ka;
1849 ka.hdr.type = QueueType::KeepAlive;
1850 AppendData( &ka, QueueDataSize[ka.hdr.idx] );
1851 if( !CommitData() ) break;
1852
1853 keepAlive = 0;
1854 }
1855 else if( !m_sock->HasData() )
1856 {
1857 keepAlive++;
1858 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
1859 }
1860 }
1861 else
1862 {
1863 keepAlive = 0;
1864 }
1865
1866 bool connActive = true;
1867 while( m_sock->HasData() )
1868 {
1869 connActive = HandleServerQuery();
1870 if( !connActive ) break;
1871 }
1872 if( !connActive ) break;
1873 }
1874 if( ShouldExit() ) break;
1875
1876 m_isConnected.store( false, std::memory_order_release );
1877#ifdef TRACY_ON_DEMAND
1878 m_bufferOffset = 0;
1879 m_bufferStart = 0;
1880#endif
1881
1882 m_sock->~Socket();
1883 tracy_free( m_sock );
1884 m_sock = nullptr;
1885
1886#ifndef TRACY_ON_DEMAND
1887 // Client is no longer available here. Accept incoming connections, but reject handshake.
1888 for(;;)
1889 {
1890 if( ShouldExit() )
1891 {
1892 m_shutdownFinished.store( true, std::memory_order_relaxed );
1893 return;
1894 }
1895
1896 ClearQueues( token );
1897
1898 m_sock = listen.Accept();
1899 if( m_sock )
1900 {
1901 char shibboleth[HandshakeShibbolethSize];
1902 auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 );
1903 if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 )
1904 {
1905 m_sock->~Socket();
1906 tracy_free( m_sock );
1907 m_sock = nullptr;
1908 continue;
1909 }
1910
1911 uint32_t protocolVersion;
1912 res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 );
1913 if( !res )
1914 {
1915 m_sock->~Socket();
1916 tracy_free( m_sock );
1917 m_sock = nullptr;
1918 continue;
1919 }
1920
1921 HandshakeStatus status = HandshakeNotAvailable;
1922 m_sock->Send( &status, sizeof( status ) );
1923 m_sock->~Socket();
1924 tracy_free( m_sock );
1925 }
1926 }
1927#endif
1928 }
1929 // End of connections loop
1930
1931 // Wait for symbols thread to terminate. Symbol resolution will continue in this thread.
1932#ifdef TRACY_HAS_CALLSTACK
1933 while( s_symbolThreadGone.load() == false ) { YieldThread(); }
1934#endif
1935
1936 // Client is exiting. Send items remaining in queues.
1937 for(;;)
1938 {
1939 const auto status = Dequeue( token );
1940 const auto serialStatus = DequeueSerial();
1941 if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
1942 {
1943 m_shutdownFinished.store( true, std::memory_order_relaxed );
1944 return;
1945 }
1946 else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty )
1947 {
1948 if( m_bufferOffset != m_bufferStart ) CommitData();
1949 break;
1950 }
1951
1952 while( m_sock->HasData() )
1953 {
1954 if( !HandleServerQuery() )
1955 {
1956 m_shutdownFinished.store( true, std::memory_order_relaxed );
1957 return;
1958 }
1959 }
1960
1961#ifdef TRACY_HAS_CALLSTACK
1962 for(;;)
1963 {
1964 auto si = m_symbolQueue.front();
1965 if( !si ) break;
1966 HandleSymbolQueueItem( *si );
1967 m_symbolQueue.pop();
1968 }
1969#endif
1970 }
1971
1972 // Send client termination notice to the server
1973 QueueItem terminate;
1974 MemWrite( &terminate.hdr.type, QueueType::Terminate );
1975 if( !SendData( (const char*)&terminate, 1 ) )
1976 {
1977 m_shutdownFinished.store( true, std::memory_order_relaxed );
1978 return;
1979 }
1980 // Handle remaining server queries
1981 for(;;)
1982 {
1983 while( m_sock->HasData() )
1984 {
1985 if( !HandleServerQuery() )
1986 {
1987 m_shutdownFinished.store( true, std::memory_order_relaxed );
1988 return;
1989 }
1990 }
1991#ifdef TRACY_HAS_CALLSTACK
1992 for(;;)
1993 {
1994 auto si = m_symbolQueue.front();
1995 if( !si ) break;
1996 HandleSymbolQueueItem( *si );
1997 m_symbolQueue.pop();
1998 }
1999#endif
2000 const auto status = Dequeue( token );
2001 const auto serialStatus = DequeueSerial();
2002 if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
2003 {
2004 m_shutdownFinished.store( true, std::memory_order_relaxed );
2005 return;
2006 }
2007 if( m_bufferOffset != m_bufferStart )
2008 {
2009 if( !CommitData() )
2010 {
2011 m_shutdownFinished.store( true, std::memory_order_relaxed );
2012 return;
2013 }
2014 }
2015 }
2016}
2017
2018#ifndef TRACY_NO_FRAME_IMAGE
2019void Profiler::CompressWorker()
2020{
2021 ThreadExitHandler threadExitHandler;
2022 SetThreadName( "Tracy DXT1" );
2023 while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
2024
2025#ifdef TRACY_USE_RPMALLOC
2026 rpmalloc_thread_initialize();
2027#endif
2028
2029 for(;;)
2030 {
2031 const auto shouldExit = ShouldExit();
2032
2033 {
2034 bool lockHeld = true;
2035 while( !m_fiLock.try_lock() )
2036 {
2037 if( m_shutdownManual.load( std::memory_order_relaxed ) )
2038 {
2039 lockHeld = false;
2040 break;
2041 }
2042 }
2043 if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue );
2044 if( lockHeld )
2045 {
2046 m_fiLock.unlock();
2047 }
2048 }
2049
2050 const auto sz = m_fiDequeue.size();
2051 if( sz > 0 )
2052 {
2053 auto fi = m_fiDequeue.data();
2054 auto end = fi + sz;
2055 while( fi != end )
2056 {
2057 const auto w = fi->w;
2058 const auto h = fi->h;
2059 const auto csz = size_t( w * h / 2 );
2060 auto etc1buf = (char*)tracy_malloc( csz );
2061 CompressImageDxt1( (const char*)fi->image, etc1buf, w, h );
2062 tracy_free( fi->image );
2063
2064 TracyLfqPrepare( QueueType::FrameImage );
2065 MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf );
2066 MemWrite( &item->frameImageFat.frame, fi->frame );
2067 MemWrite( &item->frameImageFat.w, w );
2068 MemWrite( &item->frameImageFat.h, h );
2069 uint8_t flip = fi->flip;
2070 MemWrite( &item->frameImageFat.flip, flip );
2071 TracyLfqCommit;
2072
2073 fi++;
2074 }
2075 m_fiDequeue.clear();
2076 }
2077 else
2078 {
2079 std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) );
2080 }
2081
2082 if( shouldExit )
2083 {
2084 return;
2085 }
2086 }
2087}
2088#endif
2089
2090static void FreeAssociatedMemory( const QueueItem& item )
2091{
2092 if( item.hdr.idx >= (int)QueueType::Terminate ) return;
2093
2094 uint64_t ptr;
2095 switch( item.hdr.type )
2096 {
2097 case QueueType::ZoneText:
2098 case QueueType::ZoneName:
2099 ptr = MemRead<uint64_t>( &item.zoneTextFat.text );
2100 tracy_free( (void*)ptr );
2101 break;
2102 case QueueType::MessageColor:
2103 case QueueType::MessageColorCallstack:
2104 ptr = MemRead<uint64_t>( &item.messageColorFat.text );
2105 tracy_free( (void*)ptr );
2106 break;
2107 case QueueType::Message:
2108 case QueueType::MessageCallstack:
2109#ifndef TRACY_ON_DEMAND
2110 case QueueType::MessageAppInfo:
2111#endif
2112 ptr = MemRead<uint64_t>( &item.messageFat.text );
2113 tracy_free( (void*)ptr );
2114 break;
2115 case QueueType::ZoneBeginAllocSrcLoc:
2116 case QueueType::ZoneBeginAllocSrcLocCallstack:
2117 ptr = MemRead<uint64_t>( &item.zoneBegin.srcloc );
2118 tracy_free( (void*)ptr );
2119 break;
2120 case QueueType::GpuZoneBeginAllocSrcLoc:
2121 case QueueType::GpuZoneBeginAllocSrcLocCallstack:
2122 case QueueType::GpuZoneBeginAllocSrcLocSerial:
2123 case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial:
2124 ptr = MemRead<uint64_t>( &item.gpuZoneBegin.srcloc );
2125 tracy_free( (void*)ptr );
2126 break;
2127 case QueueType::CallstackSerial:
2128 case QueueType::Callstack:
2129 ptr = MemRead<uint64_t>( &item.callstackFat.ptr );
2130 tracy_free( (void*)ptr );
2131 break;
2132 case QueueType::CallstackAlloc:
2133 ptr = MemRead<uint64_t>( &item.callstackAllocFat.nativePtr );
2134 tracy_free( (void*)ptr );
2135 ptr = MemRead<uint64_t>( &item.callstackAllocFat.ptr );
2136 tracy_free( (void*)ptr );
2137 break;
2138 case QueueType::CallstackSample:
2139 case QueueType::CallstackSampleContextSwitch:
2140 ptr = MemRead<uint64_t>( &item.callstackSampleFat.ptr );
2141 tracy_free( (void*)ptr );
2142 break;
2143 case QueueType::FrameImage:
2144 ptr = MemRead<uint64_t>( &item.frameImageFat.image );
2145 tracy_free( (void*)ptr );
2146 break;
2147#ifdef TRACY_HAS_CALLSTACK
2148 case QueueType::CallstackFrameSize:
2149 {
2150 InitRpmalloc();
2151 auto size = MemRead<uint8_t>( &item.callstackFrameSizeFat.size );
2152 auto data = (const CallstackEntry*)MemRead<uint64_t>( &item.callstackFrameSizeFat.data );
2153 for( uint8_t i=0; i<size; i++ )
2154 {
2155 const auto& frame = data[i];
2156 tracy_free_fast( (void*)frame.name );
2157 tracy_free_fast( (void*)frame.file );
2158 }
2159 tracy_free_fast( (void*)data );
2160 break;
2161 }
2162 case QueueType::SymbolInformation:
2163 {
2164 uint8_t needFree = MemRead<uint8_t>( &item.symbolInformationFat.needFree );
2165 if( needFree )
2166 {
2167 ptr = MemRead<uint64_t>( &item.symbolInformationFat.fileString );
2168 tracy_free( (void*)ptr );
2169 }
2170 break;
2171 }
2172 case QueueType::SymbolCodeMetadata:
2173 ptr = MemRead<uint64_t>( &item.symbolCodeMetadata.ptr );
2174 tracy_free( (void*)ptr );
2175 break;
2176#endif
2177#ifndef TRACY_ON_DEMAND
2178 case QueueType::LockName:
2179 ptr = MemRead<uint64_t>( &item.lockNameFat.name );
2180 tracy_free( (void*)ptr );
2181 break;
2182 case QueueType::GpuContextName:
2183 ptr = MemRead<uint64_t>( &item.gpuContextNameFat.ptr );
2184 tracy_free( (void*)ptr );
2185 break;
2186#endif
2187#ifdef TRACY_ON_DEMAND
2188 case QueueType::MessageAppInfo:
2189 case QueueType::GpuContextName:
2190 // Don't free memory associated with deferred messages.
2191 break;
2192#endif
2193#ifdef TRACY_HAS_SYSTEM_TRACING
2194 case QueueType::ExternalNameMetadata:
2195 ptr = MemRead<uint64_t>( &item.externalNameMetadata.name );
2196 tracy_free( (void*)ptr );
2197 ptr = MemRead<uint64_t>( &item.externalNameMetadata.threadName );
2198 tracy_free_fast( (void*)ptr );
2199 break;
2200#endif
2201 case QueueType::SourceCodeMetadata:
2202 ptr = MemRead<uint64_t>( &item.sourceCodeMetadata.ptr );
2203 tracy_free( (void*)ptr );
2204 break;
2205 default:
2206 break;
2207 }
2208}
2209
2210void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
2211{
2212 for(;;)
2213 {
2214 const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } );
2215 if( sz == 0 ) break;
2216 }
2217
2218 ClearSerial();
2219}
2220
2221void Profiler::ClearSerial()
2222{
2223 bool lockHeld = true;
2224 while( !m_serialLock.try_lock() )
2225 {
2226 if( m_shutdownManual.load( std::memory_order_relaxed ) )
2227 {
2228 lockHeld = false;
2229 break;
2230 }
2231 }
2232 for( auto& v : m_serialQueue ) FreeAssociatedMemory( v );
2233 m_serialQueue.clear();
2234 if( lockHeld )
2235 {
2236 m_serialLock.unlock();
2237 }
2238
2239 for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
2240 m_serialDequeue.clear();
2241}
2242
2243Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
2244{
2245 bool connectionLost = false;
2246 const auto sz = GetQueue().try_dequeue_bulk_single( token,
2247 [this, &connectionLost] ( const uint32_t& threadId )
2248 {
2249 if( ThreadCtxCheck( threadId ) == ThreadCtxStatus::ConnectionLost ) connectionLost = true;
2250 },
2251 [this, &connectionLost] ( QueueItem* item, size_t sz )
2252 {
2253 if( connectionLost ) return;
2254 InitRpmalloc();
2255 assert( sz > 0 );
2256 int64_t refThread = m_refTimeThread;
2257 int64_t refCtx = m_refTimeCtx;
2258 int64_t refGpu = m_refTimeGpu;
2259 while( sz-- > 0 )
2260 {
2261 uint64_t ptr;
2262 uint16_t size;
2263 auto idx = MemRead<uint8_t>( &item->hdr.idx );
2264 if( idx < (int)QueueType::Terminate )
2265 {
2266 switch( (QueueType)idx )
2267 {
2268 case QueueType::ZoneText:
2269 case QueueType::ZoneName:
2270 ptr = MemRead<uint64_t>( &item->zoneTextFat.text );
2271 size = MemRead<uint16_t>( &item->zoneTextFat.size );
2272 SendSingleString( (const char*)ptr, size );
2273 tracy_free_fast( (void*)ptr );
2274 break;
2275 case QueueType::Message:
2276 case QueueType::MessageCallstack:
2277 ptr = MemRead<uint64_t>( &item->messageFat.text );
2278 size = MemRead<uint16_t>( &item->messageFat.size );
2279 SendSingleString( (const char*)ptr, size );
2280 tracy_free_fast( (void*)ptr );
2281 break;
2282 case QueueType::MessageColor:
2283 case QueueType::MessageColorCallstack:
2284 ptr = MemRead<uint64_t>( &item->messageColorFat.text );
2285 size = MemRead<uint16_t>( &item->messageColorFat.size );
2286 SendSingleString( (const char*)ptr, size );
2287 tracy_free_fast( (void*)ptr );
2288 break;
2289 case QueueType::MessageAppInfo:
2290 ptr = MemRead<uint64_t>( &item->messageFat.text );
2291 size = MemRead<uint16_t>( &item->messageFat.size );
2292 SendSingleString( (const char*)ptr, size );
2293#ifndef TRACY_ON_DEMAND
2294 tracy_free_fast( (void*)ptr );
2295#endif
2296 break;
2297 case QueueType::ZoneBeginAllocSrcLoc:
2298 case QueueType::ZoneBeginAllocSrcLocCallstack:
2299 {
2300 int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
2301 int64_t dt = t - refThread;
2302 refThread = t;
2303 MemWrite( &item->zoneBegin.time, dt );
2304 ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
2305 SendSourceLocationPayload( ptr );
2306 tracy_free_fast( (void*)ptr );
2307 break;
2308 }
2309 case QueueType::Callstack:
2310 ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
2311 SendCallstackPayload( ptr );
2312 tracy_free_fast( (void*)ptr );
2313 break;
2314 case QueueType::CallstackAlloc:
2315 ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr );
2316 if( ptr != 0 )
2317 {
2318 CutCallstack( (void*)ptr, "lua_pcall" );
2319 SendCallstackPayload( ptr );
2320 tracy_free_fast( (void*)ptr );
2321 }
2322 ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr );
2323 SendCallstackAlloc( ptr );
2324 tracy_free_fast( (void*)ptr );
2325 break;
2326 case QueueType::CallstackSample:
2327 case QueueType::CallstackSampleContextSwitch:
2328 {
2329 ptr = MemRead<uint64_t>( &item->callstackSampleFat.ptr );
2330 SendCallstackPayload64( ptr );
2331 tracy_free_fast( (void*)ptr );
2332 int64_t t = MemRead<int64_t>( &item->callstackSampleFat.time );
2333 int64_t dt = t - refCtx;
2334 refCtx = t;
2335 MemWrite( &item->callstackSampleFat.time, dt );
2336 break;
2337 }
2338 case QueueType::FrameImage:
2339 {
2340 ptr = MemRead<uint64_t>( &item->frameImageFat.image );
2341 const auto w = MemRead<uint16_t>( &item->frameImageFat.w );
2342 const auto h = MemRead<uint16_t>( &item->frameImageFat.h );
2343 const auto csz = size_t( w * h / 2 );
2344 SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
2345 tracy_free_fast( (void*)ptr );
2346 break;
2347 }
2348 case QueueType::ZoneBegin:
2349 case QueueType::ZoneBeginCallstack:
2350 {
2351 int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
2352 int64_t dt = t - refThread;
2353 refThread = t;
2354 MemWrite( &item->zoneBegin.time, dt );
2355 break;
2356 }
2357 case QueueType::ZoneEnd:
2358 {
2359 int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
2360 int64_t dt = t - refThread;
2361 refThread = t;
2362 MemWrite( &item->zoneEnd.time, dt );
2363 break;
2364 }
2365 case QueueType::GpuZoneBegin:
2366 case QueueType::GpuZoneBeginCallstack:
2367 {
2368 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
2369 int64_t dt = t - refThread;
2370 refThread = t;
2371 MemWrite( &item->gpuZoneBegin.cpuTime, dt );
2372 break;
2373 }
2374 case QueueType::GpuZoneBeginAllocSrcLoc:
2375 case QueueType::GpuZoneBeginAllocSrcLocCallstack:
2376 {
2377 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
2378 int64_t dt = t - refThread;
2379 refThread = t;
2380 MemWrite( &item->gpuZoneBegin.cpuTime, dt );
2381 ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc );
2382 SendSourceLocationPayload( ptr );
2383 tracy_free_fast( (void*)ptr );
2384 break;
2385 }
2386 case QueueType::GpuZoneEnd:
2387 {
2388 int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
2389 int64_t dt = t - refThread;
2390 refThread = t;
2391 MemWrite( &item->gpuZoneEnd.cpuTime, dt );
2392 break;
2393 }
2394 case QueueType::GpuContextName:
2395 ptr = MemRead<uint64_t>( &item->gpuContextNameFat.ptr );
2396 size = MemRead<uint16_t>( &item->gpuContextNameFat.size );
2397 SendSingleString( (const char*)ptr, size );
2398#ifndef TRACY_ON_DEMAND
2399 tracy_free_fast( (void*)ptr );
2400#endif
2401 break;
2402 case QueueType::PlotDataInt:
2403 case QueueType::PlotDataFloat:
2404 case QueueType::PlotDataDouble:
2405 {
2406 int64_t t = MemRead<int64_t>( &item->plotDataInt.time );
2407 int64_t dt = t - refThread;
2408 refThread = t;
2409 MemWrite( &item->plotDataInt.time, dt );
2410 break;
2411 }
2412 case QueueType::ContextSwitch:
2413 {
2414 int64_t t = MemRead<int64_t>( &item->contextSwitch.time );
2415 int64_t dt = t - refCtx;
2416 refCtx = t;
2417 MemWrite( &item->contextSwitch.time, dt );
2418 break;
2419 }
2420 case QueueType::ThreadWakeup:
2421 {
2422 int64_t t = MemRead<int64_t>( &item->threadWakeup.time );
2423 int64_t dt = t - refCtx;
2424 refCtx = t;
2425 MemWrite( &item->threadWakeup.time, dt );
2426 break;
2427 }
2428 case QueueType::GpuTime:
2429 {
2430 int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
2431 int64_t dt = t - refGpu;
2432 refGpu = t;
2433 MemWrite( &item->gpuTime.gpuTime, dt );
2434 break;
2435 }
2436#ifdef TRACY_HAS_CALLSTACK
2437 case QueueType::CallstackFrameSize:
2438 {
2439 auto data = (const CallstackEntry*)MemRead<uint64_t>( &item->callstackFrameSizeFat.data );
2440 auto datasz = MemRead<uint8_t>( &item->callstackFrameSizeFat.size );
2441 auto imageName = (const char*)MemRead<uint64_t>( &item->callstackFrameSizeFat.imageName );
2442 SendSingleString( imageName );
2443 AppendData( item++, QueueDataSize[idx] );
2444
2445 for( uint8_t i=0; i<datasz; i++ )
2446 {
2447 const auto& frame = data[i];
2448
2449 SendSingleString( frame.name );
2450 SendSecondString( frame.file );
2451
2452 QueueItem item;
2453 MemWrite( &item.hdr.type, QueueType::CallstackFrame );
2454 MemWrite( &item.callstackFrame.line, frame.line );
2455 MemWrite( &item.callstackFrame.symAddr, frame.symAddr );
2456 MemWrite( &item.callstackFrame.symLen, frame.symLen );
2457
2458 AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] );
2459
2460 tracy_free_fast( (void*)frame.name );
2461 tracy_free_fast( (void*)frame.file );
2462 }
2463 tracy_free_fast( (void*)data );
2464 continue;
2465 }
2466 case QueueType::SymbolInformation:
2467 {
2468 auto fileString = (const char*)MemRead<uint64_t>( &item->symbolInformationFat.fileString );
2469 auto needFree = MemRead<uint8_t>( &item->symbolInformationFat.needFree );
2470 SendSingleString( fileString );
2471 if( needFree ) tracy_free_fast( (void*)fileString );
2472 break;
2473 }
2474 case QueueType::SymbolCodeMetadata:
2475 {
2476 auto symbol = MemRead<uint64_t>( &item->symbolCodeMetadata.symbol );
2477 auto ptr = (const char*)MemRead<uint64_t>( &item->symbolCodeMetadata.ptr );
2478 auto size = MemRead<uint32_t>( &item->symbolCodeMetadata.size );
2479 SendLongString( symbol, ptr, size, QueueType::SymbolCode );
2480 tracy_free_fast( (void*)ptr );
2481 ++item;
2482 continue;
2483 }
2484#endif
2485#ifdef TRACY_HAS_SYSTEM_TRACING
2486 case QueueType::ExternalNameMetadata:
2487 {
2488 auto thread = MemRead<uint64_t>( &item->externalNameMetadata.thread );
2489 auto name = (const char*)MemRead<uint64_t>( &item->externalNameMetadata.name );
2490 auto threadName = (const char*)MemRead<uint64_t>( &item->externalNameMetadata.threadName );
2491 SendString( thread, threadName, QueueType::ExternalThreadName );
2492 SendString( thread, name, QueueType::ExternalName );
2493 tracy_free_fast( (void*)threadName );
2494 tracy_free_fast( (void*)name );
2495 ++item;
2496 continue;
2497 }
2498#endif
2499 case QueueType::SourceCodeMetadata:
2500 {
2501 auto ptr = (const char*)MemRead<uint64_t>( &item->sourceCodeMetadata.ptr );
2502 auto size = MemRead<uint32_t>( &item->sourceCodeMetadata.size );
2503 auto id = MemRead<uint32_t>( &item->sourceCodeMetadata.id );
2504 SendLongString( (uint64_t)id, ptr, size, QueueType::SourceCode );
2505 tracy_free_fast( (void*)ptr );
2506 ++item;
2507 continue;
2508 }
2509 default:
2510 assert( false );
2511 break;
2512 }
2513 }
2514 if( !AppendData( item++, QueueDataSize[idx] ) )
2515 {
2516 connectionLost = true;
2517 m_refTimeThread = refThread;
2518 m_refTimeCtx = refCtx;
2519 m_refTimeGpu = refGpu;
2520 return;
2521 }
2522 }
2523 m_refTimeThread = refThread;
2524 m_refTimeCtx = refCtx;
2525 m_refTimeGpu = refGpu;
2526 }
2527 );
2528 if( connectionLost ) return DequeueStatus::ConnectionLost;
2529 return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty;
2530}
2531
2532Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop )
2533{
2534 const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {},
2535 [this, &timeStop] ( QueueItem* item, size_t sz )
2536 {
2537 assert( sz > 0 );
2538 int64_t refCtx = m_refTimeCtx;
2539 while( sz-- > 0 )
2540 {
2541 FreeAssociatedMemory( *item );
2542 if( timeStop < 0 ) return;
2543 const auto idx = MemRead<uint8_t>( &item->hdr.idx );
2544 if( idx == (uint8_t)QueueType::ContextSwitch )
2545 {
2546 const auto csTime = MemRead<int64_t>( &item->contextSwitch.time );
2547 if( csTime > timeStop )
2548 {
2549 timeStop = -1;
2550 m_refTimeCtx = refCtx;
2551 return;
2552 }
2553 int64_t dt = csTime - refCtx;
2554 refCtx = csTime;
2555 MemWrite( &item->contextSwitch.time, dt );
2556 if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) )
2557 {
2558 timeStop = -2;
2559 m_refTimeCtx = refCtx;
2560 return;
2561 }
2562 }
2563 else if( idx == (uint8_t)QueueType::ThreadWakeup )
2564 {
2565 const auto csTime = MemRead<int64_t>( &item->threadWakeup.time );
2566 if( csTime > timeStop )
2567 {
2568 timeStop = -1;
2569 m_refTimeCtx = refCtx;
2570 return;
2571 }
2572 int64_t dt = csTime - refCtx;
2573 refCtx = csTime;
2574 MemWrite( &item->threadWakeup.time, dt );
2575 if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) )
2576 {
2577 timeStop = -2;
2578 m_refTimeCtx = refCtx;
2579 return;
2580 }
2581 }
2582 item++;
2583 }
2584 m_refTimeCtx = refCtx;
2585 }
2586 );
2587
2588 if( timeStop == -2 ) return DequeueStatus::ConnectionLost;
2589 return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty;
2590}
2591
2592#define ThreadCtxCheckSerial( _name ) \
2593 uint32_t thread = MemRead<uint32_t>( &item->_name.thread ); \
2594 switch( ThreadCtxCheck( thread ) ) \
2595 { \
2596 case ThreadCtxStatus::Same: break; \
2597 case ThreadCtxStatus::Changed: assert( m_refTimeThread == 0 ); refThread = 0; break; \
2598 case ThreadCtxStatus::ConnectionLost: return DequeueStatus::ConnectionLost; \
2599 default: assert( false ); break; \
2600 }
2601
2602Profiler::DequeueStatus Profiler::DequeueSerial()
2603{
2604 {
2605 bool lockHeld = true;
2606 while( !m_serialLock.try_lock() )
2607 {
2608 if( m_shutdownManual.load( std::memory_order_relaxed ) )
2609 {
2610 lockHeld = false;
2611 break;
2612 }
2613 }
2614 if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue );
2615 if( lockHeld )
2616 {
2617 m_serialLock.unlock();
2618 }
2619 }
2620
2621 const auto sz = m_serialDequeue.size();
2622 if( sz > 0 )
2623 {
2624 InitRpmalloc();
2625 int64_t refSerial = m_refTimeSerial;
2626 int64_t refGpu = m_refTimeGpu;
2627#ifdef TRACY_FIBERS
2628 int64_t refThread = m_refTimeThread;
2629#endif
2630 auto item = m_serialDequeue.data();
2631 auto end = item + sz;
2632 while( item != end )
2633 {
2634 uint64_t ptr;
2635 auto idx = MemRead<uint8_t>( &item->hdr.idx );
2636 if( idx < (int)QueueType::Terminate )
2637 {
2638 switch( (QueueType)idx )
2639 {
2640 case QueueType::CallstackSerial:
2641 ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
2642 SendCallstackPayload( ptr );
2643 tracy_free_fast( (void*)ptr );
2644 break;
2645 case QueueType::LockWait:
2646 case QueueType::LockSharedWait:
2647 {
2648 int64_t t = MemRead<int64_t>( &item->lockWait.time );
2649 int64_t dt = t - refSerial;
2650 refSerial = t;
2651 MemWrite( &item->lockWait.time, dt );
2652 break;
2653 }
2654 case QueueType::LockObtain:
2655 case QueueType::LockSharedObtain:
2656 {
2657 int64_t t = MemRead<int64_t>( &item->lockObtain.time );
2658 int64_t dt = t - refSerial;
2659 refSerial = t;
2660 MemWrite( &item->lockObtain.time, dt );
2661 break;
2662 }
2663 case QueueType::LockRelease:
2664 case QueueType::LockSharedRelease:
2665 {
2666 int64_t t = MemRead<int64_t>( &item->lockRelease.time );
2667 int64_t dt = t - refSerial;
2668 refSerial = t;
2669 MemWrite( &item->lockRelease.time, dt );
2670 break;
2671 }
2672 case QueueType::LockName:
2673 {
2674 ptr = MemRead<uint64_t>( &item->lockNameFat.name );
2675 uint16_t size = MemRead<uint16_t>( &item->lockNameFat.size );
2676 SendSingleString( (const char*)ptr, size );
2677#ifndef TRACY_ON_DEMAND
2678 tracy_free_fast( (void*)ptr );
2679#endif
2680 break;
2681 }
2682 case QueueType::MemAlloc:
2683 case QueueType::MemAllocNamed:
2684 case QueueType::MemAllocCallstack:
2685 case QueueType::MemAllocCallstackNamed:
2686 {
2687 int64_t t = MemRead<int64_t>( &item->memAlloc.time );
2688 int64_t dt = t - refSerial;
2689 refSerial = t;
2690 MemWrite( &item->memAlloc.time, dt );
2691 break;
2692 }
2693 case QueueType::MemFree:
2694 case QueueType::MemFreeNamed:
2695 case QueueType::MemFreeCallstack:
2696 case QueueType::MemFreeCallstackNamed:
2697 {
2698 int64_t t = MemRead<int64_t>( &item->memFree.time );
2699 int64_t dt = t - refSerial;
2700 refSerial = t;
2701 MemWrite( &item->memFree.time, dt );
2702 break;
2703 }
2704 case QueueType::GpuZoneBeginSerial:
2705 case QueueType::GpuZoneBeginCallstackSerial:
2706 {
2707 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
2708 int64_t dt = t - refSerial;
2709 refSerial = t;
2710 MemWrite( &item->gpuZoneBegin.cpuTime, dt );
2711 break;
2712 }
2713 case QueueType::GpuZoneBeginAllocSrcLocSerial:
2714 case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial:
2715 {
2716 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
2717 int64_t dt = t - refSerial;
2718 refSerial = t;
2719 MemWrite( &item->gpuZoneBegin.cpuTime, dt );
2720 ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc );
2721 SendSourceLocationPayload( ptr );
2722 tracy_free_fast( (void*)ptr );
2723 break;
2724 }
2725 case QueueType::GpuZoneEndSerial:
2726 {
2727 int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
2728 int64_t dt = t - refSerial;
2729 refSerial = t;
2730 MemWrite( &item->gpuZoneEnd.cpuTime, dt );
2731 break;
2732 }
2733 case QueueType::GpuTime:
2734 {
2735 int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
2736 int64_t dt = t - refGpu;
2737 refGpu = t;
2738 MemWrite( &item->gpuTime.gpuTime, dt );
2739 break;
2740 }
2741 case QueueType::GpuContextName:
2742 {
2743 ptr = MemRead<uint64_t>( &item->gpuContextNameFat.ptr );
2744 uint16_t size = MemRead<uint16_t>( &item->gpuContextNameFat.size );
2745 SendSingleString( (const char*)ptr, size );
2746#ifndef TRACY_ON_DEMAND
2747 tracy_free_fast( (void*)ptr );
2748#endif
2749 break;
2750 }
2751#ifdef TRACY_FIBERS
2752 case QueueType::ZoneBegin:
2753 case QueueType::ZoneBeginCallstack:
2754 {
2755 ThreadCtxCheckSerial( zoneBeginThread );
2756 int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
2757 int64_t dt = t - refThread;
2758 refThread = t;
2759 MemWrite( &item->zoneBegin.time, dt );
2760 break;
2761 }
2762 case QueueType::ZoneBeginAllocSrcLoc:
2763 case QueueType::ZoneBeginAllocSrcLocCallstack:
2764 {
2765 ThreadCtxCheckSerial( zoneBeginThread );
2766 int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
2767 int64_t dt = t - refThread;
2768 refThread = t;
2769 MemWrite( &item->zoneBegin.time, dt );
2770 ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
2771 SendSourceLocationPayload( ptr );
2772 tracy_free_fast( (void*)ptr );
2773 break;
2774 }
2775 case QueueType::ZoneEnd:
2776 {
2777 ThreadCtxCheckSerial( zoneEndThread );
2778 int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
2779 int64_t dt = t - refThread;
2780 refThread = t;
2781 MemWrite( &item->zoneEnd.time, dt );
2782 break;
2783 }
2784 case QueueType::ZoneText:
2785 case QueueType::ZoneName:
2786 {
2787 ThreadCtxCheckSerial( zoneTextFatThread );
2788 ptr = MemRead<uint64_t>( &item->zoneTextFat.text );
2789 uint16_t size = MemRead<uint16_t>( &item->zoneTextFat.size );
2790 SendSingleString( (const char*)ptr, size );
2791 tracy_free_fast( (void*)ptr );
2792 break;
2793 }
2794 case QueueType::Message:
2795 case QueueType::MessageCallstack:
2796 {
2797 ThreadCtxCheckSerial( messageFatThread );
2798 ptr = MemRead<uint64_t>( &item->messageFat.text );
2799 uint16_t size = MemRead<uint16_t>( &item->messageFat.size );
2800 SendSingleString( (const char*)ptr, size );
2801 tracy_free_fast( (void*)ptr );
2802 break;
2803 }
2804 case QueueType::MessageColor:
2805 case QueueType::MessageColorCallstack:
2806 {
2807 ThreadCtxCheckSerial( messageColorFatThread );
2808 ptr = MemRead<uint64_t>( &item->messageColorFat.text );
2809 uint16_t size = MemRead<uint16_t>( &item->messageColorFat.size );
2810 SendSingleString( (const char*)ptr, size );
2811 tracy_free_fast( (void*)ptr );
2812 break;
2813 }
2814 case QueueType::Callstack:
2815 {
2816 ThreadCtxCheckSerial( callstackFatThread );
2817 ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
2818 SendCallstackPayload( ptr );
2819 tracy_free_fast( (void*)ptr );
2820 break;
2821 }
2822 case QueueType::CallstackAlloc:
2823 {
2824 ThreadCtxCheckSerial( callstackAllocFatThread );
2825 ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr );
2826 if( ptr != 0 )
2827 {
2828 CutCallstack( (void*)ptr, "lua_pcall" );
2829 SendCallstackPayload( ptr );
2830 tracy_free_fast( (void*)ptr );
2831 }
2832 ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr );
2833 SendCallstackAlloc( ptr );
2834 tracy_free_fast( (void*)ptr );
2835 break;
2836 }
2837 case QueueType::FiberEnter:
2838 {
2839 ThreadCtxCheckSerial( fiberEnter );
2840 int64_t t = MemRead<int64_t>( &item->fiberEnter.time );
2841 int64_t dt = t - refThread;
2842 refThread = t;
2843 MemWrite( &item->fiberEnter.time, dt );
2844 break;
2845 }
2846 case QueueType::FiberLeave:
2847 {
2848 ThreadCtxCheckSerial( fiberLeave );
2849 int64_t t = MemRead<int64_t>( &item->fiberLeave.time );
2850 int64_t dt = t - refThread;
2851 refThread = t;
2852 MemWrite( &item->fiberLeave.time, dt );
2853 break;
2854 }
2855#endif
2856 default:
2857 assert( false );
2858 break;
2859 }
2860 }
2861#ifdef TRACY_FIBERS
2862 else
2863 {
2864 switch( (QueueType)idx )
2865 {
2866 case QueueType::ZoneColor:
2867 {
2868 ThreadCtxCheckSerial( zoneColorThread );
2869 break;
2870 }
2871 case QueueType::ZoneValue:
2872 {
2873 ThreadCtxCheckSerial( zoneValueThread );
2874 break;
2875 }
2876 case QueueType::ZoneValidation:
2877 {
2878 ThreadCtxCheckSerial( zoneValidationThread );
2879 break;
2880 }
2881 case QueueType::MessageLiteral:
2882 case QueueType::MessageLiteralCallstack:
2883 {
2884 ThreadCtxCheckSerial( messageLiteralThread );
2885 break;
2886 }
2887 case QueueType::MessageLiteralColor:
2888 case QueueType::MessageLiteralColorCallstack:
2889 {
2890 ThreadCtxCheckSerial( messageColorLiteralThread );
2891 break;
2892 }
2893 case QueueType::CrashReport:
2894 {
2895 ThreadCtxCheckSerial( crashReportThread );
2896 break;
2897 }
2898 default:
2899 break;
2900 }
2901 }
2902#endif
2903 if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost;
2904 item++;
2905 }
2906 m_refTimeSerial = refSerial;
2907 m_refTimeGpu = refGpu;
2908#ifdef TRACY_FIBERS
2909 m_refTimeThread = refThread;
2910#endif
2911 m_serialDequeue.clear();
2912 }
2913 else
2914 {
2915 return DequeueStatus::QueueEmpty;
2916 }
2917 return DequeueStatus::DataDequeued;
2918}
2919
2920Profiler::ThreadCtxStatus Profiler::ThreadCtxCheck( uint32_t threadId )
2921{
2922 if( m_threadCtx == threadId ) return ThreadCtxStatus::Same;
2923 QueueItem item;
2924 MemWrite( &item.hdr.type, QueueType::ThreadContext );
2925 MemWrite( &item.threadCtx.thread, threadId );
2926 if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return ThreadCtxStatus::ConnectionLost;
2927 m_threadCtx = threadId;
2928 m_refTimeThread = 0;
2929 return ThreadCtxStatus::Changed;
2930}
2931
2932bool Profiler::CommitData()
2933{
2934 bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart );
2935 if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
2936 m_bufferStart = m_bufferOffset;
2937 return ret;
2938}
2939
2940bool Profiler::SendData( const char* data, size_t len )
2941{
2942 const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 );
2943 memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) );
2944 return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1;
2945}
2946
2947void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type )
2948{
2949 assert( type == QueueType::StringData ||
2950 type == QueueType::ThreadName ||
2951 type == QueueType::PlotName ||
2952 type == QueueType::FrameName ||
2953 type == QueueType::ExternalName ||
2954 type == QueueType::ExternalThreadName ||
2955 type == QueueType::FiberName );
2956
2957 QueueItem item;
2958 MemWrite( &item.hdr.type, type );
2959 MemWrite( &item.stringTransfer.ptr, str );
2960
2961 assert( len <= std::numeric_limits<uint16_t>::max() );
2962 auto l16 = uint16_t( len );
2963
2964 NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 );
2965
2966 AppendDataUnsafe( &item, QueueDataSize[(int)type] );
2967 AppendDataUnsafe( &l16, sizeof( l16 ) );
2968 AppendDataUnsafe( ptr, l16 );
2969}
2970
2971void Profiler::SendSingleString( const char* ptr, size_t len )
2972{
2973 QueueItem item;
2974 MemWrite( &item.hdr.type, QueueType::SingleStringData );
2975
2976 assert( len <= std::numeric_limits<uint16_t>::max() );
2977 auto l16 = uint16_t( len );
2978
2979 NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 );
2980
2981 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] );
2982 AppendDataUnsafe( &l16, sizeof( l16 ) );
2983 AppendDataUnsafe( ptr, l16 );
2984}
2985
2986void Profiler::SendSecondString( const char* ptr, size_t len )
2987{
2988 QueueItem item;
2989 MemWrite( &item.hdr.type, QueueType::SecondStringData );
2990
2991 assert( len <= std::numeric_limits<uint16_t>::max() );
2992 auto l16 = uint16_t( len );
2993
2994 NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 );
2995
2996 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] );
2997 AppendDataUnsafe( &l16, sizeof( l16 ) );
2998 AppendDataUnsafe( ptr, l16 );
2999}
3000
3001void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type )
3002{
3003 assert( type == QueueType::FrameImageData ||
3004 type == QueueType::SymbolCode ||
3005 type == QueueType::SourceCode );
3006
3007 QueueItem item;
3008 MemWrite( &item.hdr.type, type );
3009 MemWrite( &item.stringTransfer.ptr, str );
3010
3011 assert( len <= std::numeric_limits<uint32_t>::max() );
3012 assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize );
3013 auto l32 = uint32_t( len );
3014
3015 NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 );
3016
3017 AppendDataUnsafe( &item, QueueDataSize[(int)type] );
3018 AppendDataUnsafe( &l32, sizeof( l32 ) );
3019 AppendDataUnsafe( ptr, l32 );
3020}
3021
3022void Profiler::SendSourceLocation( uint64_t ptr )
3023{
3024 auto srcloc = (const SourceLocationData*)ptr;
3025 QueueItem item;
3026 MemWrite( &item.hdr.type, QueueType::SourceLocation );
3027 MemWrite( &item.srcloc.name, (uint64_t)srcloc->name );
3028 MemWrite( &item.srcloc.file, (uint64_t)srcloc->file );
3029 MemWrite( &item.srcloc.function, (uint64_t)srcloc->function );
3030 MemWrite( &item.srcloc.line, srcloc->line );
3031 MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) );
3032 MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) );
3033 MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) );
3034 AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] );
3035}
3036
3037void Profiler::SendSourceLocationPayload( uint64_t _ptr )
3038{
3039 auto ptr = (const char*)_ptr;
3040
3041 QueueItem item;
3042 MemWrite( &item.hdr.type, QueueType::SourceLocationPayload );
3043 MemWrite( &item.stringTransfer.ptr, _ptr );
3044
3045 uint16_t len;
3046 memcpy( &len, ptr, sizeof( len ) );
3047 assert( len > 2 );
3048 len -= 2;
3049 ptr += 2;
3050
3051 NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len );
3052
3053 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] );
3054 AppendDataUnsafe( &len, sizeof( len ) );
3055 AppendDataUnsafe( ptr, len );
3056}
3057
3058void Profiler::SendCallstackPayload( uint64_t _ptr )
3059{
3060 auto ptr = (uintptr_t*)_ptr;
3061
3062 QueueItem item;
3063 MemWrite( &item.hdr.type, QueueType::CallstackPayload );
3064 MemWrite( &item.stringTransfer.ptr, _ptr );
3065
3066 const auto sz = *ptr++;
3067 const auto len = sz * sizeof( uint64_t );
3068 const auto l16 = uint16_t( len );
3069
3070 NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 );
3071
3072 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] );
3073 AppendDataUnsafe( &l16, sizeof( l16 ) );
3074
3075 if( compile_time_condition<sizeof( uintptr_t ) == sizeof( uint64_t )>::value )
3076 {
3077 AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz );
3078 }
3079 else
3080 {
3081 for( uintptr_t i=0; i<sz; i++ )
3082 {
3083 const auto val = uint64_t( *ptr++ );
3084 AppendDataUnsafe( &val, sizeof( uint64_t ) );
3085 }
3086 }
3087}
3088
3089void Profiler::SendCallstackPayload64( uint64_t _ptr )
3090{
3091 auto ptr = (uint64_t*)_ptr;
3092
3093 QueueItem item;
3094 MemWrite( &item.hdr.type, QueueType::CallstackPayload );
3095 MemWrite( &item.stringTransfer.ptr, _ptr );
3096
3097 const auto sz = *ptr++;
3098 const auto len = sz * sizeof( uint64_t );
3099 const auto l16 = uint16_t( len );
3100
3101 NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 );
3102
3103 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] );
3104 AppendDataUnsafe( &l16, sizeof( l16 ) );
3105 AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz );
3106}
3107
3108void Profiler::SendCallstackAlloc( uint64_t _ptr )
3109{
3110 auto ptr = (const char*)_ptr;
3111
3112 QueueItem item;
3113 MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload );
3114 MemWrite( &item.stringTransfer.ptr, _ptr );
3115
3116 uint16_t len;
3117 memcpy( &len, ptr, 2 );
3118 ptr += 2;
3119
3120 NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len );
3121
3122 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] );
3123 AppendDataUnsafe( &len, sizeof( len ) );
3124 AppendDataUnsafe( ptr, len );
3125}
3126
3127void Profiler::QueueCallstackFrame( uint64_t ptr )
3128{
3129#ifdef TRACY_HAS_CALLSTACK
3130 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::CallstackFrame, ptr } );
3131#else
3132 AckServerQuery();
3133#endif
3134}
3135
3136void Profiler::QueueSymbolQuery( uint64_t symbol )
3137{
3138#ifdef TRACY_HAS_CALLSTACK
3139 // Special handling for kernel frames
3140 if( symbol >> 63 != 0 )
3141 {
3142 SendSingleString( "<kernel>" );
3143 QueueItem item;
3144 MemWrite( &item.hdr.type, QueueType::SymbolInformation );
3145 MemWrite( &item.symbolInformation.line, 0 );
3146 MemWrite( &item.symbolInformation.symAddr, symbol );
3147 AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] );
3148 }
3149 else
3150 {
3151 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } );
3152 }
3153#else
3154 AckServerQuery();
3155#endif
3156}
3157
3158void Profiler::QueueExternalName( uint64_t ptr )
3159{
3160#ifdef TRACY_HAS_SYSTEM_TRACING
3161 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } );
3162#endif
3163}
3164
3165void Profiler::QueueKernelCode( uint64_t symbol, uint32_t size )
3166{
3167 assert( symbol >> 63 != 0 );
3168#ifdef TRACY_HAS_CALLSTACK
3169 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::KernelCode, symbol, size } );
3170#else
3171 AckSymbolCodeNotAvailable();
3172#endif
3173}
3174
3175void Profiler::QueueSourceCodeQuery( uint32_t id )
3176{
3177 assert( m_exectime != 0 );
3178 assert( m_queryData );
3179 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SourceCode, uint64_t( m_queryData ), uint64_t( m_queryImage ), id } );
3180 m_queryData = nullptr;
3181 m_queryImage = nullptr;
3182}
3183
3184#ifdef TRACY_HAS_CALLSTACK
3185void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si )
3186{
3187 switch( si.type )
3188 {
3189 case SymbolQueueItemType::CallstackFrame:
3190 {
3191 const auto frameData = DecodeCallstackPtr( si.ptr );
3192 auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size );
3193 memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size );
3194 TracyLfqPrepare( QueueType::CallstackFrameSize );
3195 MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr );
3196 MemWrite( &item->callstackFrameSizeFat.size, frameData.size );
3197 MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data );
3198 MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName );
3199 TracyLfqCommit;
3200 break;
3201 }
3202 case SymbolQueueItemType::SymbolQuery:
3203 {
3204#ifdef __ANDROID__
3205 // On Android it's common for code to be in mappings that are only executable
3206 // but not readable.
3207 if( !EnsureReadable( si.ptr ) )
3208 {
3209 TracyLfqPrepare( QueueType::AckServerQueryNoop );
3210 TracyLfqCommit;
3211 break;
3212 }
3213#endif
3214 const auto sym = DecodeSymbolAddress( si.ptr );
3215 TracyLfqPrepare( QueueType::SymbolInformation );
3216 MemWrite( &item->symbolInformationFat.line, sym.line );
3217 MemWrite( &item->symbolInformationFat.symAddr, si.ptr );
3218 MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file );
3219 MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree );
3220 TracyLfqCommit;
3221 break;
3222 }
3223#ifdef TRACY_HAS_SYSTEM_TRACING
3224 case SymbolQueueItemType::ExternalName:
3225 {
3226 const char* threadName;
3227 const char* name;
3228 SysTraceGetExternalName( si.ptr, threadName, name );
3229 TracyLfqPrepare( QueueType::ExternalNameMetadata );
3230 MemWrite( &item->externalNameMetadata.thread, si.ptr );
3231 MemWrite( &item->externalNameMetadata.name, (uint64_t)name );
3232 MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName );
3233 TracyLfqCommit;
3234 break;
3235 }
3236#endif
3237 case SymbolQueueItemType::KernelCode:
3238 {
3239#ifdef _WIN32
3240 auto mod = GetKernelModulePath( si.ptr );
3241 if( mod )
3242 {
3243 auto fn = DecodeCallstackPtrFast( si.ptr );
3244 if( *fn )
3245 {
3246 auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES );
3247 if( hnd )
3248 {
3249 auto ptr = (const void*)GetProcAddress( hnd, fn );
3250 if( ptr )
3251 {
3252 auto buf = (char*)tracy_malloc( si.extra );
3253 memcpy( buf, ptr, si.extra );
3254 FreeLibrary( hnd );
3255 TracyLfqPrepare( QueueType::SymbolCodeMetadata );
3256 MemWrite( &item->symbolCodeMetadata.symbol, si.ptr );
3257 MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf );
3258 MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra );
3259 TracyLfqCommit;
3260 break;
3261 }
3262 FreeLibrary( hnd );
3263 }
3264 }
3265 }
3266#endif
3267 TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable );
3268 TracyLfqCommit;
3269 break;
3270 }
3271 case SymbolQueueItemType::SourceCode:
3272 HandleSourceCodeQuery( (char*)si.ptr, (char*)si.extra, si.id );
3273 break;
3274 default:
3275 assert( false );
3276 break;
3277 }
3278}
3279
3280void Profiler::SymbolWorker()
3281{
3282#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER
3283 s_symbolTid = syscall( SYS_gettid );
3284#endif
3285
3286 ThreadExitHandler threadExitHandler;
3287 SetThreadName( "Tracy Symbol Worker" );
3288#ifdef TRACY_USE_RPMALLOC
3289 InitRpmalloc();
3290#endif
3291 InitCallstack();
3292 while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
3293
3294 for(;;)
3295 {
3296 const auto shouldExit = ShouldExit();
3297#ifdef TRACY_ON_DEMAND
3298 if( !IsConnected() )
3299 {
3300 if( shouldExit )
3301 {
3302 s_symbolThreadGone.store( true, std::memory_order_release );
3303 return;
3304 }
3305 while( m_symbolQueue.front() ) m_symbolQueue.pop();
3306 std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) );
3307 continue;
3308 }
3309#endif
3310 auto si = m_symbolQueue.front();
3311 if( si )
3312 {
3313 HandleSymbolQueueItem( *si );
3314 m_symbolQueue.pop();
3315 }
3316 else
3317 {
3318 if( shouldExit )
3319 {
3320 s_symbolThreadGone.store( true, std::memory_order_release );
3321 return;
3322 }
3323 std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) );
3324 }
3325 }
3326}
3327#endif
3328
3329bool Profiler::HandleServerQuery()
3330{
3331 ServerQueryPacket payload;
3332 if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false;
3333
3334 uint8_t type;
3335 uint64_t ptr;
3336 memcpy( &type, &payload.type, sizeof( payload.type ) );
3337 memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) );
3338
3339 switch( type )
3340 {
3341 case ServerQueryString:
3342 SendString( ptr, (const char*)ptr, QueueType::StringData );
3343 break;
3344 case ServerQueryThreadString:
3345 if( ptr == m_mainThread )
3346 {
3347 SendString( ptr, "Main thread", 11, QueueType::ThreadName );
3348 }
3349 else
3350 {
3351 SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName );
3352 }
3353 break;
3354 case ServerQuerySourceLocation:
3355 SendSourceLocation( ptr );
3356 break;
3357 case ServerQueryPlotName:
3358 SendString( ptr, (const char*)ptr, QueueType::PlotName );
3359 break;
3360 case ServerQueryTerminate:
3361 return false;
3362 case ServerQueryCallstackFrame:
3363 QueueCallstackFrame( ptr );
3364 break;
3365 case ServerQueryFrameName:
3366 SendString( ptr, (const char*)ptr, QueueType::FrameName );
3367 break;
3368 case ServerQueryDisconnect:
3369 HandleDisconnect();
3370 return false;
3371#ifdef TRACY_HAS_SYSTEM_TRACING
3372 case ServerQueryExternalName:
3373 QueueExternalName( ptr );
3374 break;
3375#endif
3376 case ServerQueryParameter:
3377 HandleParameter( ptr );
3378 break;
3379 case ServerQuerySymbol:
3380 QueueSymbolQuery( ptr );
3381 break;
3382#ifndef TRACY_NO_CODE_TRANSFER
3383 case ServerQuerySymbolCode:
3384 HandleSymbolCodeQuery( ptr, payload.extra );
3385 break;
3386#endif
3387 case ServerQuerySourceCode:
3388 QueueSourceCodeQuery( uint32_t( ptr ) );
3389 break;
3390 case ServerQueryDataTransfer:
3391 if( m_queryData )
3392 {
3393 assert( !m_queryImage );
3394 m_queryImage = m_queryData;
3395 }
3396 m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 );
3397 AckServerQuery();
3398 break;
3399 case ServerQueryDataTransferPart:
3400 memcpy( m_queryDataPtr, &ptr, 8 );
3401 memcpy( m_queryDataPtr+8, &payload.extra, 4 );
3402 m_queryDataPtr += 12;
3403 AckServerQuery();
3404 break;
3405#ifdef TRACY_FIBERS
3406 case ServerQueryFiberName:
3407 SendString( ptr, (const char*)ptr, QueueType::FiberName );
3408 break;
3409#endif
3410 default:
3411 assert( false );
3412 break;
3413 }
3414
3415 return true;
3416}
3417
3418void Profiler::HandleDisconnect()
3419{
3420 moodycamel::ConsumerToken token( GetQueue() );
3421
3422#ifdef TRACY_HAS_SYSTEM_TRACING
3423 if( s_sysTraceThread )
3424 {
3425 auto timestamp = GetTime();
3426 for(;;)
3427 {
3428 const auto status = DequeueContextSwitches( token, timestamp );
3429 if( status == DequeueStatus::ConnectionLost )
3430 {
3431 return;
3432 }
3433 else if( status == DequeueStatus::QueueEmpty )
3434 {
3435 if( m_bufferOffset != m_bufferStart )
3436 {
3437 if( !CommitData() ) return;
3438 }
3439 }
3440 if( timestamp < 0 )
3441 {
3442 if( m_bufferOffset != m_bufferStart )
3443 {
3444 if( !CommitData() ) return;
3445 }
3446 break;
3447 }
3448 ClearSerial();
3449 if( m_sock->HasData() )
3450 {
3451 while( m_sock->HasData() )
3452 {
3453 if( !HandleServerQuery() ) return;
3454 }
3455 if( m_bufferOffset != m_bufferStart )
3456 {
3457 if( !CommitData() ) return;
3458 }
3459 }
3460 else
3461 {
3462 if( m_bufferOffset != m_bufferStart )
3463 {
3464 if( !CommitData() ) return;
3465 }
3466 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
3467 }
3468 }
3469 }
3470#endif
3471
3472 QueueItem terminate;
3473 MemWrite( &terminate.hdr.type, QueueType::Terminate );
3474 if( !SendData( (const char*)&terminate, 1 ) ) return;
3475 for(;;)
3476 {
3477 ClearQueues( token );
3478 if( m_sock->HasData() )
3479 {
3480 while( m_sock->HasData() )
3481 {
3482 if( !HandleServerQuery() ) return;
3483 }
3484 if( m_bufferOffset != m_bufferStart )
3485 {
3486 if( !CommitData() ) return;
3487 }
3488 }
3489 else
3490 {
3491 if( m_bufferOffset != m_bufferStart )
3492 {
3493 if( !CommitData() ) return;
3494 }
3495 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
3496 }
3497 }
3498}
3499
3500void Profiler::CalibrateTimer()
3501{
3502 m_timerMul = 1.;
3503
3504#ifdef TRACY_HW_TIMER
3505
3506# if !defined TRACY_TIMER_QPC && defined TRACY_TIMER_FALLBACK
3507 const bool needCalibration = HardwareSupportsInvariantTSC();
3508# else
3509 const bool needCalibration = true;
3510# endif
3511 if( needCalibration )
3512 {
3513 std::atomic_signal_fence( std::memory_order_acq_rel );
3514 const auto t0 = std::chrono::high_resolution_clock::now();
3515 const auto r0 = GetTime();
3516 std::atomic_signal_fence( std::memory_order_acq_rel );
3517 std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) );
3518 std::atomic_signal_fence( std::memory_order_acq_rel );
3519 const auto t1 = std::chrono::high_resolution_clock::now();
3520 const auto r1 = GetTime();
3521 std::atomic_signal_fence( std::memory_order_acq_rel );
3522
3523 const auto dt = std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count();
3524 const auto dr = r1 - r0;
3525
3526 m_timerMul = double( dt ) / double( dr );
3527 }
3528#endif
3529}
3530
3531void Profiler::CalibrateDelay()
3532{
3533 constexpr int Iterations = 50000;
3534
3535 auto mindiff = std::numeric_limits<int64_t>::max();
3536 for( int i=0; i<Iterations * 10; i++ )
3537 {
3538 const auto t0i = GetTime();
3539 const auto t1i = GetTime();
3540 const auto dti = t1i - t0i;
3541 if( dti > 0 && dti < mindiff ) mindiff = dti;
3542 }
3543 m_resolution = mindiff;
3544
3545#ifdef TRACY_DELAYED_INIT
3546 m_delay = m_resolution;
3547#else
3548 constexpr int Events = Iterations * 2; // start + end
3549 static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
3550
3551 static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 };
3552 const auto t0 = GetTime();
3553 for( int i=0; i<Iterations; i++ )
3554 {
3555 {
3556 TracyLfqPrepare( QueueType::ZoneBegin );
3557 MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
3558 MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
3559 TracyLfqCommit;
3560 }
3561 {
3562 TracyLfqPrepare( QueueType::ZoneEnd );
3563 MemWrite( &item->zoneEnd.time, GetTime() );
3564 TracyLfqCommit;
3565 }
3566 }
3567 const auto t1 = GetTime();
3568 const auto dt = t1 - t0;
3569 m_delay = dt / Events;
3570
3571 moodycamel::ConsumerToken token( GetQueue() );
3572 int left = Events;
3573 while( left != 0 )
3574 {
3575 const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} );
3576 assert( sz > 0 );
3577 left -= (int)sz;
3578 }
3579 assert( GetQueue().size_approx() == 0 );
3580#endif
3581}
3582
3583void Profiler::ReportTopology()
3584{
3585#ifndef TRACY_DELAYED_INIT
3586 struct CpuData
3587 {
3588 uint32_t package;
3589 uint32_t core;
3590 uint32_t thread;
3591 };
3592
3593#if defined _WIN32
3594# ifdef TRACY_UWP
3595 t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx;
3596# else
3597 t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" );
3598# endif
3599 if( !_GetLogicalProcessorInformationEx ) return;
3600
3601 DWORD psz = 0;
3602 _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz );
3603 auto packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz );
3604 auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz );
3605 assert( res );
3606
3607 DWORD csz = 0;
3608 _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz );
3609 auto coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz );
3610 res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz );
3611 assert( res );
3612
3613 SYSTEM_INFO sysinfo;
3614 GetSystemInfo( &sysinfo );
3615 const uint32_t numcpus = sysinfo.dwNumberOfProcessors;
3616
3617 auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus );
3618 for( uint32_t i=0; i<numcpus; i++ ) cpuData[i].thread = i;
3619
3620 int idx = 0;
3621 auto ptr = packageInfo;
3622 while( (char*)ptr < ((char*)packageInfo) + psz )
3623 {
3624 assert( ptr->Relationship == RelationProcessorPackage );
3625 // FIXME account for GroupCount
3626 auto mask = ptr->Processor.GroupMask[0].Mask;
3627 int core = 0;
3628 while( mask != 0 )
3629 {
3630 if( mask & 1 ) cpuData[core].package = idx;
3631 core++;
3632 mask >>= 1;
3633 }
3634 ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
3635 idx++;
3636 }
3637
3638 idx = 0;
3639 ptr = coreInfo;
3640 while( (char*)ptr < ((char*)coreInfo) + csz )
3641 {
3642 assert( ptr->Relationship == RelationProcessorCore );
3643 // FIXME account for GroupCount
3644 auto mask = ptr->Processor.GroupMask[0].Mask;
3645 int core = 0;
3646 while( mask != 0 )
3647 {
3648 if( mask & 1 ) cpuData[core].core = idx;
3649 core++;
3650 mask >>= 1;
3651 }
3652 ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
3653 idx++;
3654 }
3655
3656 for( uint32_t i=0; i<numcpus; i++ )
3657 {
3658 auto& data = cpuData[i];
3659
3660 TracyLfqPrepare( QueueType::CpuTopology );
3661 MemWrite( &item->cpuTopology.package, data.package );
3662 MemWrite( &item->cpuTopology.core, data.core );
3663 MemWrite( &item->cpuTopology.thread, data.thread );
3664
3665#ifdef TRACY_ON_DEMAND
3666 DeferItem( *item );
3667#endif
3668
3669 TracyLfqCommit;
3670 }
3671
3672 tracy_free( cpuData );
3673 tracy_free( coreInfo );
3674 tracy_free( packageInfo );
3675#elif defined __linux__
3676 const int numcpus = std::thread::hardware_concurrency();
3677 auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus );
3678 memset( cpuData, 0, sizeof( CpuData ) * numcpus );
3679
3680 const char* basePath = "/sys/devices/system/cpu/cpu";
3681 for( int i=0; i<numcpus; i++ )
3682 {
3683 char path[1024];
3684 sprintf( path, "%s%i/topology/physical_package_id", basePath, i );
3685 char buf[1024];
3686 FILE* f = fopen( path, "rb" );
3687 if( !f )
3688 {
3689 tracy_free( cpuData );
3690 return;
3691 }
3692 auto read = fread( buf, 1, 1024, f );
3693 buf[read] = '\0';
3694 fclose( f );
3695 cpuData[i].package = uint32_t( atoi( buf ) );
3696 cpuData[i].thread = i;
3697 sprintf( path, "%s%i/topology/core_id", basePath, i );
3698 f = fopen( path, "rb" );
3699 read = fread( buf, 1, 1024, f );
3700 buf[read] = '\0';
3701 fclose( f );
3702 cpuData[i].core = uint32_t( atoi( buf ) );
3703 }
3704
3705 for( int i=0; i<numcpus; i++ )
3706 {
3707 auto& data = cpuData[i];
3708
3709 TracyLfqPrepare( QueueType::CpuTopology );
3710 MemWrite( &item->cpuTopology.package, data.package );
3711 MemWrite( &item->cpuTopology.core, data.core );
3712 MemWrite( &item->cpuTopology.thread, data.thread );
3713
3714#ifdef TRACY_ON_DEMAND
3715 DeferItem( *item );
3716#endif
3717
3718 TracyLfqCommit;
3719 }
3720
3721 tracy_free( cpuData );
3722#endif
3723#endif
3724}
3725
3726void Profiler::SendCallstack( int depth, const char* skipBefore )
3727{
3728#ifdef TRACY_HAS_CALLSTACK
3729 auto ptr = Callstack( depth );
3730 CutCallstack( ptr, skipBefore );
3731
3732 TracyQueuePrepare( QueueType::Callstack );
3733 MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
3734 TracyQueueCommit( callstackFatThread );
3735#endif
3736}
3737
3738void Profiler::CutCallstack( void* callstack, const char* skipBefore )
3739{
3740#ifdef TRACY_HAS_CALLSTACK
3741 auto data = (uintptr_t*)callstack;
3742 const auto sz = *data++;
3743 uintptr_t i;
3744 for( i=0; i<sz; i++ )
3745 {
3746 auto name = DecodeCallstackPtrFast( uint64_t( data[i] ) );
3747 const bool found = strcmp( name, skipBefore ) == 0;
3748 if( found )
3749 {
3750 i++;
3751 break;
3752 }
3753 }
3754
3755 if( i != sz )
3756 {
3757 memmove( data, data + i, ( sz - i ) * sizeof( uintptr_t* ) );
3758 *--data = sz - i;
3759 }
3760#endif
3761}
3762
3763#ifdef TRACY_HAS_SYSTIME
3764void Profiler::ProcessSysTime()
3765{
3766 if( m_shutdown.load( std::memory_order_relaxed ) ) return;
3767 auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
3768 if( t - m_sysTimeLast > 100000000 ) // 100 ms
3769 {
3770 auto sysTime = m_sysTime.Get();
3771 if( sysTime >= 0 )
3772 {
3773 m_sysTimeLast = t;
3774
3775 TracyLfqPrepare( QueueType::SysTimeReport );
3776 MemWrite( &item->sysTime.time, GetTime() );
3777 MemWrite( &item->sysTime.sysTime, sysTime );
3778 TracyLfqCommit;
3779 }
3780 }
3781}
3782#endif
3783
3784void Profiler::HandleParameter( uint64_t payload )
3785{
3786 assert( m_paramCallback );
3787 const auto idx = uint32_t( payload >> 32 );
3788 const auto val = int32_t( payload & 0xFFFFFFFF );
3789 m_paramCallback( m_paramCallbackData, idx, val );
3790 AckServerQuery();
3791}
3792
3793void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size )
3794{
3795 if( symbol >> 63 != 0 )
3796 {
3797 QueueKernelCode( symbol, size );
3798 }
3799 else
3800 {
3801#ifdef __ANDROID__
3802 // On Android it's common for code to be in mappings that are only executable
3803 // but not readable.
3804 if( !EnsureReadable( symbol ) )
3805 {
3806 AckSymbolCodeNotAvailable();
3807 return;
3808 }
3809#endif
3810 SendLongString( symbol, (const char*)symbol, size, QueueType::SymbolCode );
3811 }
3812}
3813
3814void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id )
3815{
3816 bool ok = false;
3817 struct stat st;
3818 if( stat( data, &st ) == 0 && (uint64_t)st.st_mtime < m_exectime )
3819 {
3820 if( st.st_size < ( TargetFrameSize - 16 ) )
3821 {
3822 FILE* f = fopen( data, "rb" );
3823 if( f )
3824 {
3825 auto ptr = (char*)tracy_malloc_fast( st.st_size );
3826 auto rd = fread( ptr, 1, st.st_size, f );
3827 fclose( f );
3828 if( rd == (size_t)st.st_size )
3829 {
3830 TracyLfqPrepare( QueueType::SourceCodeMetadata );
3831 MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr );
3832 MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd );
3833 MemWrite( &item->sourceCodeMetadata.id, id );
3834 TracyLfqCommit;
3835 ok = true;
3836 }
3837 }
3838 }
3839 }
3840
3841#ifdef TRACY_DEBUGINFOD
3842 else if( image && data[0] == '/' )
3843 {
3844 size_t size;
3845 auto buildid = GetBuildIdForImage( image, size );
3846 if( buildid )
3847 {
3848 auto d = debuginfod_find_source( GetDebuginfodClient(), buildid, size, data, nullptr );
3849 TracyDebug( "DebugInfo source query: %s, fn: %s, image: %s\n", d >= 0 ? " ok " : "fail", data, image );
3850 if( d >= 0 )
3851 {
3852 struct stat st;
3853 fstat( d, &st );
3854 if( st.st_size < ( TargetFrameSize - 16 ) )
3855 {
3856 lseek( d, 0, SEEK_SET );
3857 auto ptr = (char*)tracy_malloc_fast( st.st_size );
3858 auto rd = read( d, ptr, st.st_size );
3859 if( rd == (size_t)st.st_size )
3860 {
3861 TracyLfqPrepare( QueueType::SourceCodeMetadata );
3862 MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr );
3863 MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd );
3864 MemWrite( &item->sourceCodeMetadata.id, id );
3865 TracyLfqCommit;
3866 ok = true;
3867 }
3868 }
3869 close( d );
3870 }
3871 }
3872 }
3873 else
3874 {
3875 TracyDebug( "DebugInfo invalid query fn: %s, image: %s\n", data, image );
3876 }
3877#endif
3878
3879 if( !ok && m_sourceCallback )
3880 {
3881 size_t sz;
3882 char* ptr = m_sourceCallback( m_sourceCallbackData, data, sz );
3883 if( ptr )
3884 {
3885 if( sz < ( TargetFrameSize - 16 ) )
3886 {
3887 TracyLfqPrepare( QueueType::SourceCodeMetadata );
3888 MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr );
3889 MemWrite( &item->sourceCodeMetadata.size, (uint32_t)sz );
3890 MemWrite( &item->sourceCodeMetadata.id, id );
3891 TracyLfqCommit;
3892 ok = true;
3893 }
3894 }
3895 }
3896
3897 if( !ok )
3898 {
3899 TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable );
3900 MemWrite( &item->sourceCodeNotAvailable, id );
3901 TracyLfqCommit;
3902 }
3903
3904 tracy_free_fast( data );
3905 tracy_free_fast( image );
3906}
3907
3908#if defined _WIN32 && defined TRACY_TIMER_QPC
3909int64_t Profiler::GetTimeQpc()
3910{
3911 LARGE_INTEGER t;
3912 QueryPerformanceCounter( &t );
3913 return t.QuadPart;
3914}
3915#endif
3916
3917}
3918
3919#ifdef __cplusplus
3920extern "C" {
3921#endif
3922
3923TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active )
3924{
3925 ___tracy_c_zone_context ctx;
3926#ifdef TRACY_ON_DEMAND
3927 ctx.active = active && tracy::GetProfiler().IsConnected();
3928#else
3929 ctx.active = active;
3930#endif
3931 if( !ctx.active ) return ctx;
3932 const auto id = tracy::GetProfiler().GetNextZoneId();
3933 ctx.id = id;
3934
3935#ifndef TRACY_NO_VERIFY
3936 {
3937 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
3938 tracy::MemWrite( &item->zoneValidation.id, id );
3939 TracyQueueCommitC( zoneValidationThread );
3940 }
3941#endif
3942 {
3943 TracyQueuePrepareC( tracy::QueueType::ZoneBegin );
3944 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
3945 tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
3946 TracyQueueCommitC( zoneBeginThread );
3947 }
3948 return ctx;
3949}
3950
3951TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active )
3952{
3953 ___tracy_c_zone_context ctx;
3954#ifdef TRACY_ON_DEMAND
3955 ctx.active = active && tracy::GetProfiler().IsConnected();
3956#else
3957 ctx.active = active;
3958#endif
3959 if( !ctx.active ) return ctx;
3960 const auto id = tracy::GetProfiler().GetNextZoneId();
3961 ctx.id = id;
3962
3963#ifndef TRACY_NO_VERIFY
3964 {
3965 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
3966 tracy::MemWrite( &item->zoneValidation.id, id );
3967 TracyQueueCommitC( zoneValidationThread );
3968 }
3969#endif
3970 tracy::GetProfiler().SendCallstack( depth );
3971 {
3972 TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack );
3973 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
3974 tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
3975 TracyQueueCommitC( zoneBeginThread );
3976 }
3977 return ctx;
3978}
3979
3980TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active )
3981{
3982 ___tracy_c_zone_context ctx;
3983#ifdef TRACY_ON_DEMAND
3984 ctx.active = active && tracy::GetProfiler().IsConnected();
3985#else
3986 ctx.active = active;
3987#endif
3988 if( !ctx.active )
3989 {
3990 tracy::tracy_free( (void*)srcloc );
3991 return ctx;
3992 }
3993 const auto id = tracy::GetProfiler().GetNextZoneId();
3994 ctx.id = id;
3995
3996#ifndef TRACY_NO_VERIFY
3997 {
3998 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
3999 tracy::MemWrite( &item->zoneValidation.id, id );
4000 TracyQueueCommitC( zoneValidationThread );
4001 }
4002#endif
4003 {
4004 TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc );
4005 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
4006 tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
4007 TracyQueueCommitC( zoneBeginThread );
4008 }
4009 return ctx;
4010}
4011
4012TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active )
4013{
4014 ___tracy_c_zone_context ctx;
4015#ifdef TRACY_ON_DEMAND
4016 ctx.active = active && tracy::GetProfiler().IsConnected();
4017#else
4018 ctx.active = active;
4019#endif
4020 if( !ctx.active )
4021 {
4022 tracy::tracy_free( (void*)srcloc );
4023 return ctx;
4024 }
4025 const auto id = tracy::GetProfiler().GetNextZoneId();
4026 ctx.id = id;
4027
4028#ifndef TRACY_NO_VERIFY
4029 {
4030 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
4031 tracy::MemWrite( &item->zoneValidation.id, id );
4032 TracyQueueCommitC( zoneValidationThread );
4033 }
4034#endif
4035 tracy::GetProfiler().SendCallstack( depth );
4036 {
4037 TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack );
4038 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
4039 tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
4040 TracyQueueCommitC( zoneBeginThread );
4041 }
4042 return ctx;
4043}
4044
4045TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
4046{
4047 if( !ctx.active ) return;
4048#ifndef TRACY_NO_VERIFY
4049 {
4050 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
4051 tracy::MemWrite( &item->zoneValidation.id, ctx.id );
4052 TracyQueueCommitC( zoneValidationThread );
4053 }
4054#endif
4055 {
4056 TracyQueuePrepareC( tracy::QueueType::ZoneEnd );
4057 tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() );
4058 TracyQueueCommitC( zoneEndThread );
4059 }
4060}
4061
4062TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size )
4063{
4064 assert( size < std::numeric_limits<uint16_t>::max() );
4065 if( !ctx.active ) return;
4066 auto ptr = (char*)tracy::tracy_malloc( size );
4067 memcpy( ptr, txt, size );
4068#ifndef TRACY_NO_VERIFY
4069 {
4070 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
4071 tracy::MemWrite( &item->zoneValidation.id, ctx.id );
4072 TracyQueueCommitC( zoneValidationThread );
4073 }
4074#endif
4075 {
4076 TracyQueuePrepareC( tracy::QueueType::ZoneText );
4077 tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
4078 tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
4079 TracyQueueCommitC( zoneTextFatThread );
4080 }
4081}
4082
4083TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size )
4084{
4085 assert( size < std::numeric_limits<uint16_t>::max() );
4086 if( !ctx.active ) return;
4087 auto ptr = (char*)tracy::tracy_malloc( size );
4088 memcpy( ptr, txt, size );
4089#ifndef TRACY_NO_VERIFY
4090 {
4091 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
4092 tracy::MemWrite( &item->zoneValidation.id, ctx.id );
4093 TracyQueueCommitC( zoneValidationThread );
4094 }
4095#endif
4096 {
4097 TracyQueuePrepareC( tracy::QueueType::ZoneName );
4098 tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
4099 tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
4100 TracyQueueCommitC( zoneTextFatThread );
4101 }
4102}
4103
4104TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) {
4105 if( !ctx.active ) return;
4106#ifndef TRACY_NO_VERIFY
4107 {
4108 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
4109 tracy::MemWrite( &item->zoneValidation.id, ctx.id );
4110 TracyQueueCommitC( zoneValidationThread );
4111 }
4112#endif
4113 {
4114 TracyQueuePrepareC( tracy::QueueType::ZoneColor );
4115 tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) );
4116 tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
4117 tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) );
4118 TracyQueueCommitC( zoneColorThread );
4119 }
4120}
4121
4122TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value )
4123{
4124 if( !ctx.active ) return;
4125#ifndef TRACY_NO_VERIFY
4126 {
4127 TracyQueuePrepareC( tracy::QueueType::ZoneValidation );
4128 tracy::MemWrite( &item->zoneValidation.id, ctx.id );
4129 TracyQueueCommitC( zoneValidationThread );
4130 }
4131#endif
4132 {
4133 TracyQueuePrepareC( tracy::QueueType::ZoneValue );
4134 tracy::MemWrite( &item->zoneValue.value, value );
4135 TracyQueueCommitC( zoneValueThread );
4136 }
4137}
4138
4139TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); }
4140TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); }
4141TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); }
4142TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); }
4143TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); }
4144TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); }
4145TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); }
4146TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); }
4147TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); }
4148TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); }
4149TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); }
4150TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); }
4151TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); }
4152TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); }
4153TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); }
4154TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); }
4155TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); }
4156TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
4157TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); }
4158TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); }
4159
4160TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) {
4161 return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz );
4162}
4163
4164TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) {
4165 return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
4166}
4167
4168TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data )
4169{
4170 TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin );
4171 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4172 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4173 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4174 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4175 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4176 TracyLfqCommitC;
4177}
4178
4179TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data )
4180{
4181 tracy::GetProfiler().SendCallstack( data.depth );
4182 TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack );
4183 tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
4184 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4185 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4186 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4187 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4188 TracyLfqCommitC;
4189}
4190
4191TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data )
4192{
4193 TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc );
4194 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4195 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4196 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4197 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4198 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4199 TracyLfqCommitC;
4200}
4201
4202TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data )
4203{
4204 tracy::GetProfiler().SendCallstack( data.depth );
4205 TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack );
4206 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4207 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4208 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4209 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4210 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4211 TracyLfqCommitC;
4212}
4213
4214TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data )
4215{
4216 TracyLfqPrepareC( tracy::QueueType::GpuTime );
4217 tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime );
4218 tracy::MemWrite( &item->gpuTime.queryId, data.queryId );
4219 tracy::MemWrite( &item->gpuTime.context, data.context );
4220 TracyLfqCommitC;
4221}
4222
4223TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data )
4224{
4225 TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd );
4226 tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() );
4227 memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
4228 tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId );
4229 tracy::MemWrite( &item->gpuZoneEnd.context, data.context );
4230 TracyLfqCommitC;
4231}
4232
4233TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data )
4234{
4235 TracyLfqPrepareC( tracy::QueueType::GpuNewContext );
4236 tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() );
4237 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4238 tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime );
4239 tracy::MemWrite( &item->gpuNewContext.period, data.period );
4240 tracy::MemWrite( &item->gpuNewContext.context, data.context );
4241 tracy::MemWrite( &item->gpuNewContext.flags, data.flags );
4242 tracy::MemWrite( &item->gpuNewContext.type, data.type );
4243 TracyLfqCommitC;
4244}
4245
4246TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data )
4247{
4248 auto ptr = (char*)tracy::tracy_malloc( data.len );
4249 memcpy( ptr, data.name, data.len );
4250
4251 TracyLfqPrepareC( tracy::QueueType::GpuContextName );
4252 tracy::MemWrite( &item->gpuContextNameFat.context, data.context );
4253 tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
4254 tracy::MemWrite( &item->gpuContextNameFat.size, data.len );
4255 TracyLfqCommitC;
4256}
4257
4258TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data )
4259{
4260 TracyLfqPrepareC( tracy::QueueType::GpuCalibration );
4261 tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() );
4262 tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime );
4263 tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta );
4264 tracy::MemWrite( &item->gpuCalibration.context, data.context );
4265 TracyLfqCommitC;
4266}
4267
4268TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data )
4269{
4270 auto item = tracy::Profiler::QueueSerial();
4271 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial );
4272 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4273 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4274 tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
4275 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4276 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4277 tracy::Profiler::QueueSerialFinish();
4278}
4279
4280TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data )
4281{
4282 auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) );
4283 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial );
4284 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4285 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4286 tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
4287 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4288 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4289 tracy::Profiler::QueueSerialFinish();
4290}
4291
4292TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data )
4293{
4294 auto item = tracy::Profiler::QueueSerial();
4295 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial );
4296 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4297 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4298 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4299 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4300 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4301 tracy::Profiler::QueueSerialFinish();
4302}
4303
4304TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data )
4305{
4306 auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) );
4307 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
4308 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
4309 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4310 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
4311 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
4312 tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
4313 tracy::Profiler::QueueSerialFinish();
4314}
4315
4316TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data )
4317{
4318 auto item = tracy::Profiler::QueueSerial();
4319 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
4320 tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime );
4321 tracy::MemWrite( &item->gpuTime.queryId, data.queryId );
4322 tracy::MemWrite( &item->gpuTime.context, data.context );
4323 tracy::Profiler::QueueSerialFinish();
4324}
4325
4326TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data )
4327{
4328 auto item = tracy::Profiler::QueueSerial();
4329 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial );
4330 tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() );
4331 memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
4332 tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId );
4333 tracy::MemWrite( &item->gpuZoneEnd.context, data.context );
4334 tracy::Profiler::QueueSerialFinish();
4335}
4336
4337TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data )
4338{
4339 auto item = tracy::Profiler::QueueSerial();
4340 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext );
4341 tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() );
4342 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
4343 tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime );
4344 tracy::MemWrite( &item->gpuNewContext.period, data.period );
4345 tracy::MemWrite( &item->gpuNewContext.context, data.context );
4346 tracy::MemWrite( &item->gpuNewContext.flags, data.flags );
4347 tracy::MemWrite( &item->gpuNewContext.type, data.type );
4348 tracy::Profiler::QueueSerialFinish();
4349}
4350
4351TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data )
4352{
4353 auto ptr = (char*)tracy::tracy_malloc( data.len );
4354 memcpy( ptr, data.name, data.len );
4355
4356 auto item = tracy::Profiler::QueueSerial();
4357 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName );
4358 tracy::MemWrite( &item->gpuContextNameFat.context, data.context );
4359 tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
4360 tracy::MemWrite( &item->gpuContextNameFat.size, data.len );
4361 tracy::Profiler::QueueSerialFinish();
4362}
4363
4364TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data )
4365{
4366 auto item = tracy::Profiler::QueueSerial();
4367 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration );
4368 tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() );
4369 tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime );
4370 tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta );
4371 tracy::MemWrite( &item->gpuCalibration.context, data.context );
4372 tracy::Profiler::QueueSerialFinish();
4373}
4374
4375TRACY_API int ___tracy_connected( void )
4376{
4377 return tracy::GetProfiler().IsConnected();
4378}
4379
4380#ifdef TRACY_FIBERS
4381TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber ); }
4382TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); }
4383#endif
4384
4385# ifdef TRACY_MANUAL_LIFETIME
4386TRACY_API void ___tracy_startup_profiler( void )
4387{
4388 tracy::StartupProfiler();
4389}
4390
4391TRACY_API void ___tracy_shutdown_profiler( void )
4392{
4393 tracy::ShutdownProfiler();
4394}
4395# endif
4396
4397#ifdef __cplusplus
4398}
4399#endif
4400
4401#endif