The open source OpenXR runtime
at main 4401 lines 150 kB view raw
1#ifdef TRACY_ENABLE 2 3#ifdef _WIN32 4# ifndef NOMINMAX 5# define NOMINMAX 6# endif 7# include <winsock2.h> 8# include <windows.h> 9# include <tlhelp32.h> 10# include <inttypes.h> 11# include <intrin.h> 12# include "../common/TracyUwp.hpp" 13#else 14# include <sys/time.h> 15# include <sys/param.h> 16#endif 17 18#ifdef _GNU_SOURCE 19# include <errno.h> 20#endif 21 22#ifdef __linux__ 23# include <dirent.h> 24# include <pthread.h> 25# include <sys/types.h> 26# include <sys/syscall.h> 27#endif 28 29#if defined __APPLE__ || defined BSD 30# include <sys/types.h> 31# include <sys/sysctl.h> 32#endif 33 34#if defined __APPLE__ 35# include "TargetConditionals.h" 36# include <mach-o/dyld.h> 37#endif 38 39#ifdef __ANDROID__ 40# include <sys/mman.h> 41# include <sys/system_properties.h> 42# include <stdio.h> 43# include <stdint.h> 44# include <algorithm> 45# include <vector> 46#endif 47 48#include <algorithm> 49#include <assert.h> 50#include <atomic> 51#include <chrono> 52#include <limits> 53#include <new> 54#include <stdlib.h> 55#include <string.h> 56#include <sys/stat.h> 57#include <thread> 58 59#include "../common/TracyAlign.hpp" 60#include "../common/TracyAlloc.hpp" 61#include "../common/TracySocket.hpp" 62#include "../common/TracySystem.hpp" 63#include "../common/TracyYield.hpp" 64#include "../common/tracy_lz4.hpp" 65#include "tracy_rpmalloc.hpp" 66#include "TracyCallstack.hpp" 67#include "TracyDebug.hpp" 68#include "TracyDxt1.hpp" 69#include "TracyScoped.hpp" 70#include "TracyProfiler.hpp" 71#include "TracyThread.hpp" 72#include "TracyArmCpuTable.hpp" 73#include "TracySysTrace.hpp" 74#include "../tracy/TracyC.h" 75 76#ifdef TRACY_PORT 77# ifndef TRACY_DATA_PORT 78# define TRACY_DATA_PORT TRACY_PORT 79# endif 80# ifndef TRACY_BROADCAST_PORT 81# define TRACY_BROADCAST_PORT TRACY_PORT 82# endif 83#endif 84 85#ifdef __APPLE__ 86# define TRACY_DELAYED_INIT 87#else 88# ifdef __GNUC__ 89# define init_order( val ) __attribute__ ((init_priority(val))) 90# else 91# define init_order(x) 92# endif 93#endif 94 95#if defined _WIN32 96# include <lmcons.h> 97extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); 98extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); 99#else 100# include <unistd.h> 101# include <limits.h> 102#endif 103#if defined __linux__ 104# include <sys/sysinfo.h> 105# include <sys/utsname.h> 106#endif 107 108#if !defined _WIN32 && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) 109# include "TracyCpuid.hpp" 110#endif 111 112#if !( ( defined _WIN32 && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ ) 113# include <mutex> 114#endif 115 116namespace tracy 117{ 118 119#ifdef __ANDROID__ 120// Implementation helpers of EnsureReadable(address). 121// This is so far only needed on Android, where it is common for libraries to be mapped 122// with only executable, not readable, permissions. Typical example (line from /proc/self/maps): 123/* 124746b63b000-746b6dc000 --xp 00042000 07:48 35 /apex/com.android.runtime/lib64/bionic/libc.so 125*/ 126// See https://github.com/wolfpld/tracy/issues/125 . 127// To work around this, we parse /proc/self/maps and we use mprotect to set read permissions 128// on any mappings that contain symbols addresses hit by HandleSymbolCodeQuery. 129 130namespace { 131// Holds some information about a single memory mapping. 132struct MappingInfo { 133 // Start of address range. Inclusive. 134 uintptr_t start_address; 135 // End of address range. Exclusive, so the mapping is the half-open interval 136 // [start, end) and its length in bytes is `end - start`. As in /proc/self/maps. 137 uintptr_t end_address; 138 // Read/Write/Executable permissions. 139 bool perm_r, perm_w, perm_x; 140}; 141} // anonymous namespace 142 143 // Internal implementation helper for LookUpMapping(address). 144 // 145 // Parses /proc/self/maps returning a vector<MappingInfo>. 146 // /proc/self/maps is assumed to be sorted by ascending address, so the resulting 147 // vector is sorted by ascending address too. 148static std::vector<MappingInfo> ParseMappings() 149{ 150 std::vector<MappingInfo> result; 151 FILE* file = fopen( "/proc/self/maps", "r" ); 152 if( !file ) return result; 153 char line[1024]; 154 while( fgets( line, sizeof( line ), file ) ) 155 { 156 uintptr_t start_addr; 157 uintptr_t end_addr; 158 if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; 159 char* first_space = strchr( line, ' ' ); 160 if( !first_space ) continue; 161 char* perm = first_space + 1; 162 char* second_space = strchr( perm, ' ' ); 163 if( !second_space || second_space - perm != 4 ) continue; 164 result.emplace_back(); 165 auto& mapping = result.back(); 166 mapping.start_address = start_addr; 167 mapping.end_address = end_addr; 168 mapping.perm_r = perm[0] == 'r'; 169 mapping.perm_w = perm[1] == 'w'; 170 mapping.perm_x = perm[2] == 'x'; 171 } 172 fclose( file ); 173 return result; 174} 175 176// Internal implementation helper for LookUpMapping(address). 177// 178// Takes as input an `address` and a known vector `mappings`, assumed to be 179// sorted by increasing addresses, as /proc/self/maps seems to be. 180// Returns a pointer to the MappingInfo describing the mapping that this 181// address belongs to, or nullptr if the address isn't in `mappings`. 182static MappingInfo* LookUpMapping(std::vector<MappingInfo>& mappings, uintptr_t address) 183{ 184 // Comparison function for std::lower_bound. Returns true if all addresses in `m1` 185 // are lower than `addr`. 186 auto Compare = []( const MappingInfo& m1, uintptr_t addr ) { 187 // '<=' because the address ranges are half-open intervals, [start, end). 188 return m1.end_address <= addr; 189 }; 190 auto iter = std::lower_bound( mappings.begin(), mappings.end(), address, Compare ); 191 if( iter == mappings.end() || iter->start_address > address) { 192 return nullptr; 193 } 194 return &*iter; 195} 196 197// Internal implementation helper for EnsureReadable(address). 198// 199// Takes as input an `address` and returns a pointer to a MappingInfo 200// describing the mapping that this address belongs to, or nullptr if 201// the address isn't in any known mapping. 202// 203// This function is stateful and not reentrant (assumes to be called from 204// only one thread). It holds a vector of mappings parsed from /proc/self/maps. 205// 206// Attempts to react to mappings changes by re-parsing /proc/self/maps. 207static MappingInfo* LookUpMapping(uintptr_t address) 208{ 209 // Static state managed by this function. Not constant, we mutate that state as 210 // we turn some mappings readable. Initially parsed once here, updated as needed below. 211 static std::vector<MappingInfo> s_mappings = ParseMappings(); 212 MappingInfo* mapping = LookUpMapping( s_mappings, address ); 213 if( mapping ) return mapping; 214 215 // This address isn't in any known mapping. Try parsing again, maybe 216 // mappings changed. 217 s_mappings = ParseMappings(); 218 return LookUpMapping( s_mappings, address ); 219} 220 221// Internal implementation helper for EnsureReadable(address). 222// 223// Attempts to make the specified `mapping` readable if it isn't already. 224// Returns true if and only if the mapping is readable. 225static bool EnsureReadable( MappingInfo& mapping ) 226{ 227 if( mapping.perm_r ) 228 { 229 // The mapping is already readable. 230 return true; 231 } 232 int prot = PROT_READ; 233 if( mapping.perm_w ) prot |= PROT_WRITE; 234 if( mapping.perm_x ) prot |= PROT_EXEC; 235 if( mprotect( reinterpret_cast<void*>( mapping.start_address ), 236 mapping.end_address - mapping.start_address, prot ) == -1 ) 237 { 238 // Failed to make the mapping readable. Shouldn't happen, hasn't 239 // been observed yet. If it happened in practice, we should consider 240 // adding a bool to MappingInfo to track this to avoid retrying mprotect 241 // everytime on such mappings. 242 return false; 243 } 244 // The mapping is now readable. Update `mapping` so the next call will be fast. 245 mapping.perm_r = true; 246 return true; 247} 248 249// Attempts to set the read permission on the entire mapping containing the 250// specified address. Returns true if and only if the mapping is now readable. 251static bool EnsureReadable( uintptr_t address ) 252{ 253 MappingInfo* mapping = LookUpMapping(address); 254 return mapping && EnsureReadable( *mapping ); 255} 256 257#endif // defined __ANDROID__ 258 259#ifndef TRACY_DELAYED_INIT 260 261struct InitTimeWrapper 262{ 263 int64_t val; 264}; 265 266struct ProducerWrapper 267{ 268 tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr; 269}; 270 271struct ThreadHandleWrapper 272{ 273 uint32_t val; 274}; 275#endif 276 277 278#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 279static inline void CpuId( uint32_t* regs, uint32_t leaf ) 280{ 281 memset(regs, 0, sizeof(uint32_t) * 4); 282#if defined _WIN32 283 __cpuidex( (int*)regs, leaf, 0 ); 284#else 285 __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); 286#endif 287} 288 289static void InitFailure( const char* msg ) 290{ 291#if defined _WIN32 292 bool hasConsole = false; 293 bool reopen = false; 294 const auto attached = AttachConsole( ATTACH_PARENT_PROCESS ); 295 if( attached ) 296 { 297 hasConsole = true; 298 reopen = true; 299 } 300 else 301 { 302 const auto err = GetLastError(); 303 if( err == ERROR_ACCESS_DENIED ) 304 { 305 hasConsole = true; 306 } 307 } 308 if( hasConsole ) 309 { 310 fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); 311 if( reopen ) 312 { 313 freopen( "CONOUT$", "w", stderr ); 314 fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); 315 } 316 } 317 else 318 { 319# ifndef TRACY_UWP 320 MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP ); 321# endif 322 } 323#else 324 fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg ); 325#endif 326 exit( 1 ); 327} 328 329static bool CheckHardwareSupportsInvariantTSC() 330{ 331 const char* noCheck = GetEnvVar( "TRACY_NO_INVARIANT_CHECK" ); 332 if( noCheck && noCheck[0] == '1' ) return true; 333 334 uint32_t regs[4]; 335 CpuId( regs, 1 ); 336 if( !( regs[3] & ( 1 << 4 ) ) ) 337 { 338#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK 339 InitFailure( "CPU doesn't support RDTSC instruction." ); 340#else 341 return false; 342#endif 343 } 344 CpuId( regs, 0x80000007 ); 345 if( regs[3] & ( 1 << 8 ) ) return true; 346 347 return false; 348} 349 350#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER 351bool HardwareSupportsInvariantTSC() 352{ 353 static bool cachedResult = CheckHardwareSupportsInvariantTSC(); 354 return cachedResult; 355} 356#endif 357 358static int64_t SetupHwTimer() 359{ 360#if !defined TRACY_TIMER_QPC && !defined TRACY_TIMER_FALLBACK 361 if( !CheckHardwareSupportsInvariantTSC() ) 362 { 363#if defined _WIN32 364 InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC or TRACY_TIMER_FALLBACK define to use lower resolution timer." ); 365#else 366 InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_FALLBACK define to use lower resolution timer." ); 367#endif 368 } 369#endif 370 371 return Profiler::GetTime(); 372} 373#else 374static int64_t SetupHwTimer() 375{ 376 return Profiler::GetTime(); 377} 378#endif 379 380static const char* GetProcessName() 381{ 382 const char* processName = "unknown"; 383#ifdef _WIN32 384 static char buf[_MAX_PATH]; 385 GetModuleFileNameA( nullptr, buf, _MAX_PATH ); 386 const char* ptr = buf; 387 while( *ptr != '\0' ) ptr++; 388 while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--; 389 if( ptr > buf ) ptr++; 390 processName = ptr; 391#elif defined __ANDROID__ 392# if __ANDROID_API__ >= 21 393 auto buf = getprogname(); 394 if( buf ) processName = buf; 395# endif 396#elif defined __linux__ && defined _GNU_SOURCE 397 if( program_invocation_short_name ) processName = program_invocation_short_name; 398#elif defined __APPLE__ || defined BSD 399 auto buf = getprogname(); 400 if( buf ) processName = buf; 401#endif 402 return processName; 403} 404 405static const char* GetProcessExecutablePath() 406{ 407#ifdef _WIN32 408 static char buf[_MAX_PATH]; 409 GetModuleFileNameA( nullptr, buf, _MAX_PATH ); 410 return buf; 411#elif defined __ANDROID__ 412 return nullptr; 413#elif defined __linux__ && defined _GNU_SOURCE 414 return program_invocation_name; 415#elif defined __APPLE__ 416 static char buf[1024]; 417 uint32_t size = 1024; 418 _NSGetExecutablePath( buf, &size ); 419 return buf; 420#elif defined __DragonFly__ 421 static char buf[1024]; 422 readlink( "/proc/curproc/file", buf, 1024 ); 423 return buf; 424#elif defined __FreeBSD__ 425 static char buf[1024]; 426 int mib[4]; 427 mib[0] = CTL_KERN; 428 mib[1] = KERN_PROC; 429 mib[2] = KERN_PROC_PATHNAME; 430 mib[3] = -1; 431 size_t cb = 1024; 432 sysctl( mib, 4, buf, &cb, nullptr, 0 ); 433 return buf; 434#elif defined __NetBSD__ 435 static char buf[1024]; 436 readlink( "/proc/curproc/exe", buf, 1024 ); 437 return buf; 438#else 439 return nullptr; 440#endif 441} 442 443#if defined __linux__ && defined __ARM_ARCH 444static uint32_t GetHex( char*& ptr, int skip ) 445{ 446 uint32_t ret; 447 ptr += skip; 448 char* end; 449 if( ptr[0] == '0' && ptr[1] == 'x' ) 450 { 451 ptr += 2; 452 ret = strtol( ptr, &end, 16 ); 453 } 454 else 455 { 456 ret = strtol( ptr, &end, 10 ); 457 } 458 ptr = end; 459 return ret; 460} 461#endif 462 463static const char* GetHostInfo() 464{ 465 static char buf[1024]; 466 auto ptr = buf; 467#if defined _WIN32 468# ifdef TRACY_UWP 469 auto GetVersion = &::GetVersionEx; 470# else 471 auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" ); 472# endif 473 if( !GetVersion ) 474 { 475# ifdef __MINGW32__ 476 ptr += sprintf( ptr, "OS: Windows (MingW)\n" ); 477# else 478 ptr += sprintf( ptr, "OS: Windows\n" ); 479# endif 480 } 481 else 482 { 483 RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) }; 484 GetVersion( &ver ); 485 486# ifdef __MINGW32__ 487 ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); 488# else 489 ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); 490# endif 491 } 492#elif defined __linux__ 493 struct utsname utsName; 494 uname( &utsName ); 495# if defined __ANDROID__ 496 ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release ); 497# else 498 ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release ); 499# endif 500#elif defined __APPLE__ 501# if TARGET_OS_IPHONE == 1 502 ptr += sprintf( ptr, "OS: Darwin (iOS)\n" ); 503# elif TARGET_OS_MAC == 1 504 ptr += sprintf( ptr, "OS: Darwin (OSX)\n" ); 505# else 506 ptr += sprintf( ptr, "OS: Darwin (unknown)\n" ); 507# endif 508#elif defined __DragonFly__ 509 ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" ); 510#elif defined __FreeBSD__ 511 ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" ); 512#elif defined __NetBSD__ 513 ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); 514#elif defined __OpenBSD__ 515 ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); 516#else 517 ptr += sprintf( ptr, "OS: unknown\n" ); 518#endif 519 520#if defined _MSC_VER 521# if defined __clang__ 522 ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); 523# else 524 ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER ); 525# endif 526#elif defined __clang__ 527 ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ ); 528#elif defined __GNUC__ 529 ptr += sprintf( ptr, "Compiler: gcc %i.%i.%i\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ ); 530#else 531 ptr += sprintf( ptr, "Compiler: unknown\n" ); 532#endif 533 534#if defined _WIN32 535 InitWinSock(); 536 537 char hostname[512]; 538 gethostname( hostname, 512 ); 539 540# ifdef TRACY_UWP 541 const char* user = ""; 542# else 543 DWORD userSz = UNLEN+1; 544 char user[UNLEN+1]; 545 GetUserNameA( user, &userSz ); 546# endif 547 548 ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); 549#else 550 char hostname[_POSIX_HOST_NAME_MAX]{}; 551 char user[_POSIX_LOGIN_NAME_MAX]{}; 552 553 gethostname( hostname, _POSIX_HOST_NAME_MAX ); 554# if defined __ANDROID__ 555 const auto login = getlogin(); 556 if( login ) 557 { 558 strcpy( user, login ); 559 } 560 else 561 { 562 memcpy( user, "(?)", 4 ); 563 } 564# else 565 getlogin_r( user, _POSIX_LOGIN_NAME_MAX ); 566# endif 567 568 ptr += sprintf( ptr, "User: %s@%s\n", user, hostname ); 569#endif 570 571#if defined __i386 || defined _M_IX86 572 ptr += sprintf( ptr, "Arch: x86\n" ); 573#elif defined __x86_64__ || defined _M_X64 574 ptr += sprintf( ptr, "Arch: x64\n" ); 575#elif defined __aarch64__ 576 ptr += sprintf( ptr, "Arch: ARM64\n" ); 577#elif defined __ARM_ARCH 578 ptr += sprintf( ptr, "Arch: ARM\n" ); 579#else 580 ptr += sprintf( ptr, "Arch: unknown\n" ); 581#endif 582 583#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 584 uint32_t regs[4]; 585 char cpuModel[4*4*3+1] = {}; 586 auto modelPtr = cpuModel; 587 for( uint32_t i=0x80000002; i<0x80000005; ++i ) 588 { 589 CpuId( regs, i ); 590 memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs ); 591 } 592 593 ptr += sprintf( ptr, "CPU: %s\n", cpuModel ); 594#elif defined __linux__ && defined __ARM_ARCH 595 bool cpuFound = false; 596 FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" ); 597 if( fcpuinfo ) 598 { 599 enum { BufSize = 4*1024 }; 600 char buf[BufSize]; 601 const auto sz = fread( buf, 1, BufSize, fcpuinfo ); 602 fclose( fcpuinfo ); 603 const auto end = buf + sz; 604 auto cptr = buf; 605 606 uint32_t impl = 0; 607 uint32_t var = 0; 608 uint32_t part = 0; 609 uint32_t rev = 0; 610 611 while( end - cptr > 20 ) 612 { 613 while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 ) 614 { 615 cptr += 4; 616 while( end - cptr > 20 && *cptr != '\n' ) cptr++; 617 cptr++; 618 } 619 if( end - cptr <= 20 ) break; 620 cptr += 4; 621 if( memcmp( cptr, "implementer\t: ", 14 ) == 0 ) 622 { 623 if( impl != 0 ) break; 624 impl = GetHex( cptr, 14 ); 625 } 626 else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 ); 627 else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 ); 628 else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 ); 629 while( *cptr != '\n' && *cptr != '\0' ) cptr++; 630 cptr++; 631 } 632 633 if( impl != 0 || var != 0 || part != 0 || rev != 0 ) 634 { 635 cpuFound = true; 636 ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev ); 637 } 638 } 639 if( !cpuFound ) 640 { 641 ptr += sprintf( ptr, "CPU: unknown\n" ); 642 } 643#elif defined __APPLE__ && TARGET_OS_IPHONE == 1 644 { 645 size_t sz; 646 sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 ); 647 auto str = (char*)tracy_malloc( sz ); 648 sysctlbyname( "hw.machine", str, &sz, nullptr, 0 ); 649 ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) ); 650 tracy_free( str ); 651 } 652#else 653 ptr += sprintf( ptr, "CPU: unknown\n" ); 654#endif 655#ifdef __ANDROID__ 656 char deviceModel[PROP_VALUE_MAX+1]; 657 char deviceManufacturer[PROP_VALUE_MAX+1]; 658 __system_property_get( "ro.product.model", deviceModel ); 659 __system_property_get( "ro.product.manufacturer", deviceManufacturer ); 660 ptr += sprintf( ptr, "Device: %s %s\n", deviceManufacturer, deviceModel ); 661#endif 662 663 ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() ); 664 665#if defined _WIN32 666 MEMORYSTATUSEX statex; 667 statex.dwLength = sizeof( statex ); 668 GlobalMemoryStatusEx( &statex ); 669# ifdef _MSC_VER 670 ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 ); 671# else 672 ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 ); 673# endif 674#elif defined __linux__ 675 struct sysinfo sysInfo; 676 sysinfo( &sysInfo ); 677 ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 ); 678#elif defined __APPLE__ 679 size_t memSize; 680 size_t sz = sizeof( memSize ); 681 sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 ); 682 ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); 683#elif defined BSD 684 size_t memSize; 685 size_t sz = sizeof( memSize ); 686 sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); 687 ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); 688#else 689 ptr += sprintf( ptr, "RAM: unknown\n" ); 690#endif 691 692 return buf; 693} 694 695static uint64_t GetPid() 696{ 697#if defined _WIN32 698 return uint64_t( GetCurrentProcessId() ); 699#else 700 return uint64_t( getpid() ); 701#endif 702} 703 704void Profiler::AckServerQuery() 705{ 706 QueueItem item; 707 MemWrite( &item.hdr.type, QueueType::AckServerQueryNoop ); 708 NeedDataSize( QueueDataSize[(int)QueueType::AckServerQueryNoop] ); 709 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckServerQueryNoop] ); 710} 711 712void Profiler::AckSymbolCodeNotAvailable() 713{ 714 QueueItem item; 715 MemWrite( &item.hdr.type, QueueType::AckSymbolCodeNotAvailable ); 716 NeedDataSize( QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); 717 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::AckSymbolCodeNotAvailable] ); 718} 719 720static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len, int port ) 721{ 722 static BroadcastMessage msg; 723 724 msg.broadcastVersion = BroadcastVersion; 725 msg.protocolVersion = ProtocolVersion; 726 msg.listenPort = port; 727 msg.pid = GetPid(); 728 729 memcpy( msg.programName, procname, pnsz ); 730 memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); 731 732 len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 ); 733 return msg; 734} 735 736#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER 737static DWORD s_profilerThreadId = 0; 738static DWORD s_symbolThreadId = 0; 739static char s_crashText[1024]; 740 741LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) 742{ 743 if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH; 744 745 const unsigned ec = pExp->ExceptionRecord->ExceptionCode; 746 auto msgPtr = s_crashText; 747 switch( ec ) 748 { 749 case EXCEPTION_ACCESS_VIOLATION: 750 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec ); 751 switch( pExp->ExceptionRecord->ExceptionInformation[0] ) 752 { 753 case 0: 754 msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); 755 break; 756 case 1: 757 msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); 758 break; 759 case 8: 760 msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] ); 761 break; 762 default: 763 break; 764 } 765 break; 766 case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: 767 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec ); 768 break; 769 case EXCEPTION_DATATYPE_MISALIGNMENT: 770 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec ); 771 break; 772 case EXCEPTION_FLT_DIVIDE_BY_ZERO: 773 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec ); 774 break; 775 case EXCEPTION_ILLEGAL_INSTRUCTION: 776 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec ); 777 break; 778 case EXCEPTION_IN_PAGE_ERROR: 779 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec ); 780 break; 781 case EXCEPTION_INT_DIVIDE_BY_ZERO: 782 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec ); 783 break; 784 case EXCEPTION_PRIV_INSTRUCTION: 785 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec ); 786 break; 787 case EXCEPTION_STACK_OVERFLOW: 788 msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec ); 789 break; 790 default: 791 return EXCEPTION_CONTINUE_SEARCH; 792 } 793 794 { 795 GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); 796 797 TracyQueuePrepare( QueueType::CrashReport ); 798 item->crashReport.time = Profiler::GetTime(); 799 item->crashReport.text = (uint64_t)s_crashText; 800 TracyQueueCommit( crashReportThread ); 801 } 802 803 HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); 804 if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH; 805 806 THREADENTRY32 te = { sizeof( te ) }; 807 if( !Thread32First( h, &te ) ) 808 { 809 CloseHandle( h ); 810 return EXCEPTION_CONTINUE_SEARCH; 811 } 812 813 const auto pid = GetCurrentProcessId(); 814 const auto tid = GetCurrentThreadId(); 815 816 do 817 { 818 if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId && te.th32ThreadID != s_symbolThreadId ) 819 { 820 HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID ); 821 if( th != INVALID_HANDLE_VALUE ) 822 { 823 SuspendThread( th ); 824 CloseHandle( th ); 825 } 826 } 827 } 828 while( Thread32Next( h, &te ) ); 829 CloseHandle( h ); 830 831 { 832 TracyLfqPrepare( QueueType::Crash ); 833 TracyLfqCommit; 834 } 835 836 std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); 837 GetProfiler().RequestShutdown(); 838 while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; 839 840 return EXCEPTION_CONTINUE_SEARCH; 841} 842#endif 843 844static Profiler* s_instance = nullptr; 845static Thread* s_thread; 846#ifndef TRACY_NO_FRAME_IMAGE 847static Thread* s_compressThread; 848#endif 849#ifdef TRACY_HAS_CALLSTACK 850static Thread* s_symbolThread; 851std::atomic<bool> s_symbolThreadGone { false }; 852#endif 853#ifdef TRACY_HAS_SYSTEM_TRACING 854static Thread* s_sysTraceThread = nullptr; 855#endif 856 857#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER 858# ifndef TRACY_CRASH_SIGNAL 859# define TRACY_CRASH_SIGNAL SIGPWR 860# endif 861 862static long s_profilerTid = 0; 863static long s_symbolTid = 0; 864static char s_crashText[1024]; 865static std::atomic<bool> s_alreadyCrashed( false ); 866 867static void ThreadFreezer( int /*signal*/ ) 868{ 869 for(;;) sleep( 1000 ); 870} 871 872static inline void HexPrint( char*& ptr, uint64_t val ) 873{ 874 if( val == 0 ) 875 { 876 *ptr++ = '0'; 877 return; 878 } 879 880 static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; 881 char buf[16]; 882 auto bptr = buf; 883 884 do 885 { 886 *bptr++ = HexTable[val%16]; 887 val /= 16; 888 } 889 while( val > 0 ); 890 891 do 892 { 893 *ptr++ = *--bptr; 894 } 895 while( bptr != buf ); 896} 897 898static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) 899{ 900 bool expected = false; 901 if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal ); 902 903 struct sigaction act = {}; 904 act.sa_handler = SIG_DFL; 905 sigaction( SIGABRT, &act, nullptr ); 906 907 auto msgPtr = s_crashText; 908 switch( signal ) 909 { 910 case SIGILL: 911 strcpy( msgPtr, "Illegal Instruction.\n" ); 912 while( *msgPtr ) msgPtr++; 913 switch( info->si_code ) 914 { 915 case ILL_ILLOPC: 916 strcpy( msgPtr, "Illegal opcode.\n" ); 917 break; 918 case ILL_ILLOPN: 919 strcpy( msgPtr, "Illegal operand.\n" ); 920 break; 921 case ILL_ILLADR: 922 strcpy( msgPtr, "Illegal addressing mode.\n" ); 923 break; 924 case ILL_ILLTRP: 925 strcpy( msgPtr, "Illegal trap.\n" ); 926 break; 927 case ILL_PRVOPC: 928 strcpy( msgPtr, "Privileged opcode.\n" ); 929 break; 930 case ILL_PRVREG: 931 strcpy( msgPtr, "Privileged register.\n" ); 932 break; 933 case ILL_COPROC: 934 strcpy( msgPtr, "Coprocessor error.\n" ); 935 break; 936 case ILL_BADSTK: 937 strcpy( msgPtr, "Internal stack error.\n" ); 938 break; 939 default: 940 break; 941 } 942 break; 943 case SIGFPE: 944 strcpy( msgPtr, "Floating-point exception.\n" ); 945 while( *msgPtr ) msgPtr++; 946 switch( info->si_code ) 947 { 948 case FPE_INTDIV: 949 strcpy( msgPtr, "Integer divide by zero.\n" ); 950 break; 951 case FPE_INTOVF: 952 strcpy( msgPtr, "Integer overflow.\n" ); 953 break; 954 case FPE_FLTDIV: 955 strcpy( msgPtr, "Floating-point divide by zero.\n" ); 956 break; 957 case FPE_FLTOVF: 958 strcpy( msgPtr, "Floating-point overflow.\n" ); 959 break; 960 case FPE_FLTUND: 961 strcpy( msgPtr, "Floating-point underflow.\n" ); 962 break; 963 case FPE_FLTRES: 964 strcpy( msgPtr, "Floating-point inexact result.\n" ); 965 break; 966 case FPE_FLTINV: 967 strcpy( msgPtr, "Floating-point invalid operation.\n" ); 968 break; 969 case FPE_FLTSUB: 970 strcpy( msgPtr, "Subscript out of range.\n" ); 971 break; 972 default: 973 break; 974 } 975 break; 976 case SIGSEGV: 977 strcpy( msgPtr, "Invalid memory reference.\n" ); 978 while( *msgPtr ) msgPtr++; 979 switch( info->si_code ) 980 { 981 case SEGV_MAPERR: 982 strcpy( msgPtr, "Address not mapped to object.\n" ); 983 break; 984 case SEGV_ACCERR: 985 strcpy( msgPtr, "Invalid permissions for mapped object.\n" ); 986 break; 987# ifdef SEGV_BNDERR 988 case SEGV_BNDERR: 989 strcpy( msgPtr, "Failed address bound checks.\n" ); 990 break; 991# endif 992# ifdef SEGV_PKUERR 993 case SEGV_PKUERR: 994 strcpy( msgPtr, "Access was denied by memory protection keys.\n" ); 995 break; 996# endif 997 default: 998 break; 999 } 1000 break; 1001 case SIGPIPE: 1002 strcpy( msgPtr, "Broken pipe.\n" ); 1003 while( *msgPtr ) msgPtr++; 1004 break; 1005 case SIGBUS: 1006 strcpy( msgPtr, "Bus error.\n" ); 1007 while( *msgPtr ) msgPtr++; 1008 switch( info->si_code ) 1009 { 1010 case BUS_ADRALN: 1011 strcpy( msgPtr, "Invalid address alignment.\n" ); 1012 break; 1013 case BUS_ADRERR: 1014 strcpy( msgPtr, "Nonexistent physical address.\n" ); 1015 break; 1016 case BUS_OBJERR: 1017 strcpy( msgPtr, "Object-specific hardware error.\n" ); 1018 break; 1019# ifdef BUS_MCEERR_AR 1020 case BUS_MCEERR_AR: 1021 strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" ); 1022 break; 1023# endif 1024# ifdef BUS_MCEERR_AO 1025 case BUS_MCEERR_AO: 1026 strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" ); 1027 break; 1028# endif 1029 default: 1030 break; 1031 } 1032 break; 1033 case SIGABRT: 1034 strcpy( msgPtr, "Abort signal from abort().\n" ); 1035 break; 1036 default: 1037 abort(); 1038 } 1039 while( *msgPtr ) msgPtr++; 1040 1041 if( signal != SIGPIPE ) 1042 { 1043 strcpy( msgPtr, "Fault address: 0x" ); 1044 while( *msgPtr ) msgPtr++; 1045 HexPrint( msgPtr, uint64_t( info->si_addr ) ); 1046 *msgPtr++ = '\n'; 1047 } 1048 1049 { 1050 GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); 1051 1052 TracyQueuePrepare( QueueType::CrashReport ); 1053 item->crashReport.time = Profiler::GetTime(); 1054 item->crashReport.text = (uint64_t)s_crashText; 1055 TracyQueueCommit( crashReportThread ); 1056 } 1057 1058 DIR* dp = opendir( "/proc/self/task" ); 1059 if( !dp ) abort(); 1060 1061 const auto selfTid = syscall( SYS_gettid ); 1062 1063 struct dirent* ep; 1064 while( ( ep = readdir( dp ) ) != nullptr ) 1065 { 1066 if( ep->d_name[0] == '.' ) continue; 1067 int tid = atoi( ep->d_name ); 1068 if( tid != selfTid && tid != s_profilerTid && tid != s_symbolTid ) 1069 { 1070 syscall( SYS_tkill, tid, TRACY_CRASH_SIGNAL ); 1071 } 1072 } 1073 closedir( dp ); 1074 1075#ifdef TRACY_HAS_CALLSTACK 1076 if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release ); 1077#endif 1078 1079 TracyLfqPrepare( QueueType::Crash ); 1080 TracyLfqCommit; 1081 1082 std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) ); 1083 GetProfiler().RequestShutdown(); 1084 while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); }; 1085 1086 abort(); 1087} 1088#endif 1089 1090 1091enum { QueuePrealloc = 256 * 1024 }; 1092 1093TRACY_API int64_t GetFrequencyQpc() 1094{ 1095#if defined _WIN32 1096 LARGE_INTEGER t; 1097 QueryPerformanceFrequency( &t ); 1098 return t.QuadPart; 1099#else 1100 return 0; 1101#endif 1102} 1103 1104#ifdef TRACY_DELAYED_INIT 1105struct ThreadNameData; 1106TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue(); 1107 1108struct ProfilerData 1109{ 1110 int64_t initTime = SetupHwTimer(); 1111 moodycamel::ConcurrentQueue<QueueItem> queue; 1112 Profiler profiler; 1113 std::atomic<uint32_t> lockCounter { 0 }; 1114 std::atomic<uint8_t> gpuCtxCounter { 0 }; 1115 std::atomic<ThreadNameData*> threadNameData { nullptr }; 1116}; 1117 1118struct ProducerWrapper 1119{ 1120 ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {} 1121 moodycamel::ProducerToken detail; 1122 tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr; 1123}; 1124 1125struct ProfilerThreadData 1126{ 1127 ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {} 1128 ProducerWrapper token; 1129 GpuCtxWrapper gpuCtx; 1130# ifdef TRACY_ON_DEMAND 1131 LuaZoneState luaZoneState; 1132# endif 1133}; 1134 1135std::atomic<int> RpInitDone { 0 }; 1136std::atomic<int> RpInitLock { 0 }; 1137thread_local bool RpThreadInitDone = false; 1138thread_local bool RpThreadShutdown = false; 1139 1140# ifdef TRACY_MANUAL_LIFETIME 1141ProfilerData* s_profilerData = nullptr; 1142static ProfilerThreadData& GetProfilerThreadData(); 1143TRACY_API void StartupProfiler() 1144{ 1145 s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); 1146 new (s_profilerData) ProfilerData(); 1147 s_profilerData->profiler.SpawnWorkerThreads(); 1148 GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); 1149} 1150static ProfilerData& GetProfilerData() 1151{ 1152 assert( s_profilerData ); 1153 return *s_profilerData; 1154} 1155TRACY_API void ShutdownProfiler() 1156{ 1157 s_profilerData->~ProfilerData(); 1158 tracy_free( s_profilerData ); 1159 s_profilerData = nullptr; 1160 rpmalloc_finalize(); 1161 RpThreadInitDone = false; 1162 RpInitDone.store( 0, std::memory_order_release ); 1163} 1164# else 1165static std::atomic<int> profilerDataLock { 0 }; 1166static std::atomic<ProfilerData*> profilerData { nullptr }; 1167 1168static ProfilerData& GetProfilerData() 1169{ 1170 auto ptr = profilerData.load( std::memory_order_acquire ); 1171 if( !ptr ) 1172 { 1173 int expected = 0; 1174 while( !profilerDataLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); } 1175 ptr = profilerData.load( std::memory_order_acquire ); 1176 if( !ptr ) 1177 { 1178 ptr = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); 1179 new (ptr) ProfilerData(); 1180 profilerData.store( ptr, std::memory_order_release ); 1181 } 1182 profilerDataLock.store( 0, std::memory_order_release ); 1183 } 1184 return *ptr; 1185} 1186# endif 1187 1188// GCC prior to 8.4 had a bug with function-inline thread_local variables. Versions of glibc beginning with 1189// 2.18 may attempt to work around this issue, which manifests as a crash while running static destructors 1190// if this function is compiled into a shared object. Unfortunately, centos7 ships with glibc 2.17. If running 1191// on old GCC, use the old-fashioned way as a workaround 1192// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85400 1193#if !defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 8) || ((__GNUC__ == 8) && (__GNUC_MINOR__ < 4))) 1194struct ProfilerThreadDataKey 1195{ 1196public: 1197 ProfilerThreadDataKey() 1198 { 1199 int val = pthread_key_create(&m_key, sDestructor); 1200 static_cast<void>(val); // unused 1201 assert(val == 0); 1202 } 1203 ~ProfilerThreadDataKey() 1204 { 1205 int val = pthread_key_delete(m_key); 1206 static_cast<void>(val); // unused 1207 assert(val == 0); 1208 } 1209 ProfilerThreadData& get() 1210 { 1211 void* p = pthread_getspecific(m_key); 1212 if (!p) 1213 { 1214 p = (ProfilerThreadData*)tracy_malloc( sizeof( ProfilerThreadData ) ); 1215 new (p) ProfilerThreadData(GetProfilerData()); 1216 pthread_setspecific(m_key, p); 1217 } 1218 return *static_cast<ProfilerThreadData*>(p); 1219 } 1220private: 1221 pthread_key_t m_key; 1222 1223 static void sDestructor(void* p) 1224 { 1225 ((ProfilerThreadData*)p)->~ProfilerThreadData(); 1226 tracy_free(p); 1227 } 1228}; 1229 1230static ProfilerThreadData& GetProfilerThreadData() 1231{ 1232 static ProfilerThreadDataKey key; 1233 return key.get(); 1234} 1235#else 1236static ProfilerThreadData& GetProfilerThreadData() 1237{ 1238 thread_local ProfilerThreadData data( GetProfilerData() ); 1239 return data; 1240} 1241#endif 1242 1243TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; } 1244TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; } 1245TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return GetProfilerData().queue; } 1246TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; } 1247TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; } 1248TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; } 1249TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; } 1250TRACY_API uint32_t GetThreadHandle() { return detail::GetThreadHandleImpl(); } 1251std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; } 1252 1253# ifdef TRACY_ON_DEMAND 1254TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } 1255# endif 1256 1257# ifndef TRACY_MANUAL_LIFETIME 1258namespace 1259{ 1260 const auto& __profiler_init = GetProfiler(); 1261} 1262# endif 1263 1264#else 1265 1266// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this. 1267 1268// 1a. But s_queue is needed for initialization of variables in point 2. 1269extern moodycamel::ConcurrentQueue<QueueItem> s_queue; 1270 1271// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread. 1272thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue ); 1273thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) }; 1274thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() }; 1275 1276# ifdef _MSC_VER 1277// 1. Initialize these static variables before all other variables. 1278# pragma warning( disable : 4075 ) 1279# pragma init_seg( ".CRT$XCB" ) 1280# endif 1281 1282static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() }; 1283std::atomic<int> init_order(102) RpInitDone( 0 ); 1284std::atomic<int> init_order(102) RpInitLock( 0 ); 1285thread_local bool RpThreadInitDone = false; 1286thread_local bool RpThreadShutdown = false; 1287moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc ); 1288std::atomic<uint32_t> init_order(104) s_lockCounter( 0 ); 1289std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 ); 1290 1291thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr }; 1292 1293struct ThreadNameData; 1294static std::atomic<ThreadNameData*> init_order(104) s_threadNameDataInstance( nullptr ); 1295std::atomic<ThreadNameData*>& s_threadNameData = s_threadNameDataInstance; 1296 1297# ifdef TRACY_ON_DEMAND 1298thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false }; 1299# endif 1300 1301static Profiler init_order(105) s_profiler; 1302 1303TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; } 1304TRACY_API Profiler& GetProfiler() { return s_profiler; } 1305TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; } 1306TRACY_API int64_t GetInitTime() { return s_initTime.val; } 1307TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; } 1308TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; } 1309TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; } 1310TRACY_API uint32_t GetThreadHandle() { return s_threadHandle.val; } 1311 1312std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; } 1313 1314# ifdef TRACY_ON_DEMAND 1315TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } 1316# endif 1317#endif 1318 1319TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } 1320TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } 1321 1322Profiler::Profiler() 1323 : m_timeBegin( 0 ) 1324 , m_mainThread( detail::GetThreadHandleImpl() ) 1325 , m_epoch( std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count() ) 1326 , m_shutdown( false ) 1327 , m_shutdownManual( false ) 1328 , m_shutdownFinished( false ) 1329 , m_sock( nullptr ) 1330 , m_broadcast( nullptr ) 1331 , m_noExit( false ) 1332 , m_userPort( 0 ) 1333 , m_zoneId( 1 ) 1334 , m_samplingPeriod( 0 ) 1335 , m_stream( LZ4_createStream() ) 1336 , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) ) 1337 , m_bufferOffset( 0 ) 1338 , m_bufferStart( 0 ) 1339 , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) ) 1340 , m_serialQueue( 1024*1024 ) 1341 , m_serialDequeue( 1024*1024 ) 1342#ifndef TRACY_NO_FRAME_IMAGE 1343 , m_fiQueue( 16 ) 1344 , m_fiDequeue( 16 ) 1345#endif 1346 , m_symbolQueue( 8*1024 ) 1347 , m_frameCount( 0 ) 1348 , m_isConnected( false ) 1349#ifdef TRACY_ON_DEMAND 1350 , m_connectionId( 0 ) 1351 , m_deferredQueue( 64*1024 ) 1352#endif 1353 , m_paramCallback( nullptr ) 1354 , m_sourceCallback( nullptr ) 1355 , m_queryImage( nullptr ) 1356 , m_queryData( nullptr ) 1357 , m_crashHandlerInstalled( false ) 1358{ 1359 assert( !s_instance ); 1360 s_instance = this; 1361 1362#ifndef TRACY_DELAYED_INIT 1363# ifdef _MSC_VER 1364 // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here. 1365 s_token_detail = moodycamel::ProducerToken( s_queue ); 1366 s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) }; 1367 s_threadHandle = ThreadHandleWrapper { m_mainThread }; 1368# endif 1369#endif 1370 1371 CalibrateTimer(); 1372 CalibrateDelay(); 1373 ReportTopology(); 1374 1375#ifndef TRACY_NO_EXIT 1376 const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); 1377 if( noExitEnv && noExitEnv[0] == '1' ) 1378 { 1379 m_noExit = true; 1380 } 1381#endif 1382 1383 const char* userPort = GetEnvVar( "TRACY_PORT" ); 1384 if( userPort ) 1385 { 1386 m_userPort = atoi( userPort ); 1387 } 1388 1389#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) 1390 SpawnWorkerThreads(); 1391#endif 1392} 1393 1394void Profiler::SpawnWorkerThreads() 1395{ 1396#ifdef TRACY_HAS_SYSTEM_TRACING 1397 if( SysTraceStart( m_samplingPeriod ) ) 1398 { 1399 s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); 1400 new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); 1401 std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); 1402 } 1403#endif 1404 1405 s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); 1406 new(s_thread) Thread( LaunchWorker, this ); 1407 1408#ifndef TRACY_NO_FRAME_IMAGE 1409 s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) ); 1410 new(s_compressThread) Thread( LaunchCompressWorker, this ); 1411#endif 1412 1413#ifdef TRACY_HAS_CALLSTACK 1414 s_symbolThread = (Thread*)tracy_malloc( sizeof( Thread ) ); 1415 new(s_symbolThread) Thread( LaunchSymbolWorker, this ); 1416#endif 1417 1418#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER 1419 s_profilerThreadId = GetThreadId( s_thread->Handle() ); 1420# ifdef TRACY_HAS_CALLSTACK 1421 s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); 1422# endif 1423 m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); 1424#endif 1425 1426#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER 1427 struct sigaction threadFreezer = {}; 1428 threadFreezer.sa_handler = ThreadFreezer; 1429 sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); 1430 1431 struct sigaction crashHandler = {}; 1432 crashHandler.sa_sigaction = CrashHandler; 1433 crashHandler.sa_flags = SA_SIGINFO; 1434 sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); 1435 sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); 1436 sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); 1437 sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); 1438 sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); 1439 sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); 1440#endif 1441 1442#ifndef TRACY_NO_CRASH_HANDLER 1443 m_crashHandlerInstalled = true; 1444#endif 1445 1446#ifdef TRACY_HAS_CALLSTACK 1447 InitCallstackCritical(); 1448#endif 1449 1450 m_timeBegin.store( GetTime(), std::memory_order_relaxed ); 1451} 1452 1453Profiler::~Profiler() 1454{ 1455 m_shutdown.store( true, std::memory_order_relaxed ); 1456 1457#if defined _WIN32 && !defined TRACY_UWP 1458 if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler ); 1459#endif 1460 1461#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER 1462 if( m_crashHandlerInstalled ) 1463 { 1464 sigaction( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr, nullptr ); 1465 sigaction( SIGILL, &m_prevSignal.ill, nullptr ); 1466 sigaction( SIGFPE, &m_prevSignal.fpe, nullptr ); 1467 sigaction( SIGSEGV, &m_prevSignal.segv, nullptr ); 1468 sigaction( SIGPIPE, &m_prevSignal.pipe, nullptr ); 1469 sigaction( SIGBUS, &m_prevSignal.bus, nullptr ); 1470 sigaction( SIGABRT, &m_prevSignal.abrt, nullptr ); 1471 } 1472#endif 1473 1474#ifdef TRACY_HAS_SYSTEM_TRACING 1475 if( s_sysTraceThread ) 1476 { 1477 SysTraceStop(); 1478 s_sysTraceThread->~Thread(); 1479 tracy_free( s_sysTraceThread ); 1480 } 1481#endif 1482 1483#ifdef TRACY_HAS_CALLSTACK 1484 s_symbolThread->~Thread(); 1485 tracy_free( s_symbolThread ); 1486#endif 1487 1488#ifndef TRACY_NO_FRAME_IMAGE 1489 s_compressThread->~Thread(); 1490 tracy_free( s_compressThread ); 1491#endif 1492 1493 s_thread->~Thread(); 1494 tracy_free( s_thread ); 1495 1496#ifdef TRACY_HAS_CALLSTACK 1497 EndCallstack(); 1498#endif 1499 1500 tracy_free( m_lz4Buf ); 1501 tracy_free( m_buffer ); 1502 LZ4_freeStream( (LZ4_stream_t*)m_stream ); 1503 1504 if( m_sock ) 1505 { 1506 m_sock->~Socket(); 1507 tracy_free( m_sock ); 1508 } 1509 1510 if( m_broadcast ) 1511 { 1512 m_broadcast->~UdpBroadcast(); 1513 tracy_free( m_broadcast ); 1514 } 1515 1516 assert( s_instance ); 1517 s_instance = nullptr; 1518} 1519 1520bool Profiler::ShouldExit() 1521{ 1522 return s_instance->m_shutdown.load( std::memory_order_relaxed ); 1523} 1524 1525void Profiler::Worker() 1526{ 1527#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER 1528 s_profilerTid = syscall( SYS_gettid ); 1529#endif 1530 1531 ThreadExitHandler threadExitHandler; 1532 1533 SetThreadName( "Tracy Profiler" ); 1534 1535#ifdef TRACY_DATA_PORT 1536 const bool dataPortSearch = false; 1537 auto dataPort = m_userPort != 0 ? m_userPort : TRACY_DATA_PORT; 1538#else 1539 const bool dataPortSearch = m_userPort == 0; 1540 auto dataPort = m_userPort != 0 ? m_userPort : 8086; 1541#endif 1542#ifdef TRACY_BROADCAST_PORT 1543 const auto broadcastPort = TRACY_BROADCAST_PORT; 1544#else 1545 const auto broadcastPort = 8086; 1546#endif 1547 1548 while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 1549 1550#ifdef TRACY_USE_RPMALLOC 1551 rpmalloc_thread_initialize(); 1552#endif 1553 1554 m_exectime = 0; 1555 const auto execname = GetProcessExecutablePath(); 1556 if( execname ) 1557 { 1558 struct stat st; 1559 if( stat( execname, &st ) == 0 ) 1560 { 1561 m_exectime = (uint64_t)st.st_mtime; 1562 } 1563 } 1564 1565 const auto procname = GetProcessName(); 1566 const auto pnsz = std::min<size_t>( strlen( procname ), WelcomeMessageProgramNameSize - 1 ); 1567 1568 const auto hostinfo = GetHostInfo(); 1569 const auto hisz = std::min<size_t>( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 ); 1570 1571 const uint64_t pid = GetPid(); 1572 1573 uint8_t flags = 0; 1574 1575#ifdef TRACY_ON_DEMAND 1576 flags |= WelcomeFlag::OnDemand; 1577#endif 1578#ifdef __APPLE__ 1579 flags |= WelcomeFlag::IsApple; 1580#endif 1581#ifndef TRACY_NO_CODE_TRANSFER 1582 flags |= WelcomeFlag::CodeTransfer; 1583#endif 1584#ifdef _WIN32 1585 flags |= WelcomeFlag::CombineSamples; 1586# ifndef TRACY_NO_CONTEXT_SWITCH 1587 flags |= WelcomeFlag::IdentifySamples; 1588# endif 1589#endif 1590 1591#if defined __i386 || defined _M_IX86 1592 uint8_t cpuArch = CpuArchX86; 1593#elif defined __x86_64__ || defined _M_X64 1594 uint8_t cpuArch = CpuArchX64; 1595#elif defined __aarch64__ 1596 uint8_t cpuArch = CpuArchArm64; 1597#elif defined __ARM_ARCH 1598 uint8_t cpuArch = CpuArchArm32; 1599#else 1600 uint8_t cpuArch = CpuArchUnknown; 1601#endif 1602 1603#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 1604 uint32_t regs[4]; 1605 char manufacturer[12]; 1606 CpuId( regs, 0 ); 1607 memcpy( manufacturer, regs+1, 4 ); 1608 memcpy( manufacturer+4, regs+3, 4 ); 1609 memcpy( manufacturer+8, regs+2, 4 ); 1610 1611 CpuId( regs, 1 ); 1612 uint32_t cpuId = ( regs[0] & 0xFFF ) | ( ( regs[0] & 0xFFF0000 ) >> 4 ); 1613#else 1614 const char manufacturer[12] = {}; 1615 uint32_t cpuId = 0; 1616#endif 1617 1618 WelcomeMessage welcome; 1619 MemWrite( &welcome.timerMul, m_timerMul ); 1620 MemWrite( &welcome.initBegin, GetInitTime() ); 1621 MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); 1622 MemWrite( &welcome.delay, m_delay ); 1623 MemWrite( &welcome.resolution, m_resolution ); 1624 MemWrite( &welcome.epoch, m_epoch ); 1625 MemWrite( &welcome.exectime, m_exectime ); 1626 MemWrite( &welcome.pid, pid ); 1627 MemWrite( &welcome.samplingPeriod, m_samplingPeriod ); 1628 MemWrite( &welcome.flags, flags ); 1629 MemWrite( &welcome.cpuArch, cpuArch ); 1630 memcpy( welcome.cpuManufacturer, manufacturer, 12 ); 1631 MemWrite( &welcome.cpuId, cpuId ); 1632 memcpy( welcome.programName, procname, pnsz ); 1633 memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz ); 1634 memcpy( welcome.hostInfo, hostinfo, hisz ); 1635 memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz ); 1636 1637 moodycamel::ConsumerToken token( GetQueue() ); 1638 1639 ListenSocket listen; 1640 bool isListening = false; 1641 if( !dataPortSearch ) 1642 { 1643 isListening = listen.Listen( dataPort, 4 ); 1644 } 1645 else 1646 { 1647 for( uint32_t i=0; i<20; i++ ) 1648 { 1649 if( listen.Listen( dataPort+i, 4 ) ) 1650 { 1651 dataPort += i; 1652 isListening = true; 1653 break; 1654 } 1655 } 1656 } 1657 if( !isListening ) 1658 { 1659 for(;;) 1660 { 1661 if( ShouldExit() ) 1662 { 1663 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1664 return; 1665 } 1666 1667 ClearQueues( token ); 1668 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 1669 } 1670 } 1671 1672#ifndef TRACY_NO_BROADCAST 1673 m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) ); 1674 new(m_broadcast) UdpBroadcast(); 1675# ifdef TRACY_ONLY_LOCALHOST 1676 const char* addr = "127.255.255.255"; 1677# else 1678 const char* addr = "255.255.255.255"; 1679# endif 1680 if( !m_broadcast->Open( addr, broadcastPort ) ) 1681 { 1682 m_broadcast->~UdpBroadcast(); 1683 tracy_free( m_broadcast ); 1684 m_broadcast = nullptr; 1685 } 1686#endif 1687 1688 int broadcastLen = 0; 1689 auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen, dataPort ); 1690 uint64_t lastBroadcast = 0; 1691 1692 // Connections loop. 1693 // Each iteration of the loop handles whole connection. Multiple iterations will only 1694 // happen in the on-demand mode or when handshake fails. 1695 for(;;) 1696 { 1697 // Wait for incoming connection 1698 for(;;) 1699 { 1700#ifndef TRACY_NO_EXIT 1701 if( !m_noExit && ShouldExit() ) 1702 { 1703 if( m_broadcast ) 1704 { 1705 broadcastMsg.activeTime = -1; 1706 m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); 1707 } 1708 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1709 return; 1710 } 1711#endif 1712 m_sock = listen.Accept(); 1713 if( m_sock ) break; 1714#ifndef TRACY_ON_DEMAND 1715 ProcessSysTime(); 1716#endif 1717 1718 if( m_broadcast ) 1719 { 1720 const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); 1721 if( t - lastBroadcast > 3000000000 ) // 3s 1722 { 1723 lastBroadcast = t; 1724 const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count(); 1725 broadcastMsg.activeTime = int32_t( ts - m_epoch ); 1726 assert( broadcastMsg.activeTime >= 0 ); 1727 m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); 1728 } 1729 } 1730 } 1731 1732 if( m_broadcast ) 1733 { 1734 lastBroadcast = 0; 1735 broadcastMsg.activeTime = -1; 1736 m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); 1737 } 1738 1739 // Handshake 1740 { 1741 char shibboleth[HandshakeShibbolethSize]; 1742 auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 ); 1743 if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) 1744 { 1745 m_sock->~Socket(); 1746 tracy_free( m_sock ); 1747 m_sock = nullptr; 1748 continue; 1749 } 1750 1751 uint32_t protocolVersion; 1752 res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 ); 1753 if( !res ) 1754 { 1755 m_sock->~Socket(); 1756 tracy_free( m_sock ); 1757 m_sock = nullptr; 1758 continue; 1759 } 1760 1761 if( protocolVersion != ProtocolVersion ) 1762 { 1763 HandshakeStatus status = HandshakeProtocolMismatch; 1764 m_sock->Send( &status, sizeof( status ) ); 1765 m_sock->~Socket(); 1766 tracy_free( m_sock ); 1767 m_sock = nullptr; 1768 continue; 1769 } 1770 } 1771 1772#ifdef TRACY_ON_DEMAND 1773 const auto currentTime = GetTime(); 1774 ClearQueues( token ); 1775 m_connectionId.fetch_add( 1, std::memory_order_release ); 1776#endif 1777 m_isConnected.store( true, std::memory_order_release ); 1778 1779 HandshakeStatus handshake = HandshakeWelcome; 1780 m_sock->Send( &handshake, sizeof( handshake ) ); 1781 1782 LZ4_resetStream( (LZ4_stream_t*)m_stream ); 1783 m_sock->Send( &welcome, sizeof( welcome ) ); 1784 1785 m_threadCtx = 0; 1786 m_refTimeSerial = 0; 1787 m_refTimeCtx = 0; 1788 m_refTimeGpu = 0; 1789 1790#ifdef TRACY_ON_DEMAND 1791 OnDemandPayloadMessage onDemand; 1792 onDemand.frames = m_frameCount.load( std::memory_order_relaxed ); 1793 onDemand.currentTime = currentTime; 1794 1795 m_sock->Send( &onDemand, sizeof( onDemand ) ); 1796 1797 m_deferredLock.lock(); 1798 for( auto& item : m_deferredQueue ) 1799 { 1800 uint64_t ptr; 1801 uint16_t size; 1802 const auto idx = MemRead<uint8_t>( &item.hdr.idx ); 1803 switch( (QueueType)idx ) 1804 { 1805 case QueueType::MessageAppInfo: 1806 ptr = MemRead<uint64_t>( &item.messageFat.text ); 1807 size = MemRead<uint16_t>( &item.messageFat.size ); 1808 SendSingleString( (const char*)ptr, size ); 1809 break; 1810 case QueueType::LockName: 1811 ptr = MemRead<uint64_t>( &item.lockNameFat.name ); 1812 size = MemRead<uint16_t>( &item.lockNameFat.size ); 1813 SendSingleString( (const char*)ptr, size ); 1814 break; 1815 case QueueType::GpuContextName: 1816 ptr = MemRead<uint64_t>( &item.gpuContextNameFat.ptr ); 1817 size = MemRead<uint16_t>( &item.gpuContextNameFat.size ); 1818 SendSingleString( (const char*)ptr, size ); 1819 break; 1820 default: 1821 break; 1822 } 1823 AppendData( &item, QueueDataSize[idx] ); 1824 } 1825 m_deferredLock.unlock(); 1826#endif 1827 1828 // Main communications loop 1829 int keepAlive = 0; 1830 for(;;) 1831 { 1832 ProcessSysTime(); 1833 const auto status = Dequeue( token ); 1834 const auto serialStatus = DequeueSerial(); 1835 if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) 1836 { 1837 break; 1838 } 1839 else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) 1840 { 1841 if( ShouldExit() ) break; 1842 if( m_bufferOffset != m_bufferStart ) 1843 { 1844 if( !CommitData() ) break; 1845 } 1846 if( keepAlive == 500 ) 1847 { 1848 QueueItem ka; 1849 ka.hdr.type = QueueType::KeepAlive; 1850 AppendData( &ka, QueueDataSize[ka.hdr.idx] ); 1851 if( !CommitData() ) break; 1852 1853 keepAlive = 0; 1854 } 1855 else if( !m_sock->HasData() ) 1856 { 1857 keepAlive++; 1858 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 1859 } 1860 } 1861 else 1862 { 1863 keepAlive = 0; 1864 } 1865 1866 bool connActive = true; 1867 while( m_sock->HasData() ) 1868 { 1869 connActive = HandleServerQuery(); 1870 if( !connActive ) break; 1871 } 1872 if( !connActive ) break; 1873 } 1874 if( ShouldExit() ) break; 1875 1876 m_isConnected.store( false, std::memory_order_release ); 1877#ifdef TRACY_ON_DEMAND 1878 m_bufferOffset = 0; 1879 m_bufferStart = 0; 1880#endif 1881 1882 m_sock->~Socket(); 1883 tracy_free( m_sock ); 1884 m_sock = nullptr; 1885 1886#ifndef TRACY_ON_DEMAND 1887 // Client is no longer available here. Accept incoming connections, but reject handshake. 1888 for(;;) 1889 { 1890 if( ShouldExit() ) 1891 { 1892 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1893 return; 1894 } 1895 1896 ClearQueues( token ); 1897 1898 m_sock = listen.Accept(); 1899 if( m_sock ) 1900 { 1901 char shibboleth[HandshakeShibbolethSize]; 1902 auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 ); 1903 if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 ) 1904 { 1905 m_sock->~Socket(); 1906 tracy_free( m_sock ); 1907 m_sock = nullptr; 1908 continue; 1909 } 1910 1911 uint32_t protocolVersion; 1912 res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 ); 1913 if( !res ) 1914 { 1915 m_sock->~Socket(); 1916 tracy_free( m_sock ); 1917 m_sock = nullptr; 1918 continue; 1919 } 1920 1921 HandshakeStatus status = HandshakeNotAvailable; 1922 m_sock->Send( &status, sizeof( status ) ); 1923 m_sock->~Socket(); 1924 tracy_free( m_sock ); 1925 } 1926 } 1927#endif 1928 } 1929 // End of connections loop 1930 1931 // Wait for symbols thread to terminate. Symbol resolution will continue in this thread. 1932#ifdef TRACY_HAS_CALLSTACK 1933 while( s_symbolThreadGone.load() == false ) { YieldThread(); } 1934#endif 1935 1936 // Client is exiting. Send items remaining in queues. 1937 for(;;) 1938 { 1939 const auto status = Dequeue( token ); 1940 const auto serialStatus = DequeueSerial(); 1941 if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) 1942 { 1943 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1944 return; 1945 } 1946 else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) 1947 { 1948 if( m_bufferOffset != m_bufferStart ) CommitData(); 1949 break; 1950 } 1951 1952 while( m_sock->HasData() ) 1953 { 1954 if( !HandleServerQuery() ) 1955 { 1956 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1957 return; 1958 } 1959 } 1960 1961#ifdef TRACY_HAS_CALLSTACK 1962 for(;;) 1963 { 1964 auto si = m_symbolQueue.front(); 1965 if( !si ) break; 1966 HandleSymbolQueueItem( *si ); 1967 m_symbolQueue.pop(); 1968 } 1969#endif 1970 } 1971 1972 // Send client termination notice to the server 1973 QueueItem terminate; 1974 MemWrite( &terminate.hdr.type, QueueType::Terminate ); 1975 if( !SendData( (const char*)&terminate, 1 ) ) 1976 { 1977 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1978 return; 1979 } 1980 // Handle remaining server queries 1981 for(;;) 1982 { 1983 while( m_sock->HasData() ) 1984 { 1985 if( !HandleServerQuery() ) 1986 { 1987 m_shutdownFinished.store( true, std::memory_order_relaxed ); 1988 return; 1989 } 1990 } 1991#ifdef TRACY_HAS_CALLSTACK 1992 for(;;) 1993 { 1994 auto si = m_symbolQueue.front(); 1995 if( !si ) break; 1996 HandleSymbolQueueItem( *si ); 1997 m_symbolQueue.pop(); 1998 } 1999#endif 2000 const auto status = Dequeue( token ); 2001 const auto serialStatus = DequeueSerial(); 2002 if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) 2003 { 2004 m_shutdownFinished.store( true, std::memory_order_relaxed ); 2005 return; 2006 } 2007 if( m_bufferOffset != m_bufferStart ) 2008 { 2009 if( !CommitData() ) 2010 { 2011 m_shutdownFinished.store( true, std::memory_order_relaxed ); 2012 return; 2013 } 2014 } 2015 } 2016} 2017 2018#ifndef TRACY_NO_FRAME_IMAGE 2019void Profiler::CompressWorker() 2020{ 2021 ThreadExitHandler threadExitHandler; 2022 SetThreadName( "Tracy DXT1" ); 2023 while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 2024 2025#ifdef TRACY_USE_RPMALLOC 2026 rpmalloc_thread_initialize(); 2027#endif 2028 2029 for(;;) 2030 { 2031 const auto shouldExit = ShouldExit(); 2032 2033 { 2034 bool lockHeld = true; 2035 while( !m_fiLock.try_lock() ) 2036 { 2037 if( m_shutdownManual.load( std::memory_order_relaxed ) ) 2038 { 2039 lockHeld = false; 2040 break; 2041 } 2042 } 2043 if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue ); 2044 if( lockHeld ) 2045 { 2046 m_fiLock.unlock(); 2047 } 2048 } 2049 2050 const auto sz = m_fiDequeue.size(); 2051 if( sz > 0 ) 2052 { 2053 auto fi = m_fiDequeue.data(); 2054 auto end = fi + sz; 2055 while( fi != end ) 2056 { 2057 const auto w = fi->w; 2058 const auto h = fi->h; 2059 const auto csz = size_t( w * h / 2 ); 2060 auto etc1buf = (char*)tracy_malloc( csz ); 2061 CompressImageDxt1( (const char*)fi->image, etc1buf, w, h ); 2062 tracy_free( fi->image ); 2063 2064 TracyLfqPrepare( QueueType::FrameImage ); 2065 MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf ); 2066 MemWrite( &item->frameImageFat.frame, fi->frame ); 2067 MemWrite( &item->frameImageFat.w, w ); 2068 MemWrite( &item->frameImageFat.h, h ); 2069 uint8_t flip = fi->flip; 2070 MemWrite( &item->frameImageFat.flip, flip ); 2071 TracyLfqCommit; 2072 2073 fi++; 2074 } 2075 m_fiDequeue.clear(); 2076 } 2077 else 2078 { 2079 std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); 2080 } 2081 2082 if( shouldExit ) 2083 { 2084 return; 2085 } 2086 } 2087} 2088#endif 2089 2090static void FreeAssociatedMemory( const QueueItem& item ) 2091{ 2092 if( item.hdr.idx >= (int)QueueType::Terminate ) return; 2093 2094 uint64_t ptr; 2095 switch( item.hdr.type ) 2096 { 2097 case QueueType::ZoneText: 2098 case QueueType::ZoneName: 2099 ptr = MemRead<uint64_t>( &item.zoneTextFat.text ); 2100 tracy_free( (void*)ptr ); 2101 break; 2102 case QueueType::MessageColor: 2103 case QueueType::MessageColorCallstack: 2104 ptr = MemRead<uint64_t>( &item.messageColorFat.text ); 2105 tracy_free( (void*)ptr ); 2106 break; 2107 case QueueType::Message: 2108 case QueueType::MessageCallstack: 2109#ifndef TRACY_ON_DEMAND 2110 case QueueType::MessageAppInfo: 2111#endif 2112 ptr = MemRead<uint64_t>( &item.messageFat.text ); 2113 tracy_free( (void*)ptr ); 2114 break; 2115 case QueueType::ZoneBeginAllocSrcLoc: 2116 case QueueType::ZoneBeginAllocSrcLocCallstack: 2117 ptr = MemRead<uint64_t>( &item.zoneBegin.srcloc ); 2118 tracy_free( (void*)ptr ); 2119 break; 2120 case QueueType::GpuZoneBeginAllocSrcLoc: 2121 case QueueType::GpuZoneBeginAllocSrcLocCallstack: 2122 case QueueType::GpuZoneBeginAllocSrcLocSerial: 2123 case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: 2124 ptr = MemRead<uint64_t>( &item.gpuZoneBegin.srcloc ); 2125 tracy_free( (void*)ptr ); 2126 break; 2127 case QueueType::CallstackSerial: 2128 case QueueType::Callstack: 2129 ptr = MemRead<uint64_t>( &item.callstackFat.ptr ); 2130 tracy_free( (void*)ptr ); 2131 break; 2132 case QueueType::CallstackAlloc: 2133 ptr = MemRead<uint64_t>( &item.callstackAllocFat.nativePtr ); 2134 tracy_free( (void*)ptr ); 2135 ptr = MemRead<uint64_t>( &item.callstackAllocFat.ptr ); 2136 tracy_free( (void*)ptr ); 2137 break; 2138 case QueueType::CallstackSample: 2139 case QueueType::CallstackSampleContextSwitch: 2140 ptr = MemRead<uint64_t>( &item.callstackSampleFat.ptr ); 2141 tracy_free( (void*)ptr ); 2142 break; 2143 case QueueType::FrameImage: 2144 ptr = MemRead<uint64_t>( &item.frameImageFat.image ); 2145 tracy_free( (void*)ptr ); 2146 break; 2147#ifdef TRACY_HAS_CALLSTACK 2148 case QueueType::CallstackFrameSize: 2149 { 2150 InitRpmalloc(); 2151 auto size = MemRead<uint8_t>( &item.callstackFrameSizeFat.size ); 2152 auto data = (const CallstackEntry*)MemRead<uint64_t>( &item.callstackFrameSizeFat.data ); 2153 for( uint8_t i=0; i<size; i++ ) 2154 { 2155 const auto& frame = data[i]; 2156 tracy_free_fast( (void*)frame.name ); 2157 tracy_free_fast( (void*)frame.file ); 2158 } 2159 tracy_free_fast( (void*)data ); 2160 break; 2161 } 2162 case QueueType::SymbolInformation: 2163 { 2164 uint8_t needFree = MemRead<uint8_t>( &item.symbolInformationFat.needFree ); 2165 if( needFree ) 2166 { 2167 ptr = MemRead<uint64_t>( &item.symbolInformationFat.fileString ); 2168 tracy_free( (void*)ptr ); 2169 } 2170 break; 2171 } 2172 case QueueType::SymbolCodeMetadata: 2173 ptr = MemRead<uint64_t>( &item.symbolCodeMetadata.ptr ); 2174 tracy_free( (void*)ptr ); 2175 break; 2176#endif 2177#ifndef TRACY_ON_DEMAND 2178 case QueueType::LockName: 2179 ptr = MemRead<uint64_t>( &item.lockNameFat.name ); 2180 tracy_free( (void*)ptr ); 2181 break; 2182 case QueueType::GpuContextName: 2183 ptr = MemRead<uint64_t>( &item.gpuContextNameFat.ptr ); 2184 tracy_free( (void*)ptr ); 2185 break; 2186#endif 2187#ifdef TRACY_ON_DEMAND 2188 case QueueType::MessageAppInfo: 2189 case QueueType::GpuContextName: 2190 // Don't free memory associated with deferred messages. 2191 break; 2192#endif 2193#ifdef TRACY_HAS_SYSTEM_TRACING 2194 case QueueType::ExternalNameMetadata: 2195 ptr = MemRead<uint64_t>( &item.externalNameMetadata.name ); 2196 tracy_free( (void*)ptr ); 2197 ptr = MemRead<uint64_t>( &item.externalNameMetadata.threadName ); 2198 tracy_free_fast( (void*)ptr ); 2199 break; 2200#endif 2201 case QueueType::SourceCodeMetadata: 2202 ptr = MemRead<uint64_t>( &item.sourceCodeMetadata.ptr ); 2203 tracy_free( (void*)ptr ); 2204 break; 2205 default: 2206 break; 2207 } 2208} 2209 2210void Profiler::ClearQueues( moodycamel::ConsumerToken& token ) 2211{ 2212 for(;;) 2213 { 2214 const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, []( QueueItem* item, size_t sz ) { assert( sz > 0 ); while( sz-- > 0 ) FreeAssociatedMemory( *item++ ); } ); 2215 if( sz == 0 ) break; 2216 } 2217 2218 ClearSerial(); 2219} 2220 2221void Profiler::ClearSerial() 2222{ 2223 bool lockHeld = true; 2224 while( !m_serialLock.try_lock() ) 2225 { 2226 if( m_shutdownManual.load( std::memory_order_relaxed ) ) 2227 { 2228 lockHeld = false; 2229 break; 2230 } 2231 } 2232 for( auto& v : m_serialQueue ) FreeAssociatedMemory( v ); 2233 m_serialQueue.clear(); 2234 if( lockHeld ) 2235 { 2236 m_serialLock.unlock(); 2237 } 2238 2239 for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v ); 2240 m_serialDequeue.clear(); 2241} 2242 2243Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) 2244{ 2245 bool connectionLost = false; 2246 const auto sz = GetQueue().try_dequeue_bulk_single( token, 2247 [this, &connectionLost] ( const uint32_t& threadId ) 2248 { 2249 if( ThreadCtxCheck( threadId ) == ThreadCtxStatus::ConnectionLost ) connectionLost = true; 2250 }, 2251 [this, &connectionLost] ( QueueItem* item, size_t sz ) 2252 { 2253 if( connectionLost ) return; 2254 InitRpmalloc(); 2255 assert( sz > 0 ); 2256 int64_t refThread = m_refTimeThread; 2257 int64_t refCtx = m_refTimeCtx; 2258 int64_t refGpu = m_refTimeGpu; 2259 while( sz-- > 0 ) 2260 { 2261 uint64_t ptr; 2262 uint16_t size; 2263 auto idx = MemRead<uint8_t>( &item->hdr.idx ); 2264 if( idx < (int)QueueType::Terminate ) 2265 { 2266 switch( (QueueType)idx ) 2267 { 2268 case QueueType::ZoneText: 2269 case QueueType::ZoneName: 2270 ptr = MemRead<uint64_t>( &item->zoneTextFat.text ); 2271 size = MemRead<uint16_t>( &item->zoneTextFat.size ); 2272 SendSingleString( (const char*)ptr, size ); 2273 tracy_free_fast( (void*)ptr ); 2274 break; 2275 case QueueType::Message: 2276 case QueueType::MessageCallstack: 2277 ptr = MemRead<uint64_t>( &item->messageFat.text ); 2278 size = MemRead<uint16_t>( &item->messageFat.size ); 2279 SendSingleString( (const char*)ptr, size ); 2280 tracy_free_fast( (void*)ptr ); 2281 break; 2282 case QueueType::MessageColor: 2283 case QueueType::MessageColorCallstack: 2284 ptr = MemRead<uint64_t>( &item->messageColorFat.text ); 2285 size = MemRead<uint16_t>( &item->messageColorFat.size ); 2286 SendSingleString( (const char*)ptr, size ); 2287 tracy_free_fast( (void*)ptr ); 2288 break; 2289 case QueueType::MessageAppInfo: 2290 ptr = MemRead<uint64_t>( &item->messageFat.text ); 2291 size = MemRead<uint16_t>( &item->messageFat.size ); 2292 SendSingleString( (const char*)ptr, size ); 2293#ifndef TRACY_ON_DEMAND 2294 tracy_free_fast( (void*)ptr ); 2295#endif 2296 break; 2297 case QueueType::ZoneBeginAllocSrcLoc: 2298 case QueueType::ZoneBeginAllocSrcLocCallstack: 2299 { 2300 int64_t t = MemRead<int64_t>( &item->zoneBegin.time ); 2301 int64_t dt = t - refThread; 2302 refThread = t; 2303 MemWrite( &item->zoneBegin.time, dt ); 2304 ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc ); 2305 SendSourceLocationPayload( ptr ); 2306 tracy_free_fast( (void*)ptr ); 2307 break; 2308 } 2309 case QueueType::Callstack: 2310 ptr = MemRead<uint64_t>( &item->callstackFat.ptr ); 2311 SendCallstackPayload( ptr ); 2312 tracy_free_fast( (void*)ptr ); 2313 break; 2314 case QueueType::CallstackAlloc: 2315 ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr ); 2316 if( ptr != 0 ) 2317 { 2318 CutCallstack( (void*)ptr, "lua_pcall" ); 2319 SendCallstackPayload( ptr ); 2320 tracy_free_fast( (void*)ptr ); 2321 } 2322 ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr ); 2323 SendCallstackAlloc( ptr ); 2324 tracy_free_fast( (void*)ptr ); 2325 break; 2326 case QueueType::CallstackSample: 2327 case QueueType::CallstackSampleContextSwitch: 2328 { 2329 ptr = MemRead<uint64_t>( &item->callstackSampleFat.ptr ); 2330 SendCallstackPayload64( ptr ); 2331 tracy_free_fast( (void*)ptr ); 2332 int64_t t = MemRead<int64_t>( &item->callstackSampleFat.time ); 2333 int64_t dt = t - refCtx; 2334 refCtx = t; 2335 MemWrite( &item->callstackSampleFat.time, dt ); 2336 break; 2337 } 2338 case QueueType::FrameImage: 2339 { 2340 ptr = MemRead<uint64_t>( &item->frameImageFat.image ); 2341 const auto w = MemRead<uint16_t>( &item->frameImageFat.w ); 2342 const auto h = MemRead<uint16_t>( &item->frameImageFat.h ); 2343 const auto csz = size_t( w * h / 2 ); 2344 SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); 2345 tracy_free_fast( (void*)ptr ); 2346 break; 2347 } 2348 case QueueType::ZoneBegin: 2349 case QueueType::ZoneBeginCallstack: 2350 { 2351 int64_t t = MemRead<int64_t>( &item->zoneBegin.time ); 2352 int64_t dt = t - refThread; 2353 refThread = t; 2354 MemWrite( &item->zoneBegin.time, dt ); 2355 break; 2356 } 2357 case QueueType::ZoneEnd: 2358 { 2359 int64_t t = MemRead<int64_t>( &item->zoneEnd.time ); 2360 int64_t dt = t - refThread; 2361 refThread = t; 2362 MemWrite( &item->zoneEnd.time, dt ); 2363 break; 2364 } 2365 case QueueType::GpuZoneBegin: 2366 case QueueType::GpuZoneBeginCallstack: 2367 { 2368 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime ); 2369 int64_t dt = t - refThread; 2370 refThread = t; 2371 MemWrite( &item->gpuZoneBegin.cpuTime, dt ); 2372 break; 2373 } 2374 case QueueType::GpuZoneBeginAllocSrcLoc: 2375 case QueueType::GpuZoneBeginAllocSrcLocCallstack: 2376 { 2377 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime ); 2378 int64_t dt = t - refThread; 2379 refThread = t; 2380 MemWrite( &item->gpuZoneBegin.cpuTime, dt ); 2381 ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc ); 2382 SendSourceLocationPayload( ptr ); 2383 tracy_free_fast( (void*)ptr ); 2384 break; 2385 } 2386 case QueueType::GpuZoneEnd: 2387 { 2388 int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime ); 2389 int64_t dt = t - refThread; 2390 refThread = t; 2391 MemWrite( &item->gpuZoneEnd.cpuTime, dt ); 2392 break; 2393 } 2394 case QueueType::GpuContextName: 2395 ptr = MemRead<uint64_t>( &item->gpuContextNameFat.ptr ); 2396 size = MemRead<uint16_t>( &item->gpuContextNameFat.size ); 2397 SendSingleString( (const char*)ptr, size ); 2398#ifndef TRACY_ON_DEMAND 2399 tracy_free_fast( (void*)ptr ); 2400#endif 2401 break; 2402 case QueueType::PlotDataInt: 2403 case QueueType::PlotDataFloat: 2404 case QueueType::PlotDataDouble: 2405 { 2406 int64_t t = MemRead<int64_t>( &item->plotDataInt.time ); 2407 int64_t dt = t - refThread; 2408 refThread = t; 2409 MemWrite( &item->plotDataInt.time, dt ); 2410 break; 2411 } 2412 case QueueType::ContextSwitch: 2413 { 2414 int64_t t = MemRead<int64_t>( &item->contextSwitch.time ); 2415 int64_t dt = t - refCtx; 2416 refCtx = t; 2417 MemWrite( &item->contextSwitch.time, dt ); 2418 break; 2419 } 2420 case QueueType::ThreadWakeup: 2421 { 2422 int64_t t = MemRead<int64_t>( &item->threadWakeup.time ); 2423 int64_t dt = t - refCtx; 2424 refCtx = t; 2425 MemWrite( &item->threadWakeup.time, dt ); 2426 break; 2427 } 2428 case QueueType::GpuTime: 2429 { 2430 int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime ); 2431 int64_t dt = t - refGpu; 2432 refGpu = t; 2433 MemWrite( &item->gpuTime.gpuTime, dt ); 2434 break; 2435 } 2436#ifdef TRACY_HAS_CALLSTACK 2437 case QueueType::CallstackFrameSize: 2438 { 2439 auto data = (const CallstackEntry*)MemRead<uint64_t>( &item->callstackFrameSizeFat.data ); 2440 auto datasz = MemRead<uint8_t>( &item->callstackFrameSizeFat.size ); 2441 auto imageName = (const char*)MemRead<uint64_t>( &item->callstackFrameSizeFat.imageName ); 2442 SendSingleString( imageName ); 2443 AppendData( item++, QueueDataSize[idx] ); 2444 2445 for( uint8_t i=0; i<datasz; i++ ) 2446 { 2447 const auto& frame = data[i]; 2448 2449 SendSingleString( frame.name ); 2450 SendSecondString( frame.file ); 2451 2452 QueueItem item; 2453 MemWrite( &item.hdr.type, QueueType::CallstackFrame ); 2454 MemWrite( &item.callstackFrame.line, frame.line ); 2455 MemWrite( &item.callstackFrame.symAddr, frame.symAddr ); 2456 MemWrite( &item.callstackFrame.symLen, frame.symLen ); 2457 2458 AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] ); 2459 2460 tracy_free_fast( (void*)frame.name ); 2461 tracy_free_fast( (void*)frame.file ); 2462 } 2463 tracy_free_fast( (void*)data ); 2464 continue; 2465 } 2466 case QueueType::SymbolInformation: 2467 { 2468 auto fileString = (const char*)MemRead<uint64_t>( &item->symbolInformationFat.fileString ); 2469 auto needFree = MemRead<uint8_t>( &item->symbolInformationFat.needFree ); 2470 SendSingleString( fileString ); 2471 if( needFree ) tracy_free_fast( (void*)fileString ); 2472 break; 2473 } 2474 case QueueType::SymbolCodeMetadata: 2475 { 2476 auto symbol = MemRead<uint64_t>( &item->symbolCodeMetadata.symbol ); 2477 auto ptr = (const char*)MemRead<uint64_t>( &item->symbolCodeMetadata.ptr ); 2478 auto size = MemRead<uint32_t>( &item->symbolCodeMetadata.size ); 2479 SendLongString( symbol, ptr, size, QueueType::SymbolCode ); 2480 tracy_free_fast( (void*)ptr ); 2481 ++item; 2482 continue; 2483 } 2484#endif 2485#ifdef TRACY_HAS_SYSTEM_TRACING 2486 case QueueType::ExternalNameMetadata: 2487 { 2488 auto thread = MemRead<uint64_t>( &item->externalNameMetadata.thread ); 2489 auto name = (const char*)MemRead<uint64_t>( &item->externalNameMetadata.name ); 2490 auto threadName = (const char*)MemRead<uint64_t>( &item->externalNameMetadata.threadName ); 2491 SendString( thread, threadName, QueueType::ExternalThreadName ); 2492 SendString( thread, name, QueueType::ExternalName ); 2493 tracy_free_fast( (void*)threadName ); 2494 tracy_free_fast( (void*)name ); 2495 ++item; 2496 continue; 2497 } 2498#endif 2499 case QueueType::SourceCodeMetadata: 2500 { 2501 auto ptr = (const char*)MemRead<uint64_t>( &item->sourceCodeMetadata.ptr ); 2502 auto size = MemRead<uint32_t>( &item->sourceCodeMetadata.size ); 2503 auto id = MemRead<uint32_t>( &item->sourceCodeMetadata.id ); 2504 SendLongString( (uint64_t)id, ptr, size, QueueType::SourceCode ); 2505 tracy_free_fast( (void*)ptr ); 2506 ++item; 2507 continue; 2508 } 2509 default: 2510 assert( false ); 2511 break; 2512 } 2513 } 2514 if( !AppendData( item++, QueueDataSize[idx] ) ) 2515 { 2516 connectionLost = true; 2517 m_refTimeThread = refThread; 2518 m_refTimeCtx = refCtx; 2519 m_refTimeGpu = refGpu; 2520 return; 2521 } 2522 } 2523 m_refTimeThread = refThread; 2524 m_refTimeCtx = refCtx; 2525 m_refTimeGpu = refGpu; 2526 } 2527 ); 2528 if( connectionLost ) return DequeueStatus::ConnectionLost; 2529 return sz > 0 ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; 2530} 2531 2532Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ) 2533{ 2534 const auto sz = GetQueue().try_dequeue_bulk_single( token, [] ( const uint64_t& ) {}, 2535 [this, &timeStop] ( QueueItem* item, size_t sz ) 2536 { 2537 assert( sz > 0 ); 2538 int64_t refCtx = m_refTimeCtx; 2539 while( sz-- > 0 ) 2540 { 2541 FreeAssociatedMemory( *item ); 2542 if( timeStop < 0 ) return; 2543 const auto idx = MemRead<uint8_t>( &item->hdr.idx ); 2544 if( idx == (uint8_t)QueueType::ContextSwitch ) 2545 { 2546 const auto csTime = MemRead<int64_t>( &item->contextSwitch.time ); 2547 if( csTime > timeStop ) 2548 { 2549 timeStop = -1; 2550 m_refTimeCtx = refCtx; 2551 return; 2552 } 2553 int64_t dt = csTime - refCtx; 2554 refCtx = csTime; 2555 MemWrite( &item->contextSwitch.time, dt ); 2556 if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) 2557 { 2558 timeStop = -2; 2559 m_refTimeCtx = refCtx; 2560 return; 2561 } 2562 } 2563 else if( idx == (uint8_t)QueueType::ThreadWakeup ) 2564 { 2565 const auto csTime = MemRead<int64_t>( &item->threadWakeup.time ); 2566 if( csTime > timeStop ) 2567 { 2568 timeStop = -1; 2569 m_refTimeCtx = refCtx; 2570 return; 2571 } 2572 int64_t dt = csTime - refCtx; 2573 refCtx = csTime; 2574 MemWrite( &item->threadWakeup.time, dt ); 2575 if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) 2576 { 2577 timeStop = -2; 2578 m_refTimeCtx = refCtx; 2579 return; 2580 } 2581 } 2582 item++; 2583 } 2584 m_refTimeCtx = refCtx; 2585 } 2586 ); 2587 2588 if( timeStop == -2 ) return DequeueStatus::ConnectionLost; 2589 return ( timeStop == -1 || sz > 0 ) ? DequeueStatus::DataDequeued : DequeueStatus::QueueEmpty; 2590} 2591 2592#define ThreadCtxCheckSerial( _name ) \ 2593 uint32_t thread = MemRead<uint32_t>( &item->_name.thread ); \ 2594 switch( ThreadCtxCheck( thread ) ) \ 2595 { \ 2596 case ThreadCtxStatus::Same: break; \ 2597 case ThreadCtxStatus::Changed: assert( m_refTimeThread == 0 ); refThread = 0; break; \ 2598 case ThreadCtxStatus::ConnectionLost: return DequeueStatus::ConnectionLost; \ 2599 default: assert( false ); break; \ 2600 } 2601 2602Profiler::DequeueStatus Profiler::DequeueSerial() 2603{ 2604 { 2605 bool lockHeld = true; 2606 while( !m_serialLock.try_lock() ) 2607 { 2608 if( m_shutdownManual.load( std::memory_order_relaxed ) ) 2609 { 2610 lockHeld = false; 2611 break; 2612 } 2613 } 2614 if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue ); 2615 if( lockHeld ) 2616 { 2617 m_serialLock.unlock(); 2618 } 2619 } 2620 2621 const auto sz = m_serialDequeue.size(); 2622 if( sz > 0 ) 2623 { 2624 InitRpmalloc(); 2625 int64_t refSerial = m_refTimeSerial; 2626 int64_t refGpu = m_refTimeGpu; 2627#ifdef TRACY_FIBERS 2628 int64_t refThread = m_refTimeThread; 2629#endif 2630 auto item = m_serialDequeue.data(); 2631 auto end = item + sz; 2632 while( item != end ) 2633 { 2634 uint64_t ptr; 2635 auto idx = MemRead<uint8_t>( &item->hdr.idx ); 2636 if( idx < (int)QueueType::Terminate ) 2637 { 2638 switch( (QueueType)idx ) 2639 { 2640 case QueueType::CallstackSerial: 2641 ptr = MemRead<uint64_t>( &item->callstackFat.ptr ); 2642 SendCallstackPayload( ptr ); 2643 tracy_free_fast( (void*)ptr ); 2644 break; 2645 case QueueType::LockWait: 2646 case QueueType::LockSharedWait: 2647 { 2648 int64_t t = MemRead<int64_t>( &item->lockWait.time ); 2649 int64_t dt = t - refSerial; 2650 refSerial = t; 2651 MemWrite( &item->lockWait.time, dt ); 2652 break; 2653 } 2654 case QueueType::LockObtain: 2655 case QueueType::LockSharedObtain: 2656 { 2657 int64_t t = MemRead<int64_t>( &item->lockObtain.time ); 2658 int64_t dt = t - refSerial; 2659 refSerial = t; 2660 MemWrite( &item->lockObtain.time, dt ); 2661 break; 2662 } 2663 case QueueType::LockRelease: 2664 case QueueType::LockSharedRelease: 2665 { 2666 int64_t t = MemRead<int64_t>( &item->lockRelease.time ); 2667 int64_t dt = t - refSerial; 2668 refSerial = t; 2669 MemWrite( &item->lockRelease.time, dt ); 2670 break; 2671 } 2672 case QueueType::LockName: 2673 { 2674 ptr = MemRead<uint64_t>( &item->lockNameFat.name ); 2675 uint16_t size = MemRead<uint16_t>( &item->lockNameFat.size ); 2676 SendSingleString( (const char*)ptr, size ); 2677#ifndef TRACY_ON_DEMAND 2678 tracy_free_fast( (void*)ptr ); 2679#endif 2680 break; 2681 } 2682 case QueueType::MemAlloc: 2683 case QueueType::MemAllocNamed: 2684 case QueueType::MemAllocCallstack: 2685 case QueueType::MemAllocCallstackNamed: 2686 { 2687 int64_t t = MemRead<int64_t>( &item->memAlloc.time ); 2688 int64_t dt = t - refSerial; 2689 refSerial = t; 2690 MemWrite( &item->memAlloc.time, dt ); 2691 break; 2692 } 2693 case QueueType::MemFree: 2694 case QueueType::MemFreeNamed: 2695 case QueueType::MemFreeCallstack: 2696 case QueueType::MemFreeCallstackNamed: 2697 { 2698 int64_t t = MemRead<int64_t>( &item->memFree.time ); 2699 int64_t dt = t - refSerial; 2700 refSerial = t; 2701 MemWrite( &item->memFree.time, dt ); 2702 break; 2703 } 2704 case QueueType::GpuZoneBeginSerial: 2705 case QueueType::GpuZoneBeginCallstackSerial: 2706 { 2707 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime ); 2708 int64_t dt = t - refSerial; 2709 refSerial = t; 2710 MemWrite( &item->gpuZoneBegin.cpuTime, dt ); 2711 break; 2712 } 2713 case QueueType::GpuZoneBeginAllocSrcLocSerial: 2714 case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: 2715 { 2716 int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime ); 2717 int64_t dt = t - refSerial; 2718 refSerial = t; 2719 MemWrite( &item->gpuZoneBegin.cpuTime, dt ); 2720 ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc ); 2721 SendSourceLocationPayload( ptr ); 2722 tracy_free_fast( (void*)ptr ); 2723 break; 2724 } 2725 case QueueType::GpuZoneEndSerial: 2726 { 2727 int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime ); 2728 int64_t dt = t - refSerial; 2729 refSerial = t; 2730 MemWrite( &item->gpuZoneEnd.cpuTime, dt ); 2731 break; 2732 } 2733 case QueueType::GpuTime: 2734 { 2735 int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime ); 2736 int64_t dt = t - refGpu; 2737 refGpu = t; 2738 MemWrite( &item->gpuTime.gpuTime, dt ); 2739 break; 2740 } 2741 case QueueType::GpuContextName: 2742 { 2743 ptr = MemRead<uint64_t>( &item->gpuContextNameFat.ptr ); 2744 uint16_t size = MemRead<uint16_t>( &item->gpuContextNameFat.size ); 2745 SendSingleString( (const char*)ptr, size ); 2746#ifndef TRACY_ON_DEMAND 2747 tracy_free_fast( (void*)ptr ); 2748#endif 2749 break; 2750 } 2751#ifdef TRACY_FIBERS 2752 case QueueType::ZoneBegin: 2753 case QueueType::ZoneBeginCallstack: 2754 { 2755 ThreadCtxCheckSerial( zoneBeginThread ); 2756 int64_t t = MemRead<int64_t>( &item->zoneBegin.time ); 2757 int64_t dt = t - refThread; 2758 refThread = t; 2759 MemWrite( &item->zoneBegin.time, dt ); 2760 break; 2761 } 2762 case QueueType::ZoneBeginAllocSrcLoc: 2763 case QueueType::ZoneBeginAllocSrcLocCallstack: 2764 { 2765 ThreadCtxCheckSerial( zoneBeginThread ); 2766 int64_t t = MemRead<int64_t>( &item->zoneBegin.time ); 2767 int64_t dt = t - refThread; 2768 refThread = t; 2769 MemWrite( &item->zoneBegin.time, dt ); 2770 ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc ); 2771 SendSourceLocationPayload( ptr ); 2772 tracy_free_fast( (void*)ptr ); 2773 break; 2774 } 2775 case QueueType::ZoneEnd: 2776 { 2777 ThreadCtxCheckSerial( zoneEndThread ); 2778 int64_t t = MemRead<int64_t>( &item->zoneEnd.time ); 2779 int64_t dt = t - refThread; 2780 refThread = t; 2781 MemWrite( &item->zoneEnd.time, dt ); 2782 break; 2783 } 2784 case QueueType::ZoneText: 2785 case QueueType::ZoneName: 2786 { 2787 ThreadCtxCheckSerial( zoneTextFatThread ); 2788 ptr = MemRead<uint64_t>( &item->zoneTextFat.text ); 2789 uint16_t size = MemRead<uint16_t>( &item->zoneTextFat.size ); 2790 SendSingleString( (const char*)ptr, size ); 2791 tracy_free_fast( (void*)ptr ); 2792 break; 2793 } 2794 case QueueType::Message: 2795 case QueueType::MessageCallstack: 2796 { 2797 ThreadCtxCheckSerial( messageFatThread ); 2798 ptr = MemRead<uint64_t>( &item->messageFat.text ); 2799 uint16_t size = MemRead<uint16_t>( &item->messageFat.size ); 2800 SendSingleString( (const char*)ptr, size ); 2801 tracy_free_fast( (void*)ptr ); 2802 break; 2803 } 2804 case QueueType::MessageColor: 2805 case QueueType::MessageColorCallstack: 2806 { 2807 ThreadCtxCheckSerial( messageColorFatThread ); 2808 ptr = MemRead<uint64_t>( &item->messageColorFat.text ); 2809 uint16_t size = MemRead<uint16_t>( &item->messageColorFat.size ); 2810 SendSingleString( (const char*)ptr, size ); 2811 tracy_free_fast( (void*)ptr ); 2812 break; 2813 } 2814 case QueueType::Callstack: 2815 { 2816 ThreadCtxCheckSerial( callstackFatThread ); 2817 ptr = MemRead<uint64_t>( &item->callstackFat.ptr ); 2818 SendCallstackPayload( ptr ); 2819 tracy_free_fast( (void*)ptr ); 2820 break; 2821 } 2822 case QueueType::CallstackAlloc: 2823 { 2824 ThreadCtxCheckSerial( callstackAllocFatThread ); 2825 ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr ); 2826 if( ptr != 0 ) 2827 { 2828 CutCallstack( (void*)ptr, "lua_pcall" ); 2829 SendCallstackPayload( ptr ); 2830 tracy_free_fast( (void*)ptr ); 2831 } 2832 ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr ); 2833 SendCallstackAlloc( ptr ); 2834 tracy_free_fast( (void*)ptr ); 2835 break; 2836 } 2837 case QueueType::FiberEnter: 2838 { 2839 ThreadCtxCheckSerial( fiberEnter ); 2840 int64_t t = MemRead<int64_t>( &item->fiberEnter.time ); 2841 int64_t dt = t - refThread; 2842 refThread = t; 2843 MemWrite( &item->fiberEnter.time, dt ); 2844 break; 2845 } 2846 case QueueType::FiberLeave: 2847 { 2848 ThreadCtxCheckSerial( fiberLeave ); 2849 int64_t t = MemRead<int64_t>( &item->fiberLeave.time ); 2850 int64_t dt = t - refThread; 2851 refThread = t; 2852 MemWrite( &item->fiberLeave.time, dt ); 2853 break; 2854 } 2855#endif 2856 default: 2857 assert( false ); 2858 break; 2859 } 2860 } 2861#ifdef TRACY_FIBERS 2862 else 2863 { 2864 switch( (QueueType)idx ) 2865 { 2866 case QueueType::ZoneColor: 2867 { 2868 ThreadCtxCheckSerial( zoneColorThread ); 2869 break; 2870 } 2871 case QueueType::ZoneValue: 2872 { 2873 ThreadCtxCheckSerial( zoneValueThread ); 2874 break; 2875 } 2876 case QueueType::ZoneValidation: 2877 { 2878 ThreadCtxCheckSerial( zoneValidationThread ); 2879 break; 2880 } 2881 case QueueType::MessageLiteral: 2882 case QueueType::MessageLiteralCallstack: 2883 { 2884 ThreadCtxCheckSerial( messageLiteralThread ); 2885 break; 2886 } 2887 case QueueType::MessageLiteralColor: 2888 case QueueType::MessageLiteralColorCallstack: 2889 { 2890 ThreadCtxCheckSerial( messageColorLiteralThread ); 2891 break; 2892 } 2893 case QueueType::CrashReport: 2894 { 2895 ThreadCtxCheckSerial( crashReportThread ); 2896 break; 2897 } 2898 default: 2899 break; 2900 } 2901 } 2902#endif 2903 if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost; 2904 item++; 2905 } 2906 m_refTimeSerial = refSerial; 2907 m_refTimeGpu = refGpu; 2908#ifdef TRACY_FIBERS 2909 m_refTimeThread = refThread; 2910#endif 2911 m_serialDequeue.clear(); 2912 } 2913 else 2914 { 2915 return DequeueStatus::QueueEmpty; 2916 } 2917 return DequeueStatus::DataDequeued; 2918} 2919 2920Profiler::ThreadCtxStatus Profiler::ThreadCtxCheck( uint32_t threadId ) 2921{ 2922 if( m_threadCtx == threadId ) return ThreadCtxStatus::Same; 2923 QueueItem item; 2924 MemWrite( &item.hdr.type, QueueType::ThreadContext ); 2925 MemWrite( &item.threadCtx.thread, threadId ); 2926 if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return ThreadCtxStatus::ConnectionLost; 2927 m_threadCtx = threadId; 2928 m_refTimeThread = 0; 2929 return ThreadCtxStatus::Changed; 2930} 2931 2932bool Profiler::CommitData() 2933{ 2934 bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart ); 2935 if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0; 2936 m_bufferStart = m_bufferOffset; 2937 return ret; 2938} 2939 2940bool Profiler::SendData( const char* data, size_t len ) 2941{ 2942 const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); 2943 memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) ); 2944 return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; 2945} 2946 2947void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type ) 2948{ 2949 assert( type == QueueType::StringData || 2950 type == QueueType::ThreadName || 2951 type == QueueType::PlotName || 2952 type == QueueType::FrameName || 2953 type == QueueType::ExternalName || 2954 type == QueueType::ExternalThreadName || 2955 type == QueueType::FiberName ); 2956 2957 QueueItem item; 2958 MemWrite( &item.hdr.type, type ); 2959 MemWrite( &item.stringTransfer.ptr, str ); 2960 2961 assert( len <= std::numeric_limits<uint16_t>::max() ); 2962 auto l16 = uint16_t( len ); 2963 2964 NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 ); 2965 2966 AppendDataUnsafe( &item, QueueDataSize[(int)type] ); 2967 AppendDataUnsafe( &l16, sizeof( l16 ) ); 2968 AppendDataUnsafe( ptr, l16 ); 2969} 2970 2971void Profiler::SendSingleString( const char* ptr, size_t len ) 2972{ 2973 QueueItem item; 2974 MemWrite( &item.hdr.type, QueueType::SingleStringData ); 2975 2976 assert( len <= std::numeric_limits<uint16_t>::max() ); 2977 auto l16 = uint16_t( len ); 2978 2979 NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 ); 2980 2981 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] ); 2982 AppendDataUnsafe( &l16, sizeof( l16 ) ); 2983 AppendDataUnsafe( ptr, l16 ); 2984} 2985 2986void Profiler::SendSecondString( const char* ptr, size_t len ) 2987{ 2988 QueueItem item; 2989 MemWrite( &item.hdr.type, QueueType::SecondStringData ); 2990 2991 assert( len <= std::numeric_limits<uint16_t>::max() ); 2992 auto l16 = uint16_t( len ); 2993 2994 NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 ); 2995 2996 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] ); 2997 AppendDataUnsafe( &l16, sizeof( l16 ) ); 2998 AppendDataUnsafe( ptr, l16 ); 2999} 3000 3001void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) 3002{ 3003 assert( type == QueueType::FrameImageData || 3004 type == QueueType::SymbolCode || 3005 type == QueueType::SourceCode ); 3006 3007 QueueItem item; 3008 MemWrite( &item.hdr.type, type ); 3009 MemWrite( &item.stringTransfer.ptr, str ); 3010 3011 assert( len <= std::numeric_limits<uint32_t>::max() ); 3012 assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize ); 3013 auto l32 = uint32_t( len ); 3014 3015 NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 ); 3016 3017 AppendDataUnsafe( &item, QueueDataSize[(int)type] ); 3018 AppendDataUnsafe( &l32, sizeof( l32 ) ); 3019 AppendDataUnsafe( ptr, l32 ); 3020} 3021 3022void Profiler::SendSourceLocation( uint64_t ptr ) 3023{ 3024 auto srcloc = (const SourceLocationData*)ptr; 3025 QueueItem item; 3026 MemWrite( &item.hdr.type, QueueType::SourceLocation ); 3027 MemWrite( &item.srcloc.name, (uint64_t)srcloc->name ); 3028 MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); 3029 MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); 3030 MemWrite( &item.srcloc.line, srcloc->line ); 3031 MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) ); 3032 MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); 3033 MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); 3034 AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); 3035} 3036 3037void Profiler::SendSourceLocationPayload( uint64_t _ptr ) 3038{ 3039 auto ptr = (const char*)_ptr; 3040 3041 QueueItem item; 3042 MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); 3043 MemWrite( &item.stringTransfer.ptr, _ptr ); 3044 3045 uint16_t len; 3046 memcpy( &len, ptr, sizeof( len ) ); 3047 assert( len > 2 ); 3048 len -= 2; 3049 ptr += 2; 3050 3051 NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len ); 3052 3053 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); 3054 AppendDataUnsafe( &len, sizeof( len ) ); 3055 AppendDataUnsafe( ptr, len ); 3056} 3057 3058void Profiler::SendCallstackPayload( uint64_t _ptr ) 3059{ 3060 auto ptr = (uintptr_t*)_ptr; 3061 3062 QueueItem item; 3063 MemWrite( &item.hdr.type, QueueType::CallstackPayload ); 3064 MemWrite( &item.stringTransfer.ptr, _ptr ); 3065 3066 const auto sz = *ptr++; 3067 const auto len = sz * sizeof( uint64_t ); 3068 const auto l16 = uint16_t( len ); 3069 3070 NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 ); 3071 3072 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] ); 3073 AppendDataUnsafe( &l16, sizeof( l16 ) ); 3074 3075 if( compile_time_condition<sizeof( uintptr_t ) == sizeof( uint64_t )>::value ) 3076 { 3077 AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz ); 3078 } 3079 else 3080 { 3081 for( uintptr_t i=0; i<sz; i++ ) 3082 { 3083 const auto val = uint64_t( *ptr++ ); 3084 AppendDataUnsafe( &val, sizeof( uint64_t ) ); 3085 } 3086 } 3087} 3088 3089void Profiler::SendCallstackPayload64( uint64_t _ptr ) 3090{ 3091 auto ptr = (uint64_t*)_ptr; 3092 3093 QueueItem item; 3094 MemWrite( &item.hdr.type, QueueType::CallstackPayload ); 3095 MemWrite( &item.stringTransfer.ptr, _ptr ); 3096 3097 const auto sz = *ptr++; 3098 const auto len = sz * sizeof( uint64_t ); 3099 const auto l16 = uint16_t( len ); 3100 3101 NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 ); 3102 3103 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] ); 3104 AppendDataUnsafe( &l16, sizeof( l16 ) ); 3105 AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz ); 3106} 3107 3108void Profiler::SendCallstackAlloc( uint64_t _ptr ) 3109{ 3110 auto ptr = (const char*)_ptr; 3111 3112 QueueItem item; 3113 MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload ); 3114 MemWrite( &item.stringTransfer.ptr, _ptr ); 3115 3116 uint16_t len; 3117 memcpy( &len, ptr, 2 ); 3118 ptr += 2; 3119 3120 NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len ); 3121 3122 AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] ); 3123 AppendDataUnsafe( &len, sizeof( len ) ); 3124 AppendDataUnsafe( ptr, len ); 3125} 3126 3127void Profiler::QueueCallstackFrame( uint64_t ptr ) 3128{ 3129#ifdef TRACY_HAS_CALLSTACK 3130 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::CallstackFrame, ptr } ); 3131#else 3132 AckServerQuery(); 3133#endif 3134} 3135 3136void Profiler::QueueSymbolQuery( uint64_t symbol ) 3137{ 3138#ifdef TRACY_HAS_CALLSTACK 3139 // Special handling for kernel frames 3140 if( symbol >> 63 != 0 ) 3141 { 3142 SendSingleString( "<kernel>" ); 3143 QueueItem item; 3144 MemWrite( &item.hdr.type, QueueType::SymbolInformation ); 3145 MemWrite( &item.symbolInformation.line, 0 ); 3146 MemWrite( &item.symbolInformation.symAddr, symbol ); 3147 AppendData( &item, QueueDataSize[(int)QueueType::SymbolInformation] ); 3148 } 3149 else 3150 { 3151 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SymbolQuery, symbol } ); 3152 } 3153#else 3154 AckServerQuery(); 3155#endif 3156} 3157 3158void Profiler::QueueExternalName( uint64_t ptr ) 3159{ 3160#ifdef TRACY_HAS_SYSTEM_TRACING 3161 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::ExternalName, ptr } ); 3162#endif 3163} 3164 3165void Profiler::QueueKernelCode( uint64_t symbol, uint32_t size ) 3166{ 3167 assert( symbol >> 63 != 0 ); 3168#ifdef TRACY_HAS_CALLSTACK 3169 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::KernelCode, symbol, size } ); 3170#else 3171 AckSymbolCodeNotAvailable(); 3172#endif 3173} 3174 3175void Profiler::QueueSourceCodeQuery( uint32_t id ) 3176{ 3177 assert( m_exectime != 0 ); 3178 assert( m_queryData ); 3179 m_symbolQueue.emplace( SymbolQueueItem { SymbolQueueItemType::SourceCode, uint64_t( m_queryData ), uint64_t( m_queryImage ), id } ); 3180 m_queryData = nullptr; 3181 m_queryImage = nullptr; 3182} 3183 3184#ifdef TRACY_HAS_CALLSTACK 3185void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) 3186{ 3187 switch( si.type ) 3188 { 3189 case SymbolQueueItemType::CallstackFrame: 3190 { 3191 const auto frameData = DecodeCallstackPtr( si.ptr ); 3192 auto data = tracy_malloc_fast( sizeof( CallstackEntry ) * frameData.size ); 3193 memcpy( data, frameData.data, sizeof( CallstackEntry ) * frameData.size ); 3194 TracyLfqPrepare( QueueType::CallstackFrameSize ); 3195 MemWrite( &item->callstackFrameSizeFat.ptr, si.ptr ); 3196 MemWrite( &item->callstackFrameSizeFat.size, frameData.size ); 3197 MemWrite( &item->callstackFrameSizeFat.data, (uint64_t)data ); 3198 MemWrite( &item->callstackFrameSizeFat.imageName, (uint64_t)frameData.imageName ); 3199 TracyLfqCommit; 3200 break; 3201 } 3202 case SymbolQueueItemType::SymbolQuery: 3203 { 3204#ifdef __ANDROID__ 3205 // On Android it's common for code to be in mappings that are only executable 3206 // but not readable. 3207 if( !EnsureReadable( si.ptr ) ) 3208 { 3209 TracyLfqPrepare( QueueType::AckServerQueryNoop ); 3210 TracyLfqCommit; 3211 break; 3212 } 3213#endif 3214 const auto sym = DecodeSymbolAddress( si.ptr ); 3215 TracyLfqPrepare( QueueType::SymbolInformation ); 3216 MemWrite( &item->symbolInformationFat.line, sym.line ); 3217 MemWrite( &item->symbolInformationFat.symAddr, si.ptr ); 3218 MemWrite( &item->symbolInformationFat.fileString, (uint64_t)sym.file ); 3219 MemWrite( &item->symbolInformationFat.needFree, (uint8_t)sym.needFree ); 3220 TracyLfqCommit; 3221 break; 3222 } 3223#ifdef TRACY_HAS_SYSTEM_TRACING 3224 case SymbolQueueItemType::ExternalName: 3225 { 3226 const char* threadName; 3227 const char* name; 3228 SysTraceGetExternalName( si.ptr, threadName, name ); 3229 TracyLfqPrepare( QueueType::ExternalNameMetadata ); 3230 MemWrite( &item->externalNameMetadata.thread, si.ptr ); 3231 MemWrite( &item->externalNameMetadata.name, (uint64_t)name ); 3232 MemWrite( &item->externalNameMetadata.threadName, (uint64_t)threadName ); 3233 TracyLfqCommit; 3234 break; 3235 } 3236#endif 3237 case SymbolQueueItemType::KernelCode: 3238 { 3239#ifdef _WIN32 3240 auto mod = GetKernelModulePath( si.ptr ); 3241 if( mod ) 3242 { 3243 auto fn = DecodeCallstackPtrFast( si.ptr ); 3244 if( *fn ) 3245 { 3246 auto hnd = LoadLibraryExA( mod, nullptr, DONT_RESOLVE_DLL_REFERENCES ); 3247 if( hnd ) 3248 { 3249 auto ptr = (const void*)GetProcAddress( hnd, fn ); 3250 if( ptr ) 3251 { 3252 auto buf = (char*)tracy_malloc( si.extra ); 3253 memcpy( buf, ptr, si.extra ); 3254 FreeLibrary( hnd ); 3255 TracyLfqPrepare( QueueType::SymbolCodeMetadata ); 3256 MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); 3257 MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)buf ); 3258 MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); 3259 TracyLfqCommit; 3260 break; 3261 } 3262 FreeLibrary( hnd ); 3263 } 3264 } 3265 } 3266#endif 3267 TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); 3268 TracyLfqCommit; 3269 break; 3270 } 3271 case SymbolQueueItemType::SourceCode: 3272 HandleSourceCodeQuery( (char*)si.ptr, (char*)si.extra, si.id ); 3273 break; 3274 default: 3275 assert( false ); 3276 break; 3277 } 3278} 3279 3280void Profiler::SymbolWorker() 3281{ 3282#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER 3283 s_symbolTid = syscall( SYS_gettid ); 3284#endif 3285 3286 ThreadExitHandler threadExitHandler; 3287 SetThreadName( "Tracy Symbol Worker" ); 3288#ifdef TRACY_USE_RPMALLOC 3289 InitRpmalloc(); 3290#endif 3291 InitCallstack(); 3292 while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 3293 3294 for(;;) 3295 { 3296 const auto shouldExit = ShouldExit(); 3297#ifdef TRACY_ON_DEMAND 3298 if( !IsConnected() ) 3299 { 3300 if( shouldExit ) 3301 { 3302 s_symbolThreadGone.store( true, std::memory_order_release ); 3303 return; 3304 } 3305 while( m_symbolQueue.front() ) m_symbolQueue.pop(); 3306 std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); 3307 continue; 3308 } 3309#endif 3310 auto si = m_symbolQueue.front(); 3311 if( si ) 3312 { 3313 HandleSymbolQueueItem( *si ); 3314 m_symbolQueue.pop(); 3315 } 3316 else 3317 { 3318 if( shouldExit ) 3319 { 3320 s_symbolThreadGone.store( true, std::memory_order_release ); 3321 return; 3322 } 3323 std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) ); 3324 } 3325 } 3326} 3327#endif 3328 3329bool Profiler::HandleServerQuery() 3330{ 3331 ServerQueryPacket payload; 3332 if( !m_sock->Read( &payload, sizeof( payload ), 10 ) ) return false; 3333 3334 uint8_t type; 3335 uint64_t ptr; 3336 memcpy( &type, &payload.type, sizeof( payload.type ) ); 3337 memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) ); 3338 3339 switch( type ) 3340 { 3341 case ServerQueryString: 3342 SendString( ptr, (const char*)ptr, QueueType::StringData ); 3343 break; 3344 case ServerQueryThreadString: 3345 if( ptr == m_mainThread ) 3346 { 3347 SendString( ptr, "Main thread", 11, QueueType::ThreadName ); 3348 } 3349 else 3350 { 3351 SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName ); 3352 } 3353 break; 3354 case ServerQuerySourceLocation: 3355 SendSourceLocation( ptr ); 3356 break; 3357 case ServerQueryPlotName: 3358 SendString( ptr, (const char*)ptr, QueueType::PlotName ); 3359 break; 3360 case ServerQueryTerminate: 3361 return false; 3362 case ServerQueryCallstackFrame: 3363 QueueCallstackFrame( ptr ); 3364 break; 3365 case ServerQueryFrameName: 3366 SendString( ptr, (const char*)ptr, QueueType::FrameName ); 3367 break; 3368 case ServerQueryDisconnect: 3369 HandleDisconnect(); 3370 return false; 3371#ifdef TRACY_HAS_SYSTEM_TRACING 3372 case ServerQueryExternalName: 3373 QueueExternalName( ptr ); 3374 break; 3375#endif 3376 case ServerQueryParameter: 3377 HandleParameter( ptr ); 3378 break; 3379 case ServerQuerySymbol: 3380 QueueSymbolQuery( ptr ); 3381 break; 3382#ifndef TRACY_NO_CODE_TRANSFER 3383 case ServerQuerySymbolCode: 3384 HandleSymbolCodeQuery( ptr, payload.extra ); 3385 break; 3386#endif 3387 case ServerQuerySourceCode: 3388 QueueSourceCodeQuery( uint32_t( ptr ) ); 3389 break; 3390 case ServerQueryDataTransfer: 3391 if( m_queryData ) 3392 { 3393 assert( !m_queryImage ); 3394 m_queryImage = m_queryData; 3395 } 3396 m_queryDataPtr = m_queryData = (char*)tracy_malloc( ptr + 11 ); 3397 AckServerQuery(); 3398 break; 3399 case ServerQueryDataTransferPart: 3400 memcpy( m_queryDataPtr, &ptr, 8 ); 3401 memcpy( m_queryDataPtr+8, &payload.extra, 4 ); 3402 m_queryDataPtr += 12; 3403 AckServerQuery(); 3404 break; 3405#ifdef TRACY_FIBERS 3406 case ServerQueryFiberName: 3407 SendString( ptr, (const char*)ptr, QueueType::FiberName ); 3408 break; 3409#endif 3410 default: 3411 assert( false ); 3412 break; 3413 } 3414 3415 return true; 3416} 3417 3418void Profiler::HandleDisconnect() 3419{ 3420 moodycamel::ConsumerToken token( GetQueue() ); 3421 3422#ifdef TRACY_HAS_SYSTEM_TRACING 3423 if( s_sysTraceThread ) 3424 { 3425 auto timestamp = GetTime(); 3426 for(;;) 3427 { 3428 const auto status = DequeueContextSwitches( token, timestamp ); 3429 if( status == DequeueStatus::ConnectionLost ) 3430 { 3431 return; 3432 } 3433 else if( status == DequeueStatus::QueueEmpty ) 3434 { 3435 if( m_bufferOffset != m_bufferStart ) 3436 { 3437 if( !CommitData() ) return; 3438 } 3439 } 3440 if( timestamp < 0 ) 3441 { 3442 if( m_bufferOffset != m_bufferStart ) 3443 { 3444 if( !CommitData() ) return; 3445 } 3446 break; 3447 } 3448 ClearSerial(); 3449 if( m_sock->HasData() ) 3450 { 3451 while( m_sock->HasData() ) 3452 { 3453 if( !HandleServerQuery() ) return; 3454 } 3455 if( m_bufferOffset != m_bufferStart ) 3456 { 3457 if( !CommitData() ) return; 3458 } 3459 } 3460 else 3461 { 3462 if( m_bufferOffset != m_bufferStart ) 3463 { 3464 if( !CommitData() ) return; 3465 } 3466 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 3467 } 3468 } 3469 } 3470#endif 3471 3472 QueueItem terminate; 3473 MemWrite( &terminate.hdr.type, QueueType::Terminate ); 3474 if( !SendData( (const char*)&terminate, 1 ) ) return; 3475 for(;;) 3476 { 3477 ClearQueues( token ); 3478 if( m_sock->HasData() ) 3479 { 3480 while( m_sock->HasData() ) 3481 { 3482 if( !HandleServerQuery() ) return; 3483 } 3484 if( m_bufferOffset != m_bufferStart ) 3485 { 3486 if( !CommitData() ) return; 3487 } 3488 } 3489 else 3490 { 3491 if( m_bufferOffset != m_bufferStart ) 3492 { 3493 if( !CommitData() ) return; 3494 } 3495 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); 3496 } 3497 } 3498} 3499 3500void Profiler::CalibrateTimer() 3501{ 3502 m_timerMul = 1.; 3503 3504#ifdef TRACY_HW_TIMER 3505 3506# if !defined TRACY_TIMER_QPC && defined TRACY_TIMER_FALLBACK 3507 const bool needCalibration = HardwareSupportsInvariantTSC(); 3508# else 3509 const bool needCalibration = true; 3510# endif 3511 if( needCalibration ) 3512 { 3513 std::atomic_signal_fence( std::memory_order_acq_rel ); 3514 const auto t0 = std::chrono::high_resolution_clock::now(); 3515 const auto r0 = GetTime(); 3516 std::atomic_signal_fence( std::memory_order_acq_rel ); 3517 std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) ); 3518 std::atomic_signal_fence( std::memory_order_acq_rel ); 3519 const auto t1 = std::chrono::high_resolution_clock::now(); 3520 const auto r1 = GetTime(); 3521 std::atomic_signal_fence( std::memory_order_acq_rel ); 3522 3523 const auto dt = std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count(); 3524 const auto dr = r1 - r0; 3525 3526 m_timerMul = double( dt ) / double( dr ); 3527 } 3528#endif 3529} 3530 3531void Profiler::CalibrateDelay() 3532{ 3533 constexpr int Iterations = 50000; 3534 3535 auto mindiff = std::numeric_limits<int64_t>::max(); 3536 for( int i=0; i<Iterations * 10; i++ ) 3537 { 3538 const auto t0i = GetTime(); 3539 const auto t1i = GetTime(); 3540 const auto dti = t1i - t0i; 3541 if( dti > 0 && dti < mindiff ) mindiff = dti; 3542 } 3543 m_resolution = mindiff; 3544 3545#ifdef TRACY_DELAYED_INIT 3546 m_delay = m_resolution; 3547#else 3548 constexpr int Events = Iterations * 2; // start + end 3549 static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); 3550 3551 static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; 3552 const auto t0 = GetTime(); 3553 for( int i=0; i<Iterations; i++ ) 3554 { 3555 { 3556 TracyLfqPrepare( QueueType::ZoneBegin ); 3557 MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); 3558 MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); 3559 TracyLfqCommit; 3560 } 3561 { 3562 TracyLfqPrepare( QueueType::ZoneEnd ); 3563 MemWrite( &item->zoneEnd.time, GetTime() ); 3564 TracyLfqCommit; 3565 } 3566 } 3567 const auto t1 = GetTime(); 3568 const auto dt = t1 - t0; 3569 m_delay = dt / Events; 3570 3571 moodycamel::ConsumerToken token( GetQueue() ); 3572 int left = Events; 3573 while( left != 0 ) 3574 { 3575 const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} ); 3576 assert( sz > 0 ); 3577 left -= (int)sz; 3578 } 3579 assert( GetQueue().size_approx() == 0 ); 3580#endif 3581} 3582 3583void Profiler::ReportTopology() 3584{ 3585#ifndef TRACY_DELAYED_INIT 3586 struct CpuData 3587 { 3588 uint32_t package; 3589 uint32_t core; 3590 uint32_t thread; 3591 }; 3592 3593#if defined _WIN32 3594# ifdef TRACY_UWP 3595 t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx; 3596# else 3597 t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" ); 3598# endif 3599 if( !_GetLogicalProcessorInformationEx ) return; 3600 3601 DWORD psz = 0; 3602 _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); 3603 auto packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); 3604 auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); 3605 assert( res ); 3606 3607 DWORD csz = 0; 3608 _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); 3609 auto coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); 3610 res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); 3611 assert( res ); 3612 3613 SYSTEM_INFO sysinfo; 3614 GetSystemInfo( &sysinfo ); 3615 const uint32_t numcpus = sysinfo.dwNumberOfProcessors; 3616 3617 auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); 3618 for( uint32_t i=0; i<numcpus; i++ ) cpuData[i].thread = i; 3619 3620 int idx = 0; 3621 auto ptr = packageInfo; 3622 while( (char*)ptr < ((char*)packageInfo) + psz ) 3623 { 3624 assert( ptr->Relationship == RelationProcessorPackage ); 3625 // FIXME account for GroupCount 3626 auto mask = ptr->Processor.GroupMask[0].Mask; 3627 int core = 0; 3628 while( mask != 0 ) 3629 { 3630 if( mask & 1 ) cpuData[core].package = idx; 3631 core++; 3632 mask >>= 1; 3633 } 3634 ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); 3635 idx++; 3636 } 3637 3638 idx = 0; 3639 ptr = coreInfo; 3640 while( (char*)ptr < ((char*)coreInfo) + csz ) 3641 { 3642 assert( ptr->Relationship == RelationProcessorCore ); 3643 // FIXME account for GroupCount 3644 auto mask = ptr->Processor.GroupMask[0].Mask; 3645 int core = 0; 3646 while( mask != 0 ) 3647 { 3648 if( mask & 1 ) cpuData[core].core = idx; 3649 core++; 3650 mask >>= 1; 3651 } 3652 ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); 3653 idx++; 3654 } 3655 3656 for( uint32_t i=0; i<numcpus; i++ ) 3657 { 3658 auto& data = cpuData[i]; 3659 3660 TracyLfqPrepare( QueueType::CpuTopology ); 3661 MemWrite( &item->cpuTopology.package, data.package ); 3662 MemWrite( &item->cpuTopology.core, data.core ); 3663 MemWrite( &item->cpuTopology.thread, data.thread ); 3664 3665#ifdef TRACY_ON_DEMAND 3666 DeferItem( *item ); 3667#endif 3668 3669 TracyLfqCommit; 3670 } 3671 3672 tracy_free( cpuData ); 3673 tracy_free( coreInfo ); 3674 tracy_free( packageInfo ); 3675#elif defined __linux__ 3676 const int numcpus = std::thread::hardware_concurrency(); 3677 auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); 3678 memset( cpuData, 0, sizeof( CpuData ) * numcpus ); 3679 3680 const char* basePath = "/sys/devices/system/cpu/cpu"; 3681 for( int i=0; i<numcpus; i++ ) 3682 { 3683 char path[1024]; 3684 sprintf( path, "%s%i/topology/physical_package_id", basePath, i ); 3685 char buf[1024]; 3686 FILE* f = fopen( path, "rb" ); 3687 if( !f ) 3688 { 3689 tracy_free( cpuData ); 3690 return; 3691 } 3692 auto read = fread( buf, 1, 1024, f ); 3693 buf[read] = '\0'; 3694 fclose( f ); 3695 cpuData[i].package = uint32_t( atoi( buf ) ); 3696 cpuData[i].thread = i; 3697 sprintf( path, "%s%i/topology/core_id", basePath, i ); 3698 f = fopen( path, "rb" ); 3699 read = fread( buf, 1, 1024, f ); 3700 buf[read] = '\0'; 3701 fclose( f ); 3702 cpuData[i].core = uint32_t( atoi( buf ) ); 3703 } 3704 3705 for( int i=0; i<numcpus; i++ ) 3706 { 3707 auto& data = cpuData[i]; 3708 3709 TracyLfqPrepare( QueueType::CpuTopology ); 3710 MemWrite( &item->cpuTopology.package, data.package ); 3711 MemWrite( &item->cpuTopology.core, data.core ); 3712 MemWrite( &item->cpuTopology.thread, data.thread ); 3713 3714#ifdef TRACY_ON_DEMAND 3715 DeferItem( *item ); 3716#endif 3717 3718 TracyLfqCommit; 3719 } 3720 3721 tracy_free( cpuData ); 3722#endif 3723#endif 3724} 3725 3726void Profiler::SendCallstack( int depth, const char* skipBefore ) 3727{ 3728#ifdef TRACY_HAS_CALLSTACK 3729 auto ptr = Callstack( depth ); 3730 CutCallstack( ptr, skipBefore ); 3731 3732 TracyQueuePrepare( QueueType::Callstack ); 3733 MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); 3734 TracyQueueCommit( callstackFatThread ); 3735#endif 3736} 3737 3738void Profiler::CutCallstack( void* callstack, const char* skipBefore ) 3739{ 3740#ifdef TRACY_HAS_CALLSTACK 3741 auto data = (uintptr_t*)callstack; 3742 const auto sz = *data++; 3743 uintptr_t i; 3744 for( i=0; i<sz; i++ ) 3745 { 3746 auto name = DecodeCallstackPtrFast( uint64_t( data[i] ) ); 3747 const bool found = strcmp( name, skipBefore ) == 0; 3748 if( found ) 3749 { 3750 i++; 3751 break; 3752 } 3753 } 3754 3755 if( i != sz ) 3756 { 3757 memmove( data, data + i, ( sz - i ) * sizeof( uintptr_t* ) ); 3758 *--data = sz - i; 3759 } 3760#endif 3761} 3762 3763#ifdef TRACY_HAS_SYSTIME 3764void Profiler::ProcessSysTime() 3765{ 3766 if( m_shutdown.load( std::memory_order_relaxed ) ) return; 3767 auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); 3768 if( t - m_sysTimeLast > 100000000 ) // 100 ms 3769 { 3770 auto sysTime = m_sysTime.Get(); 3771 if( sysTime >= 0 ) 3772 { 3773 m_sysTimeLast = t; 3774 3775 TracyLfqPrepare( QueueType::SysTimeReport ); 3776 MemWrite( &item->sysTime.time, GetTime() ); 3777 MemWrite( &item->sysTime.sysTime, sysTime ); 3778 TracyLfqCommit; 3779 } 3780 } 3781} 3782#endif 3783 3784void Profiler::HandleParameter( uint64_t payload ) 3785{ 3786 assert( m_paramCallback ); 3787 const auto idx = uint32_t( payload >> 32 ); 3788 const auto val = int32_t( payload & 0xFFFFFFFF ); 3789 m_paramCallback( m_paramCallbackData, idx, val ); 3790 AckServerQuery(); 3791} 3792 3793void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) 3794{ 3795 if( symbol >> 63 != 0 ) 3796 { 3797 QueueKernelCode( symbol, size ); 3798 } 3799 else 3800 { 3801#ifdef __ANDROID__ 3802 // On Android it's common for code to be in mappings that are only executable 3803 // but not readable. 3804 if( !EnsureReadable( symbol ) ) 3805 { 3806 AckSymbolCodeNotAvailable(); 3807 return; 3808 } 3809#endif 3810 SendLongString( symbol, (const char*)symbol, size, QueueType::SymbolCode ); 3811 } 3812} 3813 3814void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) 3815{ 3816 bool ok = false; 3817 struct stat st; 3818 if( stat( data, &st ) == 0 && (uint64_t)st.st_mtime < m_exectime ) 3819 { 3820 if( st.st_size < ( TargetFrameSize - 16 ) ) 3821 { 3822 FILE* f = fopen( data, "rb" ); 3823 if( f ) 3824 { 3825 auto ptr = (char*)tracy_malloc_fast( st.st_size ); 3826 auto rd = fread( ptr, 1, st.st_size, f ); 3827 fclose( f ); 3828 if( rd == (size_t)st.st_size ) 3829 { 3830 TracyLfqPrepare( QueueType::SourceCodeMetadata ); 3831 MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); 3832 MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); 3833 MemWrite( &item->sourceCodeMetadata.id, id ); 3834 TracyLfqCommit; 3835 ok = true; 3836 } 3837 } 3838 } 3839 } 3840 3841#ifdef TRACY_DEBUGINFOD 3842 else if( image && data[0] == '/' ) 3843 { 3844 size_t size; 3845 auto buildid = GetBuildIdForImage( image, size ); 3846 if( buildid ) 3847 { 3848 auto d = debuginfod_find_source( GetDebuginfodClient(), buildid, size, data, nullptr ); 3849 TracyDebug( "DebugInfo source query: %s, fn: %s, image: %s\n", d >= 0 ? " ok " : "fail", data, image ); 3850 if( d >= 0 ) 3851 { 3852 struct stat st; 3853 fstat( d, &st ); 3854 if( st.st_size < ( TargetFrameSize - 16 ) ) 3855 { 3856 lseek( d, 0, SEEK_SET ); 3857 auto ptr = (char*)tracy_malloc_fast( st.st_size ); 3858 auto rd = read( d, ptr, st.st_size ); 3859 if( rd == (size_t)st.st_size ) 3860 { 3861 TracyLfqPrepare( QueueType::SourceCodeMetadata ); 3862 MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); 3863 MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); 3864 MemWrite( &item->sourceCodeMetadata.id, id ); 3865 TracyLfqCommit; 3866 ok = true; 3867 } 3868 } 3869 close( d ); 3870 } 3871 } 3872 } 3873 else 3874 { 3875 TracyDebug( "DebugInfo invalid query fn: %s, image: %s\n", data, image ); 3876 } 3877#endif 3878 3879 if( !ok && m_sourceCallback ) 3880 { 3881 size_t sz; 3882 char* ptr = m_sourceCallback( m_sourceCallbackData, data, sz ); 3883 if( ptr ) 3884 { 3885 if( sz < ( TargetFrameSize - 16 ) ) 3886 { 3887 TracyLfqPrepare( QueueType::SourceCodeMetadata ); 3888 MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); 3889 MemWrite( &item->sourceCodeMetadata.size, (uint32_t)sz ); 3890 MemWrite( &item->sourceCodeMetadata.id, id ); 3891 TracyLfqCommit; 3892 ok = true; 3893 } 3894 } 3895 } 3896 3897 if( !ok ) 3898 { 3899 TracyLfqPrepare( QueueType::AckSourceCodeNotAvailable ); 3900 MemWrite( &item->sourceCodeNotAvailable, id ); 3901 TracyLfqCommit; 3902 } 3903 3904 tracy_free_fast( data ); 3905 tracy_free_fast( image ); 3906} 3907 3908#if defined _WIN32 && defined TRACY_TIMER_QPC 3909int64_t Profiler::GetTimeQpc() 3910{ 3911 LARGE_INTEGER t; 3912 QueryPerformanceCounter( &t ); 3913 return t.QuadPart; 3914} 3915#endif 3916 3917} 3918 3919#ifdef __cplusplus 3920extern "C" { 3921#endif 3922 3923TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ) 3924{ 3925 ___tracy_c_zone_context ctx; 3926#ifdef TRACY_ON_DEMAND 3927 ctx.active = active && tracy::GetProfiler().IsConnected(); 3928#else 3929 ctx.active = active; 3930#endif 3931 if( !ctx.active ) return ctx; 3932 const auto id = tracy::GetProfiler().GetNextZoneId(); 3933 ctx.id = id; 3934 3935#ifndef TRACY_NO_VERIFY 3936 { 3937 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 3938 tracy::MemWrite( &item->zoneValidation.id, id ); 3939 TracyQueueCommitC( zoneValidationThread ); 3940 } 3941#endif 3942 { 3943 TracyQueuePrepareC( tracy::QueueType::ZoneBegin ); 3944 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); 3945 tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); 3946 TracyQueueCommitC( zoneBeginThread ); 3947 } 3948 return ctx; 3949} 3950 3951TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ) 3952{ 3953 ___tracy_c_zone_context ctx; 3954#ifdef TRACY_ON_DEMAND 3955 ctx.active = active && tracy::GetProfiler().IsConnected(); 3956#else 3957 ctx.active = active; 3958#endif 3959 if( !ctx.active ) return ctx; 3960 const auto id = tracy::GetProfiler().GetNextZoneId(); 3961 ctx.id = id; 3962 3963#ifndef TRACY_NO_VERIFY 3964 { 3965 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 3966 tracy::MemWrite( &item->zoneValidation.id, id ); 3967 TracyQueueCommitC( zoneValidationThread ); 3968 } 3969#endif 3970 tracy::GetProfiler().SendCallstack( depth ); 3971 { 3972 TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack ); 3973 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); 3974 tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); 3975 TracyQueueCommitC( zoneBeginThread ); 3976 } 3977 return ctx; 3978} 3979 3980TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ) 3981{ 3982 ___tracy_c_zone_context ctx; 3983#ifdef TRACY_ON_DEMAND 3984 ctx.active = active && tracy::GetProfiler().IsConnected(); 3985#else 3986 ctx.active = active; 3987#endif 3988 if( !ctx.active ) 3989 { 3990 tracy::tracy_free( (void*)srcloc ); 3991 return ctx; 3992 } 3993 const auto id = tracy::GetProfiler().GetNextZoneId(); 3994 ctx.id = id; 3995 3996#ifndef TRACY_NO_VERIFY 3997 { 3998 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 3999 tracy::MemWrite( &item->zoneValidation.id, id ); 4000 TracyQueueCommitC( zoneValidationThread ); 4001 } 4002#endif 4003 { 4004 TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc ); 4005 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); 4006 tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); 4007 TracyQueueCommitC( zoneBeginThread ); 4008 } 4009 return ctx; 4010} 4011 4012TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ) 4013{ 4014 ___tracy_c_zone_context ctx; 4015#ifdef TRACY_ON_DEMAND 4016 ctx.active = active && tracy::GetProfiler().IsConnected(); 4017#else 4018 ctx.active = active; 4019#endif 4020 if( !ctx.active ) 4021 { 4022 tracy::tracy_free( (void*)srcloc ); 4023 return ctx; 4024 } 4025 const auto id = tracy::GetProfiler().GetNextZoneId(); 4026 ctx.id = id; 4027 4028#ifndef TRACY_NO_VERIFY 4029 { 4030 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 4031 tracy::MemWrite( &item->zoneValidation.id, id ); 4032 TracyQueueCommitC( zoneValidationThread ); 4033 } 4034#endif 4035 tracy::GetProfiler().SendCallstack( depth ); 4036 { 4037 TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); 4038 tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); 4039 tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); 4040 TracyQueueCommitC( zoneBeginThread ); 4041 } 4042 return ctx; 4043} 4044 4045TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) 4046{ 4047 if( !ctx.active ) return; 4048#ifndef TRACY_NO_VERIFY 4049 { 4050 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 4051 tracy::MemWrite( &item->zoneValidation.id, ctx.id ); 4052 TracyQueueCommitC( zoneValidationThread ); 4053 } 4054#endif 4055 { 4056 TracyQueuePrepareC( tracy::QueueType::ZoneEnd ); 4057 tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() ); 4058 TracyQueueCommitC( zoneEndThread ); 4059 } 4060} 4061 4062TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) 4063{ 4064 assert( size < std::numeric_limits<uint16_t>::max() ); 4065 if( !ctx.active ) return; 4066 auto ptr = (char*)tracy::tracy_malloc( size ); 4067 memcpy( ptr, txt, size ); 4068#ifndef TRACY_NO_VERIFY 4069 { 4070 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 4071 tracy::MemWrite( &item->zoneValidation.id, ctx.id ); 4072 TracyQueueCommitC( zoneValidationThread ); 4073 } 4074#endif 4075 { 4076 TracyQueuePrepareC( tracy::QueueType::ZoneText ); 4077 tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); 4078 tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); 4079 TracyQueueCommitC( zoneTextFatThread ); 4080 } 4081} 4082 4083TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) 4084{ 4085 assert( size < std::numeric_limits<uint16_t>::max() ); 4086 if( !ctx.active ) return; 4087 auto ptr = (char*)tracy::tracy_malloc( size ); 4088 memcpy( ptr, txt, size ); 4089#ifndef TRACY_NO_VERIFY 4090 { 4091 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 4092 tracy::MemWrite( &item->zoneValidation.id, ctx.id ); 4093 TracyQueueCommitC( zoneValidationThread ); 4094 } 4095#endif 4096 { 4097 TracyQueuePrepareC( tracy::QueueType::ZoneName ); 4098 tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); 4099 tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); 4100 TracyQueueCommitC( zoneTextFatThread ); 4101 } 4102} 4103 4104TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) { 4105 if( !ctx.active ) return; 4106#ifndef TRACY_NO_VERIFY 4107 { 4108 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 4109 tracy::MemWrite( &item->zoneValidation.id, ctx.id ); 4110 TracyQueueCommitC( zoneValidationThread ); 4111 } 4112#endif 4113 { 4114 TracyQueuePrepareC( tracy::QueueType::ZoneColor ); 4115 tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); 4116 tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); 4117 tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); 4118 TracyQueueCommitC( zoneColorThread ); 4119 } 4120} 4121 4122TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) 4123{ 4124 if( !ctx.active ) return; 4125#ifndef TRACY_NO_VERIFY 4126 { 4127 TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); 4128 tracy::MemWrite( &item->zoneValidation.id, ctx.id ); 4129 TracyQueueCommitC( zoneValidationThread ); 4130 } 4131#endif 4132 { 4133 TracyQueuePrepareC( tracy::QueueType::ZoneValue ); 4134 tracy::MemWrite( &item->zoneValue.value, value ); 4135 TracyQueueCommitC( zoneValueThread ); 4136 } 4137} 4138 4139TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } 4140TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } 4141TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } 4142TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } 4143TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } 4144TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); } 4145TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } 4146TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); } 4147TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } 4148TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } 4149TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } 4150TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); } 4151TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } 4152TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } 4153TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } 4154TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); } 4155TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); } 4156TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } 4157TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } 4158TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } 4159 4160TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { 4161 return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz ); 4162} 4163 4164TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { 4165 return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); 4166} 4167 4168TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) 4169{ 4170 TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin ); 4171 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4172 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4173 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4174 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4175 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4176 TracyLfqCommitC; 4177} 4178 4179TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) 4180{ 4181 tracy::GetProfiler().SendCallstack( data.depth ); 4182 TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack ); 4183 tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); 4184 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4185 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4186 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4187 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4188 TracyLfqCommitC; 4189} 4190 4191TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data ) 4192{ 4193 TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc ); 4194 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4195 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4196 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4197 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4198 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4199 TracyLfqCommitC; 4200} 4201 4202TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) 4203{ 4204 tracy::GetProfiler().SendCallstack( data.depth ); 4205 TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack ); 4206 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4207 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4208 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4209 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4210 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4211 TracyLfqCommitC; 4212} 4213 4214TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data ) 4215{ 4216 TracyLfqPrepareC( tracy::QueueType::GpuTime ); 4217 tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); 4218 tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); 4219 tracy::MemWrite( &item->gpuTime.context, data.context ); 4220 TracyLfqCommitC; 4221} 4222 4223TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ) 4224{ 4225 TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd ); 4226 tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); 4227 memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); 4228 tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); 4229 tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); 4230 TracyLfqCommitC; 4231} 4232 4233TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data ) 4234{ 4235 TracyLfqPrepareC( tracy::QueueType::GpuNewContext ); 4236 tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); 4237 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4238 tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); 4239 tracy::MemWrite( &item->gpuNewContext.period, data.period ); 4240 tracy::MemWrite( &item->gpuNewContext.context, data.context ); 4241 tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); 4242 tracy::MemWrite( &item->gpuNewContext.type, data.type ); 4243 TracyLfqCommitC; 4244} 4245 4246TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data ) 4247{ 4248 auto ptr = (char*)tracy::tracy_malloc( data.len ); 4249 memcpy( ptr, data.name, data.len ); 4250 4251 TracyLfqPrepareC( tracy::QueueType::GpuContextName ); 4252 tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); 4253 tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); 4254 tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); 4255 TracyLfqCommitC; 4256} 4257 4258TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data ) 4259{ 4260 TracyLfqPrepareC( tracy::QueueType::GpuCalibration ); 4261 tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); 4262 tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); 4263 tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); 4264 tracy::MemWrite( &item->gpuCalibration.context, data.context ); 4265 TracyLfqCommitC; 4266} 4267 4268TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) 4269{ 4270 auto item = tracy::Profiler::QueueSerial(); 4271 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); 4272 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4273 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4274 tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); 4275 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4276 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4277 tracy::Profiler::QueueSerialFinish(); 4278} 4279 4280TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) 4281{ 4282 auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); 4283 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial ); 4284 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4285 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4286 tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); 4287 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4288 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4289 tracy::Profiler::QueueSerialFinish(); 4290} 4291 4292TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data ) 4293{ 4294 auto item = tracy::Profiler::QueueSerial(); 4295 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); 4296 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4297 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4298 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4299 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4300 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4301 tracy::Profiler::QueueSerialFinish(); 4302} 4303 4304TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) 4305{ 4306 auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); 4307 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); 4308 tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); 4309 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4310 tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); 4311 tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); 4312 tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); 4313 tracy::Profiler::QueueSerialFinish(); 4314} 4315 4316TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data ) 4317{ 4318 auto item = tracy::Profiler::QueueSerial(); 4319 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); 4320 tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); 4321 tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); 4322 tracy::MemWrite( &item->gpuTime.context, data.context ); 4323 tracy::Profiler::QueueSerialFinish(); 4324} 4325 4326TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ) 4327{ 4328 auto item = tracy::Profiler::QueueSerial(); 4329 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); 4330 tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); 4331 memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); 4332 tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); 4333 tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); 4334 tracy::Profiler::QueueSerialFinish(); 4335} 4336 4337TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data ) 4338{ 4339 auto item = tracy::Profiler::QueueSerial(); 4340 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); 4341 tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); 4342 tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); 4343 tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); 4344 tracy::MemWrite( &item->gpuNewContext.period, data.period ); 4345 tracy::MemWrite( &item->gpuNewContext.context, data.context ); 4346 tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); 4347 tracy::MemWrite( &item->gpuNewContext.type, data.type ); 4348 tracy::Profiler::QueueSerialFinish(); 4349} 4350 4351TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data ) 4352{ 4353 auto ptr = (char*)tracy::tracy_malloc( data.len ); 4354 memcpy( ptr, data.name, data.len ); 4355 4356 auto item = tracy::Profiler::QueueSerial(); 4357 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); 4358 tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); 4359 tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); 4360 tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); 4361 tracy::Profiler::QueueSerialFinish(); 4362} 4363 4364TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data ) 4365{ 4366 auto item = tracy::Profiler::QueueSerial(); 4367 tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); 4368 tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); 4369 tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); 4370 tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); 4371 tracy::MemWrite( &item->gpuCalibration.context, data.context ); 4372 tracy::Profiler::QueueSerialFinish(); 4373} 4374 4375TRACY_API int ___tracy_connected( void ) 4376{ 4377 return tracy::GetProfiler().IsConnected(); 4378} 4379 4380#ifdef TRACY_FIBERS 4381TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber ); } 4382TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } 4383#endif 4384 4385# ifdef TRACY_MANUAL_LIFETIME 4386TRACY_API void ___tracy_startup_profiler( void ) 4387{ 4388 tracy::StartupProfiler(); 4389} 4390 4391TRACY_API void ___tracy_shutdown_profiler( void ) 4392{ 4393 tracy::ShutdownProfiler(); 4394} 4395# endif 4396 4397#ifdef __cplusplus 4398} 4399#endif 4400 4401#endif