nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 35 lines 1.6 kB view raw
1diff --git a/library/src/rocfft_aot_helper.cpp b/library/src/rocfft_aot_helper.cpp 2index f0a889f4..452eb37f 100644 3--- a/library/src/rocfft_aot_helper.cpp 4+++ b/library/src/rocfft_aot_helper.cpp 5@@ -771,26 +771,22 @@ int main(int argc, char** argv) 6 for(size_t i = 0; i < NUM_THREADS; ++i) 7 { 8 threads.emplace_back([&queue, &gpu_archs]() { 9+ int compile_count = 0; 10 while(true) 11 { 12 auto item = queue.pop(); 13 if(item.kernel_name.empty()) 14 break; 15 16+ if(++compile_count % 16 == 0) 17+ std::cerr << "rocfft_aot_helper processing " << item.kernel_name << std::endl << std::flush; 18 for(const auto& gpu_arch : gpu_archs) 19 { 20- if(item.sol_arch_name.empty()) 21+ if(item.sol_arch_name.empty() || gpu_arch.find(item.sol_arch_name) != std::string::npos) 22 { 23 RTCCache::cached_compile( 24 item.kernel_name, gpu_arch, item.generate_src, generator_sum()); 25 } 26- else if(gpu_arch.find(item.sol_arch_name) != std::string::npos) 27- { 28- // std::cout << "arch: " << gpu_arch 29- // << ", solution-kernel: " << item.kernel_name << std::endl; 30- RTCCache::cached_compile( 31- item.kernel_name, gpu_arch, item.generate_src, generator_sum()); 32- } 33 } 34 } 35 });