nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1diff --git a/library/src/rocfft_aot_helper.cpp b/library/src/rocfft_aot_helper.cpp
2index f0a889f4..452eb37f 100644
3--- a/library/src/rocfft_aot_helper.cpp
4+++ b/library/src/rocfft_aot_helper.cpp
5@@ -771,26 +771,22 @@ int main(int argc, char** argv)
6 for(size_t i = 0; i < NUM_THREADS; ++i)
7 {
8 threads.emplace_back([&queue, &gpu_archs]() {
9+ int compile_count = 0;
10 while(true)
11 {
12 auto item = queue.pop();
13 if(item.kernel_name.empty())
14 break;
15
16+ if(++compile_count % 16 == 0)
17+ std::cerr << "rocfft_aot_helper processing " << item.kernel_name << std::endl << std::flush;
18 for(const auto& gpu_arch : gpu_archs)
19 {
20- if(item.sol_arch_name.empty())
21+ if(item.sol_arch_name.empty() || gpu_arch.find(item.sol_arch_name) != std::string::npos)
22 {
23 RTCCache::cached_compile(
24 item.kernel_name, gpu_arch, item.generate_src, generator_sum());
25 }
26- else if(gpu_arch.find(item.sol_arch_name) != std::string::npos)
27- {
28- // std::cout << "arch: " << gpu_arch
29- // << ", solution-kernel: " << item.kernel_name << std::endl;
30- RTCCache::cached_compile(
31- item.kernel_name, gpu_arch, item.generate_src, generator_sum());
32- }
33 }
34 }
35 });