Reactos

[UCRT] Fix GCC/Clang SIMD compilation

GCC and Clang need to mark functions that use SSE/AVX etc, either with a function attribute or a pragma around the function. strlen uses a template function that either uses SSE2 or AVX2. Previously the template was surrounded with pragmas to allow both SSE2 and AVX2, but that makes GCC assume that it can use AVX2 instructions even in the SSE2 version. To fix this the template instances are now build in individual compilation units for SSE2 and AVX, separate from the "dispatcher" function.
Now ucrtbase doesn't crash anymore on GCC build.

Another issue was the namespace around strnlen_mode, which has confused clang so much, that it forgot to instantiate the template code.

+92 -10
+12 -4
sdk/lib/ucrt/inc/corecrt_internal_simd.h
··· 19 19 #if defined _CRT_SIMD_SUPPORT_AVAILABLE 20 20 21 21 #if defined(__clang__) 22 - #define _UCRT_ENABLE_EXTENDED_ISA \ 22 + #define _UCRT_ENABLE_SSE2 \ 23 + _Pragma("clang attribute push(__attribute__((target(\"sse2\"))), apply_to=function)") 24 + #define _UCRT_ENABLE_AVX2 \ 23 25 _Pragma("clang attribute push(__attribute__((target(\"sse2,avx,avx2\"))), apply_to=function)") 24 26 #define _UCRT_RESTORE_DEFAULT_ISA \ 25 27 _Pragma("clang attribute pop") 26 28 #elif defined(__GNUC__) 27 - #define _UCRT_ENABLE_EXTENDED_ISA \ 29 + #define _UCRT_ENABLE_SSE2 \ 30 + _Pragma("GCC push_options") \ 31 + _Pragma("GCC target(\"sse2\")") 32 + #define _UCRT_ENABLE_AVX2 \ 28 33 _Pragma("GCC push_options") \ 29 34 _Pragma("GCC target(\"avx2\")") 30 35 #define _UCRT_RESTORE_DEFAULT_ISA \ 31 36 _Pragma("GCC pop_options") 32 37 #else 33 - #define _UCRT_ENABLE_EXTENDED_ISA 38 + #define _UCRT_ENABLE_SSE2 39 + #define _UCRT_ENABLE_AVX2 34 40 #define _UCRT_RESTORE_DEFAULT_ISA 35 41 #endif 36 42 37 - _UCRT_ENABLE_EXTENDED_ISA 38 43 39 44 extern "C" int __isa_available; 40 45 ··· 70 75 }; 71 76 72 77 78 + _UCRT_ENABLE_SSE2 73 79 74 80 template <> 75 81 struct __crt_simd_cleanup_guard<__crt_simd_isa::sse2> ··· 120 126 } 121 127 }; 122 128 129 + _UCRT_RESTORE_DEFAULT_ISA 123 130 131 + _UCRT_ENABLE_AVX2 124 132 125 133 template <> 126 134 struct __crt_simd_cleanup_guard<__crt_simd_isa::avx2>
+11
sdk/lib/ucrt/string/string.cmake
··· 57 57 string/wmemmove_s.cpp 58 58 ) 59 59 60 + # Special handling for GCC and Clang 61 + if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang") 62 + list(APPEND UCRT_STRING_SOURCES 63 + string/strnlen-avx2.cpp 64 + string/strnlen-sse2.cpp 65 + ) 66 + 67 + set_source_files_properties(string/strnlen-sse2.cpp PROPERTIES COMPILE_OPTIONS "-msse2") 68 + set_source_files_properties(string/strnlen-avx2.cpp PROPERTIES COMPILE_OPTIONS "-mavx2") 69 + endif() 70 + 60 71 if(${ARCH} STREQUAL "i386") 61 72 list(APPEND UCRT_STRING_ASM_SOURCES 62 73 string/i386/_memicmp.s
+29
sdk/lib/ucrt/string/strnlen-avx2.cpp
··· 1 + // 2 + // strnlen-avx2.cpp 3 + // 4 + // Copyright (c) Timo Kreuzer 5 + // 6 + // Explicit template instantiations for AVX2 str(n)len code 7 + // 8 + 9 + #pragma GCC target("avx2") 10 + #define _UCRT_BUILD_AVX2 11 + #include "strnlen.cpp" 12 + 13 + template 14 + size_t __cdecl common_strnlen_simd<bounded, __crt_simd_isa::avx2, uint8_t>( 15 + uint8_t const* const string, 16 + size_t const maximum_count 17 + ) throw(); 18 + 19 + template 20 + size_t __cdecl common_strnlen_simd<bounded, __crt_simd_isa::avx2, uint16_t>( 21 + uint16_t const* const string, 22 + size_t const maximum_count 23 + ) throw(); 24 + 25 + template 26 + size_t __cdecl common_strnlen_simd<unbounded, __crt_simd_isa::avx2, uint16_t>( 27 + uint16_t const* const string, 28 + size_t const maximum_count 29 + ) throw();
+29
sdk/lib/ucrt/string/strnlen-sse2.cpp
··· 1 + // 2 + // strnlen-sse2.cpp 3 + // 4 + // Copyright (c) Timo Kreuzer 5 + // 6 + // Explicit template instantiations for SSE2 str(n)len code 7 + // 8 + 9 + #pragma GCC target("sse2") 10 + #define _UCRT_BUILD_SSE2 11 + #include "strnlen.cpp" 12 + 13 + template 14 + size_t __cdecl common_strnlen_simd<bounded, __crt_simd_isa::sse2, uint8_t>( 15 + uint8_t const* const string, 16 + size_t const maximum_count 17 + ) throw(); 18 + 19 + template 20 + size_t __cdecl common_strnlen_simd<bounded, __crt_simd_isa::sse2, uint16_t>( 21 + uint16_t const* const string, 22 + size_t const maximum_count 23 + ) throw(); 24 + 25 + template 26 + size_t __cdecl common_strnlen_simd<unbounded, __crt_simd_isa::sse2, uint16_t>( 27 + uint16_t const* const string, 28 + size_t const maximum_count 29 + ) throw();
+11 -6
sdk/lib/ucrt/string/strnlen.cpp
··· 21 21 22 22 23 23 24 - namespace 25 - { 24 + //namespace // clang doesn't like this! 25 + //{ 26 26 enum strnlen_mode 27 27 { 28 28 bounded, // strnlen mode; maximum_count is respected 29 29 unbounded, // strlen mode; maximum_count is ignored 30 30 }; 31 - } 31 + //} 32 32 33 33 // This function returns true if we have reached the end of the range to be 34 34 // searched for a terminator. For the bounded strnlen functions, we must ··· 78 78 79 79 #ifdef _CRT_SIMD_SUPPORT_AVAILABLE 80 80 81 - _UCRT_ENABLE_EXTENDED_ISA 82 81 83 82 template <strnlen_mode Mode, __crt_simd_isa Isa, typename Element> 84 83 _Check_return_ 85 84 _When_(maximum_count > _String_length_(string), _Post_satisfies_(return == _String_length_(string))) 86 85 _When_(maximum_count <= _String_length_(string), _Post_satisfies_(return == maximum_count)) 87 - static __inline size_t __cdecl common_strnlen_simd( 86 + size_t __cdecl common_strnlen_simd( 88 87 Element const* const string, 89 88 size_t const maximum_count 90 89 ) throw() 90 + #if (defined(__GNUC__) || defined(__clang__)) && !defined(_UCRT_BUILD_SSE2) && !defined(_UCRT_BUILD_AVX2) 91 + ; 92 + #else 91 93 { 92 94 using traits = __crt_simd_traits<Isa, Element>; 93 95 ··· 170 172 return static_cast<size_t>(it - string); 171 173 } 172 174 173 - _UCRT_RESTORE_DEFAULT_ISA 175 + #endif // (defined(__GNUC__) || defined(__clang__)) && !defined(_UCRT_BUILD_SSE2) && !defined(_UCRT_BUILD_AVX2) 174 176 175 177 #endif // _CRT_SIMD_SUPPORT_AVAILABLE 178 + 179 + #if !defined(_UCRT_BUILD_SSE2) && !defined(_UCRT_BUILD_AVX2) 176 180 177 181 template <strnlen_mode Mode, typename Element> 178 182 _Check_return_ ··· 225 229 } 226 230 227 231 #endif // _M_ARM64 232 + #endif // !defined(_UCRT_BUILD_SSE2) && !defined(_UCRT_BUILD_AVX2)