diff --git a/lib/codec_choose.c b/lib/codec_choose.c index bd4f6ad6..27901175 100644 --- a/lib/codec_choose.c +++ b/lib/codec_choose.c @@ -1,15 +1,48 @@ #include #include #include +#include + +#if __x86_64__ || __i386__ || _M_X86 || _M_X64 +#ifdef _MSC_VER + #include + #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + {\ + int info[4];\ + __cpuidex(info, __level, __count);\ + __eax = info[0];\ + __ebx = info[1];\ + __ecx = info[2];\ + __edx = info[3];\ + } + #define __cpuid(__level, __eax, __ebx, __ecx, __edx) __cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx) +#else + #include + #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) + static inline uint64_t _xgetbv(unsigned int index){ + unsigned int eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((uint64_t)edx << 32) | eax; + } + #else + #error "Platform not supported" + #endif +#endif -#if __x86_64__ || __i386__ -#include #ifndef bit_AVX2 #define bit_AVX2 (1 << 5) #endif #ifndef bit_SSSE3 #define bit_SSSE3 (1 << 9) #endif + +#define bit_XSAVE_XRSTORE (1 << 27) + +#ifndef _XCR_XFEATURE_ENABLED_MASK +#define _XCR_XFEATURE_ENABLED_MASK 0 +#endif + +#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6 #endif #include "../include/libbase64.h" @@ -92,19 +125,42 @@ codec_choose_arm (struct codec *codec) static bool codec_choose_x86 (struct codec *codec) { -#if (__x86_64__ || __i386__) && (HAVE_AVX2 || HAVE_SSSE3) +#if (__x86_64__ || __i386__ || _M_X86 || _M_X64) && (HAVE_AVX2 || HAVE_SSSE3) unsigned int eax, ebx = 0, ecx = 0, edx; - unsigned int max_level = __get_cpuid_max(0, NULL); + unsigned int max_level; + + #ifdef _MSC_VER + int info[4]; + __cpuidex(info, 0, 0); + max_level = info[0]; + #else + max_level = __get_cpuid_max(0, NULL); + #endif #if HAVE_AVX2 - /* Check for AVX2 support: */ + /* Check for AVX2 support: + Checking for AVX requires 3 things: + 1) CPUID indicates that the OS uses XSAVE and XRSTORE + instructions (allowing saving YMM registers on context + switch) + 2) CPUID indicates support for AVX + 3) XGETBV indicates the AVX registers will be saved and + restored on context switch + + Note that XGETBV is only available on 686 or later CPUs, so + the instruction needs to be conditionally run.*/ if (max_level >= 7) { __cpuid_count(7, 0, eax, ebx, ecx, edx); - if (ebx & bit_AVX2) { - codec->enc = base64_stream_encode_avx2; - codec->dec = base64_stream_decode_avx2; - return true; + + if ((ebx & bit_AVX2) && (ecx & bit_XSAVE_XRSTORE)) { + uint64_t xcr_mask; + xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) { + codec->enc = base64_stream_encode_avx2; + codec->dec = base64_stream_decode_avx2; + return true; + } } } #endif