This commit is contained in:
Jay D Dee
2023-08-30 20:15:48 -04:00
parent 57a6b7b58b
commit 4378d2f841
72 changed files with 10184 additions and 2182 deletions

View File

@@ -174,35 +174,147 @@ static inline int cpu_fanpercent()
return 0;
}
// CPUID
// This list is incomplete, it only contains features of interest to cpuminer.
// refer to http://en.wikipedia.org/wiki/CPUID for details.
// AVX10 compatibility notes
//
// Notation used: AVX10i.[version]_[vectorwidth]
// AVX10.1_512 is a rebranding of AVX512 and is effectively the AVX* superset
// with full 512 bit vector support.
// AVX10.2_256 is effectively AVX2 + AVX512_VL, all AVX512 instructions and
// features applied only to 256 bit and 128 bit vectors.
// Future AVX10 versions will add new instructions and features.
// Register array indexes
#define EAX_Reg (0)
#define EBX_Reg (1)
#define ECX_Reg (2)
#define EDX_Reg (3)
// CPUID function number, aka leaf (EAX)
#define VENDOR_ID (0)
#define CPU_INFO (1)
#define CACHE_TLB_DESCRIPTOR (2)
#define EXTENDED_FEATURES (7)
#define AVX10_FEATURES (0x24)
#define HIGHEST_EXT_FUNCTION (0x80000000)
#define EXTENDED_CPU_INFO (0x80000001)
#define CPU_BRAND_1 (0x80000002)
#define CPU_BRAND_2 (0x80000003)
#define CPU_BRAND_3 (0x80000004)
// CPU_INFO: EAX=1, ECX=0
// ECX
#define SSE3_Flag 1
#define SSSE3_Flag (1<< 9)
#define XOP_Flag (1<<11) // obsolete
#define FMA3_Flag (1<<12)
#define SSE41_Flag (1<<19)
#define SSE42_Flag (1<<20)
#define AES_NI_Flag (1<<25)
#define XSAVE_Flag (1<<26)
#define OSXSAVE_Flag (1<<27)
#define AVX_Flag (1<<28)
// EDX
#define MMX_Flag (1<<23)
#define SSE_Flag (1<<25)
#define SSE2_Flag (1<<26)
// EXTENDED_FEATURES subleaf 0: EAX=7, ECX=0
// EBX
#define AVX2_Flag (1<< 5)
#define AVX512_F_Flag (1<<16)
#define AVX512_DQ_Flag (1<<17)
#define AVX512_IFMA_Flag (1<<21)
#define AVX512_PF_Flag (1<<26)
#define AVX512_ER_Flag (1<<27)
#define AVX512_CD_Flag (1<<28)
#define SHA_Flag (1<<29)
#define AVX512_BW_Flag (1<<30)
#define AVX512_VL_Flag (1<<31)
// ECX
#define AVX512_VBMI_Flag (1<< 1)
#define AVX512_VBMI2_Flag (1<< 6)
#define VAES_Flag (1<< 9)
#define AVX512_VNNI_Flag (1<<11)
#define AVX512_BITALG_Flag (1<<12)
#define AVX512_VPOPCNTDQ_Flag (1<<14)
// EDX
#define AVX512_4VNNIW_Flag (1<< 2)
#define AVX512_4FMAPS_Flag (1<< 3)
#define AVX512_VP2INTERSECT_Flag (1<< 8)
#define AMX_BF16_Flag (1<<22)
#define AVX512_FP16_Flag (1<<23)
#define AMX_TILE_Flag (1<<24)
#define AMX_INT8_Flag (1<<25)
// EXTENDED_FEATURES subleaf 1: EAX=7, ECX=1
// EAX
#define SHA512_Flag 1
#define SM3_Flag (1<< 1)
#define SM4_Flag (1<< 2)
#define AVX_VNNI_Flag (1<< 4)
#define AVX512_BF16_Flag (1<< 5)
#define AMX_FP16_Flag (1<<21)
#define AVX_IFMA_Flag (1<<23)
// EDX
#define AVX_VNNI_INT8_Flag (1<< 4)
#define AVX_NE_CONVERT_Flag (1<< 5)
#define AMX_COMPLEX_Flag (1<< 8)
#define AVX_VNNI_INT16_Flag (1<<10)
#define AVX10_Flag (1<<19)
#define APX_F_Flag (1<<21)
// AVX10_FEATURES: EAX=0x24, ECX=0
// EBX
#define AVX10_VERSION_mask 0xff // bits [7:0]
#define AVX10_128_Flag (1<<16)
#define AVX10_256_Flag (1<<17)
#define AVX10_512_Flag (1<<18)
// Use this to detect presence of feature
#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag)
#define FMA3_mask (FMA3_Flag|AVX_mask)
#define AVX512_mask (AVX512_VL_Flag|AVX512_BW_Flag|AVX512_DQ_Flag|AVX512_F_Flag)
#ifndef __arm__
static inline void cpuid(int functionnumber, int output[4]) {
static inline void cpuid( unsigned int leaf, unsigned int subleaf,
unsigned int output[4] )
{
#if defined (_MSC_VER) || defined (__INTEL_COMPILER)
// Microsoft or Intel compiler, intrin.h included
__cpuidex(output, functionnumber, 0);
// Microsoft or Intel compiler, intrin.h included
__cpuidex(output, leaf, subleaf );
#elif defined(__GNUC__) || defined(__clang__)
// use inline assembly, Gnu/AT&T syntax
int a, b, c, d;
asm volatile("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(functionnumber), "c"(0));
output[0] = a;
output[1] = b;
output[2] = c;
output[3] = d;
// use inline assembly, Gnu/AT&T syntax
unsigned int a, b, c, d;
asm volatile( "cpuid"
: "=a"(a), "=b"(b), "=c"(c), "=d"(d)
: "a"(leaf), "c"(subleaf) );
output[ EAX_Reg ] = a;
output[ EBX_Reg ] = b;
output[ ECX_Reg ] = c;
output[ EDX_Reg ] = d;
#else
// unknown platform. try inline assembly with masm/intel syntax
__asm {
mov eax, functionnumber
xor ecx, ecx
cpuid;
mov esi, output
mov[esi], eax
mov[esi + 4], ebx
mov[esi + 8], ecx
mov[esi + 12], edx
}
// unknown platform. try inline assembly with masm/intel syntax
__asm {
mov eax, leaf
mov ecx, subleaf
cpuid;
mov esi, output
mov[esi], eax
mov[esi + 4], ebx
mov[esi + 8], ecx
mov[esi + 12], edx
}
#endif
}
#else /* !__arm__ */
#define cpuid(fn, out) out[0] = 0;
#define cpuid(leaf, subleaf, out) out[0] = 0;
#endif
static inline void cpu_getname(char *outbuf, size_t maxsz)
@@ -211,13 +323,13 @@ static inline void cpu_getname(char *outbuf, size_t maxsz)
#ifdef WIN32
char brand[256] = { 0 };
int output[4] = { 0 }, ext;
cpuid(0x80000000, output);
cpuid( 0x80000000, 0, output );
ext = output[0];
if (ext >= 0x80000004)
{
for (int i = 2; i <= (ext & 0xF); i++)
{
cpuid(0x80000000+i, output);
cpuid( 0x80000000+i, 0, output);
memcpy(&brand[(i-2) * 4*sizeof(int)], output, 4*sizeof(int));
}
snprintf(outbuf, maxsz, "%s", brand);
@@ -309,70 +421,97 @@ static inline void cpu_getmodelid(char *outbuf, size_t maxsz)
#endif
}
// http://en.wikipedia.org/wiki/CPUID
// Typical display format: AVX10.[version]_[vectorlength], if vector length is
// omitted 256 is the default.
// Ex: AVX10.1_512
// Flags:
// AVX10 128 256 512
// 0 0 0 0 = AVX10 not supported
// 1 1 1 0 = AVX10 256 bit max (version 2)
// 1 1 1 1 = AVX10 512 bit max (version 1 granite rapids)
// Other combinations are not defined.
// CPUID commands
#define VENDOR_ID (0)
#define CPU_INFO (1)
#define CACHE_TLB_DESCRIPTOR (2)
#define EXTENDED_FEATURES (7)
#define HIGHEST_EXT_FUNCTION (0x80000000)
#define EXTENDED_CPU_INFO (0x80000001)
#define CPU_BRAND_1 (0x80000002)
#define CPU_BRAND_2 (0x80000003)
#define CPU_BRAND_3 (0x80000004)
// Test AVX10_flag before AVX10_FEATURES flags.
static inline bool has_avx10()
{
#ifdef __arm__
return false;
#else
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 1, cpu_info );
return cpu_info[ EDX_Reg ] & AVX10_Flag;
#endif
}
// Registers
#define EAX_Reg (0)
#define EBX_Reg (1)
#define ECX_Reg (2)
#define EDX_Reg (3)
static inline unsigned int avx10_version()
{
#ifdef __arm__
return 0;
#else
if ( has_avx10() )
{
unsigned int cpu_info[4] = { 0 };
cpuid( AVX10_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX10_VERSION_mask;
}
return 0;
#endif
}
// Feature flags
static inline bool has_avx10_512()
{
#ifdef __arm__
return false;
#else
if ( has_avx10() )
{
unsigned int cpu_info[4] = { 0 };
cpuid( AVX10_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX10_512_Flag;
}
return false;
#endif
}
// CPU_INFO ECX
#define SSE3_Flag 1
#define SSSE3_Flag (1<< 9)
#define XOP_Flag (1<<11) // obsolete, only available on pre-Ryzen AMD
#define FMA3_Flag (1<<12)
#define AES_Flag (1<<25)
#define SSE41_Flag (1<<19)
#define SSE42_Flag (1<<20)
#define AES_Flag (1<<25)
#define XSAVE_Flag (1<<26)
#define OSXSAVE_Flag (1<<27)
#define AVX_Flag (1<<28)
static inline bool has_avx10_256()
{
#ifdef __arm__
return false;
#else
if ( has_avx10() )
{
unsigned int cpu_info[4] = { 0 };
cpuid( AVX10_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX10_256_Flag;
}
return false;
#endif
}
// CPU_INFO EDX
#define SSE_Flag (1<<25)
#define SSE2_Flag (1<<26)
// EXTENDED_FEATURES EBX
#define AVX2_Flag (1<< 5)
#define AVX512F_Flag (1<<16)
#define AVX512DQ_Flag (1<<17)
#define SHA_Flag (1<<29)
#define AVX512BW_Flag (1<<30)
#define AVX512VL_Flag (1<<31)
// EXTENDED_FEATURES ECX
#define AVX512VBMI_Flag (1<<1)
#define AVX512VBMI2_Flag (1<<6)
#define VAES_Flag (1<<9)
// Use this to detect presence of feature
#define AVX_mask (AVX_Flag|XSAVE_Flag|OSXSAVE_Flag)
#define FMA3_mask (FMA3_Flag|AVX_mask)
#define AVX512_mask (AVX512VL_Flag|AVX512BW_Flag|AVX512DQ_Flag|AVX512F_Flag)
// Maximum vector length
static inline unsigned int avx10_vector_length()
{
#ifdef __arm__
return 0;
#else
if ( has_avx10() )
{
unsigned int cpu_info[4] = { 0 };
cpuid( AVX10_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX10_512_Flag ? 512
: ( cpu_info[ EBX_Reg ] & AVX10_256_Flag ? 256 : 0 );
}
return 0;
#endif
}
static inline bool has_sha()
{
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & SHA_Flag;
#endif
}
@@ -382,8 +521,8 @@ static inline bool has_sse2()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( CPU_INFO, 0, cpu_info );
return cpu_info[ EDX_Reg ] & SSE2_Flag;
#endif
}
@@ -394,9 +533,9 @@ static inline bool has_aes_ni()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
return cpu_info[ ECX_Reg ] & AES_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( CPU_INFO, 0, cpu_info );
return cpu_info[ ECX_Reg ] & AES_NI_Flag;
#endif
}
@@ -406,8 +545,8 @@ static inline bool has_avx()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( CPU_INFO, 0, cpu_info );
return ( ( cpu_info[ ECX_Reg ] & AVX_mask ) == AVX_mask );
#endif
}
@@ -418,8 +557,8 @@ static inline bool has_avx2()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX2_Flag;
#endif
}
@@ -429,9 +568,9 @@ static inline bool has_avx512f()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512F_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512_F_Flag;
#endif
}
@@ -440,9 +579,9 @@ static inline bool has_avx512dq()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512DQ_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512_DQ_Flag;
#endif
}
@@ -451,9 +590,9 @@ static inline bool has_avx512bw()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512BW_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512_BW_Flag;
#endif
}
@@ -462,9 +601,9 @@ static inline bool has_avx512vl()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512VL_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ EBX_Reg ] & AVX512_VL_Flag;
#endif
}
@@ -474,30 +613,19 @@ static inline bool has_avx512()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return ( ( cpu_info[ EBX_Reg ] & AVX512_mask ) == AVX512_mask );
#endif
}
// AMD Zen3 added support for 256 bit VAES without requiring AVX512.
// The original Intel spec requires AVX512F to support 512 bit VAES and
// requires AVX512VL to support 256 bit VAES.
// The CPUID VAES bit alone can't distiguish 256 vs 512 bit.
// If necessary:
// VAES 256 & 512 = VAES && AVX512VL
// VAES 512 = VAES && AVX512F
// VAES 256 = ( VAES && AVX512VL ) || ( VAES && !AVX512F )
// VAES 512 only = VAES && AVX512F && !AVX512VL
// VAES 256 only = VAES && !AVX512F
static inline bool has_vaes()
{
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ ECX_Reg ] & VAES_Flag;
#endif
}
@@ -507,9 +635,9 @@ static inline bool has_vbmi()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
return cpu_info[ ECX_Reg ] & AVX512VBMI_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ ECX_Reg ] & AVX512_VBMI_Flag;
#endif
}
@@ -518,9 +646,9 @@ static inline bool has_vbmi2()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, cpu_info );
return cpu_info[ ECX_Reg ] & AVX512VBMI2_Flag;
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_FEATURES, 0, cpu_info );
return cpu_info[ ECX_Reg ] & AVX512_VBMI2_Flag;
#endif
}
@@ -530,8 +658,8 @@ static inline bool has_xop()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( EXTENDED_CPU_INFO, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( EXTENDED_CPU_INFO, 0, cpu_info );
return cpu_info[ ECX_Reg ] & XOP_Flag;
#endif
}
@@ -541,8 +669,8 @@ static inline bool has_fma3()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( CPU_INFO, 0, cpu_info );
return ( ( cpu_info[ ECX_Reg ] & FMA3_mask ) == FMA3_mask );
#endif
}
@@ -552,8 +680,8 @@ static inline bool has_sse42()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( CPU_INFO, 0, cpu_info );
return cpu_info[ ECX_Reg ] & SSE42_Flag;
#endif
}
@@ -563,16 +691,16 @@ static inline bool has_sse()
#ifdef __arm__
return false;
#else
int cpu_info[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
unsigned int cpu_info[4] = { 0 };
cpuid( CPU_INFO, 0, cpu_info );
return cpu_info[ EDX_Reg ] & SSE_Flag;
#endif
}
static inline uint32_t cpuid_get_highest_function_number()
{
uint32_t cpu_info[4] = {0};
cpuid( VENDOR_ID, cpu_info);
unsigned int cpu_info[4] = {0};
cpuid( VENDOR_ID, 0, cpu_info);
return cpu_info[ EAX_Reg ];
}
@@ -605,8 +733,8 @@ static inline void cpu_bestfeature(char *outbuf, size_t maxsz)
#else
int cpu_info[4] = { 0 };
int cpu_info_adv[4] = { 0 };
cpuid( CPU_INFO, cpu_info );
cpuid( EXTENDED_FEATURES, cpu_info_adv );
cpuid( CPU_INFO, 0, cpu_info );
cpuid( EXTENDED_FEATURES, 0, cpu_info_adv );
if ( has_avx() && has_avx2() )
sprintf(outbuf, "AVX2");
@@ -634,14 +762,14 @@ static inline void cpu_brand_string( char* s )
sprintf( s, "ARM" );
#else
int cpu_info[4] = { 0 };
cpuid( VENDOR_ID, cpu_info );
cpuid( VENDOR_ID, 0, cpu_info );
if ( cpu_info[ EAX_Reg ] >= 4 )
{
cpuid( CPU_BRAND_1, cpu_info );
cpuid( CPU_BRAND_1, 0, cpu_info );
memcpy( s, cpu_info, sizeof(cpu_info) );
cpuid( CPU_BRAND_2, cpu_info );
cpuid( CPU_BRAND_2, 0, cpu_info );
memcpy( s + 16, cpu_info, sizeof(cpu_info) );
cpuid( CPU_BRAND_3, cpu_info );
cpuid( CPU_BRAND_3, 0, cpu_info );
memcpy( s + 32, cpu_info, sizeof(cpu_info) );
}
#endif