mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v25.5
This commit is contained in:
@@ -217,7 +217,9 @@ static inline __m256i mm256_not( const __m256i v )
|
||||
// Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
|
||||
// Returns 4 or 8 bit integer mask from MSBit of 64 or 32 bit elements.
|
||||
// Effectively a sign test.
|
||||
|
||||
// The functions return int which can promote small integers to int when used
|
||||
// in an expression. Users should mask the slack bits strategically to maintain
|
||||
// data integrity.
|
||||
#define mm256_movmask_64( v ) \
|
||||
_mm256_movemask_pd( _mm256_castsi256_pd( v ) )
|
||||
|
||||
|
||||
@@ -14,12 +14,6 @@
|
||||
// vectors. It is therefore not technically required for any 512 bit vector
|
||||
// utilities defined below.
|
||||
|
||||
// if avx10 // avx512 is always set
|
||||
// if evex512: yes
|
||||
// else if avx512 : yes // avx512 is set but not avx10
|
||||
// else : no // avx512 not set or avx10.1 is set without evex512
|
||||
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
// AVX512 intrinsics have a few changes from previous conventions.
|
||||
@@ -57,7 +51,7 @@
|
||||
// - if an argument is to referenced multiple times a C inline function
|
||||
// should be used instead of a macro to prevent an expression argument
|
||||
// from being evaluated multiple times (wasteful) or produces side
|
||||
// effects (very bad).
|
||||
// effects (very bad).
|
||||
//
|
||||
// There are 2 areas where overhead is a major concern: constants and
|
||||
// permutations.
|
||||
|
||||
@@ -4,9 +4,10 @@
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON)
|
||||
|
||||
// Targeted functions supporting NEON SIMD 128 & 64 bit vectors.
|
||||
// Element size matters!
|
||||
//
|
||||
// Intel naming is generally used.
|
||||
// Intel style naming is generally used, however, this not an attempt to emulate Intel
|
||||
// intructions. It's focussed on the functions used in this program and the best way
|
||||
// to implement them with NEON.
|
||||
//
|
||||
// Some advanced logical operations that require SHA3. Prior to GCC-13
|
||||
// they also require armv8.2
|
||||
|
||||
Reference in New Issue
Block a user