mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.17.1
This commit is contained in:
@@ -237,6 +237,25 @@ static inline void memset_128( __m128i *dst, const __m128i a, const int n )
|
||||
static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
|
||||
{ for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; }
|
||||
|
||||
#if defined(__AVX512VL__)
|
||||
|
||||
// a ^ b ^ c
|
||||
#define mm128_xor3( a, b, c ) \
|
||||
_mm_ternarylogic_epi64( a, b, c, 0x96 )
|
||||
|
||||
// a ^ ( b & c )
|
||||
#define mm128_xorand( a, b, c ) \
|
||||
_mm_ternarylogic_epi64( a, b, c, 0x78 )
|
||||
|
||||
#else
|
||||
|
||||
#define mm128_xor3( a, b, c ) \
|
||||
_mm_xor_si128( a, _mm_xor_si128( b, c ) )
|
||||
|
||||
#define mm128_xorand( a, b, c ) \
|
||||
_mm_xor_si128( a, _mm_and_si128( b, c ) )
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// Bit rotations
|
||||
|
@@ -275,15 +275,17 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
|
||||
|
||||
//
|
||||
// Rotate elements accross all lanes.
|
||||
//
|
||||
// Swap 128 bit elements in 256 bit vector.
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit vector by one 64 bit element
|
||||
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
|
||||
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
|
||||
#if defined(__AVX512VL__)
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512VL__)
|
||||
static inline __m256i mm256_swap_128( const __m256i v )
|
||||
{ return _mm256_alignr_epi64( v, v, 2 ); }
|
||||
|
||||
static inline __m256i mm256_ror_1x64( const __m256i v )
|
||||
{ return _mm256_alignr_epi64( v, v, 1 ); }
|
||||
|
||||
static inline __m256i mm256_rol_1x64( const __m256i v )
|
||||
{ return _mm256_alignr_epi64( v, v, 3 ); }
|
||||
|
||||
static inline __m256i mm256_ror_1x32( const __m256i v )
|
||||
{ return _mm256_alignr_epi32( v, v, 1 ); }
|
||||
@@ -293,6 +295,13 @@ static inline __m256i mm256_rol_1x32( const __m256i v )
|
||||
|
||||
#else // AVX2
|
||||
|
||||
// Swap 128 bit elements in 256 bit vector.
|
||||
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
|
||||
|
||||
// Rotate 256 bit vector by one 64 bit element
|
||||
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
|
||||
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
|
||||
|
||||
// Rotate 256 bit vector by one 32 bit element.
|
||||
#define mm256_ror_1x32( v ) \
|
||||
_mm256_permutevar8x32_epi32( v, \
|
||||
@@ -304,6 +313,7 @@ static inline __m256i mm256_rol_1x32( const __m256i v )
|
||||
m256_const_64( 0x0000000600000005, 0x0000000400000003, \
|
||||
0x0000000200000001, 0x0000000000000007 )
|
||||
|
||||
|
||||
#endif // AVX512 else AVX2
|
||||
|
||||
//
|
||||
|
Reference in New Issue
Block a user