mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.21.3
This commit is contained in:
@@ -409,19 +409,20 @@ static inline __m512i mm512_shuflr_x64( const __m512i v, const int n )
|
||||
static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
{ return _mm512_alignr_epi32( v, v, n ); }
|
||||
|
||||
/* Not used
|
||||
#define mm512_shuflr_16( v ) \
|
||||
_mm512_permutexvar_epi16( m512_const_64( \
|
||||
0x0000001F001E001D, 0x001C001B001A0019, \
|
||||
0X0018001700160015, 0X0014001300120011, \
|
||||
0X0010000F000E000D, 0X000C000B000A0009, \
|
||||
0X0008000700060005, 0X0004000300020001 ), v )
|
||||
0x0018001700160015, 0x0014001300120011, \
|
||||
0x0010000F000E000D, 0x000C000B000A0009, \
|
||||
0x0008000700060005, 0x0004000300020001 ), v )
|
||||
|
||||
#define mm512_shufll_16( v ) \
|
||||
_mm512_permutexvar_epi16( m512_const_64( \
|
||||
0x001E001D001C001B, 0x001A001900180017, \
|
||||
0X0016001500140013, 0X001200110010000F, \
|
||||
0X000E000D000C000B, 0X000A000900080007, \
|
||||
0X0006000500040003, 0X000200010000001F ), v )
|
||||
0x0016001500140013, 0x001200110010000F, \
|
||||
0x000E000D000C000B, 0x000A000900080007, \
|
||||
0x0006000500040003, 0x000200010000001F ), v )
|
||||
|
||||
#define mm512_shuflr_8( v ) \
|
||||
_mm512_shuffle_epi8( v, m512_const_64( \
|
||||
@@ -436,6 +437,7 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
0x2E2D2C2B2A292827, 0x262524232221201F, \
|
||||
0x1E1D1C1B1A191817, 0x161514131211100F, \
|
||||
0x0E0D0C0B0A090807, 0x060504030201003F ) )
|
||||
*/
|
||||
|
||||
// 256 bit lanes used only by lyra2, move these there
|
||||
// Rotate elements within 256 bit lanes of 512 bit vector.
|
||||
@@ -449,7 +451,7 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
#define mm512_shuflr256_64( v ) _mm512_permutex_epi64( v, 0x39 )
|
||||
#define mm512_shufll256_64( v ) _mm512_permutex_epi64( v, 0x93 )
|
||||
|
||||
/*
|
||||
/* Not used
|
||||
// Rotate 256 bit lanes by one 32 bit element
|
||||
#define mm512_shuflr256_32( v ) \
|
||||
_mm512_permutexvar_epi32( m512_const_64( \
|
||||
@@ -496,6 +498,7 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
//
|
||||
// Shuffle/rotate elements within 128 bit lanes of 512 bit vector.
|
||||
|
||||
/* Not used
|
||||
// Limited 2 input, 1 output shuffle, combines shuffle with blend.
|
||||
// Like most shuffles it's limited to 128 bit lanes and like some shuffles
|
||||
// destination elements must come from a specific source arg.
|
||||
@@ -506,7 +509,10 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
#define mm512_shuffle2_32( v1, v2, c ) \
|
||||
_mm512_castps_si512( _mm512_shuffle_ps( _mm512_castsi512_ps( v1 ), \
|
||||
_mm512_castsi512_ps( v2 ), c ) );
|
||||
*/
|
||||
|
||||
// These hard coded shuffles exist for consistency with AVX2 & SSE2 where
|
||||
// efficient generic versions don't exist.
|
||||
// Swap 64 bits in each 128 bit lane
|
||||
#define mm512_swap128_64( v ) _mm512_shuffle_epi32( v, 0x4e )
|
||||
#define mm512_shuflr128_64 mm512_swap128_64
|
||||
@@ -516,9 +522,11 @@ static inline __m512i mm512_shuflr_x32( const __m512i v, const int n )
|
||||
#define mm512_shuflr128_32( v ) _mm512_shuffle_epi32( v, 0x39 )
|
||||
#define mm512_shufll128_32( v ) _mm512_shuffle_epi32( v, 0x93 )
|
||||
|
||||
// Rotate right 128 bit lanes by c bytes, versatile and just as fast
|
||||
/* Not used
|
||||
// Rotate right 128 bit lanes by c bytes, efficient generic version of above.
|
||||
static inline __m512i mm512_shuflr128_8( const __m512i v, const int c )
|
||||
{ return _mm512_alignr_epi8( v, v, c ); }
|
||||
*/
|
||||
|
||||
// Rotate byte elements in each 64 or 32 bit lane. Redundant for AVX512, all
|
||||
// can be done with ror & rol. Defined only for convenience and consistency
|
||||
|
||||
Reference in New Issue
Block a user