This commit is contained in:
Jay D Dee
2021-07-26 15:01:37 -04:00
parent 92b3733925
commit 9b905fccc8
33 changed files with 889 additions and 565 deletions

View File

@@ -237,6 +237,25 @@ static inline void memset_128( __m128i *dst, const __m128i a, const int n )
static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
{ for ( int i = 0; i < n; i ++ ) dst[i] = src[i]; }
#if defined(__AVX512VL__)
// a ^ b ^ c
#define mm128_xor3( a, b, c ) \
_mm_ternarylogic_epi64( a, b, c, 0x96 )
// a ^ ( b & c )
#define mm128_xorand( a, b, c ) \
_mm_ternarylogic_epi64( a, b, c, 0x78 )
#else
#define mm128_xor3( a, b, c ) \
_mm_xor_si128( a, _mm_xor_si128( b, c ) )
#define mm128_xorand( a, b, c ) \
_mm_xor_si128( a, _mm_and_si128( b, c ) )
#endif
//
// Bit rotations

View File

@@ -275,15 +275,17 @@ static inline void memcpy_256( __m256i *dst, const __m256i *src, const int n )
//
// Rotate elements accross all lanes.
//
// Swap 128 bit elements in 256 bit vector.
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
// Rotate 256 bit vector by one 64 bit element
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
#if defined(__AVX512VL__)
#if defined(__AVX512F__) && defined(__AVX512VL__)
static inline __m256i mm256_swap_128( const __m256i v )
{ return _mm256_alignr_epi64( v, v, 2 ); }
static inline __m256i mm256_ror_1x64( const __m256i v )
{ return _mm256_alignr_epi64( v, v, 1 ); }
static inline __m256i mm256_rol_1x64( const __m256i v )
{ return _mm256_alignr_epi64( v, v, 3 ); }
static inline __m256i mm256_ror_1x32( const __m256i v )
{ return _mm256_alignr_epi32( v, v, 1 ); }
@@ -293,6 +295,13 @@ static inline __m256i mm256_rol_1x32( const __m256i v )
#else // AVX2
// Swap 128 bit elements in 256 bit vector.
#define mm256_swap_128( v ) _mm256_permute4x64_epi64( v, 0x4e )
// Rotate 256 bit vector by one 64 bit element
#define mm256_ror_1x64( v ) _mm256_permute4x64_epi64( v, 0x39 )
#define mm256_rol_1x64( v ) _mm256_permute4x64_epi64( v, 0x93 )
// Rotate 256 bit vector by one 32 bit element.
#define mm256_ror_1x32( v ) \
_mm256_permutevar8x32_epi32( v, \
@@ -304,6 +313,7 @@ static inline __m256i mm256_rol_1x32( const __m256i v )
m256_const_64( 0x0000000600000005, 0x0000000400000003, \
0x0000000200000001, 0x0000000000000007 )
#endif // AVX512 else AVX2
//