This commit is contained in:
Jay D Dee
2023-09-13 11:48:52 -04:00
parent 4378d2f841
commit d6b5750362
28 changed files with 1626 additions and 1327 deletions

View File

@@ -1216,13 +1216,13 @@ static inline void dintrlv_16x32_512( void *dst00, void *dst01, void *dst02,
static inline void extr_lane_16x32( void *d, const void *s,
const int lane, const int bit_len )
{
((uint32_t*)d)[ 0] = ((const uint32_t*)s)[ lane ];
((uint32_t*)d)[ 1] = ((const uint32_t*)s)[ lane+16 ];
((uint32_t*)d)[ 2] = ((const uint32_t*)s)[ lane+32 ];
((uint32_t*)d)[ 3] = ((const uint32_t*)s)[ lane+48 ];
((uint32_t*)d)[ 4] = ((const uint32_t*)s)[ lane+64 ];
((uint32_t*)d)[ 5] = ((const uint32_t*)s)[ lane+80 ];
((uint32_t*)d)[ 6] = ((const uint32_t*)s)[ lane+96 ];
((uint32_t*)d)[ 0] = ((const uint32_t*)s)[ lane ];
((uint32_t*)d)[ 1] = ((const uint32_t*)s)[ lane+ 16 ];
((uint32_t*)d)[ 2] = ((const uint32_t*)s)[ lane+ 32 ];
((uint32_t*)d)[ 3] = ((const uint32_t*)s)[ lane+ 48 ];
((uint32_t*)d)[ 4] = ((const uint32_t*)s)[ lane+ 64 ];
((uint32_t*)d)[ 5] = ((const uint32_t*)s)[ lane+ 80 ];
((uint32_t*)d)[ 6] = ((const uint32_t*)s)[ lane+ 96 ];
((uint32_t*)d)[ 7] = ((const uint32_t*)s)[ lane+112 ];
if ( bit_len <= 256 ) return;
((uint32_t*)d)[ 8] = ((const uint32_t*)s)[ lane+128 ];

View File

@@ -274,11 +274,11 @@ static inline void memcpy_128( __m128i *dst, const __m128i *src, const int n )
// Returns 2 or 4 bit integer mask from MSBit of 64 or 32 bit elements.
// Effectively a sign test.
#define mm_movmask_64( v ) \
_mm_castpd_si128( _mm_movmask_pd( _mm_castsi128_pd( v ) ) )
#define mm128_movmask_64( v ) \
_mm_movemask_pd( (__m128d)(v) )
#define mm_movmask_32( v ) \
_mm_castps_si128( _mm_movmask_ps( _mm_castsi128_ps( v ) ) )
#define mm128_movmask_32( v ) \
_mm_movemask_ps( (__m128)(v) )
//
// Bit rotations

View File

@@ -209,10 +209,10 @@ static inline __m256i mm256_not( const __m256i v )
// Effectively a sign test.
#define mm256_movmask_64( v ) \
_mm256_castpd_si256( _mm256_movmask_pd( _mm256_castsi256_pd( v ) ) )
_mm256_movemask_pd( _mm256_castsi256_pd( v ) )
#define mm256_movmask_32( v ) \
_mm256_castps_si256( _mm256_movmask_ps( _mm256_castsi256_ps( v ) ) )
_mm256_movemask_ps( _mm256_castsi256_ps( v ) )
//
// Bit rotations.