mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v3.23.3
This commit is contained in:
@@ -738,10 +738,10 @@ static inline void extr_lane_8x32( void *d, const void *s,
|
||||
// Combine byte swap & broadcast in one permute
|
||||
static inline void mm256_bswap32_intrlv80_8x32( void *d, const void *src )
|
||||
{
|
||||
const __m256i c0 = _mm256_set1_epi32( 0x00010203 );
|
||||
const __m256i c1 = _mm256_set1_epi32( 0x04050607 );
|
||||
const __m256i c2 = _mm256_set1_epi32( 0x08090a0b );
|
||||
const __m256i c3 = _mm256_set1_epi32( 0x0c0d0e0f );
|
||||
const __m256i c0 = v256_32( 0x00010203 );
|
||||
const __m256i c1 = v256_32( 0x04050607 );
|
||||
const __m256i c2 = v256_32( 0x08090a0b );
|
||||
const __m256i c3 = v256_32( 0x0c0d0e0f );
|
||||
const __m128i s0 = casti_m128i( src,0 );
|
||||
const __m128i s1 = casti_m128i( src,1 );
|
||||
const __m128i s2 = casti_m128i( src,2 );
|
||||
@@ -796,7 +796,7 @@ static inline void mm256_bswap32_intrlv80_8x32( void *d, const void *src )
|
||||
{
|
||||
const __m128i bswap_shuf = _mm_set_epi64x( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
const __m256i c1 = _mm256_set1_epi32( 1 );
|
||||
const __m256i c1 = v256_32( 1 );
|
||||
const __m256i c2 = _mm256_add_epi32( c1, c1 );
|
||||
const __m256i c3 = _mm256_add_epi32( c2, c1 );
|
||||
|
||||
@@ -1244,10 +1244,10 @@ static inline void extr_lane_16x32( void *d, const void *s,
|
||||
// Combine byte swap & broadcast in one permute
|
||||
static inline void mm512_bswap32_intrlv80_16x32( void *d, const void *src )
|
||||
{
|
||||
const __m512i c0 = _mm512_set1_epi32( 0x00010203 );
|
||||
const __m512i c1 = _mm512_set1_epi32( 0x04050607 );
|
||||
const __m512i c2 = _mm512_set1_epi32( 0x08090a0b );
|
||||
const __m512i c3 = _mm512_set1_epi32( 0x0c0d0e0f );
|
||||
const __m512i c0 = v512_32( 0x00010203 );
|
||||
const __m512i c1 = v512_32( 0x04050607 );
|
||||
const __m512i c2 = v512_32( 0x08090a0b );
|
||||
const __m512i c3 = v512_32( 0x0c0d0e0f );
|
||||
const __m128i s0 = casti_m128i( src,0 );
|
||||
const __m128i s1 = casti_m128i( src,1 );
|
||||
const __m128i s2 = casti_m128i( src,2 );
|
||||
@@ -1302,7 +1302,7 @@ static inline void mm512_bswap32_intrlv80_16x32( void *d, const void *src )
|
||||
{
|
||||
const __m128i bswap_shuf = _mm_set_epi64x( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
const __m512i c1 = _mm512_set1_epi32( 1 );
|
||||
const __m512i c1 = v512_32( 1 );
|
||||
const __m512i c2 = _mm512_add_epi32( c1, c1 );
|
||||
const __m512i c3 = _mm512_add_epi32( c2, c1 );
|
||||
__m128i s0 = casti_m128i( src,0 );
|
||||
@@ -1566,8 +1566,8 @@ static inline void mm256_intrlv80_4x64( void *d, const void *src )
|
||||
|
||||
static inline void mm256_bswap32_intrlv80_4x64( void *d, const void *src )
|
||||
{
|
||||
const __m256i c0 = _mm256_set1_epi64x( 0x0405060700010203 );
|
||||
const __m256i c1 = _mm256_set1_epi64x( 0x0c0d0e0f08090a0b );
|
||||
const __m256i c0 = v256_64( 0x0405060700010203 );
|
||||
const __m256i c1 = v256_64( 0x0c0d0e0f08090a0b );
|
||||
const __m128i s0 = casti_m128i( src,0 );
|
||||
const __m128i s1 = casti_m128i( src,1 );
|
||||
const __m128i s2 = casti_m128i( src,2 );
|
||||
@@ -1958,16 +1958,16 @@ static inline void mm512_intrlv80_8x64( void *dst, const void *src )
|
||||
__m512i *d = (__m512i*)dst;
|
||||
const uint64_t *s = (const uint64_t*)src;
|
||||
|
||||
d[0] = _mm512_set1_epi64( s[0] );
|
||||
d[1] = _mm512_set1_epi64( s[1] );
|
||||
d[2] = _mm512_set1_epi64( s[2] );
|
||||
d[3] = _mm512_set1_epi64( s[3] );
|
||||
d[4] = _mm512_set1_epi64( s[4] );
|
||||
d[5] = _mm512_set1_epi64( s[5] );
|
||||
d[6] = _mm512_set1_epi64( s[6] );
|
||||
d[7] = _mm512_set1_epi64( s[7] );
|
||||
d[8] = _mm512_set1_epi64( s[8] );
|
||||
d[9] = _mm512_set1_epi64( s[9] );
|
||||
d[0] = v512_64( s[0] );
|
||||
d[1] = v512_64( s[1] );
|
||||
d[2] = v512_64( s[2] );
|
||||
d[3] = v512_64( s[3] );
|
||||
d[4] = v512_64( s[4] );
|
||||
d[5] = v512_64( s[5] );
|
||||
d[6] = v512_64( s[6] );
|
||||
d[7] = v512_64( s[7] );
|
||||
d[8] = v512_64( s[8] );
|
||||
d[9] = v512_64( s[9] );
|
||||
}
|
||||
|
||||
// byte swap and broadcast to all lanes
|
||||
@@ -1977,8 +1977,8 @@ static inline void mm512_intrlv80_8x64( void *dst, const void *src )
|
||||
// Combine byte swap & broadcast in one permute
|
||||
static inline void mm512_bswap32_intrlv80_8x64( void *d, const void *src )
|
||||
{
|
||||
const __m512i c0 = _mm512_set1_epi64( 0x0405060700010203 );
|
||||
const __m512i c1 = _mm512_set1_epi64( 0x0c0d0e0f08090a0b );
|
||||
const __m512i c0 = v512_64( 0x0405060700010203 );
|
||||
const __m512i c1 = v512_64( 0x0c0d0e0f08090a0b );
|
||||
const __m128i s0 = casti_m128i( src,0 );
|
||||
const __m128i s1 = casti_m128i( src,1 );
|
||||
const __m128i s2 = casti_m128i( src,2 );
|
||||
@@ -2013,7 +2013,7 @@ static inline void mm512_bswap32_intrlv80_8x64( void *d, const void *src )
|
||||
{
|
||||
const __m128i bswap_shuf = _mm_set_epi64x( 0x0c0d0e0f08090a0b,
|
||||
0x0405060700010203 );
|
||||
const __m512i c1 = _mm512_set1_epi64( 1 );
|
||||
const __m512i c1 = v512_64( 1 );
|
||||
__m128i s0 = casti_m128i( src,0 );
|
||||
__m128i s1 = casti_m128i( src,1 );
|
||||
__m128i s2 = casti_m128i( src,2 );
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// 128 bit SSE vectors
|
||||
//
|
||||
@@ -20,9 +20,9 @@
|
||||
// define a local const for repeated references to the same constant.
|
||||
//
|
||||
// One common use for simd constants is as a control index for vector
|
||||
// instructions like blend and shuffle. Alhough the ultimate instruction
|
||||
// may execute in a single clock cycle, generating the control index adds
|
||||
// several more cycles to the entire operation.
|
||||
// shuffle instructions. Alhough the ultimate instruction may execute in a
|
||||
// single clock cycle, generating the control index adds several more cycles
|
||||
// to the entire operation.
|
||||
//
|
||||
// All of the utilities here assume all data is in registers except
|
||||
// in rare cases where arguments are pointers.
|
||||
@@ -32,7 +32,7 @@
|
||||
// Intrinsics automatically promote from REX to VEX when AVX is available
|
||||
// but ASM needs to be done manually.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// Used instead if casting.
|
||||
@@ -43,8 +43,8 @@ typedef union
|
||||
} __attribute__ ((aligned (16))) m128_ovly;
|
||||
|
||||
|
||||
#define v128_64(i) _mm_set1_epi64x(i)
|
||||
#define v128_32(i) _mm_set1_epi32(i)
|
||||
#define v128_64(i64) _mm_set1_epi64x(i64)
|
||||
#define v128_32(i32) _mm_set1_epi32(i32)
|
||||
|
||||
// Deprecated. AVX512 adds EVEX encoding (3rd operand) and other improvements
|
||||
// that make these functions either unnecessary or inefficient.
|
||||
@@ -81,8 +81,6 @@ static inline __m128i mm128_mov32_128( const uint32_t n )
|
||||
// Pseudo constants
|
||||
#define m128_zero _mm_setzero_si128()
|
||||
#define m128_one_128 mm128_mov64_128( 1 )
|
||||
//#define m128_one_64 _mm_set1_epi64x( 1 )
|
||||
#define m128_one_32 _mm_set1_epi32( 1 )
|
||||
|
||||
// ASM avoids the need to initialize return variable to avoid compiler warning.
|
||||
// Macro abstracts function parentheses to look like an identifier.
|
||||
@@ -100,7 +98,7 @@ static inline __m128i mm128_neg1_fn()
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
|
||||
/////////////////////////////
|
||||
/////////////////////////////////////////////////////////////
|
||||
//
|
||||
// _mm_insert_ps( _mm128i v1, __m128i v2, imm8 c )
|
||||
//
|
||||
|
||||
@@ -90,10 +90,7 @@ typedef union
|
||||
// code and therefore can't be used as compile time initializers.
|
||||
|
||||
#define m256_zero _mm256_setzero_si256()
|
||||
//#define m256_one_256 mm256_mov64_256( 1 )
|
||||
#define m256_one_128 mm256_bcast_m128( m128_one_128 )
|
||||
#define m256_one_64 _mm256_set1_epi64x( 1 )
|
||||
#define m256_one_32 _mm256_set1_epi32( 1 )
|
||||
|
||||
static inline __m256i mm256_neg1_fn()
|
||||
{
|
||||
|
||||
@@ -97,8 +97,8 @@ typedef union
|
||||
uint64_t u64[8];
|
||||
} __attribute__ ((aligned (64))) m512_ovly;
|
||||
|
||||
#define v512_64(i) _mm512_set1_epi64(i)
|
||||
#define v512_32(i) _mm512_set1_epi32(i)
|
||||
#define v512_64(i64) _mm512_set1_epi64(i64)
|
||||
#define v512_32(i32) _mm512_set1_epi32(i32)
|
||||
|
||||
// A simple 128 bit permute, using function instead of macro avoids
|
||||
// problems if the v arg passed as an expression.
|
||||
@@ -118,9 +118,6 @@ static inline __m512i mm512_perm_128( const __m512i v, const int c )
|
||||
|
||||
// Pseudo constants.
|
||||
#define m512_zero _mm512_setzero_si512()
|
||||
// Deprecated
|
||||
#define m512_one_64 _mm512_set1_epi64( 1 )
|
||||
#define m512_one_32 _mm512_set1_epi32( 1 )
|
||||
|
||||
// use asm to avoid compiler warning for unitialized local
|
||||
static inline __m512i mm512_neg1_fn()
|
||||
|
||||
Reference in New Issue
Block a user