mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v25.5
This commit is contained in:
@@ -75,6 +75,12 @@ If not what makes it happen or not happen?
|
|||||||
Change Log
|
Change Log
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
v25.5
|
||||||
|
|
||||||
|
x86_64: Fixed and insidious bug in sha256 early rejection optimization for AVX2 & AVX512.
|
||||||
|
x86_64: Faster sha256d, sha256dt for AVX2 & AVX512.
|
||||||
|
Other small bug fixes.
|
||||||
|
|
||||||
v25.4
|
v25.4
|
||||||
|
|
||||||
x86_64: improved handling of vector constants used for byte permutations.
|
x86_64: improved handling of vector constants used for byte permutations.
|
||||||
|
|||||||
@@ -441,57 +441,6 @@ void sha256_4x32_full( void *dst, const void *data, size_t len )
|
|||||||
W[14] = SHA256_8WAY_MEXP( W[12], W[ 7], W[15], W[14] ); \
|
W[14] = SHA256_8WAY_MEXP( W[12], W[ 7], W[15], W[14] ); \
|
||||||
W[15] = SHA256_8WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
W[15] = SHA256_8WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
||||||
|
|
||||||
#if defined(VL256)
|
|
||||||
// AVX512 or AVX10-256
|
|
||||||
|
|
||||||
#define CHx(X, Y, Z) _mm256_ternarylogic_epi32( X, Y, Z, 0xca )
|
|
||||||
|
|
||||||
#define MAJx(X, Y, Z) _mm256_ternarylogic_epi32( X, Y, Z, 0xe8 )
|
|
||||||
|
|
||||||
#define SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, i, j ) \
|
|
||||||
do { \
|
|
||||||
__m256i T0 = _mm256_add_epi32( v256_32( K256[ (j)+(i) ] ), \
|
|
||||||
W[ i ] ); \
|
|
||||||
__m256i T1 = BSG2_1x( E ); \
|
|
||||||
__m256i T2 = BSG2_0x( A ); \
|
|
||||||
T0 = _mm256_add_epi32( T0, CHx( E, F, G ) ); \
|
|
||||||
T1 = _mm256_add_epi32( T1, H ); \
|
|
||||||
T2 = _mm256_add_epi32( T2, MAJx( A, B, C ) ); \
|
|
||||||
T1 = _mm256_add_epi32( T1, T0 ); \
|
|
||||||
D = _mm256_add_epi32( D, T1 ); \
|
|
||||||
H = _mm256_add_epi32( T1, T2 ); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define SHA256_8WAY_16ROUNDS( A, B, C, D, E, F, G, H, j ) \
|
|
||||||
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, j ); \
|
|
||||||
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 1, j ); \
|
|
||||||
SHA256_8WAY_ROUND( G, H, A, B, C, D, E, F, 2, j ); \
|
|
||||||
SHA256_8WAY_ROUND( F, G, H, A, B, C, D, E, 3, j ); \
|
|
||||||
SHA256_8WAY_ROUND( E, F, G, H, A, B, C, D, 4, j ); \
|
|
||||||
SHA256_8WAY_ROUND( D, E, F, G, H, A, B, C, 5, j ); \
|
|
||||||
SHA256_8WAY_ROUND( C, D, E, F, G, H, A, B, 6, j ); \
|
|
||||||
SHA256_8WAY_ROUND( B, C, D, E, F, G, H, A, 7, j ); \
|
|
||||||
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 8, j ); \
|
|
||||||
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 9, j ); \
|
|
||||||
SHA256_8WAY_ROUND( G, H, A, B, C, D, E, F, 10, j ); \
|
|
||||||
SHA256_8WAY_ROUND( F, G, H, A, B, C, D, E, 11, j ); \
|
|
||||||
SHA256_8WAY_ROUND( E, F, G, H, A, B, C, D, 12, j ); \
|
|
||||||
SHA256_8WAY_ROUND( D, E, F, G, H, A, B, C, 13, j ); \
|
|
||||||
SHA256_8WAY_ROUND( C, D, E, F, G, H, A, B, 14, j ); \
|
|
||||||
SHA256_8WAY_ROUND( B, C, D, E, F, G, H, A, 15, j );
|
|
||||||
|
|
||||||
// Not used with AVX512, needed to satisfy the compiler
|
|
||||||
#define SHA256_8WAY_ROUND_NOMSG( A, B, C, D, E, F, G, H, i, j ) \
|
|
||||||
{ \
|
|
||||||
__m256i T1 = mm256_add4_32( H, BSG2_1x(E), CHx(E, F, G), \
|
|
||||||
v256_32( K256[(i)+(j)] ) ); \
|
|
||||||
__m256i T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
|
||||||
D = _mm256_add_epi32( D, T1 ); \
|
|
||||||
H = _mm256_add_epi32( T1, T2 ); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#else // AVX2
|
|
||||||
|
|
||||||
#define CHx(X, Y, Z) \
|
#define CHx(X, Y, Z) \
|
||||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||||
|
|
||||||
@@ -503,61 +452,58 @@ do { \
|
|||||||
|
|
||||||
#define SHA256_8WAY_ROUND_NOMSG( A, B, C, D, E, F, G, H, i, j ) \
|
#define SHA256_8WAY_ROUND_NOMSG( A, B, C, D, E, F, G, H, i, j ) \
|
||||||
{ \
|
{ \
|
||||||
__m256i T1 = mm256_add4_32( H, BSG2_1x(E), CHx(E, F, G), \
|
H = mm256_add4_32( H, BSG2_1x(E), CHx(E, F, G), \
|
||||||
v256_32( K256[(i)+(j)] ) ); \
|
v256_32( K256[(i)+(j)] ) ); \
|
||||||
__m256i T2 = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
__m256i T = _mm256_add_epi32( BSG2_0x(A), MAJx(A, B, C) ); \
|
||||||
Y_xor_Z = X_xor_Y; \
|
Y_xor_Z = X_xor_Y; \
|
||||||
D = _mm256_add_epi32( D, T1 ); \
|
D = _mm256_add_epi32( D, H ); \
|
||||||
H = _mm256_add_epi32( T1, T2 ); \
|
H = _mm256_add_epi32( H, T ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, i, j ) \
|
#define SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, i, j ) \
|
||||||
do { \
|
{ \
|
||||||
__m256i T0 = _mm256_add_epi32( v256_32( K256[(j)+(i)] ), W[i] ); \
|
__m256i T1 = _mm256_add_epi32( v256_32( K256[(j)+(i)] ), W[i] ); \
|
||||||
__m256i T1 = BSG2_1x( E ); \
|
H = _mm256_add_epi32( H, BSG2_1x( E ) ); \
|
||||||
__m256i T2 = BSG2_0x( A ); \
|
__m256i T2 = BSG2_0x( A ); \
|
||||||
T0 = _mm256_add_epi32( T0, CHx( E, F, G ) ); \
|
T1 = _mm256_add_epi32( T1, CHx( E, F, G ) ); \
|
||||||
T1 = _mm256_add_epi32( T1, H ); \
|
|
||||||
T2 = _mm256_add_epi32( T2, MAJx( A, B, C ) ); \
|
T2 = _mm256_add_epi32( T2, MAJx( A, B, C ) ); \
|
||||||
T1 = _mm256_add_epi32( T1, T0 ); \
|
H = _mm256_add_epi32( H, T1 ); \
|
||||||
Y_xor_Z = X_xor_Y; \
|
Y_xor_Z = X_xor_Y; \
|
||||||
D = _mm256_add_epi32( D, T1 ); \
|
D = _mm256_add_epi32( D, H ); \
|
||||||
H = _mm256_add_epi32( T1, T2 ); \
|
H = _mm256_add_epi32( H, T2 ); \
|
||||||
} while (0)
|
}
|
||||||
|
|
||||||
// read Y_xor_Z, update X_xor_Y
|
// read Y_xor_Z, update X_xor_Y
|
||||||
#define MAJ_2step(X, Y, Z, X_xor_Y, Y_xor_Z ) \
|
#define MAJ_2step(X, Y, Z, X_xor_Y, Y_xor_Z ) \
|
||||||
_mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \
|
_mm256_xor_si256( Y, _mm256_and_si256( X_xor_Y = _mm256_xor_si256( X, Y ), \
|
||||||
Y_xor_Z ) )
|
Y_xor_Z ) )
|
||||||
|
|
||||||
// start with toc initialized to y^z: toc = B ^ C
|
// start with toc initialized to y^z, toc = B ^ C for first ound.
|
||||||
// First round reads toc as Y_xor_Z and saves X_xor_Y as tic.
|
// First round reads toc as Y_xor_Z and saves X_xor_Y as tic.
|
||||||
// Second round reads tic as Y_xor_Z and saves X_xor_Y as toc.
|
// Second round reads tic as Y_xor_Z and saves X_xor_Y as toc.
|
||||||
|
|
||||||
#define SHA256_8WAY_2ROUNDS( A, B, C, D, E, F, G, H, i0, i1, j ) \
|
#define SHA256_8WAY_2ROUNDS( A, B, C, D, E, F, G, H, i0, i1, j ) \
|
||||||
do { \
|
{ \
|
||||||
__m256i T0 = _mm256_add_epi32( v256_32( K256[ (j)+(i0) ] ), \
|
__m256i T1 = _mm256_add_epi32( v256_32( K256[ (j)+(i0) ] ), \
|
||||||
W[ i0 ] ); \
|
W[ i0 ] ); \
|
||||||
__m256i T1 = BSG2_1x( E ); \
|
H = _mm256_add_epi32( H, BSG2_1x( E ) ); \
|
||||||
__m256i T2 = BSG2_0x( A ); \
|
__m256i T2 = BSG2_0x( A ); \
|
||||||
T0 = _mm256_add_epi32( T0, CHx( E, F, G ) ); \
|
T1 = _mm256_add_epi32( T1, CHx( E, F, G ) ); \
|
||||||
T1 = _mm256_add_epi32( T1, H ); \
|
|
||||||
T2 = _mm256_add_epi32( T2, MAJ_2step( A, B, C, tic, toc ) ); \
|
T2 = _mm256_add_epi32( T2, MAJ_2step( A, B, C, tic, toc ) ); \
|
||||||
T1 = _mm256_add_epi32( T1, T0 ); \
|
H = _mm256_add_epi32( H, T1 ); \
|
||||||
D = _mm256_add_epi32( D, T1 ); \
|
D = _mm256_add_epi32( D, H ); \
|
||||||
H = _mm256_add_epi32( T1, T2 ); \
|
H = _mm256_add_epi32( H, T2 ); \
|
||||||
\
|
\
|
||||||
T0 = _mm256_add_epi32( v256_32( K256[ (j)+(i1) ] ), \
|
T1 = _mm256_add_epi32( v256_32( K256[ (j)+(i1) ] ), \
|
||||||
W[ (i1) ] ); \
|
W[ (i1) ] ); \
|
||||||
T1 = BSG2_1x( D ); \
|
G = _mm256_add_epi32( G, BSG2_1x( D ) ); \
|
||||||
T2 = BSG2_0x( H ); \
|
T2 = BSG2_0x( H ); \
|
||||||
T0 = _mm256_add_epi32( T0, CHx( D, E, F ) ); \
|
T1 = _mm256_add_epi32( T1, CHx( D, E, F ) ); \
|
||||||
T1 = _mm256_add_epi32( T1, G ); \
|
|
||||||
T2 = _mm256_add_epi32( T2, MAJ_2step( H, A, B, toc, tic ) ); \
|
T2 = _mm256_add_epi32( T2, MAJ_2step( H, A, B, toc, tic ) ); \
|
||||||
T1 = _mm256_add_epi32( T1, T0 ); \
|
G = _mm256_add_epi32( G, T1 ); \
|
||||||
C = _mm256_add_epi32( C, T1 ); \
|
C = _mm256_add_epi32( C, G ); \
|
||||||
G = _mm256_add_epi32( T1, T2 ); \
|
G = _mm256_add_epi32( G, T2 ); \
|
||||||
} while (0)
|
}
|
||||||
|
|
||||||
#define SHA256_8WAY_16ROUNDS( A, B, C, D, E, F, G, H, j ) \
|
#define SHA256_8WAY_16ROUNDS( A, B, C, D, E, F, G, H, j ) \
|
||||||
{ \
|
{ \
|
||||||
@@ -572,8 +518,6 @@ do { \
|
|||||||
SHA256_8WAY_2ROUNDS( C, D, E, F, G, H, A, B, 14, 15, j ); \
|
SHA256_8WAY_2ROUNDS( C, D, E, F, G, H, A, B, 14, 15, j ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // AVX512VL else AVX2
|
|
||||||
|
|
||||||
static inline void SHA256_8WAY_TRANSFORM( __m256i *out, __m256i *W,
|
static inline void SHA256_8WAY_TRANSFORM( __m256i *out, __m256i *W,
|
||||||
const __m256i *in ) \
|
const __m256i *in ) \
|
||||||
{
|
{
|
||||||
@@ -650,9 +594,7 @@ void sha256_8x32_prehash_3rounds( __m256i *state_mid, __m256i *X,
|
|||||||
G = _mm256_load_si256( state_in + 6 );
|
G = _mm256_load_si256( state_in + 6 );
|
||||||
H = _mm256_load_si256( state_in + 7 );
|
H = _mm256_load_si256( state_in + 7 );
|
||||||
|
|
||||||
#if !defined(VL256)
|
|
||||||
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( B, C );
|
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( B, C );
|
||||||
#endif
|
|
||||||
|
|
||||||
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, 0 );
|
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, 0 );
|
||||||
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 1, 0 );
|
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 1, 0 );
|
||||||
@@ -692,9 +634,7 @@ void sha256_8x32_final_rounds( __m256i *state_out, const __m256i *data,
|
|||||||
G = _mm256_load_si256( state_mid + 6 );
|
G = _mm256_load_si256( state_mid + 6 );
|
||||||
H = _mm256_load_si256( state_mid + 7 );
|
H = _mm256_load_si256( state_mid + 7 );
|
||||||
|
|
||||||
#if !defined(VL256)
|
|
||||||
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( F, G );
|
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( F, G );
|
||||||
#endif
|
|
||||||
|
|
||||||
// round 3 part 2, add nonces
|
// round 3 part 2, add nonces
|
||||||
A = _mm256_add_epi32( A, W[3] );
|
A = _mm256_add_epi32( A, W[3] );
|
||||||
@@ -779,10 +719,10 @@ void sha256_8x32_final_rounds( __m256i *state_out, const __m256i *data,
|
|||||||
int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
||||||
const __m256i *state_in, const uint32_t *target )
|
const __m256i *state_in, const uint32_t *target )
|
||||||
{
|
{
|
||||||
__m256i A, B, C, D, E, F, G, H, T0, T1, T2;
|
__m256i A, B, C, D, E, F, G, H, G57, H56;
|
||||||
__m256i vmask, targ, hash;
|
__m256i vmask, targ, hash;
|
||||||
__m256i W[16]; memcpy_256( W, data, 16 );
|
__m256i W[16]; memcpy_256( W, data, 16 );
|
||||||
uint8_t flip, t6_mask;
|
uint8_t flip, t6_mask, t7_mask;
|
||||||
|
|
||||||
A = _mm256_load_si256( state_in );
|
A = _mm256_load_si256( state_in );
|
||||||
B = _mm256_load_si256( state_in+1 );
|
B = _mm256_load_si256( state_in+1 );
|
||||||
@@ -793,12 +733,10 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
|||||||
G = _mm256_load_si256( state_in+6 );
|
G = _mm256_load_si256( state_in+6 );
|
||||||
H = _mm256_load_si256( state_in+7 );
|
H = _mm256_load_si256( state_in+7 );
|
||||||
|
|
||||||
const __m256i IV7 = H;
|
const __m256i istate6 = G;
|
||||||
const __m256i IV6 = G;
|
const __m256i istate7 = H;
|
||||||
|
|
||||||
#if !defined(VL256)
|
|
||||||
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( B, C );
|
__m256i X_xor_Y, Y_xor_Z = _mm256_xor_si256( B, C );
|
||||||
#endif
|
|
||||||
|
|
||||||
// rounds 0 to 16, ignore zero padding W[9..14]
|
// rounds 0 to 16, ignore zero padding W[9..14]
|
||||||
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, 0 );
|
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, 0 );
|
||||||
@@ -841,11 +779,9 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
|||||||
W[11] = SHA256_8WAY_MEXP( W[ 9], W[ 4], W[12], W[11] );
|
W[11] = SHA256_8WAY_MEXP( W[ 9], W[ 4], W[12], W[11] );
|
||||||
W[12] = SHA256_8WAY_MEXP( W[10], W[ 5], W[13], W[12] );
|
W[12] = SHA256_8WAY_MEXP( W[10], W[ 5], W[13], W[12] );
|
||||||
|
|
||||||
#if !defined(VL256)
|
|
||||||
Y_xor_Z = _mm256_xor_si256( B, C );
|
Y_xor_Z = _mm256_xor_si256( B, C );
|
||||||
#endif
|
|
||||||
|
|
||||||
// rounds 48 to 57
|
// Rounds 48 to 55
|
||||||
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, 48 );
|
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 0, 48 );
|
||||||
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 1, 48 );
|
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 1, 48 );
|
||||||
SHA256_8WAY_ROUND( G, H, A, B, C, D, E, F, 2, 48 );
|
SHA256_8WAY_ROUND( G, H, A, B, C, D, E, F, 2, 48 );
|
||||||
@@ -854,77 +790,83 @@ int sha256_8x32_transform_le_short( __m256i *state_out, const __m256i *data,
|
|||||||
SHA256_8WAY_ROUND( D, E, F, G, H, A, B, C, 5, 48 );
|
SHA256_8WAY_ROUND( D, E, F, G, H, A, B, C, 5, 48 );
|
||||||
SHA256_8WAY_ROUND( C, D, E, F, G, H, A, B, 6, 48 );
|
SHA256_8WAY_ROUND( C, D, E, F, G, H, A, B, 6, 48 );
|
||||||
SHA256_8WAY_ROUND( B, C, D, E, F, G, H, A, 7, 48 );
|
SHA256_8WAY_ROUND( B, C, D, E, F, G, H, A, 7, 48 );
|
||||||
SHA256_8WAY_ROUND( A, B, C, D, E, F, G, H, 8, 48 );
|
|
||||||
SHA256_8WAY_ROUND( H, A, B, C, D, E, F, G, 9, 48 );
|
|
||||||
|
|
||||||
// round 58 to 60 part 1
|
// Round 56
|
||||||
T0 = _mm256_add_epi32( v256_32( K256[58] ),
|
H = _mm256_add_epi32( v256_32( K256[56] ),
|
||||||
|
mm256_add4_32( BSG2_1x( E ), CHx( E, F, G ), W[ 8], H ) );
|
||||||
|
D = _mm256_add_epi32( D, H );
|
||||||
|
H56 = _mm256_add_epi32( H, _mm256_add_epi32( BSG2_0x( A ),
|
||||||
|
MAJx( A, B, C ) ) );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
|
||||||
|
// Rounds 57 to 60 part 1
|
||||||
|
G = _mm256_add_epi32( v256_32( K256[57] ),
|
||||||
|
mm256_add4_32( BSG2_1x( D ), CHx( D, E, F ), W[ 9], G ) );
|
||||||
|
C = _mm256_add_epi32( C, G );
|
||||||
|
G57 = _mm256_add_epi32( G, MAJx( H56, A, B ) );
|
||||||
|
|
||||||
|
F = _mm256_add_epi32( v256_32( K256[58] ),
|
||||||
mm256_add4_32( BSG2_1x( C ), CHx( C, D, E ), W[10], F ) );
|
mm256_add4_32( BSG2_1x( C ), CHx( C, D, E ), W[10], F ) );
|
||||||
B = _mm256_add_epi32( B, T0 );
|
B = _mm256_add_epi32( B, F );
|
||||||
|
|
||||||
T1 = _mm256_add_epi32( v256_32( K256[59] ),
|
E = _mm256_add_epi32( v256_32( K256[59] ),
|
||||||
mm256_add4_32( BSG2_1x( B ), CHx( B, C, D ), W[11], E ) );
|
mm256_add4_32( BSG2_1x( B ), CHx( B, C, D ), W[11], E ) );
|
||||||
A = _mm256_add_epi32( A, T1 );
|
A = _mm256_add_epi32( A, E );
|
||||||
|
|
||||||
T2 = _mm256_add_epi32( v256_32( K256[60] ),
|
D = _mm256_add_epi32( v256_32( K256[60] ),
|
||||||
mm256_add4_32( BSG2_1x( A ), CHx( A, B, C ), W[12], D ) );
|
mm256_add4_32( BSG2_1x( A ), CHx( A, B, C ), W[12], D ) );
|
||||||
H = _mm256_add_epi32( H, T2 );
|
H = _mm256_add_epi32( H56, D );
|
||||||
|
|
||||||
// Got H, test it.
|
// Got H, test it.
|
||||||
|
hash = mm256_bswap_32( _mm256_add_epi32( H, istate7 ) );
|
||||||
targ = v256_32( target[7] );
|
targ = v256_32( target[7] );
|
||||||
hash = mm256_bswap_32( _mm256_add_epi32( H, IV7 ) );
|
// A simple unsigned LE test is complicated by the lack of a cmple
|
||||||
if ( target[7] )
|
// instruction, and lack of unsigned compares in AVX2.
|
||||||
{
|
flip = ( (int)target[7] < 0 ? -1 : 0 ) ^ mm256_movmask_32( hash );
|
||||||
flip = ( (int)target[7] < 0 ? -1 : 0 ) ^ mm256_movmask_32( hash );
|
if ( likely( 0xff == ( t7_mask = ( flip ^
|
||||||
if ( likely( 0xff == ( flip ^
|
mm256_movmask_32( _mm256_cmpgt_epi32( hash, targ ) ) ) )))
|
||||||
mm256_movmask_32( _mm256_cmpgt_epi32( hash, targ ) ) ) ))
|
return 0;
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
t6_mask = mm256_movmask_32( vmask =_mm256_cmpeq_epi32( hash, targ ) );
|
t6_mask = mm256_movmask_32( vmask =_mm256_cmpeq_epi32( hash, targ ) );
|
||||||
|
|
||||||
// round 58 part 2
|
// Round 57 part 2
|
||||||
F = _mm256_add_epi32( T0, _mm256_add_epi32( BSG2_0x( G ),
|
G57 = _mm256_add_epi32( G57, BSG2_0x( H56 ) );
|
||||||
MAJx( G, H, A ) ) );
|
Y_xor_Z = X_xor_Y;
|
||||||
// round 61 part 1
|
|
||||||
W[13] = SHA256_8WAY_MEXP( W[11], W[ 6], W[14], W[13] );
|
|
||||||
T0 = _mm256_add_epi32( v256_32( K256[61] ),
|
|
||||||
mm256_add4_32( BSG2_1x( H ), CHx( H, A, B ), W[13], C ) );
|
|
||||||
G = _mm256_add_epi32( G, T0 );
|
|
||||||
|
|
||||||
if ( t6_mask )
|
// Round 61 part 1
|
||||||
|
W[13] = SHA256_8WAY_MEXP( W[11], W[ 6], W[14], W[13] );
|
||||||
|
C = _mm256_add_epi32( v256_32( K256[61] ),
|
||||||
|
mm256_add4_32( BSG2_1x( H ), CHx( H, A, B ), W[13], C ) );
|
||||||
|
G = _mm256_add_epi32( G57, C );
|
||||||
|
|
||||||
|
if ( t6_mask == (0xff & ~t7_mask ) )
|
||||||
{
|
{
|
||||||
// Testing H was inconclusive: hash7 == target7, need to test G
|
// Testing H was inconclusive: hash7 == target7, need to test G
|
||||||
targ = _mm256_and_si256( vmask, v256_32( target[6] ) );
|
targ = _mm256_and_si256( vmask, v256_32( target[6] ) );
|
||||||
hash = mm256_bswap_32( _mm256_add_epi32( G, IV6 ) );
|
hash = mm256_bswap_32( _mm256_add_epi32( G, istate6 ) );
|
||||||
|
flip = ( (int)target[6] < 0 ? -1 : 0 ) ^ mm256_movmask_32( hash );
|
||||||
if ( likely( 0 == ( t6_mask & mm256_movmask_32(
|
if ( likely( 0 != ( t6_mask & ( flip ^
|
||||||
_mm256_cmpeq_epi32( hash, targ ) ) ) ))
|
|
||||||
{
|
|
||||||
flip = ( (int)target[6] < 0 ? -1 : 0 ) ^ mm256_movmask_32( hash );
|
|
||||||
if ( likely( 0 != ( t6_mask & ( flip ^
|
|
||||||
mm256_movmask_32( _mm256_cmpgt_epi32( hash, targ ) ) ) ) ))
|
mm256_movmask_32( _mm256_cmpgt_epi32( hash, targ ) ) ) ) ))
|
||||||
return 0;
|
return 0;
|
||||||
if ( likely( ( target[6] == 0x80000000 )
|
|
||||||
&& ( 0 == ( t6_mask & mm256_movmask_32( _mm256_cmpgt_epi32(
|
|
||||||
hash, _mm256_xor_si256( hash, hash ) ) ) ) ) ))
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// else inconclusive, testing targ5 isn't practical, fininsh hashing
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point either the hash will be good or the test was inconclusive.
|
// Rounds 58 to 61 part 2
|
||||||
// If the latter it's probably a high target difficulty with a nearly equal
|
F = _mm256_add_epi32( F, _mm256_add_epi32( BSG2_0x( G57 ),
|
||||||
// high difficulty hash that has a good chance of being good.
|
MAJx( G57, H, A ) ) );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
|
||||||
// rounds 59 to 61 part 2
|
E = _mm256_add_epi32( E, _mm256_add_epi32( BSG2_0x( F ),
|
||||||
E = _mm256_add_epi32( T1, _mm256_add_epi32( BSG2_0x( F ),
|
MAJx( F, G57, H ) ) );
|
||||||
MAJx( F, G, H ) ) );
|
Y_xor_Z = X_xor_Y;
|
||||||
D = _mm256_add_epi32( T2, _mm256_add_epi32( BSG2_0x( E ),
|
|
||||||
MAJx( E, F, G ) ) );
|
|
||||||
C = _mm256_add_epi32( T0, _mm256_add_epi32( BSG2_0x( D ),
|
|
||||||
MAJx( D, E, F ) ) );
|
|
||||||
|
|
||||||
// rounds 62 & 63
|
D = _mm256_add_epi32( D, _mm256_add_epi32( BSG2_0x( E ),
|
||||||
|
MAJx( E, F, G57 ) ) );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
|
||||||
|
C = _mm256_add_epi32( C, _mm256_add_epi32( BSG2_0x( D ),
|
||||||
|
MAJx( D, E, F ) ) );
|
||||||
|
Y_xor_Z = X_xor_Y;
|
||||||
|
|
||||||
|
// Rounds 62 & 63
|
||||||
W[14] = SHA256_8WAY_MEXP( W[12], W[ 7], W[15], W[14] );
|
W[14] = SHA256_8WAY_MEXP( W[12], W[ 7], W[15], W[14] );
|
||||||
W[15] = SHA256_8WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
W[15] = SHA256_8WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
||||||
|
|
||||||
@@ -1077,40 +1019,26 @@ void sha256_8x32_full( void *dst, const void *data, size_t len )
|
|||||||
W[15] = SHA256_16WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
W[15] = SHA256_16WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
||||||
|
|
||||||
#define SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, i, j ) \
|
#define SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, i, j ) \
|
||||||
do { \
|
{ \
|
||||||
__m512i T0 = _mm512_add_epi32( v512_32( K256[(j)+(i)] ), W[i] ); \
|
__m512i T1 = _mm512_add_epi32( v512_32( K256[(j)+(i)] ), W[i] ); \
|
||||||
__m512i T1 = BSG2_1x16( E ); \
|
H = _mm512_add_epi32( H, BSG2_1x16( E ) ); \
|
||||||
__m512i T2 = BSG2_0x16( A ); \
|
__m512i T2 = BSG2_0x16( A ); \
|
||||||
T0 = _mm512_add_epi32( T0, CHx16( E, F, G ) ); \
|
T1 = _mm512_add_epi32( T1, CHx16( E, F, G ) ); \
|
||||||
T1 = _mm512_add_epi32( T1, H ); \
|
|
||||||
T2 = _mm512_add_epi32( T2, MAJx16( A, B, C ) ); \
|
T2 = _mm512_add_epi32( T2, MAJx16( A, B, C ) ); \
|
||||||
T1 = _mm512_add_epi32( T1, T0 ); \
|
H = _mm512_add_epi32( H, T1 ); \
|
||||||
D = _mm512_add_epi32( D, T1 ); \
|
D = _mm512_add_epi32( D, H ); \
|
||||||
H = _mm512_add_epi32( T1, T2 ); \
|
H = _mm512_add_epi32( H, T2 ); \
|
||||||
} while (0)
|
}
|
||||||
|
|
||||||
#define SHA256_16WAY_ROUND_NOMSG( A, B, C, D, E, F, G, H, i, j ) \
|
#define SHA256_16WAY_ROUND_NOMSG( A, B, C, D, E, F, G, H, i, j ) \
|
||||||
{ \
|
{ \
|
||||||
__m512i T1 = mm512_add4_32( H, BSG2_1x16(E), CHx16(E, F, G), \
|
H = mm512_add4_32( H, BSG2_1x16(E), CHx16(E, F, G), \
|
||||||
v512_32( K256[(i)+(j)] ) ); \
|
v512_32( K256[(i)+(j)] ) ); \
|
||||||
__m512i T2 = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
|
__m512i T = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
|
||||||
D = _mm512_add_epi32( D, T1 ); \
|
D = _mm512_add_epi32( D, H ); \
|
||||||
H = _mm512_add_epi32( T1, T2 ); \
|
H = _mm512_add_epi32( H, T ); \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
#define SHA256_16WAY_ROUND(A, B, C, D, E, F, G, H, i, j) \
|
|
||||||
do { \
|
|
||||||
__m512i T1, T2; \
|
|
||||||
__m512i K = v512_32( K256[( (j)+(i) )] ); \
|
|
||||||
T1 = _mm512_add_epi32( H, mm512_add4_32( BSG2_1x16(E), CHx16(E, F, G), \
|
|
||||||
K, W[i] ) ); \
|
|
||||||
T2 = _mm512_add_epi32( BSG2_0x16(A), MAJx16(A, B, C) ); \
|
|
||||||
D = _mm512_add_epi32( D, T1 ); \
|
|
||||||
H = _mm512_add_epi32( T1, T2 ); \
|
|
||||||
} while (0)
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define SHA256_16WAY_16ROUNDS( A, B, C, D, E, F, G, H, j ) \
|
#define SHA256_16WAY_16ROUNDS( A, B, C, D, E, F, G, H, j ) \
|
||||||
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 0, j ); \
|
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 0, j ); \
|
||||||
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 1, j ); \
|
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 1, j ); \
|
||||||
@@ -1332,11 +1260,10 @@ void sha256_16x32_final_rounds( __m512i *state_out, const __m512i *data,
|
|||||||
int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
||||||
const __m512i *state_in, const uint32_t *target )
|
const __m512i *state_in, const uint32_t *target )
|
||||||
{
|
{
|
||||||
__m512i A, B, C, D, E, F, G, H, hash, targ;
|
__m512i A, B, C, D, E, F, G, H, hash, targ, G57, H56;
|
||||||
__m512i T0, T1, T2;
|
|
||||||
__m512i W[16]; memcpy_512( W, data, 16 );
|
__m512i W[16]; memcpy_512( W, data, 16 );
|
||||||
__mmask16 t6_mask;
|
__mmask16 mask;
|
||||||
|
|
||||||
A = _mm512_load_si512( state_in );
|
A = _mm512_load_si512( state_in );
|
||||||
B = _mm512_load_si512( state_in+1 );
|
B = _mm512_load_si512( state_in+1 );
|
||||||
C = _mm512_load_si512( state_in+2 );
|
C = _mm512_load_si512( state_in+2 );
|
||||||
@@ -1346,9 +1273,9 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
|||||||
G = _mm512_load_si512( state_in+6 );
|
G = _mm512_load_si512( state_in+6 );
|
||||||
H = _mm512_load_si512( state_in+7 );
|
H = _mm512_load_si512( state_in+7 );
|
||||||
|
|
||||||
const __m512i IV6 = G;
|
const __m512i istate6 = G;
|
||||||
const __m512i IV7 = H;
|
const __m512i istate7 = H;
|
||||||
|
|
||||||
// rounds 0 to 8
|
// rounds 0 to 8
|
||||||
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 0, 0 );
|
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 0, 0 );
|
||||||
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 1, 0 );
|
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 1, 0 );
|
||||||
@@ -1419,7 +1346,7 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
|||||||
W[11] = SHA256_16WAY_MEXP( W[ 9], W[ 4], W[12], W[11] );
|
W[11] = SHA256_16WAY_MEXP( W[ 9], W[ 4], W[12], W[11] );
|
||||||
W[12] = SHA256_16WAY_MEXP( W[10], W[ 5], W[13], W[12] );
|
W[12] = SHA256_16WAY_MEXP( W[10], W[ 5], W[13], W[12] );
|
||||||
|
|
||||||
// Rounds 48 to 57
|
// Rounds 48 to 55
|
||||||
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 0, 48 );
|
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 0, 48 );
|
||||||
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 1, 48 );
|
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 1, 48 );
|
||||||
SHA256_16WAY_ROUND( G, H, A, B, C, D, E, F, 2, 48 );
|
SHA256_16WAY_ROUND( G, H, A, B, C, D, E, F, 2, 48 );
|
||||||
@@ -1428,62 +1355,67 @@ int sha256_16x32_transform_le_short( __m512i *state_out, const __m512i *data,
|
|||||||
SHA256_16WAY_ROUND( D, E, F, G, H, A, B, C, 5, 48 );
|
SHA256_16WAY_ROUND( D, E, F, G, H, A, B, C, 5, 48 );
|
||||||
SHA256_16WAY_ROUND( C, D, E, F, G, H, A, B, 6, 48 );
|
SHA256_16WAY_ROUND( C, D, E, F, G, H, A, B, 6, 48 );
|
||||||
SHA256_16WAY_ROUND( B, C, D, E, F, G, H, A, 7, 48 );
|
SHA256_16WAY_ROUND( B, C, D, E, F, G, H, A, 7, 48 );
|
||||||
SHA256_16WAY_ROUND( A, B, C, D, E, F, G, H, 8, 48 );
|
|
||||||
SHA256_16WAY_ROUND( H, A, B, C, D, E, F, G, 9, 48 );
|
|
||||||
|
|
||||||
// rounds 58 to 60 part 1
|
// Round 56
|
||||||
T0 = _mm512_add_epi32( v512_32( K256[58] ),
|
H = _mm512_add_epi32( v512_32( K256[56] ),
|
||||||
mm512_add4_32( BSG2_1x16( C ), CHx16( C, D, E ), W[10], F ) );
|
mm512_add4_32( BSG2_1x16( E ), CHx16( E, F, G ), W[ 8], H ) );
|
||||||
B = _mm512_add_epi32( B, T0 );
|
D = _mm512_add_epi32( D, H );
|
||||||
|
H56 = _mm512_add_epi32( H, _mm512_add_epi32( BSG2_0x16( A ),
|
||||||
|
MAJx16( A, B, C ) ) );
|
||||||
|
|
||||||
T1 = _mm512_add_epi32( v512_32( K256[59] ),
|
// Rounds 57 to 60 part 1
|
||||||
|
G = _mm512_add_epi32( v512_32( K256[57] ),
|
||||||
|
mm512_add4_32( BSG2_1x16( D ), CHx16( D, E, F ), W[ 9], G ) );
|
||||||
|
C = _mm512_add_epi32( C, G );
|
||||||
|
G57 = _mm512_add_epi32( G, MAJx16( H56, A, B ) );
|
||||||
|
|
||||||
|
F = _mm512_add_epi32( v512_32( K256[58] ),
|
||||||
|
mm512_add4_32( BSG2_1x16( C ), CHx16( C, D, E ), W[10], F ) );
|
||||||
|
B = _mm512_add_epi32( B, F );
|
||||||
|
|
||||||
|
E = _mm512_add_epi32( v512_32( K256[59] ),
|
||||||
mm512_add4_32( BSG2_1x16( B ), CHx16( B, C, D ), W[11], E ) );
|
mm512_add4_32( BSG2_1x16( B ), CHx16( B, C, D ), W[11], E ) );
|
||||||
A = _mm512_add_epi32( A, T1 );
|
A = _mm512_add_epi32( A, E );
|
||||||
|
|
||||||
T2 = _mm512_add_epi32( v512_32( K256[60] ),
|
D = _mm512_add_epi32( v512_32( K256[60] ),
|
||||||
mm512_add4_32( BSG2_1x16( A ), CHx16( A, B, C ), W[12], D ) );
|
mm512_add4_32( BSG2_1x16( A ), CHx16( A, B, C ), W[12], D ) );
|
||||||
H = _mm512_add_epi32( H, T2 );
|
H = _mm512_add_epi32( H56, D );
|
||||||
|
|
||||||
// got H, test it against target[7]
|
// got final H, test it against target[7]
|
||||||
hash = mm512_bswap_32( _mm512_add_epi32( H , IV7 ) );
|
hash = mm512_bswap_32( _mm512_add_epi32( H , istate7 ) );
|
||||||
targ = v512_32( target[7] );
|
targ = v512_32( target[7] );
|
||||||
if ( target[7] )
|
if ( likely( 0 == ( mask = _mm512_cmple_epu32_mask( hash, targ ) ) ))
|
||||||
if ( likely( 0 == _mm512_cmple_epu32_mask( hash, targ ) ))
|
|
||||||
return 0;
|
return 0;
|
||||||
t6_mask = _mm512_cmpeq_epi32_mask( hash, targ );
|
|
||||||
|
|
||||||
// round 58 part 2
|
// Round 57 part 2
|
||||||
F = _mm512_add_epi32( T0, _mm512_add_epi32( BSG2_0x16( G ),
|
G57 = _mm512_add_epi32( G57, BSG2_0x16( H56 ) );
|
||||||
MAJx16( G, H, A ) ) );
|
|
||||||
|
// Round 61 part 1
|
||||||
// round 61 part 1
|
|
||||||
W[13] = SHA256_16WAY_MEXP( W[11], W[ 6], W[14], W[13] );
|
W[13] = SHA256_16WAY_MEXP( W[11], W[ 6], W[14], W[13] );
|
||||||
T0 = _mm512_add_epi32( v512_32( K256[61] ),
|
C = _mm512_add_epi32( v512_32( K256[61] ),
|
||||||
mm512_add4_32( BSG2_1x16( H ), CHx16( H, A, B ), W[13], C ) );
|
mm512_add4_32( BSG2_1x16( H ), CHx16( H, A, B ), W[13], C ) );
|
||||||
G = _mm512_add_epi32( G, T0 );
|
G = _mm512_add_epi32( G57, C );
|
||||||
|
|
||||||
// got G, test it against target[6] if indicated
|
// got final G, test it against target[6] if indicated.
|
||||||
if ( (uint16_t)t6_mask )
|
if ( mask == _mm512_cmpeq_epi32_mask( hash, targ ) )
|
||||||
{
|
{
|
||||||
hash = mm512_bswap_32( _mm512_add_epi32( G, IV6 ) );
|
hash = mm512_bswap_32( _mm512_add_epi32( G, istate6 ) );
|
||||||
targ = v512_32( target[6] );
|
targ = v512_32( target[6] );
|
||||||
if ( likely( 0 == _mm512_mask_cmple_epu32_mask( t6_mask, hash, targ ) ))
|
if ( likely( 0 == _mm512_mask_cmple_epu32_mask( mask, hash, targ ) ))
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// round 59 part 2
|
// Round 58 to 61 part 2
|
||||||
E = _mm512_add_epi32( T1, _mm512_add_epi32( BSG2_0x16( F ),
|
F = _mm512_add_epi32( F, _mm512_add_epi32( BSG2_0x16( G57 ),
|
||||||
MAJx16( F, G, H ) ) );
|
MAJx16( G57, H, A ) ) );
|
||||||
|
E = _mm512_add_epi32( E, _mm512_add_epi32( BSG2_0x16( F ),
|
||||||
// round 60 part 2
|
MAJx16( F, G57, H ) ) );
|
||||||
D = _mm512_add_epi32( T2, _mm512_add_epi32( BSG2_0x16( E ),
|
D = _mm512_add_epi32( D, _mm512_add_epi32( BSG2_0x16( E ),
|
||||||
MAJx16( E, F, G ) ) );
|
MAJx16( E, F, G57 ) ) );
|
||||||
|
C = _mm512_add_epi32( C, _mm512_add_epi32( BSG2_0x16( D ),
|
||||||
// round 61 part 2
|
MAJx16( D, E, F ) ) );
|
||||||
C = _mm512_add_epi32( T0, _mm512_add_epi32( BSG2_0x16( D ),
|
|
||||||
MAJx16( D, E, F ) ) );
|
|
||||||
|
|
||||||
// rounds 62, 63
|
// Rounds 62, 63
|
||||||
W[14] = SHA256_16WAY_MEXP( W[12], W[ 7], W[15], W[14] );
|
W[14] = SHA256_16WAY_MEXP( W[12], W[ 7], W[15], W[14] );
|
||||||
W[15] = SHA256_16WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
W[15] = SHA256_16WAY_MEXP( W[13], W[ 8], W[ 0], W[15] );
|
||||||
|
|
||||||
|
|||||||
@@ -783,29 +783,6 @@ void sha512_8x64_ctx( sha512_8x64_context *sc, void *dst, const void *data,
|
|||||||
mm256_ror_64( x, 61 ), \
|
mm256_ror_64( x, 61 ), \
|
||||||
_mm256_srli_epi64( x, 6 ) )
|
_mm256_srli_epi64( x, 6 ) )
|
||||||
|
|
||||||
#if defined(VL256)
|
|
||||||
// 4 way is not used whith AVX512 but will be whith AVX10_256 when it
|
|
||||||
// becomes available.
|
|
||||||
|
|
||||||
#define CH( X, Y, Z ) _mm256_ternarylogic_epi64( X, Y, Z, 0xca )
|
|
||||||
|
|
||||||
#define MAJ( X, Y, Z ) _mm256_ternarylogic_epi64( X, Y, Z, 0xe8 )
|
|
||||||
|
|
||||||
#define SHA3_4WAY_STEP( A, B, C, D, E, F, G, H, i ) \
|
|
||||||
do { \
|
|
||||||
__m256i T0 = _mm256_add_epi64( v256_64( K512[i] ), W[i] ); \
|
|
||||||
__m256i T1 = BSG5_1( E ); \
|
|
||||||
__m256i T2 = BSG5_0( A ); \
|
|
||||||
T0 = _mm256_add_epi64( T0, CH( E, F, G ) ); \
|
|
||||||
T1 = _mm256_add_epi64( T1, H ); \
|
|
||||||
T2 = _mm256_add_epi64( T2, MAJ( A, B, C ) ); \
|
|
||||||
T1 = _mm256_add_epi64( T1, T0 ); \
|
|
||||||
D = _mm256_add_epi64( D, T1 ); \
|
|
||||||
H = _mm256_add_epi64( T1, T2 ); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#else // AVX2 only
|
|
||||||
|
|
||||||
#define CH(X, Y, Z) \
|
#define CH(X, Y, Z) \
|
||||||
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
_mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( Y, Z ), X ), Z )
|
||||||
|
|
||||||
@@ -827,19 +804,12 @@ do { \
|
|||||||
H = _mm256_add_epi64( T1, T2 ); \
|
H = _mm256_add_epi64( T1, T2 ); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#endif // AVX512VL AVX10_256
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
sha512_4x64_round( sha512_4x64_context *ctx, __m256i *in, __m256i r[8] )
|
sha512_4x64_round( sha512_4x64_context *ctx, __m256i *in, __m256i r[8] )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
register __m256i A, B, C, D, E, F, G, H;
|
register __m256i A, B, C, D, E, F, G, H;
|
||||||
|
|
||||||
#if !defined(VL256)
|
|
||||||
// Disable for AVX10_256
|
|
||||||
__m256i X_xor_Y, Y_xor_Z;
|
__m256i X_xor_Y, Y_xor_Z;
|
||||||
#endif
|
|
||||||
|
|
||||||
__m256i W[80];
|
__m256i W[80];
|
||||||
|
|
||||||
mm256_block_bswap_64( W , in );
|
mm256_block_bswap_64( W , in );
|
||||||
@@ -872,10 +842,7 @@ sha512_4x64_round( sha512_4x64_context *ctx, __m256i *in, __m256i r[8] )
|
|||||||
H = v256_64( 0x5BE0CD19137E2179 );
|
H = v256_64( 0x5BE0CD19137E2179 );
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(VL256)
|
|
||||||
// Disable for AVX10_256
|
|
||||||
Y_xor_Z = _mm256_xor_si256( B, C );
|
Y_xor_Z = _mm256_xor_si256( B, C );
|
||||||
#endif
|
|
||||||
|
|
||||||
for ( i = 0; i < 80; i += 8 )
|
for ( i = 0; i < 80; i += 8 )
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
# during develpment. However the information contained may provide compilation
|
# during develpment. However the information contained may provide compilation
|
||||||
# tips to users.
|
# tips to users.
|
||||||
|
|
||||||
rm cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse42 cpuminer-ssse3 cpuminer-sse2 cpuminer-zen cpuminer-zen3 cpuminer-zen4 cpuminer-zen5 cpuminer-alderlake cpuminer-x64 cpuminer-armv8 cpuminer-armv8-aes cpuminer-armv8-sha2 cpuminer-armv8-aes-sha2 > /dev/null
|
./clean-all.sh
|
||||||
|
|
||||||
# AVX512 SHA VAES: Intel Core Icelake, Rocketlake
|
# AVX512 SHA VAES: Intel Core Icelake, Rocketlake
|
||||||
make distclean || echo clean
|
make distclean || echo clean
|
||||||
@@ -18,31 +18,31 @@ strip -s cpuminer
|
|||||||
mv cpuminer cpuminer-avx512-sha-vaes
|
mv cpuminer cpuminer-avx512-sha-vaes
|
||||||
|
|
||||||
# Intel Core Alderlake: AVX2 SHA VAES, needs gcc-12
|
# Intel Core Alderlake: AVX2 SHA VAES, needs gcc-12
|
||||||
#make clean || echo clean
|
make clean || echo clean
|
||||||
#rm -f config.status
|
rm -f config.status
|
||||||
#CFLAGS="-O3 -march=alderlake -Wall" ./configure --with-curl
|
CFLAGS="-O3 -march=alderlake -Wall" ./configure --with-curl
|
||||||
#make -j $(nproc)
|
make -j $(nproc)
|
||||||
#strip -s cpuminer
|
strip -s cpuminer
|
||||||
#mv cpuminer cpuminer-alderlake
|
mv cpuminer cpuminer-alderlake
|
||||||
|
|
||||||
# Intel Core Arrowlake-s: AVX2 SHA512 VAES, needs gcc-14
|
# Intel Core Arrowlake-s: AVX2 SHA512 VAES, needs gcc-14
|
||||||
# Arrowlake-s includes SHA512, Arrowlake does not?
|
# Arrowlake-s includes SHA512, Arrowlake does not?
|
||||||
#make clean || echo clean
|
make clean || echo clean
|
||||||
#rm -f config.status
|
rm -f config.status
|
||||||
#CFLAGS="-O3 -march=arrowlake-s -Wall" ./configure --with-curl
|
CFLAGS="-O3 -march=arrowlake-s -Wall" ./configure --with-curl
|
||||||
#make -j $(nproc)
|
make -j $(nproc)
|
||||||
#strip -s cpuminer
|
strip -s cpuminer
|
||||||
#mv cpuminer cpuminer-arrowlake-s
|
mv cpuminer cpuminer-arrowlake-s
|
||||||
|
|
||||||
# Intel Core Graniterapids: AVX512, SHA256, VAES, needs gcc-14
|
# Intel Core Graniterapids: AVX512, SHA256, VAES, needs gcc-14
|
||||||
# Granitrapids does not build with AVX10, SHA512 or APX.
|
# Granitrapids does not build with AVX10, SHA512 or APX.
|
||||||
# wait for Diamondrapids & gcc-15.
|
# wait for Diamondrapids & gcc-15.
|
||||||
#make clean || echo clean
|
make clean || echo clean
|
||||||
#rm -f config.status
|
rm -f config.status
|
||||||
#CFLAGS="-O3 -march=graniterapids -Wall" ./configure --with-curl
|
CFLAGS="-O3 -march=graniterapids -Wall" ./configure --with-curl
|
||||||
#make -j $(nproc)
|
make -j $(nproc)
|
||||||
#strip -s cpuminer
|
strip -s cpuminer
|
||||||
#mv cpuminer cpuminer-graniterapids
|
mv cpuminer cpuminer-graniterapids
|
||||||
|
|
||||||
# SHA512 AVX10.1
|
# SHA512 AVX10.1
|
||||||
#make clean || echo clean
|
#make clean || echo clean
|
||||||
@@ -69,20 +69,20 @@ mv cpuminer cpuminer-avx512-sha-vaes
|
|||||||
#mv cpuminer cpuminer-diamondrapids
|
#mv cpuminer cpuminer-diamondrapids
|
||||||
|
|
||||||
# Zen5: AVX512 SHA VAES, requires gcc-14.
|
# Zen5: AVX512 SHA VAES, requires gcc-14.
|
||||||
#make clean || echo clean
|
make clean || echo clean
|
||||||
#rm -f config.status
|
rm -f config.status
|
||||||
#CFLAGS="-O3 -march=znver5 -Wall" ./configure --with-curl
|
CFLAGS="-O3 -march=znver5 -Wall" ./configure --with-curl
|
||||||
#make -j $(nproc)
|
make -j $(nproc)
|
||||||
#strip -s cpuminer
|
strip -s cpuminer
|
||||||
#mv cpuminer cpuminer-zen5
|
mv cpuminer cpuminer-zen5
|
||||||
|
|
||||||
# Zen4: AVX512 SHA VAES
|
# Zen4: AVX512 SHA VAES
|
||||||
make clean || echo clean
|
make clean || echo clean
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
# Zen4: AVX512, SHA, VAES, needs gcc-12.3.
|
# Zen4: AVX512, SHA, VAES, needs gcc-12.3.
|
||||||
#CFLAGS="-O3 -march=znver4 -Wall" ./configure --with-curl
|
CFLAGS="-O3 -march=znver4 -Wall" ./configure --with-curl
|
||||||
# Inclomplete list of Zen4 AVX512 extensions but includes all extensions used by cpuminer.
|
# Inclomplete list of Zen4 AVX512 extensions but includes all extensions used by cpuminer.
|
||||||
CFLAGS="-O3 -march=znver3 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq -Wall" ./configure --with-curl
|
#CFLAGS="-O3 -march=znver3 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq -Wall" ./configure --with-curl
|
||||||
make -j $(nproc)
|
make -j $(nproc)
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-zen4
|
mv cpuminer cpuminer-zen4
|
||||||
@@ -115,8 +115,8 @@ mv cpuminer cpuminer-avx2-sha-vaes
|
|||||||
# AVX2 SHA AES: AMD Zen1
|
# AVX2 SHA AES: AMD Zen1
|
||||||
make clean || echo done
|
make clean || echo done
|
||||||
rm -f config.status
|
rm -f config.status
|
||||||
#CFLAGS="-O3 -march=znver1 -maes -Wall" ./configure --with-curl
|
CFLAGS="-O3 -march=znver1 -maes -Wall" ./configure --with-curl
|
||||||
CFLAGS="-O3 -maes -mavx2 -msha -Wall" ./configure --with-curl
|
#CFLAGS="-O3 -maes -mavx2 -msha -Wall" ./configure --with-curl
|
||||||
make -j $(nproc)
|
make -j $(nproc)
|
||||||
strip -s cpuminer
|
strip -s cpuminer
|
||||||
mv cpuminer cpuminer-avx2-sha
|
mv cpuminer cpuminer-avx2-sha
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
#
|
#
|
||||||
# make clean and rm all the targetted executables.
|
# make clean and rm all the targetted executables.
|
||||||
|
|
||||||
rm cpuminer-avx10* cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-alderlake cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen3 cpuminer-zen4 cpuminer-x64 cpuminer-armv9 cpuminer-armv9-crypto cpuminer-armv9-crypto-sha3 cpuminer-armv8.4-crypto-sha3 cpuminer-armv8.5-crypto-sha3-sve2 cpuminer-armv8-crypto cpuminer-armv8 > /dev/null
|
rm cpuminer-avx10* cpuminer-arrowlake* cpuminer-graniterapids* cpuminer-avx512-sha-vaes cpuminer-alderlake cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-avx2-sha cpuminer-sse42 cpuminer-ssse3 cpuminer-avx2-sha-vaes cpuminer-zen* cpuminer-x64 cpuminer-armv* > /dev/null
|
||||||
|
|
||||||
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe cpuminer-zen3.exe cpuminer-zen4.exe cpuminer-x64.exe > /dev/null
|
rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512-sha.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-avx2-sha.exe cpuminer-sse42.exe cpuminer-ssse3.exe cpuminer-avx2-sha-vaes.exe cpuminer-zen3.exe cpuminer-zen4.exe cpuminer-x64.exe > /dev/null
|
||||||
|
|
||||||
|
|||||||
28
configure
vendored
28
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.71 for cpuminer-opt 25.4.
|
# Generated by GNU Autoconf 2.71 for cpuminer-opt 25.5.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
|
||||||
@@ -608,8 +608,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='25.4'
|
PACKAGE_VERSION='25.5'
|
||||||
PACKAGE_STRING='cpuminer-opt 25.4'
|
PACKAGE_STRING='cpuminer-opt 25.5'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1359,7 +1359,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures cpuminer-opt 25.4 to adapt to many kinds of systems.
|
\`configure' configures cpuminer-opt 25.5 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1431,7 +1431,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 25.4:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 25.5:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1536,7 +1536,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 25.4
|
cpuminer-opt configure 25.5
|
||||||
generated by GNU Autoconf 2.71
|
generated by GNU Autoconf 2.71
|
||||||
|
|
||||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||||
@@ -1983,7 +1983,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 25.4, which was
|
It was created by cpuminer-opt $as_me 25.5, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@@ -3591,7 +3591,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='25.4'
|
VERSION='25.5'
|
||||||
|
|
||||||
|
|
||||||
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
||||||
@@ -5808,11 +5808,11 @@ if test x$ac_prog_cxx_stdcxx = xno
|
|||||||
then :
|
then :
|
||||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
|
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
|
||||||
printf %s "checking for $CXX option to enable C++11 features... " >&6; }
|
printf %s "checking for $CXX option to enable C++11 features... " >&6; }
|
||||||
if test ${ac_cv_prog_cxx_11+y}
|
if test ${ac_cv_prog_cxx_cxx11+y}
|
||||||
then :
|
then :
|
||||||
printf %s "(cached) " >&6
|
printf %s "(cached) " >&6
|
||||||
else $as_nop
|
else $as_nop
|
||||||
ac_cv_prog_cxx_11=no
|
ac_cv_prog_cxx_cxx11=no
|
||||||
ac_save_CXX=$CXX
|
ac_save_CXX=$CXX
|
||||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
/* end confdefs.h. */
|
/* end confdefs.h. */
|
||||||
@@ -5854,11 +5854,11 @@ if test x$ac_prog_cxx_stdcxx = xno
|
|||||||
then :
|
then :
|
||||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
|
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
|
||||||
printf %s "checking for $CXX option to enable C++98 features... " >&6; }
|
printf %s "checking for $CXX option to enable C++98 features... " >&6; }
|
||||||
if test ${ac_cv_prog_cxx_98+y}
|
if test ${ac_cv_prog_cxx_cxx98+y}
|
||||||
then :
|
then :
|
||||||
printf %s "(cached) " >&6
|
printf %s "(cached) " >&6
|
||||||
else $as_nop
|
else $as_nop
|
||||||
ac_cv_prog_cxx_98=no
|
ac_cv_prog_cxx_cxx98=no
|
||||||
ac_save_CXX=$CXX
|
ac_save_CXX=$CXX
|
||||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
/* end confdefs.h. */
|
/* end confdefs.h. */
|
||||||
@@ -7435,7 +7435,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 25.4, which was
|
This file was extended by cpuminer-opt $as_me 25.5, which was
|
||||||
generated by GNU Autoconf 2.71. Invocation command line was
|
generated by GNU Autoconf 2.71. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -7503,7 +7503,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 25.4
|
cpuminer-opt config.status 25.5
|
||||||
configured by $0, generated by GNU Autoconf 2.71,
|
configured by $0, generated by GNU Autoconf 2.71,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([cpuminer-opt], [25.4])
|
AC_INIT([cpuminer-opt], [25.5])
|
||||||
|
|
||||||
AC_PREREQ([2.59c])
|
AC_PREREQ([2.59c])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_SYSTEM
|
||||||
|
|||||||
20
configure~
20
configure~
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.72 for cpuminer-opt 25.4.
|
# Generated by GNU Autoconf 2.72 for cpuminer-opt 25.5.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
|
# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
|
||||||
@@ -601,8 +601,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='cpuminer-opt'
|
PACKAGE_NAME='cpuminer-opt'
|
||||||
PACKAGE_TARNAME='cpuminer-opt'
|
PACKAGE_TARNAME='cpuminer-opt'
|
||||||
PACKAGE_VERSION='25.4'
|
PACKAGE_VERSION='25.5'
|
||||||
PACKAGE_STRING='cpuminer-opt 25.4'
|
PACKAGE_STRING='cpuminer-opt 25.5'
|
||||||
PACKAGE_BUGREPORT=''
|
PACKAGE_BUGREPORT=''
|
||||||
PACKAGE_URL=''
|
PACKAGE_URL=''
|
||||||
|
|
||||||
@@ -1352,7 +1352,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
'configure' configures cpuminer-opt 25.4 to adapt to many kinds of systems.
|
'configure' configures cpuminer-opt 25.5 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1424,7 +1424,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of cpuminer-opt 25.4:";;
|
short | recursive ) echo "Configuration of cpuminer-opt 25.5:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1528,7 +1528,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
cpuminer-opt configure 25.4
|
cpuminer-opt configure 25.5
|
||||||
generated by GNU Autoconf 2.72
|
generated by GNU Autoconf 2.72
|
||||||
|
|
||||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
@@ -1949,7 +1949,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by cpuminer-opt $as_me 25.4, which was
|
It was created by cpuminer-opt $as_me 25.5, which was
|
||||||
generated by GNU Autoconf 2.72. Invocation command line was
|
generated by GNU Autoconf 2.72. Invocation command line was
|
||||||
|
|
||||||
$ $0$ac_configure_args_raw
|
$ $0$ac_configure_args_raw
|
||||||
@@ -3764,7 +3764,7 @@ fi
|
|||||||
|
|
||||||
# Define the identity of the package.
|
# Define the identity of the package.
|
||||||
PACKAGE='cpuminer-opt'
|
PACKAGE='cpuminer-opt'
|
||||||
VERSION='25.4'
|
VERSION='25.5'
|
||||||
|
|
||||||
|
|
||||||
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
|
||||||
@@ -7450,7 +7450,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by cpuminer-opt $as_me 25.4, which was
|
This file was extended by cpuminer-opt $as_me 25.5, which was
|
||||||
generated by GNU Autoconf 2.72. Invocation command line was
|
generated by GNU Autoconf 2.72. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -7518,7 +7518,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config='$ac_cs_config_escaped'
|
ac_cs_config='$ac_cs_config_escaped'
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
cpuminer-opt config.status 25.4
|
cpuminer-opt config.status 25.5
|
||||||
configured by $0, generated by GNU Autoconf 2.72,
|
configured by $0, generated by GNU Autoconf 2.72,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -3760,10 +3760,10 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
#if defined(_WIN32_WINNT)
|
#if defined(_WIN32_WINNT)
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog( LOG_INFO, "_WIN232_WINNT = 0x%04x", _WIN32_WINNT );
|
applog( LOG_INFO, "_WIN32_WINNT = 0x%04x", _WIN32_WINNT );
|
||||||
#else
|
#else
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
applog( LOG_INFO, "_WIN232_WINNT undefined." );
|
applog( LOG_INFO, "_WIN32_WINNT undefined." );
|
||||||
#endif
|
#endif
|
||||||
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
#if defined(WINDOWS_CPU_GROUPS_ENABLED)
|
||||||
if ( opt_debug || ( !opt_quiet && num_cpugroups > 1 ) )
|
if ( opt_debug || ( !opt_quiet && num_cpugroups > 1 ) )
|
||||||
|
|||||||
@@ -217,7 +217,9 @@ static inline __m256i mm256_not( const __m256i v )
|
|||||||
// Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
|
// Equivalent of AVX512 _mm256_movepi64_mask & _mm256_movepi32_mask.
|
||||||
// Returns 4 or 8 bit integer mask from MSBit of 64 or 32 bit elements.
|
// Returns 4 or 8 bit integer mask from MSBit of 64 or 32 bit elements.
|
||||||
// Effectively a sign test.
|
// Effectively a sign test.
|
||||||
|
// The functions return int which can promote small integers to int when used
|
||||||
|
// in an expression. Users should mask the slack bits strategically to maintain
|
||||||
|
// data integrity.
|
||||||
#define mm256_movmask_64( v ) \
|
#define mm256_movmask_64( v ) \
|
||||||
_mm256_movemask_pd( _mm256_castsi256_pd( v ) )
|
_mm256_movemask_pd( _mm256_castsi256_pd( v ) )
|
||||||
|
|
||||||
|
|||||||
@@ -14,12 +14,6 @@
|
|||||||
// vectors. It is therefore not technically required for any 512 bit vector
|
// vectors. It is therefore not technically required for any 512 bit vector
|
||||||
// utilities defined below.
|
// utilities defined below.
|
||||||
|
|
||||||
// if avx10 // avx512 is always set
|
|
||||||
// if evex512: yes
|
|
||||||
// else if avx512 : yes // avx512 is set but not avx10
|
|
||||||
// else : no // avx512 not set or avx10.1 is set without evex512
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(SIMD512)
|
#if defined(SIMD512)
|
||||||
|
|
||||||
// AVX512 intrinsics have a few changes from previous conventions.
|
// AVX512 intrinsics have a few changes from previous conventions.
|
||||||
@@ -57,7 +51,7 @@
|
|||||||
// - if an argument is to referenced multiple times a C inline function
|
// - if an argument is to referenced multiple times a C inline function
|
||||||
// should be used instead of a macro to prevent an expression argument
|
// should be used instead of a macro to prevent an expression argument
|
||||||
// from being evaluated multiple times (wasteful) or produces side
|
// from being evaluated multiple times (wasteful) or produces side
|
||||||
// effects (very bad).
|
// effects (very bad).
|
||||||
//
|
//
|
||||||
// There are 2 areas where overhead is a major concern: constants and
|
// There are 2 areas where overhead is a major concern: constants and
|
||||||
// permutations.
|
// permutations.
|
||||||
|
|||||||
@@ -4,9 +4,10 @@
|
|||||||
#if defined(__aarch64__) && defined(__ARM_NEON)
|
#if defined(__aarch64__) && defined(__ARM_NEON)
|
||||||
|
|
||||||
// Targeted functions supporting NEON SIMD 128 & 64 bit vectors.
|
// Targeted functions supporting NEON SIMD 128 & 64 bit vectors.
|
||||||
// Element size matters!
|
|
||||||
//
|
//
|
||||||
// Intel naming is generally used.
|
// Intel style naming is generally used, however, this not an attempt to emulate Intel
|
||||||
|
// intructions. It's focussed on the functions used in this program and the best way
|
||||||
|
// to implement them with NEON.
|
||||||
//
|
//
|
||||||
// Some advanced logical operations that require SHA3. Prior to GCC-13
|
// Some advanced logical operations that require SHA3. Prior to GCC-13
|
||||||
// they also require armv8.2
|
// they also require armv8.2
|
||||||
|
|||||||
Reference in New Issue
Block a user