mirror of
https://github.com/JayDDee/cpuminer-opt.git
synced 2025-09-17 23:44:27 +00:00
v25.4
This commit is contained in:
@@ -597,6 +597,45 @@ static void blake2s_compress(blake2s_state *S, const void *buf) {
|
||||
v[13] = S->t[1] ^ blake2s_IV[5];
|
||||
v[14] = S->f[0] ^ blake2s_IV[6];
|
||||
v[15] = S->f[1] ^ blake2s_IV[7];
|
||||
|
||||
#if defined(__SSE2__) || defined(__ARM_NEON)
|
||||
|
||||
v128_t *V = (v128_t*)v;
|
||||
|
||||
#define ROUND( r ) \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][ 6]], m[blake2s_sigma[r][ 4]], \
|
||||
m[blake2s_sigma[r][ 2]], m[blake2s_sigma[r][ 0]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 16 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 12 ); \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][ 7]], m[blake2s_sigma[r][ 5]], \
|
||||
m[blake2s_sigma[r][ 3]], m[blake2s_sigma[r][ 1]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 8 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 7 ); \
|
||||
V[0] = v128_shufll32( V[0] ); \
|
||||
V[3] = v128_swap64( V[3] ); \
|
||||
V[2] = v128_shuflr32( V[2] ); \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][12]], m[blake2s_sigma[r][10]], \
|
||||
m[blake2s_sigma[r][ 8]], m[blake2s_sigma[r][14]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 16 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 12 ); \
|
||||
V[0] = v128_add32( V[0], v128_add32( V[1], v128_set32( \
|
||||
m[blake2s_sigma[r][13]], m[blake2s_sigma[r][11]], \
|
||||
m[blake2s_sigma[r][ 9]], m[blake2s_sigma[r][15]] ) ) ); \
|
||||
V[3] = v128_ror32( v128_xor( V[3], V[0] ), 8 ); \
|
||||
V[2] = v128_add32( V[2], V[3] ); \
|
||||
V[1] = v128_ror32( v128_xor( V[1], V[2] ), 7 ); \
|
||||
V[0] = v128_shuflr32( V[0] ); \
|
||||
V[3] = v128_swap64( V[3] ); \
|
||||
V[2] = v128_shufll32( V[2] )
|
||||
|
||||
#else
|
||||
|
||||
#define G(r,i,a,b,c,d) \
|
||||
do { \
|
||||
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||||
@@ -619,6 +658,9 @@ static void blake2s_compress(blake2s_state *S, const void *buf) {
|
||||
G(r, 6, v[ 2], v[ 7], v[ 8], v[13]); \
|
||||
G(r, 7, v[ 3], v[ 4], v[ 9], v[14]); \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
ROUND(0);
|
||||
ROUND(1);
|
||||
ROUND(2);
|
||||
|
@@ -336,7 +336,7 @@ static const uint32_t _ALIGN(16) finalblk_4way[4 * 16] = {
|
||||
};
|
||||
*/
|
||||
|
||||
static inline void sha256_4way_init_state( void *state )
|
||||
static inline void sha256_4x32_init_state( void *state )
|
||||
{
|
||||
casti_v128( state, 0 ) = v128_32( 0x6A09E667 );
|
||||
casti_v128( state, 1 ) = v128_32( 0xBB67AE85 );
|
||||
@@ -359,21 +359,21 @@ static inline void HMAC_SHA256_80_init_4way( const uint32_t *key,
|
||||
memcpy( pad, key + 4*16, 4*16 );
|
||||
memcpy( pad + 4*4, keypad_4way, 4*48 );
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ihash, (v128_t*)pad,
|
||||
sha256_4x32_transform_le( (v128_t*)ihash, (v128_t*)pad,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
sha256_4way_init_state( tstate );
|
||||
sha256_4x32_init_state( tstate );
|
||||
|
||||
for ( i = 0; i < 4*8; i++ ) pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for ( ; i < 4*16; i++ ) pad[i] = 0x5c5c5c5c;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ostate, (v128_t*)pad,
|
||||
sha256_4x32_transform_le( (v128_t*)ostate, (v128_t*)pad,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
for ( i = 0; i < 4*8; i++ ) pad[i] = ihash[i] ^ 0x36363636;
|
||||
for ( ; i < 4*16; i++ ) pad[i] = 0x36363636;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)tstate, (v128_t*)pad,
|
||||
sha256_4x32_transform_le( (v128_t*)tstate, (v128_t*)pad,
|
||||
(const v128_t*)tstate );
|
||||
}
|
||||
|
||||
@@ -386,7 +386,7 @@ static inline void PBKDF2_SHA256_80_128_4way( const uint32_t *tstate,
|
||||
uint32_t _ALIGN(16) obuf[4 * 16];
|
||||
int i, j;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)istate, (v128_t*)salt,
|
||||
sha256_4x32_transform_le( (v128_t*)istate, (v128_t*)salt,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
memcpy(ibuf, salt + 4 * 16, 4 * 16);
|
||||
@@ -400,10 +400,10 @@ static inline void PBKDF2_SHA256_80_128_4way( const uint32_t *tstate,
|
||||
ibuf[4 * 4 + 2] = i + 1;
|
||||
ibuf[4 * 4 + 3] = i + 1;
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)obuf, (v128_t*)ibuf,
|
||||
sha256_4x32_transform_le( (v128_t*)obuf, (v128_t*)ibuf,
|
||||
(const v128_t*)istate );
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ostate2, (v128_t*)obuf,
|
||||
sha256_4x32_transform_le( (v128_t*)ostate2, (v128_t*)obuf,
|
||||
(const v128_t*)ostate );
|
||||
|
||||
for ( j = 0; j < 4 * 8; j++ )
|
||||
@@ -418,9 +418,9 @@ static inline void PBKDF2_SHA256_128_32_4way( uint32_t *tstate,
|
||||
uint32_t _ALIGN(64) buf[4 * 16];
|
||||
int i;
|
||||
|
||||
sha256_4way_transform_be( (v128_t*)tstate, (v128_t*)salt,
|
||||
sha256_4x32_transform_be( (v128_t*)tstate, (v128_t*)salt,
|
||||
(const v128_t*)tstate );
|
||||
sha256_4way_transform_be( (v128_t*)tstate, (v128_t*)( salt + 4*16),
|
||||
sha256_4x32_transform_be( (v128_t*)tstate, (v128_t*)( salt + 4*16),
|
||||
(const v128_t*)tstate );
|
||||
|
||||
final[ 0] = v128_32( 0x00000001 );
|
||||
@@ -431,13 +431,13 @@ static inline void PBKDF2_SHA256_128_32_4way( uint32_t *tstate,
|
||||
= v128_xor( final[ 0], final[ 0] ); //_mm_setzero_si128();
|
||||
final[15] = v128_32 ( 0x00000620 );
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)tstate, (v128_t*)final,
|
||||
sha256_4x32_transform_le( (v128_t*)tstate, (v128_t*)final,
|
||||
(const v128_t*)tstate );
|
||||
|
||||
memcpy(buf, tstate, 4 * 32);
|
||||
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
|
||||
|
||||
sha256_4way_transform_le( (v128_t*)ostate, (v128_t*)buf,
|
||||
sha256_4x32_transform_le( (v128_t*)ostate, (v128_t*)buf,
|
||||
(const v128_t*)ostate );
|
||||
|
||||
for ( i = 0; i < 4 * 8; i++ )
|
||||
@@ -467,7 +467,7 @@ static const uint32_t _ALIGN(32) finalblk_8way[8 * 16] = {
|
||||
};
|
||||
*/
|
||||
|
||||
static inline void sha256_8way_init_state( void *state )
|
||||
static inline void sha256_8x32_init_state( void *state )
|
||||
{
|
||||
casti_m256i( state, 0 ) = _mm256_set1_epi32( 0x6A09E667 );
|
||||
casti_m256i( state, 1 ) = _mm256_set1_epi32( 0xBB67AE85 );
|
||||
@@ -491,21 +491,21 @@ static inline void HMAC_SHA256_80_init_8way( const uint32_t *key,
|
||||
memset( pad + 8*5, 0x00, 8*40 );
|
||||
for ( i = 0; i < 8; i++ ) pad[ 8*15 + i ] = 0x00000280;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ihash, (__m256i*)pad,
|
||||
sha256_8x32_transform_le( (__m256i*)ihash, (__m256i*)pad,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
sha256_8way_init_state( tstate );
|
||||
sha256_8x32_init_state( tstate );
|
||||
|
||||
for ( i = 0; i < 8*8; i++ ) pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for ( ; i < 8*16; i++ ) pad[i] = 0x5c5c5c5c;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ostate, (__m256i*)pad,
|
||||
sha256_8x32_transform_le( (__m256i*)ostate, (__m256i*)pad,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
for ( i = 0; i < 8*8; i++ ) pad[i] = ihash[i] ^ 0x36363636;
|
||||
for ( ; i < 8*16; i++ ) pad[i] = 0x36363636;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)tstate, (__m256i*)pad,
|
||||
sha256_8x32_transform_le( (__m256i*)tstate, (__m256i*)pad,
|
||||
(const __m256i*)tstate );
|
||||
}
|
||||
|
||||
@@ -518,7 +518,7 @@ static inline void PBKDF2_SHA256_80_128_8way( const uint32_t *tstate,
|
||||
uint32_t _ALIGN(32) obuf[8 * 16];
|
||||
int i, j;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)istate, (__m256i*)salt,
|
||||
sha256_8x32_transform_le( (__m256i*)istate, (__m256i*)salt,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
memcpy( ibuf, salt + 8*16, 8*16 );
|
||||
@@ -541,10 +541,10 @@ static inline void PBKDF2_SHA256_80_128_8way( const uint32_t *tstate,
|
||||
ibuf[8 * 4 + 6] = i + 1;
|
||||
ibuf[8 * 4 + 7] = i + 1;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)obuf, (__m256i*)ibuf,
|
||||
sha256_8x32_transform_le( (__m256i*)obuf, (__m256i*)ibuf,
|
||||
(const __m256i*)istate );
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ostate2, (__m256i*)obuf,
|
||||
sha256_8x32_transform_le( (__m256i*)ostate2, (__m256i*)obuf,
|
||||
(const __m256i*)ostate );
|
||||
|
||||
for ( j = 0; j < 8*8; j++ )
|
||||
@@ -559,9 +559,9 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
uint32_t _ALIGN(128) buf[ 8*16 ];
|
||||
int i;
|
||||
|
||||
sha256_8way_transform_be( (__m256i*)tstate, (__m256i*)salt,
|
||||
sha256_8x32_transform_be( (__m256i*)tstate, (__m256i*)salt,
|
||||
(const __m256i*)tstate );
|
||||
sha256_8way_transform_be( (__m256i*)tstate, (__m256i*)( salt + 8*16),
|
||||
sha256_8x32_transform_be( (__m256i*)tstate, (__m256i*)( salt + 8*16),
|
||||
(const __m256i*)tstate );
|
||||
|
||||
final[ 0] = _mm256_set1_epi32( 0x00000001 );
|
||||
@@ -572,7 +572,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
= _mm256_setzero_si256();
|
||||
final[15] = _mm256_set1_epi32 ( 0x00000620 );
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)tstate, final,
|
||||
sha256_8x32_transform_le( (__m256i*)tstate, final,
|
||||
(const __m256i*)tstate );
|
||||
|
||||
memcpy( buf, tstate, 8*32 );
|
||||
@@ -580,7 +580,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
memset( buf + 8*9, 0x00, 8*24 );
|
||||
for ( i = 0; i < 8; i++ ) buf[ 8*15 + i ] = 0x00000300;
|
||||
|
||||
sha256_8way_transform_le( (__m256i*)ostate, (__m256i*)buf,
|
||||
sha256_8x32_transform_le( (__m256i*)ostate, (__m256i*)buf,
|
||||
(const __m256i*)ostate );
|
||||
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
@@ -591,7 +591,7 @@ static inline void PBKDF2_SHA256_128_32_8way( uint32_t *tstate,
|
||||
|
||||
#if defined(SIMD512)
|
||||
|
||||
static inline void sha256_16way_init_state( void *state )
|
||||
static inline void sha256_16x32_init_state( void *state )
|
||||
{
|
||||
casti_m512i( state, 0 ) = _mm512_set1_epi32( 0x6A09E667 );
|
||||
casti_m512i( state, 1 ) = _mm512_set1_epi32( 0xBB67AE85 );
|
||||
@@ -615,21 +615,21 @@ static inline void HMAC_SHA256_80_init_16way( const uint32_t *key,
|
||||
memset( pad + 16*5, 0x00, 16*40 );
|
||||
for ( i = 0; i < 16; i++ ) pad[ 16*15 + i ] = 0x00000280;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ihash, (__m512i*)pad,
|
||||
sha256_16x32_transform_le( (__m512i*)ihash, (__m512i*)pad,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
sha256_16way_init_state( tstate );
|
||||
sha256_16x32_init_state( tstate );
|
||||
|
||||
for ( i = 0; i < 16*8; i++ ) pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for ( ; i < 16*16; i++ ) pad[i] = 0x5c5c5c5c;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ostate, (__m512i*)pad,
|
||||
sha256_16x32_transform_le( (__m512i*)ostate, (__m512i*)pad,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
for ( i = 0; i < 16*8; i++ ) pad[i] = ihash[i] ^ 0x36363636;
|
||||
for ( ; i < 16*16; i++ ) pad[i] = 0x36363636;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)tstate, (__m512i*)pad,
|
||||
sha256_16x32_transform_le( (__m512i*)tstate, (__m512i*)pad,
|
||||
(const __m512i*)tstate );
|
||||
}
|
||||
|
||||
@@ -642,7 +642,7 @@ static inline void PBKDF2_SHA256_80_128_16way( const uint32_t *tstate,
|
||||
uint32_t _ALIGN(128) ostate2[ 16*8 ];
|
||||
int i, j;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)istate, (__m512i*)salt,
|
||||
sha256_16x32_transform_le( (__m512i*)istate, (__m512i*)salt,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
memcpy( ibuf, salt + 16*16, 16*16 );
|
||||
@@ -673,10 +673,10 @@ static inline void PBKDF2_SHA256_80_128_16way( const uint32_t *tstate,
|
||||
ibuf[ 16*4 + 14 ] = i + 1;
|
||||
ibuf[ 16*4 + 15 ] = i + 1;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)obuf, (__m512i*)ibuf,
|
||||
sha256_16x32_transform_le( (__m512i*)obuf, (__m512i*)ibuf,
|
||||
(const __m512i*)istate );
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ostate2, (__m512i*)obuf,
|
||||
sha256_16x32_transform_le( (__m512i*)ostate2, (__m512i*)obuf,
|
||||
(const __m512i*)ostate );
|
||||
|
||||
for ( j = 0; j < 16*8; j++ )
|
||||
@@ -691,9 +691,9 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
||||
uint32_t _ALIGN(128) buf[ 16*16 ];
|
||||
int i;
|
||||
|
||||
sha256_16way_transform_be( (__m512i*)tstate, (__m512i*)salt,
|
||||
sha256_16x32_transform_be( (__m512i*)tstate, (__m512i*)salt,
|
||||
(const __m512i*)tstate );
|
||||
sha256_16way_transform_be( (__m512i*)tstate, (__m512i*)( salt + 16*16),
|
||||
sha256_16x32_transform_be( (__m512i*)tstate, (__m512i*)( salt + 16*16),
|
||||
(const __m512i*)tstate );
|
||||
|
||||
final[ 0] = _mm512_set1_epi32( 0x00000001 );
|
||||
@@ -704,7 +704,7 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
||||
= _mm512_setzero_si512();
|
||||
final[15] = _mm512_set1_epi32 ( 0x00000620 );
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)tstate, final,
|
||||
sha256_16x32_transform_le( (__m512i*)tstate, final,
|
||||
(const __m512i*)tstate );
|
||||
|
||||
memcpy( buf, tstate, 16*32 );
|
||||
@@ -712,7 +712,7 @@ static inline void PBKDF2_SHA256_128_32_16way( uint32_t *tstate,
|
||||
memset( buf + 16*9, 0x00, 16*24 );
|
||||
for ( i = 0; i < 16; i++ ) buf[ 16*15 + i ] = 0x00000300;
|
||||
|
||||
sha256_16way_transform_le( (__m512i*)ostate, (__m512i*)buf,
|
||||
sha256_16x32_transform_le( (__m512i*)ostate, (__m512i*)buf,
|
||||
(const __m512i*)ostate );
|
||||
|
||||
for ( i = 0; i < 16*8; i++ )
|
||||
|
Reference in New Issue
Block a user